-
Notifications
You must be signed in to change notification settings - Fork 256
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Ref/remote download #598
Ref/remote download #598
Changes from 4 commits
97e4488
102d76f
e5c149f
9d4172e
88348c2
cc27951
2818916
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,8 +1,8 @@ | ||
from __future__ import print_function | ||
import numpy as np | ||
from SimPEG import Mesh | ||
import time as tm | ||
import re | ||
import warnings | ||
|
||
|
||
def read_GOCAD_ts(tsfile): | ||
|
@@ -146,35 +146,90 @@ def surface2inds(vrtx, trgl, mesh, boundaries=True, internal=True): | |
return insideGrid | ||
|
||
|
||
def remoteDownload(url, remoteFiles, basePath=None): | ||
def download( | ||
url, path='.', overwrite=False, verbose=True | ||
): | ||
""" | ||
Function to download all files stored in a cloud directory | ||
var: url ("http:\\...") | ||
list: List of file names to download | ||
|
||
:param str url: url or list of urls for the file(s) to be downloaded ("https://...") | ||
:param str path: path to where the directory is created and files downloaded (default is the current directory) | ||
:param bool overwrite: overwrite if a file with the specified name already exists | ||
:param bool verbose: print out progress | ||
""" | ||
|
||
# Download from cloud | ||
import urllib | ||
import shutil | ||
import os | ||
import sys | ||
|
||
def rename_path(downloadpath): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. if we start to make heavy use of this function, I can see a number of scenarios where we do not necessarily want to over-write the contents of |
||
splitfullpath = downloadpath.split(os.path.sep) | ||
|
||
# grab just the filename | ||
fname = splitfullpath[-1] | ||
fnamesplit = fname.split('.') | ||
newname = fnamesplit[0] | ||
|
||
# check if we have already re-numbered | ||
newnamesplit = newname.split('(') | ||
|
||
# add (num) to the end of the filename | ||
if len(newnamesplit) == 1: | ||
num = 1 | ||
else: | ||
num = int(newnamesplit[-1][:-1]) | ||
num += 1 | ||
|
||
newname = '{}({}).{}'.format(newnamesplit[0], num, fnamesplit[-1]) | ||
return os.path.sep.join( | ||
splitfullpath[:-1] + newnamesplit[:-1] + [newname] | ||
) | ||
|
||
# grab the correct url retriever | ||
if sys.version_info < (3,): | ||
urlretrieve = urllib.urlretrieve | ||
else: | ||
urlretrieve = urllib.request.urlretrieve | ||
|
||
if basePath is None: | ||
basePath = os.curdir+os.path.sep+'SimPEGtemp'+os.path.sep | ||
|
||
if os.path.exists(basePath): | ||
shutil.rmtree(basePath) | ||
|
||
os.makedirs(basePath) | ||
|
||
print("Download files from URL...") | ||
for file in remoteFiles: | ||
print("Retrieving: " + file) | ||
urlretrieve(url + file, basePath+file) | ||
# ensure we are working with absolute paths and home directories dealt with | ||
path = os.path.abspath(os.path.expanduser(path)) | ||
|
||
# make the directory if it doesn't currently exist | ||
if not os.path.exists(path): | ||
os.makedirs(path) | ||
|
||
if isinstance(url, str): | ||
filenames = [url.split('/')[-1]] | ||
elif isinstance(url, list): | ||
filenames = [u.split('/')[-1] for u in url] | ||
|
||
downloadpath = [os.path.sep.join([path, f]) for f in filenames] | ||
|
||
# check if the directory already exists | ||
for i, download in enumerate(downloadpath): | ||
if os.path.exists(download): | ||
if overwrite is True: | ||
if verbose is True: | ||
print("overwriting {}".format(download)) | ||
elif overwrite is False: | ||
while os.path.exists is True: | ||
download = rename_path(download) | ||
|
||
if verbose is True: | ||
print( | ||
"file already exists, new file is called {}".format( | ||
download | ||
) | ||
) | ||
downloadpath[i] = download | ||
|
||
# download files | ||
urllist = url if isinstance(url, list) else [url] | ||
for u, f in zip(urllist, downloadpath): | ||
print("Downloading {}...".format(u)) | ||
urlretrieve(u, f) | ||
print(" saved to: " + f) | ||
|
||
print("Download completed!") | ||
return basePath | ||
return downloadpath if isinstance(url, list) else downloadpath[0] |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,7 +16,7 @@ | |
import SimPEG.PF as PF | ||
from SimPEG import Maps, Regularization, Optimization, DataMisfit,\ | ||
InvProblem, Directives, Inversion | ||
from SimPEG.Utils.io_utils import remoteDownload | ||
from SimPEG.Utils.io_utils import download | ||
import matplotlib.pyplot as plt | ||
import numpy as np | ||
|
||
|
@@ -25,14 +25,18 @@ def run(plotIt=True, cleanAfterRun=True): | |
|
||
# Start by downloading files from the remote repository | ||
url = "https://storage.googleapis.com/simpeg/Chile_GRAV_4_Miller/" | ||
cloudfiles = ['LdM_grav_obs.grv', 'LdM_mesh.mesh', | ||
'LdM_topo.topo', 'LdM_input_file.inp'] | ||
|
||
basePath = os.path.sep.join(os.path.abspath(os.getenv('HOME')).split | ||
(os.path.sep)+['Downloads']+['SimPEGtemp']) | ||
basePath = os.path.abspath(remoteDownload(url, | ||
cloudfiles, | ||
basePath=basePath+os.path.sep)) | ||
cloudfiles = [ | ||
'LdM_grav_obs.grv', 'LdM_mesh.mesh', | ||
'LdM_topo.topo', 'LdM_input_file.inp' | ||
] | ||
|
||
# Download to Downloads/SimPEGtemp | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here we are not specifying where things get put? I think they should just go directly into Can we just rename this to file_names = download([url1, url2], folder='~/Downloads/mag_stuff', overwrite=True)
# or
file_name = download(url1)
# where
assert isinstance(file_names, list)
assert len(file_names) == 2
assert isinstance(file_name, str) I think that the default should the the pwd of where the python file is run from, appending a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This also would mean that you don't have to reconstruct the file name after the fact, you just loop through the list: with open(file_name, 'r') as f:
f.read()
with open(file_names[0], 'r') as f2:
f2.read() There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think that it could be both? You can call folder='here'
folder='or/here'
folder='/Users/rowan/downloads/or/even/here' I see all of these as folders? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok, fair enough, folder can work |
||
basePath = download( | ||
[url+f for f in cloudfiles], | ||
path='~/Downloads/simpegtemp', | ||
overwrite=True | ||
) | ||
|
||
input_file = basePath + os.path.sep + 'LdM_input_file.inp' | ||
# %% User input | ||
# Plotting parameters, max and min densities in g/cc | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,14 +2,18 @@ | |
import unittest | ||
import numpy as np | ||
import scipy.sparse as sp | ||
import os | ||
import shutil | ||
from SimPEG.Utils import ( | ||
sdiag, sub2ind, ndgrid, mkvc, inv2X2BlockDiagonal, | ||
inv3X3BlockDiagonal, invPropertyTensor, makePropertyTensor, indexCube, | ||
ind2sub, asArray_N_x_Dim, TensorType, diagEst, count, timeIt, Counter | ||
ind2sub, asArray_N_x_Dim, TensorType, diagEst, count, timeIt, Counter, | ||
download | ||
) | ||
from SimPEG import Mesh | ||
from SimPEG.Tests import checkDerivative | ||
|
||
|
||
TOL = 1e-8 | ||
|
||
|
||
|
@@ -303,5 +307,30 @@ def testProbing(self): | |
self.assertTrue(err < TOL) | ||
|
||
|
||
class TestDownload(unittest.TestCase): | ||
def test_downloads(self): | ||
url = "https://storage.googleapis.com/simpeg/Chile_GRAV_4_Miller/" | ||
cloudfiles = [ | ||
'LdM_grav_obs.grv', 'LdM_mesh.mesh', | ||
'LdM_topo.topo', 'LdM_input_file.inp' | ||
] | ||
|
||
url1 = url + cloudfiles[0] | ||
url2 = url + cloudfiles[1] | ||
|
||
file_names = download( | ||
[url1, url2], path='./test_urls', overwrite=True | ||
) | ||
# or | ||
file_name = download(url1, path='./test_url', overwrite=True) | ||
# where | ||
assert isinstance(file_names, list) | ||
assert len(file_names) == 2 | ||
assert isinstance(file_name, str) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ha, it's like I coded... |
||
|
||
# clean up | ||
shutil.rmtree(os.path.expanduser('./test_urls')) | ||
shutil.rmtree(os.path.expanduser('./test_url')) | ||
|
||
if __name__ == '__main__': | ||
unittest.main() |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this is just white-space