Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ref/remote download #598

Merged
merged 7 commits into from
May 5, 2017
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
8 changes: 5 additions & 3 deletions SimPEG/EM/Base.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,9 @@ def MeSigmaI(self):
Inverse of the edge inner product matrix for \\(\\sigma\\).
"""
if getattr(self, '_MeSigmaI', None) is None:
self._MeSigmaI = self.mesh.getEdgeInnerProduct(self.sigma, invMat=True)
self._MeSigmaI = self.mesh.getEdgeInnerProduct(
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is just white-space

self.sigma, invMat=True
)
return self._MeSigmaI

# TODO: This should take a vector
Expand Down Expand Up @@ -425,7 +427,7 @@ def s_e(self, prob):
"""
return Utils.Zero()

def s_mDeriv(self, prob, v, adjoint = False):
def s_mDeriv(self, prob, v, adjoint=False):
"""
Derivative of magnetic source term with respect to the inversion model

Expand All @@ -438,7 +440,7 @@ def s_mDeriv(self, prob, v, adjoint = False):

return Utils.Zero()

def s_eDeriv(self, prob, v, adjoint = False):
def s_eDeriv(self, prob, v, adjoint=False):
"""
Derivative of electric source term with respect to the inversion model

Expand Down
1 change: 1 addition & 0 deletions SimPEG/Utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,4 @@
from .coordutils import rotatePointsFromNormals, rotationMatrixFromNormals
from .modelutils import surface2ind_topo
from .PlotUtils import plot2Ddata, plotLayer
from .io_utils import download
91 changes: 73 additions & 18 deletions SimPEG/Utils/io_utils.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from __future__ import print_function
import numpy as np
from SimPEG import Mesh
import time as tm
import re
import warnings


def read_GOCAD_ts(tsfile):
Expand Down Expand Up @@ -146,35 +146,90 @@ def surface2inds(vrtx, trgl, mesh, boundaries=True, internal=True):
return insideGrid


def remoteDownload(url, remoteFiles, basePath=None):
def download(
url, path='.', overwrite=False, verbose=True
):
"""
Function to download all files stored in a cloud directory
var: url ("http:\\...")
list: List of file names to download

:param str url: url or list of urls for the file(s) to be downloaded ("https://...")
:param str path: path to where the directory is created and files downloaded (default is the current directory)
:param bool overwrite: overwrite if a file with the specified name already exists
:param bool verbose: print out progress
"""

# Download from cloud
import urllib
import shutil
import os
import sys

def rename_path(downloadpath):
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if we start to make heavy use of this function, I can see a number of scenarios where we do not necessarily want to over-write the contents of SimPEGtemp, so here it will add SimPEGtemp(1) (or up the integer value until we hit a unique name, similar to what you would see with a web browser)

splitfullpath = downloadpath.split(os.path.sep)

# grab just the filename
fname = splitfullpath[-1]
fnamesplit = fname.split('.')
newname = fnamesplit[0]

# check if we have already re-numbered
newnamesplit = newname.split('(')

# add (num) to the end of the filename
if len(newnamesplit) == 1:
num = 1
else:
num = int(newnamesplit[-1][:-1])
num += 1

newname = '{}({}).{}'.format(newnamesplit[0], num, fnamesplit[-1])
return os.path.sep.join(
splitfullpath[:-1] + newnamesplit[:-1] + [newname]
)

# grab the correct url retriever
if sys.version_info < (3,):
urlretrieve = urllib.urlretrieve
else:
urlretrieve = urllib.request.urlretrieve

if basePath is None:
basePath = os.curdir+os.path.sep+'SimPEGtemp'+os.path.sep

if os.path.exists(basePath):
shutil.rmtree(basePath)

os.makedirs(basePath)

print("Download files from URL...")
for file in remoteFiles:
print("Retrieving: " + file)
urlretrieve(url + file, basePath+file)
# ensure we are working with absolute paths and home directories dealt with
path = os.path.abspath(os.path.expanduser(path))

# make the directory if it doesn't currently exist
if not os.path.exists(path):
os.makedirs(path)

if isinstance(url, str):
filenames = [url.split('/')[-1]]
elif isinstance(url, list):
filenames = [u.split('/')[-1] for u in url]

downloadpath = [os.path.sep.join([path, f]) for f in filenames]

# check if the directory already exists
for i, download in enumerate(downloadpath):
if os.path.exists(download):
if overwrite is True:
if verbose is True:
print("overwriting {}".format(download))
elif overwrite is False:
while os.path.exists is True:
download = rename_path(download)

if verbose is True:
print(
"file already exists, new file is called {}".format(
download
)
)
downloadpath[i] = download

# download files
urllist = url if isinstance(url, list) else [url]
for u, f in zip(urllist, downloadpath):
print("Downloading {}...".format(u))
urlretrieve(u, f)
print(" saved to: " + f)

print("Download completed!")
return basePath
return downloadpath if isinstance(url, list) else downloadpath[0]
22 changes: 13 additions & 9 deletions examples/04-grav/plot_laguna_del_maule_inversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
import SimPEG.PF as PF
from SimPEG import Maps, Regularization, Optimization, DataMisfit,\
InvProblem, Directives, Inversion
from SimPEG.Utils.io_utils import remoteDownload
from SimPEG.Utils.io_utils import download
import matplotlib.pyplot as plt
import numpy as np

Expand All @@ -25,14 +25,18 @@ def run(plotIt=True, cleanAfterRun=True):

# Start by downloading files from the remote repository
url = "https://storage.googleapis.com/simpeg/Chile_GRAV_4_Miller/"
cloudfiles = ['LdM_grav_obs.grv', 'LdM_mesh.mesh',
'LdM_topo.topo', 'LdM_input_file.inp']

basePath = os.path.sep.join(os.path.abspath(os.getenv('HOME')).split
(os.path.sep)+['Downloads']+['SimPEGtemp'])
basePath = os.path.abspath(remoteDownload(url,
cloudfiles,
basePath=basePath+os.path.sep))
cloudfiles = [
'LdM_grav_obs.grv', 'LdM_mesh.mesh',
'LdM_topo.topo', 'LdM_input_file.inp'
]

# Download to Downloads/SimPEGtemp
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here we are not specifying where things get put? I think they should just go directly into pwd? Unless you specify a folder or something?

Can we just rename this to download? seems a bit redundant.

file_names = download([url1, url2], folder='~/Downloads/mag_stuff', overwrite=True)
# or 
file_name = download(url1)
# where
assert isinstance(file_names, list)
assert len(file_names) == 2
assert isinstance(file_name, str)

I think that the default should the the pwd of where the python file is run from, appending a SimPEGtemp seems odd to me. Downloads are downloads.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This also would mean that you don't have to reconstruct the file name after the fact, you just loop through the list:

with open(file_name, 'r') as f:
    f.read()

with open(file_names[0], 'r') as f2:
    f2.read()

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  • download works for me. @fourndo ?
  • SimPEGtemp was mainly motivated because we make heavy use of this in examples, so for someone unfamiliar it is easy to find (we can still do this and specify it in the example script rather than the util). However, I agree that can be cleared up by specifying the folder (I am tempted to call it path or directory instead as it is not just a name of a folder but does include path information)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think that it could be both? You can call abspath on the thing and it should expand it out regardless.

folder='here'
folder='or/here'
folder='/Users/rowan/downloads/or/even/here'

I see all of these as folders?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok, fair enough, folder can work

basePath = download(
[url+f for f in cloudfiles],
path='~/Downloads/simpegtemp',
overwrite=True
)

input_file = basePath + os.path.sep + 'LdM_input_file.inp'
# %% User input
# Plotting parameters, max and min densities in g/cc
Expand Down
31 changes: 7 additions & 24 deletions examples/07-fdem/plot_heagyetal2016_casing.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from SimPEG import Mesh, Utils, Maps, Tests
from SimPEG.EM import mu_0, FDEM, Analytics
from SimPEG.EM.Utils import omega
from SimPEG.Utils.io_utils import remoteDownload
from SimPEG.Utils.io_utils import download
# try:
# from pymatsolver import MumpsSolver as Solver
# print('using MumpsSolver')
Expand Down Expand Up @@ -1243,42 +1243,25 @@ class PrimSecCasingStoredResults(PrimSecCasingExample):
'J_PrimSec_5e6Casing_50Mu_05Hz_LargeCondBody',
]

@property
def filepath(self):
return os.path.sep.join(
os.path.abspath(os.getenv('HOME')).split(os.path.sep) +
['Downloads'] + ['SimPEGtemp']
)

def downloadStoredResults(self):
# download the results from where they are stored on google app engine

return os.path.abspath(
remoteDownload(
self.url, [self.cloudfile], basePath=self.filepath+os.path.sep
)
)

def removeStoredResults(self):
import shutil
print('Removing {}'.format(self.filepath))
shutil.rmtree(self.filepath)

def run(self, plotIt=False, runTests=False, saveFig=False):

self.downloadStoredResults()
filepath = download(
self.url + self.cloudfile, path='~/Downloads/simpegtemp',
overwrite=True
)
self.filepath = os.path.sep.join(filepath.split(os.path.sep)[:-1])

# resultsFiles = ['{filepath}{slash}{file}'.format(
# filepath=self.filepath, slash=os.path.sep, file=file)
# for file in self.cloudfiles]
# results = [np.load(file, encoding='bytes') for file in resultsFiles]

h5f = h5py.File(
'{filepath}{slash}{file}'.format(
filepath=self.filepath, slash=os.path.sep, file=self.cloudfile
), 'r'
)

h5f = h5py.File(filepath, 'r')
results = [h5f[entry_name][:] for entry_name in self.entry_names]
results = dict(zip(['primfields', 'dpredback', 'dpred', 'J'], results))

Expand Down
31 changes: 30 additions & 1 deletion tests/base/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,18 @@
import unittest
import numpy as np
import scipy.sparse as sp
import os
import shutil
from SimPEG.Utils import (
sdiag, sub2ind, ndgrid, mkvc, inv2X2BlockDiagonal,
inv3X3BlockDiagonal, invPropertyTensor, makePropertyTensor, indexCube,
ind2sub, asArray_N_x_Dim, TensorType, diagEst, count, timeIt, Counter
ind2sub, asArray_N_x_Dim, TensorType, diagEst, count, timeIt, Counter,
download
)
from SimPEG import Mesh
from SimPEG.Tests import checkDerivative


TOL = 1e-8


Expand Down Expand Up @@ -303,5 +307,30 @@ def testProbing(self):
self.assertTrue(err < TOL)


class TestDownload(unittest.TestCase):
def test_downloads(self):
url = "https://storage.googleapis.com/simpeg/Chile_GRAV_4_Miller/"
cloudfiles = [
'LdM_grav_obs.grv', 'LdM_mesh.mesh',
'LdM_topo.topo', 'LdM_input_file.inp'
]

url1 = url + cloudfiles[0]
url2 = url + cloudfiles[1]

file_names = download(
[url1, url2], path='./test_urls', overwrite=True
)
# or
file_name = download(url1, path='./test_url', overwrite=True)
# where
assert isinstance(file_names, list)
assert len(file_names) == 2
assert isinstance(file_name, str)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ha, it's like I coded...


# clean up
shutil.rmtree(os.path.expanduser('./test_urls'))
shutil.rmtree(os.path.expanduser('./test_url'))

if __name__ == '__main__':
unittest.main()
18 changes: 14 additions & 4 deletions tests/pf/test_gravity_IO.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from SimPEG.Utils import io_utils
from scipy.constants import mu_0
import shutil
import os


class MagSensProblemTests(unittest.TestCase):
Expand All @@ -13,11 +14,16 @@ def setUp(self):
cloudfiles = ['GravData.obs', 'Gaussian.topo', 'Mesh_10m.msh',
'ModelStart.sus', 'SimPEG_Grav_Input.inp']

self.basePath = io_utils.remoteDownload(url, cloudfiles)
self.basePath = os.path.expanduser('~/Downloads/simpegtemp')
self.files = io_utils.download(
[url + f for f in cloudfiles],
path=self.basePath,
overwrite=True
)

def test_magnetics_inversion(self):

inp_file = self.basePath + 'SimPEG_Grav_Input.inp'
inp_file = os.path.sep.join([self.basePath, 'SimPEG_Grav_Input.inp'])

driver = PF.GravityDriver.GravityDriver_Inv(inp_file)

Expand All @@ -36,8 +42,12 @@ def test_magnetics_inversion(self):
print(driver.eps)

# Write obs to file
PF.Gravity.writeUBCobs(self.basePath + 'FWR_data.dat',
driver.survey, driver.survey.dobs)
PF.Gravity.writeUBCobs(
os.path.sep.join(
[self.basePath, 'FWR_data.dat']
),
driver.survey, driver.survey.dobs
)

# Clean up the working directory
shutil.rmtree(self.basePath)
Expand Down
14 changes: 10 additions & 4 deletions tests/pf/test_magnetics_IO.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from SimPEG.Utils import io_utils
from scipy.constants import mu_0
import shutil
import os


class MagSensProblemTests(unittest.TestCase):
Expand All @@ -13,11 +14,14 @@ def setUp(self):
cloudfiles = ['MagData.obs', 'Gaussian.topo', 'Mesh_10m.msh',
'ModelStart.sus', 'SimPEG_Mag_Input.inp']

self.basePath = io_utils.remoteDownload(url, cloudfiles)
self.basePath = os.path.expanduser('~/Downloads/simpegtemp')
self.files = io_utils.download(
[url+f for f in cloudfiles], path=self.basePath, overwrite=True
)

def test_magnetics_inversion(self):

inp_file = self.basePath + 'SimPEG_Mag_Input.inp'
inp_file = os.path.sep.join([self.basePath, 'SimPEG_Mag_Input.inp'])

driver = PF.MagneticsDriver.MagneticsDriver_Inv(inp_file)

Expand All @@ -36,8 +40,10 @@ def test_magnetics_inversion(self):
print(driver.eps)

# Write obs to file
PF.Magnetics.writeUBCobs(self.basePath + 'FWR_data.dat',
driver.survey, driver.survey.dobs)
PF.Magnetics.writeUBCobs(
os.path.sep.join([self.basePath, 'FWR_data.dat']),
driver.survey, driver.survey.dobs
)

# Clean up the working directory
shutil.rmtree(self.basePath)
Expand Down