Skip to content

Commit

Permalink
Merge pull request #49 from sciris/develop
Browse files Browse the repository at this point in the history
Parallel processing and unpickling fixes
  • Loading branch information
cliffckerr committed Dec 12, 2018
2 parents 16876c7 + 7b42b72 commit bccb3bc
Show file tree
Hide file tree
Showing 7 changed files with 202 additions and 23 deletions.
4 changes: 3 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
sudo: required
dist: xenial
python: "3.7"
python:
- "2.7"
- "3.7"
language: python
install: pip install tox-travis
script: tox
150 changes: 140 additions & 10 deletions sciris/sc_fileio.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@
__all__ = ['loadobj', 'loadstr', 'saveobj', 'dumpstr']


def loadobj(filename=None, folder=None, verbose=True, die=None):
def loadobj(filename=None, folder=None, verbose=False, die=None):
'''
Load a file that has been saved as a gzipped pickle file. Accepts either
a filename (standard usage) or a file object as the first argument.
Expand All @@ -68,16 +68,16 @@ def loadobj(filename=None, folder=None, verbose=True, die=None):
kwargs = {'mode': 'rb', argtype: filename}
with GzipFile(**kwargs) as fileobj:
filestr = fileobj.read() # Convert it to a string
obj = unpickler(filestr, die=die) # Actually load it
obj = unpickler(filestr, filename=filename, verbose=verbose, die=die) # Actually load it
if verbose: print('Object loaded from "%s"' % filename)
return obj


def loadstr(string=None, die=None):
def loadstr(string=None, verbose=False, die=None):
with closing(IO(string)) as output: # Open a "fake file" with the Gzip string pickle in it.
with GzipFile(fileobj=output, mode='rb') as fileobj: # Set a Gzip reader to pull from the "file."
picklestring = fileobj.read() # Read the string pickle from the "file" (applying Gzip decompression).
obj = unpickler(picklestring, die=die) # Return the object gotten from the string pickle.
obj = unpickler(picklestring, filestring=string, verbose=verbose, die=die) # Return the object gotten from the string pickle.
return obj


Expand Down Expand Up @@ -757,9 +757,20 @@ def savespreadsheet(filename=None, data=None, folder=None, sheetnames=None, clos
class Failed(object):
''' An empty class to represent a failed object loading '''
failure_info = odict()

def __init__(self, *args, **kwargs):
pass


class Empty(object):
''' Another empty class to represent a failed object loading '''

def __init__(self, *args, **kwargs):
pass

def __setstate__(self, state):
pass


def makefailed(module_name=None, name=None, error=None):
''' Create a class -- not an object! -- that contains the failure info '''
Expand All @@ -773,6 +784,10 @@ def makefailed(module_name=None, name=None, error=None):

class RobustUnpickler(pickle.Unpickler):
''' Try to import an object, and if that fails, return a Failed object rather than crashing '''

def __init__(self, tmpfile, fix_imports=True, encoding="latin1", errors="ignore"):
pickle.Unpickler.__init__(self, tmpfile, fix_imports=fix_imports, encoding=encoding, errors=errors)

def find_class(self, module_name, name, verbose=False):
obj = makefailed(module_name, name, 'Unknown error') # This should get overwritten unless something goes terribly wrong
try:
Expand All @@ -788,20 +803,41 @@ def find_class(self, module_name, name, verbose=False):
return obj


def unpickler(string=None, die=None):
import dill # Optional Sciris dependency
def unpickler(string=None, filename=None, filestring=None, die=None, verbose=False):

if die is None: die = False

try: # Try pickle first
obj = pkl.loads(string) # Actually load it -- main usage case
except Exception as E:
except Exception as E1:
if die:
raise E
raise E1
else:
try: obj = dill.loads(string) # If that fails, try dill
except: obj = RobustUnpickler(io.BytesIO(string)).load() # And if that trails, throw everything at it
try:
if verbose: print('Standard unpickling failed (%s), trying encoding...' % str(E1))
obj = pkl.loads(string, encoding='latin1') # Try loading it again with different encoding
except Exception as E2:
try:
if verbose: print('Encoded unpickling failed (%s), trying dill...' % str(E2))
import dill # Optional Sciris dependency
obj = dill.loads(string) # If that fails, try dill
except Exception as E3:
try:
if verbose: print('Dill failed (%s), trying robust unpickler...' % str(E3))
obj = RobustUnpickler(io.BytesIO(string)).load() # And if that trails, throw everything at it
except Exception as E4:
try:
if verbose: print('Robust unpickler failed (%s), trying Python 2->3 conversion...' % str(E4))
obj = loadobj2to3(filename=filename, filestring=filestring)
except Exception as E5:
if verbose: print('Python 2->3 conversion failed (%s), giving up...' % str(E5))
errormsg = 'All available unpickling methods failed:\n Standard: %s\n Encoded: %s\n Dill: %s\n Robust: %s\n Python2->3: %s' % (E1, E2, E3, E4, E5)
raise Exception(errormsg)

if isinstance(obj, Failed):
print('Warning, the following errors were encountered during unpickling:')
print(obj.failure_info)

return obj


Expand All @@ -818,6 +854,100 @@ def savedill(fileobj=None, obj=None):
return None



##############################################################################
### Python 2 legacy support
##############################################################################

not_string_pickleable = ['datetime', 'BytesIO']
byte_objects = ['datetime', 'BytesIO', 'odict', 'spreadsheet', 'blobject']

def loadobj2to3(filename=None, filestring=None):
''' Used automatically by loadobj() to load Python2 objects in Python3 if all other loading methods fail '''

class Placeholder():
''' Replace these corrupted classes with properly loaded ones '''
def __init__(*args):
return

def __setstate__(self, state):
if isinstance(state,dict):
self.__dict__ = state
else:
self.state = state
return

class StringUnpickler(pickle.Unpickler):
def find_class(self, module, name):
if name in not_string_pickleable:
return Empty
else:
return pickle.Unpickler.find_class(self,module,name)

class BytesUnpickler(pickle.Unpickler):
def find_class(self, module, name):
if name in byte_objects:
return pickle.Unpickler.find_class(self,module,name)
else:
return Placeholder

def recursive_substitute(obj1, obj2, track=None):
if track is None:
track = []

if isinstance(obj1, Blobject): # Handle blobjects (usually spreadsheets)
obj1.blob = obj2.__dict__[b'blob']
obj1.bytes = obj2.__dict__[b'bytes']

if isinstance(obj2, dict): # Handle dictionaries
for k,v in obj2.items():
if isinstance(v, datetime.datetime):
setattr(obj1, k.decode('latin1'), v)
elif isinstance(v, dict) or hasattr(v,'__dict__'):
if isinstance(k, (bytes, bytearray)):
k = k.decode('latin1')
track2 = track.copy()
track2.append(k)
recursive_substitute(obj1[k], v, track2)
else:
for k,v in obj2.__dict__.items():
if isinstance(v,datetime.datetime):
setattr(obj1,k.decode('latin1'), v)
elif isinstance(v,dict) or hasattr(v,'__dict__'):
if isinstance(k, (bytes, bytearray)):
k = k.decode('latin1')
track2 = track.copy()
track2.append(k)
recursive_substitute(getattr(obj1,k), v, track2)

# Load either from file or from string
if filename:
with GzipFile(filename) as fileobj:
unpickler1 = StringUnpickler(fileobj, encoding='latin1')
stringout = unpickler1.load()
with GzipFile(filename) as fileobj:
unpickler2 = BytesUnpickler(fileobj, encoding='bytes')
bytesout = unpickler2.load()
elif filestring:
with closing(IO(filestring)) as output:
with GzipFile(fileobj=output, mode='rb') as fileobj:
unpickler1 = StringUnpickler(fileobj, encoding='latin1')
stringout = unpickler1.load()
with closing(IO(filestring)) as output:
with GzipFile(fileobj=output, mode='rb') as fileobj:
unpickler2 = BytesUnpickler(fileobj, encoding='bytes')
bytesout = unpickler2.load()
else:
errormsg = 'You must supply either a filename or a filestring for loadobj() or loadstr(), respectively'
raise Exception(errormsg)

# Actually do the load, with correct substitution
recursive_substitute(stringout, bytesout)
return stringout




##############################################################################
### Twisted pickling methods
##############################################################################
Expand Down
49 changes: 44 additions & 5 deletions sciris/sc_math.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,19 +8,58 @@
from . import sc_utils as ut

##############################################################################
### FIND FUNCTIONS
### FIND AND APPROXIMATION FUNCTIONS
##############################################################################

__all__ = ['approx', 'findinds', 'findnearest', 'dataindex', 'getvalidinds', 'sanitize', 'getvaliddata', 'isprime']
__all__ = ['approx', 'safedivide', 'findinds', 'findnearest', 'dataindex', 'getvalidinds', 'sanitize', 'getvaliddata', 'isprime']


def approx(val1=None, val2=None, eps=None):
''' Determine whether two scalars approximately match '''
if eps is None: eps = 1e-9
output = abs(val1-val2)<eps
'''
Determine whether two scalars approximately match. Example:
sc.approx(2*6, 11.9999999, eps=1e-6) # Returns True
sc.approx([3,12,11.9], 12) # Returns array([False, True, False], dtype=bool)
'''
if val2 is None: val2 = 0.0
if eps is None: eps = 1e-9
if isinstance(val1, list): val1 = np.array(val1) # If it's a list, convert to an array first
output = abs(val1-val2)<=eps
return output



def safedivide(numerator=None, denominator=None, default=None, eps=None, warn=False):
'''
Handle divide-by-zero and divide-by-nan elegantly. Examples:
sc.safedivide(numerator=0, denominator=0, default=1, eps=0) # Returns 1
sc.safedivide(numerator=5, denominator=2.0, default=1, eps=1e-3) # Returns 2.5
sc.safedivide(3,array([1,3,0]),-1, warn=True) # Returns array([ 3, 1, -1])
'''
# Set some defaults
if numerator is None: numerator = 1.0
if denominator is None: denominator = 1.0
if default is None: default = 0.0

# Handle the logic
invalid = approx(denominator, 0.0, eps=eps)
if ut.isnumber(denominator): # The denominator is a scalar
if invalid:
output = default
else:
output = numerator/denominator
elif ut.checktype(denominator, 'array'):
if not warn:
denominator[invalid] = 1.0 # Replace invalid values with 1
output = numerator/denominator
output[invalid] = default
else: # Unclear input, raise exception
errormsg = 'Input type %s not understood: must be number or array' % type(denominator)
raise Exception(errormsg)

return output



def findinds(val1, val2=None, eps=1e-6):
'''
Little function to find matches even if two things aren't eactly equal (eg.
Expand Down
3 changes: 3 additions & 0 deletions sciris/sc_parallel.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,9 @@ def f(x,y):
# Run simply using map -- no advantage here to using Process/Queue
multipool = mp.Pool(processes=ncpus)
outputlist = multipool.map(parallel_task, argslist)
multipool.close()
multipool.join()

return outputlist


Expand Down
11 changes: 8 additions & 3 deletions sciris/sc_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,9 +136,10 @@ def sha(string, encoding='utf-8', *args, **kwargs):
return output


def wget(url):
def wget(url, convert=True):
''' Download a URL '''
output = urlrequester.urlopen(url).read()
if convert and six.PY3: output = output.decode()
return output


Expand Down Expand Up @@ -735,11 +736,15 @@ def flexstr(arg, force=True):
output = arg # If anything goes wrong, just return as-is
else:
if isinstance(arg, six.binary_type):
output = arg.decode() # If it's bytes, decode to unicode
try:
output = arg.decode() # If it's bytes, decode to unicode
except:
if force: output = repr(arg) # If that fails, just print its representation
else: output = arg
else:
output = arg # Otherwise, return as-is
else:
if force: output = str(arg)
if force: output = repr(arg)
else: output = arg # Optionally don't do anything for non-strings
return output

Expand Down
4 changes: 2 additions & 2 deletions sciris/sc_version.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
__all__ = ['__version__', '__versiondate__', '__license__']

__version__ = '0.12.0'
__versiondate__ = '2018-11-24'
__version__ = '0.12.1'
__versiondate__ = '2018-12-11'
__license__ = 'Sciris %s (%s) -- (c) Sciris.org' % (__version__, __versiondate__)
4 changes: 2 additions & 2 deletions tox.ini
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[tox]
envlist = py37
envlist = py27, py37

[testenv]
description = Run basic usage tests
Expand All @@ -15,7 +15,7 @@ commands =
setenv = MPLBACKEND = agg

[pytest]
addopts = -ra -v -n 2
addopts = -ra -v
console_output_style = count
testpaths = tests
python_files = test_tox_*.py

0 comments on commit bccb3bc

Please sign in to comment.