Skip to content

Commit

Permalink
Merge pull request #108 from sciris/util-updates-2020apr27
Browse files Browse the repository at this point in the history
Test and utility updates
  • Loading branch information
cliffckerr committed Apr 28, 2020
2 parents bc76a22 + ead9e82 commit b452be7
Show file tree
Hide file tree
Showing 17 changed files with 723 additions and 565 deletions.
27 changes: 17 additions & 10 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,23 @@

All notable changes to this project will be documented in this file.

The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
By import convention, components of the Sciris library are listed beginning with `sc.`, e.g. `sc.odict()`.

By import convention, components of the Sciris library are listed beginning with `sc.`, e.g. `sc.odict`.
## Version 0.17.0 (2020-04-27)
1. `sc.mprofile()` has been added, which does memory profiling just like `sc.profile()`.
1. `sc.progressbar()` has been added, which prints a progress bar.
1. `sc.jsonpickle()` and `sc.jsonunpickle()` have been added, wrapping the module of the same name, to convert arbitrary objects to JSON.
1. `sc.jsonify()` checks objects for a `to_json()` method, handling e.g Pandas dataframes, and falls back to `sc.jsonpickle()` instead of raising an exception for unknown object types.
1. `sc.suggest()` now uses `jellyfish` instead of `python-levenshtein` for fuzzy string matching.
1. `sc.saveobj()` now uses protocol 4 instead of the latest by default, to avoid backwards incompatibility issues caused by using protocol 5 (only compatible with Python 3.8).
1. `sc.odict()` and related classes now raise `sc.KeyNotFound` exceptions. These are derived from `KeyError`, but fix a bug in the string representation (https://stackoverflow.com/questions/34051333/strange-error-message-printed-out-for-keyerror) to allow multi-line error messages.
1. Rewrote all tests to be pytest-compatible.

## Version 0.16.8 (2020-04-11)
- Added a [Code of Conduct](CODE_OF_CONDUCT.md).
- `sc.makefilepath()` now has a `checkexists` flag, which will optionally raise an exception if the file does (or doesn't) exist.
- `sc.sanitizejson()` now handles `datetime.date` and `datetime.time`.
- `sc.uuid()` and `sc.fast_uuid()` now work with non-integer inputs, e.g., `sc.uuid(n=10e3)`.
- `sc.thisdir()` now accepts additional arguments, so can be used to form a full path, e.g. `sc.thisdir(__file__, 'myfile.txt')`.
- `sc.checkmem()` has better parsing of objects.
- `sc.prepr()` now lists properties of objects, and has some aesthetic improvements.
1. Added a [Code of Conduct](CODE_OF_CONDUCT.md).
1. `sc.makefilepath()` now has a `checkexists` flag, which will optionally raise an exception if the file does (or doesn't) exist.
1. `sc.sanitizejson()` now handles `datetime.date` and `datetime.time`.
1. `sc.uuid()` and `sc.fast_uuid()` now work with non-integer inputs, e.g., `sc.uuid(n=10e3)`.
1. `sc.thisdir()` now accepts additional arguments, so can be used to form a full path, e.g. `sc.thisdir(__file__, 'myfile.txt')`.
1. `sc.checkmem()` has better parsing of objects.
1. `sc.prepr()` now lists properties of objects, and has some aesthetic improvements.
86 changes: 72 additions & 14 deletions sciris/sc_fileio.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
from gzip import GzipFile
from zipfile import ZipFile
from contextlib import closing
from collections import OrderedDict
from pathlib import Path
from io import BytesIO as IO
import pickle as pkl
Expand Down Expand Up @@ -82,10 +81,21 @@ def loadstr(string=None, verbose=False, die=None):
return obj


def saveobj(filename=None, obj=None, compresslevel=5, verbose=0, folder=None, method='pickle'):
def saveobj(filename=None, obj=None, compresslevel=5, verbose=0, folder=None, method='pickle', *args, **kwargs):
'''
Save an object to file -- use compression 5 by default, since more is much slower but not much smaller.
Once saved, can be loaded with loadobj() (q.v.).
Once saved, can be loaded with sc.loadobj().
Args:
filename (str or Path): the filename to save to; if str, passed to sc.makefilepath()
obj (literally anything): the object to save
compresslevel (int): the level of gzip compression
verbose (int): detail to print
folder (str): passed to sc.makefilepath()
method (str): whether to use pickle (default) or dill
args (list): passed to pickle.dumps()
kwargs (dict): passed to pickle.dumps()
Usage:
myobj = ['this', 'is', 'a', 'weird', {'object':44}]
Expand All @@ -106,12 +116,13 @@ def saveobj(filename=None, obj=None, compresslevel=5, verbose=0, folder=None, me
else: # Otherwise, try pickle
try:
if verbose>=2: print('Saving as pickle...')
savepickle(fileobj, obj) # Use pickle
savepickle(fileobj, obj, *args, **kwargs) # Use pickle
except Exception as E:
if verbose>=2: print('Exception when saving as pickle (%s), saving as dill...' % repr(E))
savedill(fileobj, obj) # ...but use Dill if that fails
savedill(fileobj, obj, *args, **kwargs) # ...but use Dill if that fails

if verbose and filename: print('Object saved to "%s"' % filename)
if verbose and filename:
print('Object saved to "%s"' % filename)

if filename:
return filename
Expand All @@ -121,6 +132,7 @@ def saveobj(filename=None, obj=None, compresslevel=5, verbose=0, folder=None, me


def dumpstr(obj=None):
''' Dump an object as a bytes-like string '''
with closing(IO()) as output: # Open a "fake file."
with GzipFile(fileobj=output, mode='wb') as fileobj: # Open a Gzip-compressing way to write to this "file."
try: savepickle(fileobj, obj) # Use pickle
Expand Down Expand Up @@ -326,7 +338,7 @@ def thisdir(file, *args, **kwargs):
### JSON functions
##############################################################################

__all__ += ['sanitizejson', 'jsonify', 'loadjson', 'savejson']
__all__ += ['sanitizejson', 'jsonify', 'loadjson', 'savejson', 'jsonpickle', 'jsonunpickle']


def sanitizejson(obj, verbose=True, die=False, tostring=False, **kwargs):
Expand Down Expand Up @@ -356,8 +368,10 @@ def sanitizejson(obj, verbose=True, die=False, tostring=False, **kwargs):
if np.isnan(obj): # It's nan, so return None
output = None
else:
if isinstance(obj, (int, np.int64)): output = int(obj) # It's an integer
else: output = float(obj)# It's something else, treat it as a float
if isinstance(obj, (int, np.int64)):
output = int(obj) # It's an integer
else:
output = float(obj)# It's something else, treat it as a float

elif ut.isstring(obj): # It's a string of some kind
try: string = str(obj) # Try to convert it to ascii
Expand All @@ -380,9 +394,18 @@ def sanitizejson(obj, verbose=True, die=False, tostring=False, **kwargs):
elif isinstance(obj, uuid.UUID):
output = str(obj)

elif callable(getattr(obj, 'to_dict', None)): # Handle e.g. pandas, where we want to return the object, not the string
output = obj.to_dict()

elif callable(getattr(obj, 'to_json', None)):
output = obj.to_json()

elif callable(getattr(obj, 'toJSON', None)):
output = obj.toJSON()

else: # None of the above
try:
output = json.loads(json.dumps(obj)) # Try passing it through jsonification
output = jsonpickle(obj)
except Exception as E:
errormsg = 'Could not sanitize "%s" %s (%s), converting to string instead' % (obj, type(obj), str(E))
if die: raise Exception(errormsg)
Expand Down Expand Up @@ -442,6 +465,39 @@ def savejson(filename=None, obj=None, folder=None, **kwargs):
return None


def jsonpickle(obj, tostring=False):
''' Use jsonpickle to return a representation of an object '''
import jsonpickle as jp
import jsonpickle.ext.numpy as jsonpickle_numpy
import jsonpickle.ext.pandas as jsonpickle_pandas
jsonpickle_numpy.register_handlers()
jsonpickle_pandas.register_handlers()

if tostring:
output = jp.dumps(obj)
else:
pickler = jp.pickler.Pickler()
output = pickler.flatten(obj)

return output


def jsonunpickle(json):
''' Use jsonunpickle to restore an object '''
import jsonpickle as jp
import jsonpickle.ext.numpy as jsonpickle_numpy
import jsonpickle.ext.pandas as jsonpickle_pandas
jsonpickle_numpy.register_handlers()
jsonpickle_pandas.register_handlers()

if isinstance(json, str):
output = jp.loads(json)
else:
unpickler = jp.unpickler.Unpickler()
output = unpickler.restore(json)

return output


##############################################################################
### Spreadsheet functions
Expand Down Expand Up @@ -1012,19 +1068,21 @@ def unpickler(string=None, filename=None, filestring=None, die=None, verbose=Fal
return obj


def savepickle(fileobj=None, obj=None):
def savepickle(fileobj=None, obj=None, protocol=None, *args, **kwargs):
''' Use pickle to do the salty work '''
fileobj.write(pkl.dumps(obj, protocol=-1))
if protocol is None:
protocol = 4
fileobj.write(pkl.dumps(obj, protocol=protocol, *args, **kwargs))
return None


def savedill(fileobj=None, obj=None):
def savedill(fileobj=None, obj=None, *args, **kwargs):
''' Use dill to do the sour work '''
try:
import dill # Optional Sciris dependency
except ModuleNotFoundError as e:
raise Exception('The "dill" Python package is not available; please install manually') from e
fileobj.write(dill.dumps(obj, protocol=-1))
fileobj.write(dill.dumps(obj, protocol=-1, *args, **kwargs))
return None


Expand Down

0 comments on commit b452be7

Please sign in to comment.