Merge pull request #377 from sciris/rc2.0.1

Version 2.0.1
sciris · Oct 22, 2022 · 6e2c623 · 6e2c623
2 parents e5e883c + 436d21b
commit 6e2c623
Show file tree

Hide file tree

Showing 15 changed files with 239 additions and 74 deletions.
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -6,6 +6,34 @@ All major updates to Sciris are documented here.
 By import convention, components of the Sciris library are listed beginning with ``sc.``, e.g. ``sc.odict()``.
 
 
+Version 2.0.1 (2022-10-21)
+--------------------------
+
+New features
+~~~~~~~~~~~~
+#. ``sc.asciify()`` converts a Unicode input string to the closest ASCII equivalent.
+#. ``sc.dataframe().disp()`` flexibly prints a dataframe (by default, all rows/columns).
+
+Improvements
+~~~~~~~~~~~~
+#. ``sc.findinds()`` now allows a wider variety of numeric-but-non-array inputs.
+#. ``sc.sanitizefilename()`` now handles more characters, including Unicode, and has many new options.
+#. ``sc.odict()`` now allows you to delete by index instead of key.
+#. ``sc.download()`` now creates folders if they do not already exist.
+#. ``sc.checktype(obj, 'arraylike')`` now returns ``True`` for pandas ``Series`` objects.
+#. ``sc.promotetoarray()`` now converts pandas ``Series`` or ``DataFrame`` objects into arrays.
+#. ``sc.savetext()`` can now save arrays (like ``np.savetxt()``).
+
+Bugfixes
+~~~~~~~~
+#. Fixed a bug with addition (concatenation) for ``sc.autolist()``.
+#. Fixed a bug with the ``_copy`` argument for ``sc.mergedicts()`` being ignored.
+#. ``sc.checkmem()`` no longer uses compression, giving more accurate estimates.
+#. Fixed a bug with ``sc.options()`` setting the plot style automatically; a ``'default'`` style was also added that restores Matplotlib defaults (which is now the Sciris default as well; use ``'sciris'`` or ``'simple'`` for the Sciris style).
+#. Fixed a bug with ``packaging.version`` not being found on some systems.
+#. Fixed an issue with colormaps attempting to be re-registered, which caused warnings.
+
+
 Version 2.0.0 (2022-08-18)
 --------------------------
 

diff --git a/LICENSE b/LICENSE
@@ -1,6 +1,6 @@
 MIT License
 
-Copyright (c) 2014-2021 by the Sciris Development Team
+Copyright (c) 2014-2022 by the Sciris Development Team
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal

diff --git a/sciris/sc_colors.py b/sciris/sc_colors.py
@@ -783,13 +783,15 @@ def orangebluecolormap(apply=False):
 
 
 # Register colormaps
-pl.cm.register_cmap('alpine',     alpinecolormap())
-pl.cm.register_cmap('parula',     parulacolormap())
-pl.cm.register_cmap('banded',     bandedcolormap())
-pl.cm.register_cmap('bi',         bicolormap())
-pl.cm.register_cmap('orangeblue', orangebluecolormap())
-try:
-    pl.cm.register_cmap('turbo',      turbocolormap())
-except: # Included since Matplotlib 3.4.0
-    pass
-
+existing = pl.colormaps()
+colormap_map = dict(
+    alpine     = alpinecolormap(),
+    parula     = parulacolormap(),
+    banded     = bandedcolormap(),
+    bi         = bicolormap(),
+    orangeblue = orangebluecolormap(),
+    turbo      = turbocolormap(),
+)
+for name,cmap in colormap_map.items():
+    if name not in existing:
+        pl.cm.register_cmap(name, cmap)
diff --git a/sciris/sc_dataframe.py b/sciris/sc_dataframe.py
@@ -255,6 +255,39 @@ def flexget(self, cols=None, rows=None, asarray=False, cast=True, default=None):
 
         return output
 
+
+    def disp(self, nrows=None, ncols=None, width=999, precision=4, options=None):
+        '''
+        Flexible display of a dataframe, showing all rows/columns by default.
+        
+        Args:
+            nrows (int): maximum number of rows to show (default: all)
+            ncols (int): maximum number of columns to show (default: all)
+            width (int): maximum screen width (default: 999)
+            precision (int): number of decimal places to show (default: 4)
+            kwargs (dict): passed to ``pd.option_context()``
+        
+        **Examples**::
+            
+            df = sc.dataframe(data=np.random.rand(100,10))
+            df.disp()
+            df.disp(precision=1, ncols=5, options={'display.colheader_justify': 'left'})
+        
+        New in version 2.0.1.
+        '''
+        opts = scu.mergedicts({
+            'display.max_rows': nrows,
+            'display.max_columns': ncols,
+            'display.width': width,
+            'display.precision': precision,
+            }, options
+        )
+        optslist = [item for pair in opts.items() for item in pair] # Convert from dict to list
+        with pd.option_context(*optslist):
+            print(self)
+        return
+
+
     def poprow(self, key, returnval=True):
         ''' Remove a row from the data frame '''
         rowindex = int(key)

diff --git a/sciris/sc_fileio.py b/sciris/sc_fileio.py
@@ -18,10 +18,10 @@
 # Basic imports
 import io
 import os
-import re
 import json
 import shutil
 import uuid
+import string
 import inspect
 import importlib
 import traceback
@@ -286,19 +286,33 @@ def loadtext(filename=None, folder=None, splitlines=False):
     return output
 
 
-def savetext(filename=None, string=None):
+def savetext(filename=None, string=None, **kwargs):
     '''
-    Convenience function for saving a text file -- accepts a string or list of strings.
+    Convenience function for saving a text file -- accepts a string or list of strings;
+    can also save an arbitrary object, in which case it will first convert to a string.
+    
+    Args:
+        filename (str): the filename to save to
+        string (str): the string (or object) to save
+        kwargs (dict): passed to ``np.savetxt()`` if saving an array
 
     **Example**::
 
         text = ['Here', 'is', 'a', 'poem']
-        sc.savetext('my-document.txt', text)
+        sc.savetext('my-poem.txt', text)
     '''
-    if isinstance(string, list): string = '\n'.join(string) # Convert from list to string)
-    if not scu.isstring(string):  string = str(string)
+    is_array = scu.isarray(string)
+    if isinstance(string, list):
+        string = '\n'.join(string) # Convert from list to string)
+    elif not is_array and not scu.isstring(string):
+        string = str(string)
     filename = makefilepath(filename=filename)
-    with open(filename, 'w') as f: f.write(string)
+    if is_array: # Shortcut to Numpy for saving arrays -- basic CSV
+        kw = scu.mergedicts(dict(fmt='%s', delimiter=', '), kwargs)
+        np.savetxt(filename, string, **kw)
+    else: # Main use case: save text
+        with open(filename, 'w') as f:
+            f.write(string)
     return
 
 
@@ -429,19 +443,48 @@ def getfilelist(folder=None, pattern=None, abspath=False, nopath=False, filesonl
     return filelist
 
 
-def sanitizefilename(rawfilename):
+def sanitizefilename(filename, sub='_', allowspaces=False, asciify=True, strict=False, disallowed=None):
     '''
     Takes a potentially Linux- and Windows-unfriendly candidate file name, and
     returns a "sanitized" version that is more usable.
+    
+    Args:
+        filename (str): the filename to sanitize
+        sub (str): the character to substitute unsafe input characters with
+        allowspaces (bool): whether to allow spaces in the filename
+        asciify (bool): whether to convert the string from Unicode to ASCII
+        strict (bool): whether to remove (almost) all non-alphanumeric characters
+        disallowed (str): optionally supply a custom list of disallowed characters
 
     **Example**::
 
-        bad_name = 'How*is*this*even*a*filename?!.doc'
-        good_name = sc.sanitizefilename(bad_name) # Returns 'How_is_this_even_a_filename.doc'
+        bad = 'Nöt*a   file&name?!.doc'
+        good = sc.sanitizefilename(bad)
+        
+    New in version 2.0.1: arguments "sub", "allowspaces", "asciify", "strict", and "disallowed"
     '''
-    filtername = re.sub(r'[\!\?\"\'<>]', '', rawfilename) # Erase certain characters we don't want at all: !, ?, ", ', <, >
-    filtername = re.sub(r'[:/\\\*\|,]', '_', filtername) # Change certain characters that might be being used as separators from what they were to underscores: space, :, /, \, *, |, comma
-    return filtername # Return the sanitized file name.
+
+    # Handle options
+    if asciify:
+        filename = scu.asciify(filename) # Ensure it's ASCII compatible
+    if disallowed is None:
+        if strict:
+            disallowed = '''!"#$%&\'()*+,/:;<=>?@[\\]^`{|}~\t\n\r\x0b\x0c'''
+        else:
+            disallowed = '''\\/:*?!"'<>|'''
+    if not allowspaces:
+        disallowed += ' '
+
+    # Create the filename
+    sanitized = ''
+    for letter in filename:
+        if letter in string.printable or not asciify:
+            if letter in disallowed:
+                sanitized += sub
+            else:
+                sanitized += letter
+
+    return sanitized # Return the sanitized file name.
 
 
 def makefilepath(filename=None, folder=None, ext=None, default=None, split=False, aspath=None, abspath=True, makedirs=True, checkexists=None, sanitize=False, die=True, verbose=False):

diff --git a/sciris/sc_math.py b/sciris/sc_math.py
@@ -140,9 +140,10 @@ def findinds(arr=None, val=None, *args, eps=1e-6, first=False, last=False, ind=N
                 boolarr = np.isclose(a=arr, b=val, atol=atol, **kwargs) # If absolute difference between the two values is less than a certain amount
             elif scu.checktype(val, 'arraylike'): # It's not actually a value, it's another array
                 boolarr = arr
-                arglist.append(val)
+                arglist.append(scu.promotetoarray(val))
             else:
-                raise Exception
+                errormsg = f'Cannot understand input {type(val)}: must be number or array-like'
+                raise TypeError(errormsg)
 
     # Handle any additional inputs
     for arg in arglist:

diff --git a/sciris/sc_odict.py b/sciris/sc_odict.py
@@ -71,6 +71,7 @@ class odict(OD):
 
     | New in version 1.1.0: "defaultdict" argument
     | New in version 1.3.1: allow integer keys via ``makefrom()``; removed ``to_OD``; performance improvements
+    | New in version 2.0.1: allow deletion by index
     '''
 
     def __init__(self, *args, defaultdict=None, **kwargs):
@@ -160,7 +161,7 @@ def __getitem__(self, key, allow_default=True):
                 return output
 
             else: # pragma: no cover # Handle everything else (rare)
-                return OD.__getitem__(self,key)
+                return OD.__getitem__(self, key)
 
 
     def __setitem__(self, key, value):
@@ -358,10 +359,17 @@ def __radd__(self, dict2):
         else:         return self.__add__(dict2)
 
 
-    def __delitem__(self, *args, **kwargs):
-        ''' Default delitem, except set stale to true '''
+    def __delitem__(self, key):
+        ''' Default delitem, except set stale to true and allow numeric values; slices etc are not supported '''
         self._setattr('_stale', True) # Flag to refresh the cached keys
-        return OD.__delitem__(self, *args, **kwargs)
+        try:
+            return OD.__delitem__(self, key)
+        except Exception as E:
+            if isinstance(key, scu._numtype): # If it's a number, use that
+                thiskey = self._ikey(key)
+                return OD.__delitem__(self, thiskey) # Note that defaultdict behavior isn't supported for non-string lookup
+            else:
+                raise E
 
 
     def disp(self, maxlen=None, showmultilines=True, divider=False, dividerthresh=10, numindents=0, sigfigs=5, numformat=None, maxitems=20, **kwargs):

diff --git a/sciris/sc_plotting.py b/sciris/sc_plotting.py
@@ -1286,7 +1286,7 @@ def savefigs(figs=None, filetype=None, filename=None, folder=None, savefigargs=N
         defaultsavefigargs = {'dpi':200, 'bbox_inches':'tight'} # Specify a higher default DPI and save the figure tightly
         defaultsavefigargs.update(savefigargs) # Update the default arguments with the user-supplied arguments
         if filetype == 'fig':
-            scf.saveobj(fullpath, plt)
+            scf.save(fullpath, plt)
             filenames.append(fullpath)
             if verbose: print(f'Figure object saved to {fullpath}')
         else: # pragma: no cover

diff --git a/sciris/sc_profiling.py b/sciris/sc_profiling.py
@@ -70,7 +70,7 @@ def memload():
 
 
 
-def checkmem(var, descend=None, alphabetical=False, plot=False, doprint=True, verbose=False):
+def checkmem(var, descend=True, alphabetical=False, compresslevel=0, plot=False, verbose=False, **kwargs):
     '''
     Checks how much memory the variable or variables in question use by dumping
     them to file.
@@ -85,14 +85,16 @@ def checkmem(var, descend=None, alphabetical=False, plot=False, doprint=True, ve
         var (any): the variable being checked
         descend (bool): whether or not to descend one level into the object
         alphabetical (bool): if descending into a dict or object, whether to list items by name rather than size
+        compresslevel (int): level of compression to use when saving to file (typically 0)
         plot (bool): if descending, show the results as a pie chart
-        doprint (bool): whether to print out results
         verbose (bool or int): detail to print, if >1, print repr of objects along the way
+        **kwargs (dict): passed to :func:`load`
 
     **Example**::
 
+        import numpy as np
         import sciris as sc
-        sc.checkmem(['spiffy',rand(2483,589)], descend=True)
+        sc.checkmem(['spiffy', np.random.rand(2483,589)])
     '''
 
     def check_one_object(variable):
@@ -103,7 +105,7 @@ def check_one_object(variable):
 
         # Create a temporary file, save the object, check the size, remove it
         filename = tempfile.mktemp()
-        scf.saveobj(filename, variable, die=False)
+        scf.save(filename, variable, die=False, compresslevel=compresslevel)
         filesize = os.path.getsize(filename)
         os.remove(filename)
 

diff --git a/sciris/sc_settings.py b/sciris/sc_settings.py
@@ -202,8 +202,8 @@ def get_orig_options():
         optdesc.aspath = 'Set whether to return Path objects instead of strings by default'
         options.aspath = parse_env('SCIRIS_ASPATH', False, 'bool')
 
-        optdesc.style = 'Set the default plotting style -- options are "simple" and "fancy" plus those in pl.style.available; see also options.rc'
-        options.style = parse_env('SCIRIS_STYLE', 'simple', 'str')
+        optdesc.style = 'Set the default plotting style -- options are "default", "simple", and "fancy", plus those in pl.style.available; see also options.rc'
+        options.style = parse_env('SCIRIS_STYLE', 'default', 'str')
 
         optdesc.dpi = 'Set the default DPI -- the larger this is, the larger the figures will be'
         options.dpi = parse_env('SCIRIS_DPI', pl.rcParams['figure.dpi'], 'int')
@@ -224,7 +224,7 @@ def get_orig_options():
         options.backend = parse_env('SCIRIS_BACKEND', pl.get_backend(), 'str')
 
         optdesc.rc = 'Matplotlib rc (run control) style parameters used during plotting -- usually set automatically by "style" option'
-        options.rc = scu.dcp(rc_simple)
+        options.rc = {}
 
         return optdesc, options
 
@@ -468,11 +468,17 @@ def _handle_style(self, style=None, reset=False, copy=True):
             rc = scu.dcp(style)
         elif style is not None: # Usual use case
             stylestr = str(style).lower()
-            if   stylestr in ['simple', 'default']: rc = scu.dcp(rc_simple)
-            elif stylestr in ['fancy', 'covasim']:  rc = scu.dcp(rc_fancy)
-            elif style in pl.style.library:         rc = scu.dcp(pl.style.library[style])
+            if stylestr in ['default', 'matplotlib', 'reset']:
+                pl.style.use('default') # Need special handling here since not in pl.style.library...ugh
+                rc = {}
+            elif stylestr in ['simple', 'sciris']:
+                rc = scu.dcp(rc_simple)
+            elif stylestr in ['fancy', 'covasim']:
+                rc = scu.dcp(rc_fancy)
+            elif style in pl.style.library:
+                rc = scu.dcp(pl.style.library[style])
             else:
-                errormsg = f'Style "{style}"; not found; options are "simple" (default), "fancy", plus:\n{scu.newlinejoin(pl.style.available)}'
+                errormsg = f'Style "{style}"; not found; options are "default", "simple", "fancy", plus:\n{scu.newlinejoin(pl.style.available)}'
                 raise ValueError(errormsg)
         if reset:
             self.rc = rc
@@ -516,7 +522,7 @@ def with_style(self, style_args=None, use=False, **kwargs):
         kwargs = scu.mergedicts(style_args, kwargs)
 
         # Handle style, overwiting existing
-        style = kwargs.pop('style', None)
+        style = kwargs.pop('style', self.style)
         rc = self._handle_style(style, reset=False)
 
         def pop_keywords(sourcekeys, rckey):