Merge pull request #403 from sciris/rc2.1.0

Version 2.1.0
sciris · Dec 24, 2022 · e9958fa · e9958fa
2 parents 10b362f + e340b92
commit e9958fa
Show file tree

Hide file tree

Showing 26 changed files with 874 additions and 424 deletions.
diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml
@@ -21,7 +21,8 @@ jobs:
       - name: Install Sciris
         run: pip install -e .
       - name: Install tests
-        run: pip install pytest
+        working-directory: ./tests
+        run: pip install -r requirements_test.txt
       - name: Run API tests
         working-directory: ./tests
         run: pytest test_*.py --durations=0 --junitxml=test-results.xml # Run actual tests

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -5,11 +5,37 @@ All major updates to Sciris are documented here.
 
 By import convention, components of the Sciris library are listed beginning with ``sc.``, e.g. ``sc.odict()``.
 
+Version 2.1.0 (2022-12-23)
+--------------------------
+
+New features
+~~~~~~~~~~~~
+#. ``sc.save()``/``sc.load()`` now allow files to be saved/loaded in `zstandard <https://github.com/indygreg/python-zstandard>`_ (instead of ``gzip``) format, since the former is usually faster for the same level of compression. ``sc.save()`` still uses ``gzip`` by default; the equivalent ``sc.zsave()`` uses ``zstandard`` by default. ``sc.save()`` also now has the option of not using any compression via ``sc.save(..., compression='none')``. (Thanks to `Fabio Mazza <https://github.com/fabmazz>`_ for the suggestion.)
+#. Functions that returned paths as strings by default -- ``sc.thisdir()``, ``sc.getfilelist()``, ``sc.makefilepath()``, ``sc.sanitizefilename()`` -- now all have aliases that return ``Path`` objects by default: ``sc.thispath()``, ``sc.getfilepaths()``, ``sc.makepath()``, and ``sc.sanitizepath()``.
+#. ``sc.thisfile()`` gets the path of the current file.
+#. ``sc.sanitizecolor()`` will convert any form of color specification (e.g. ``'g'``, ``'crimson'``) into an RGB tuple.
+#. ``sc.tryexcept()`` silences all (or some) exceptions in a ``with`` block.
+
+Bugfixes
+~~~~~~~~
+#. Fixed bug where ``sc.save(filename=None)`` would incorrectly result in creation of a file on disk in addition to returning a ``io.BytesIO`` stream.
+#. Fixed bug where ``sc.checkmem()`` would sometimes raise an exception when saving a ``None`` object to check its size.
+#. Fixed bug where ``sc.loadbalancer()`` would sometimes fail if ``interval`` was 0 (it is now required to be at least 1 ms).
+
+Other changes
+~~~~~~~~~~~~~
+#. ``sc.vectocolor()`` now has a ``nancolor`` argument to handle NaN values; NaNs are also now handled correctly.
+#. ``sc.timer()`` now has a more compact default string representation; use ``timer.disp()`` to display the full object. In addition, ``timer.total`` is now a property instead of a function.
+#. ``sc.thisdir()`` now takes a ``frame`` argument, in case the folder of a file *other* than the calling script is desired.
+#. ``sc.getfilelist()`` now has a ``fnmatch`` argument, which allows for Unix-style file matching via the `fnmatch <https://docs.python.org/3/library/fnmatch.html>`_ module.
+#. ``sc.importbyname()`` now has a ``verbose`` argument.
+#. ``sc.promotetolist()`` and ``sc.promotetoarray()`` are now aliases of ``sc.tolist()`` and ``sc.toarray()``, rather than vice versa.
+
 
 Version 2.0.4 (2022-10-25)
 --------------------------
 #. ``sc.stackedbar()`` will automatically plot a 2D array as a stacked bar chart.
-#. ``sc.parallelize()`` now always tries ``multiprocess`` if an exception is encountered and ``die=False`` (unless ``parallelizer`` already was ``'multiprocess'``).
+#. ``sc.parallelize()`` now uses ``multiprocess`` again by default (due to issues with ``concurrent.futures``).
 #. Added a ``die`` argument to ``sc.save()``.
 #. Added a ``prefix`` argument to ``sc.urlopen()``, allowing e.g. ``http://`` to be omitted from the URL.
 

diff --git a/docs/conf.py b/docs/conf.py
@@ -46,20 +46,20 @@
     "recommonmark",
 ]
 
-autodoc_default_options = {
-    'member-order': 'bysource',
-    'members': None,
-}
+# autodoc_default_options = {
+#     'member-order': 'bysource',
+#     'members': None,
+# }
 
 autodoc_mock_imports = []
 napoleon_google_docstring = True
 
 # Configure autosummary
 autosummary_generate = True  # Turn on sphinx.ext.autosummary
-autoclass_content = "both"  # Add __init__ doc (ie. params) to class summaries
+autoclass_content = "init"  # Add __init__ doc (ie. params) to class summaries
 html_show_sourcelink = False  # Remove 'view source code' from top of page (for html, not python)
 autodoc_member_order = 'bysource' # Keep original ordering
-add_module_names = False  # NB, does not work
+# add_module_names = False  # NB, does not work
 autodoc_inherit_docstrings = False # Stops sublcasses from including docs from parent classes
 
 # Add any paths that contain templates here, relative to this directory.

diff --git a/sciris/sc_colors.py b/sciris/sc_colors.py
@@ -17,17 +17,18 @@
 import numpy as np
 from matplotlib import colors as mplc
 from . import sc_utils as scu
+from . import sc_math as scm
 
 
 ##############################################################################
 #%% Color functions
 ##############################################################################
 
-__all__ = ['shifthue', 'rgb2hex', 'hex2rgb', 'rgb2hsv', 'hsv2rgb']
+__all__ = ['sanitizecolor', 'shifthue', 'rgb2hex', 'hex2rgb', 'rgb2hsv', 'hsv2rgb']
 
 
 def _listify_colors(colors, origndim=None):
-    ''' Do standard transformation on colors -- internal helpfer function '''
+    ''' Do standard transformation on colors -- internal helper function '''
     if not origndim:
         colors = scu.dcp(colors) # So we don't overwrite the original
         origndim = np.ndim(colors) # Original dimensionality
@@ -41,6 +42,46 @@ def _listify_colors(colors, origndim=None):
         return colors
 
 
+def sanitizecolor(color, asarray=False, alpha=None, normalize=True):
+    '''
+    Alias to ``matplotlib.colors.to_rgb``, but also handles numeric inputs.
+    
+    Arg:
+        color (str/list/etc): the input color to sanitize into an RGB tuple (or array)
+        asarray (bool): whether to return an array instead of a tuple
+        alpha (float): if not None, include the alpha channel with this value
+        normalize (bool): whether to divide by 255 if any values are greater than 1
+    
+    **Examples**::
+        
+        green1 = sc.sanitizecolor('g')
+        green2 = sc.sanitizecolor('tab:green')
+        crimson1 = sc.sanitizecolor('crimson')
+        crimson2 = sc.sanitizecolor((220, 20, 60))
+        midgrey = sc.sanitizecolor(0.5)
+    '''
+    if isinstance(color, str):
+        try:
+            color = mplc.to_rgb(color)
+        except ValueError as E:
+            errormsg = f'Could not understand "{color}" as a valid color: must be a standard Matplotlib color string'
+            raise ValueError(errormsg) from E
+    elif isinstance(color, float):
+        color = [color]*3 # Consider it grey
+
+    color = scu.toarray(color).astype(float) # Get it into consistent format for further operations
+    if len(color) not in [3,4]:
+        errormsg = f'Cannot parse {color} as a color: expecting length 3 (RGB) or 4 (RGBA)'
+        raise ValueError(errormsg)
+    if normalize and color.max()>1:
+        color /= 255
+    if alpha is not None and len(color) == 3:
+        color = scm.cat(color, float(alpha))
+    if not asarray:
+        color = tuple(color) # Convert back to tuple if desired
+    return color
+
+
 def _processcolors(colors=None, asarray=False, ashex=False, reverse=False):
     '''
     Small helper function to do common transformations on the colors, once generated.
@@ -159,7 +200,7 @@ def hsv2rgb(colors=None):
 __all__ += ['vectocolor', 'arraycolors', 'gridcolors', 'midpointnorm', 'colormapdemo']
 
 
-def vectocolor(vector, cmap=None, asarray=True, reverse=False, minval=None, maxval=None, midpoint=None):
+def vectocolor(vector, cmap=None, asarray=True, reverse=False, minval=None, maxval=None, midpoint=None, nancolor=None):
     """
     This function converts a vector (i.e., 1D array) of N values into an Nx3 matrix
     of color values according to the current colormap. It automatically scales the
@@ -175,26 +216,28 @@ def vectocolor(vector, cmap=None, asarray=True, reverse=False, minval=None, maxv
         minval (float): the minimum value to use
         maxval (float): the maximum value to use
         midpoint (float): the midpoint value to use
+        nancolor (color): if supplied, use this color for NaN entries
 
     Returns:
         colors (array): Nx4 array of RGB-alpha color values
 
     **Example**::
 
         n = 1000
-        x = randn(n,1);
-        y = randn(n,1);
+        x = pl.randn(n,1);
+        y = pl.randn(n,1);
         c = sc.vectocolor(y);
         pl.scatter(x, y, c=c, s=50)
 
-    New in version 1.2.0: midpoint argument.
+    | New in version 1.2.0: midpoint argument.
+    | New in version 2.1.0: nancolor argument and remove nans by default
     """
 
     from numpy import array, zeros
 
     if cmap is None:
         cmap = pl.get_cmap() # Get current colormap
-    elif type(cmap) == str:
+    elif isinstance(cmap, str):
         try:
             cmap = pl.get_cmap(cmap)
         except: # pragma: no cover
@@ -210,9 +253,9 @@ def vectocolor(vector, cmap=None, asarray=True, reverse=False, minval=None, maxv
     vector = np.array(vector) # Just to be sure
     if len(vector):
         if minval is None:
-            minval = vector.min()
+            minval = np.nanmin(vector)
         if maxval is None:
-            maxval = vector.max()
+            maxval = np.nanmax(vector)
 
         vector = vector-minval # Subtract minimum
         vector = vector/float(maxval-minval) # Divide by maximum
@@ -222,11 +265,16 @@ def vectocolor(vector, cmap=None, asarray=True, reverse=False, minval=None, maxv
         nelements = len(vector) # Count number of elements
         colors = zeros((nelements,4))
         for i in range(nelements):
-            colors[i,:] = array(cmap(vector[i]))
+            point = vector[i]
+            if np.isnan(point) and nancolor is not None:
+                color = sanitizecolor(nancolor, alpha=True) # If it's NaN
+            else:
+                color = array(cmap(point)) # Main use case
+            colors[i,:] = color
 
     # It doesn't; just return black
     else:
-        colors=(0,0,0,1)
+        colors = (0,0,0,1)
 
     # Process output
     output = _processcolors(colors=colors, asarray=asarray, reverse=reverse)

diff --git a/sciris/sc_dataframe.py b/sciris/sc_dataframe.py
@@ -243,7 +243,7 @@ def flexget(self, cols=None, rows=None, asarray=False, cast=True, default=None):
             colindices = Ellipsis
         else:
             colindices = []
-            for col in scu.promotetolist(cols):
+            for col in scu.tolist(cols):
                 colindices.append(self._sanitizecol(col))
         if rows is None:
             rowindices = Ellipsis
@@ -374,7 +374,7 @@ def concat(self, data, *args, columns=None, reset_index=True, inplace=False, dfa
         dfs = [self]
         if columns is None:
             columns = self.columns
-        for arg in scu.promotetolist(data, coerce='tuple') + list(args):
+        for arg in scu.tolist(data, coerce='tuple') + list(args):
             if isinstance(arg, pd.DataFrame):
                 df = arg
             else:
@@ -432,7 +432,7 @@ def addcol(self, key=None, value=None):
 
     def rmcol(self, key, die=True):
         ''' Remove a column or columns from the data frame '''
-        cols = scu.promotetolist(key)
+        cols = scu.tolist(key)
         for col in cols:
             if col not in self.cols: # pragma: no cover
                 errormsg = 'sc.dataframe(): cannot remove column %s: columns are:\n%s' % (col, '\n'.join(self.cols))
@@ -573,7 +573,7 @@ def filterout(self, inds=None, value=None, col=None, verbose=False, reset_index=
     def filtercols(self, cols=None, die=True, reset_index=True, inplace=False):
         ''' Filter columns keeping only those specified -- note, by default, do not perform in place '''
         if cols is None: cols = scu.dcp(self.cols) # By default, do nothing
-        cols = scu.promotetolist(cols)
+        cols = scu.tolist(cols)
         order = []
         notfound = []
         for col in cols:
@@ -595,7 +595,7 @@ def sortrows(self, col=None, reverse=False, returninds=False):
         ''' Sort the dataframe rows in place by the specified column(s)'''
         if col is None:
             col = 0 # Sort by first column by default
-        cols = scu.promotetolist(col)[::-1] # Ensure it's a list and reverse order
+        cols = scu.tolist(col)[::-1] # Ensure it's a list and reverse order
         sortorder = [] # In case there are no columns
         for col in cols:
             col = self._sanitizecol(col)

diff --git a/sciris/sc_datetime.py b/sciris/sc_datetime.py
@@ -17,6 +17,7 @@
 import dateutil as du
 from . import sc_utils as scu
 from . import sc_math as scm
+from . import sc_printing as scp
 
 
 ###############################################################################
@@ -180,7 +181,7 @@ def readdate(datestr=None, *args, dateformat=None, return_defaults=False, verbos
         return formats_to_try
 
     # Handle date formats
-    format_list = scu.promotetolist(dateformat, keepnone=True) # Keep none which signifies default
+    format_list = scu.tolist(dateformat, keepnone=True) # Keep none which signifies default
     if dateformat is not None:
         if dateformat == 'dmy':
             formats_to_try = dmy_formats
@@ -671,7 +672,7 @@ def toctic(returntic=False, returntoc=False, *args, **kwargs):
     else:           return
 
 
-class timer(scu.prettyobj):
+class timer:
     '''
     Simple timer class. Note: ``sc.timer()`` and ``sc.Timer()`` are aliases.
 
@@ -715,6 +716,7 @@ class timer(scu.prettyobj):
     | New in version 1.3.0: ``sc.timer()`` alias, and allowing the label as first argument.
     | New in version 1.3.2: ``toc()`` passes label correctly; ``tt()`` method; ``auto`` argument
     | New in version 2.0.0: ``plot()`` method; ``total()`` method; ``indivtimings`` and ``cumtimings`` properties
+    | New in version 2.1.0: ``total`` as property instead of method; updated repr; added disp() method
     '''
     def __init__(self, label=None, auto=False, start=True, **kwargs):
         from . import sc_odict as sco # Here to avoid circular import
@@ -743,6 +745,18 @@ def __exit__(self, *args):
         ''' Print elapsed time when leaving a with-as block '''
         self.toc()
         return
+
+    def __repr__(self):
+        string = scp.objectid(self)
+        string += 'Timings:\n'
+        string += str(self.timings)
+        string += f'\nTotal time: {self.total:n} s'
+        return string
+
+
+    def disp(self):
+        ''' Display the full representation of the object '''
+        return scp.pr(self)
 
 
     def tic(self):
@@ -792,6 +806,7 @@ def toc(self, label=None, **kwargs):
         return output
 
 
+    @property
     def total(self):
         ''' Calculate total time '''