Merge pull request #339 from sciris/rc1.4.0

Initial v2.0.0 updates
sciris · Aug 11, 2022 · 9c272d8 · 9c272d8
2 parents fc0148f + e4d4d47
commit 9c272d8
Show file tree

Hide file tree

Showing 10 changed files with 203 additions and 76 deletions.
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -6,6 +6,36 @@ All notable changes to this project will be documented in this file.
 By import convention, components of the Sciris library are listed beginning with ``sc.``, e.g. ``sc.odict()``.
 
 
+Version 2.0.0 (2022-08-12)
+--------------------------
+
+This version contains a number of major improvements, including:
+
+#. **TBC**
+
+New functions and methods
+~~~~~~~~~~~~~~~~~~~~~~~~~
+#. ``sc.count()`` counts the number of matching elements in an array (similar to ``np.count_nonzero()``, but more flexible with e.g. float vs. int mismatches).
+#. ``sc.strsplit()`` will automatically split common types of delimited strings (e.g. ``sc.strsplit('a b c')``).
+
+Bugfixes
+~~~~~~~~
+#. ``Spreadsheet`` objects no longer pickle the binary spreadsheet (in some cases reducing size by 50%).
+#. ``sc.loadspreadsheet()`` has been updated to match current ``pd.read_excel()`` syntax.
+
+Improvements
+~~~~~~~~~~~~
+#. If a copy/deepcopy is not possible, ``sc.cp()``/``sc.dcp()`` now raise an exception by default (previously, they silenced it).
+
+Housekeeping
+~~~~~~~~~~~~
+#. ``DeprecationWarning``s have been changed to ``FutureWarning``s.
+
+Regression information
+~~~~~~~~~~~~~~~~~~~~~~
+#. The default for ``sc.cp()`` and ``sc.dcp()`` changed from ``die=False`` to ``die=True``, which may cause previously caught exceptions to be uncaught. For previous behavior, use ``sc.dcp(..., die=False)``.
+
+
 Version 1.3.3 (2022-01-16)
 --------------------------
 

diff --git a/README.rst b/README.rst
@@ -367,5 +367,5 @@ A very simple test case of Sciris. In the ``examples/helloworld`` folder, type `
 
 See the directions `here <https://github.com/sciris/scirisweb/tree/develop/examples/helloworld>`__ on how to install and run this example.
 
-.. |Sciris showcase| image:: docs/sciris-showcase-code.png
-.. |Sciris output| image:: docs/sciris-showcase-output.png
+.. |Sciris showcase| image:: https://github.com/sciris/sciris/raw/develop/docs/sciris-showcase-code.png
+.. |Sciris output| image:: https://github.com/sciris/sciris/raw/develop/docs/sciris-showcase-output.png
diff --git a/docs/conf.py b/docs/conf.py
@@ -19,7 +19,7 @@
 # -- Project information -----------------------------------------------------
 
 project = 'Sciris'
-copyright = f'2021 by the Sciris Development Team (version {sc.__version__})'
+copyright = f'2014–2022 by the Sciris Development Team (version {sc.__version__})'
 author = 'Sciris Development Team'
 
 # The short X.Y version

diff --git a/sciris/sc_datetime.py b/sciris/sc_datetime.py
@@ -261,7 +261,7 @@ def date(obj, *args, start_date=None, readformat=None, outformat=None, as_date=T
     if dateformat is not None: # pragma: no cover
         outformat = dateformat
         warnmsg = 'sc.date() argument "dateformat" has been deprecated as of v1.2.2; use "outformat" instead'
-        warnings.warn(warnmsg, category=DeprecationWarning, stacklevel=2)
+        warnings.warn(warnmsg, category=FutureWarning, stacklevel=2)
 
     # Convert to list and handle other inputs
     if obj is None:
@@ -335,7 +335,7 @@ def day(obj, *args, start_date=None, **kwargs):
     if start_day is not None: # pragma: no cover
         start_date = start_day
         warnmsg = 'sc.day() argument "start_day" has been deprecated as of v1.2.2; use "start_date" instead'
-        warnings.warn(warnmsg, category=DeprecationWarning, stacklevel=2)
+        warnings.warn(warnmsg, category=FutureWarning, stacklevel=2)
 
     # Do not process a day if it's not supplied, and ensure it's a list
     if obj is None:
@@ -598,7 +598,7 @@ def toc(start=None, label=None, baselabel=None, sigfigs=None, reset=False, outpu
     else:
         if baselabel is None:
             if label:
-                base = f'Elapsed time for {label}: '
+                base = f'{label}: '
             else: # Handles case toc(label='')
                 base = ''
         else:

diff --git a/sciris/sc_fileio.py b/sciris/sc_fileio.py
@@ -877,6 +877,10 @@ def __init__(self, *args, **kwargs):
         self.wb = None
         return
 
+    def __getstate__(self):
+        d = self.__dict__.copy() # Shallow copy
+        d['wb'] = None
+        return d
 
     def _reload_wb(self, reload=None):
         ''' Helper function to check if workbook is already loaded '''
@@ -885,29 +889,33 @@ def _reload_wb(self, reload=None):
 
 
     def xlrd(self, reload=False, store=True, **kwargs): # pragma: no cover
-        ''' Return a book as opened by xlrd '''
-        wb = self.wb
+        ''' Legacy method to load from xlrd '''
         if self._reload_wb(reload=reload):
             try:
                 import xlrd # Optional import
             except ModuleNotFoundError as e:
                 raise ModuleNotFoundError('The "xlrd" Python package is not available; please install manually') from e
             wb = xlrd.open_workbook(file_contents=self.tofile().read(), **kwargs)
+        else:
+            wb = self.wb
+
         if store:
             self.wb = wb
         return wb
 
 
     def openpyxl(self, reload=False, store=True, **kwargs):
         ''' Return a book as opened by openpyxl '''
-        wb = self.wb
         if self._reload_wb(reload=reload):
             import openpyxl # Optional import
             if self.blob is not None:
                 self.tofile(output=False)
                 wb = openpyxl.load_workbook(self.bytes, **kwargs) # This stream can be passed straight to openpyxl
             else:
                 wb = openpyxl.Workbook(**kwargs)
+        else:
+            wb = self.wb
+
         if store:
             self.wb = wb
         return wb
@@ -925,21 +933,24 @@ def openpyexcel(self, *args, **kwargs):
 
 Falling back to openpyxl, which is identical except for how cached cell values are handled.
 '''
-        warnings.warn(warnmsg, category=DeprecationWarning, stacklevel=2)
+        warnings.warn(warnmsg, category=FutureWarning, stacklevel=2)
         return self.openpyxl(*args, **kwargs)
 
 
     def pandas(self, reload=False, store=True, **kwargs): # pragma: no cover
         ''' Return a book as opened by pandas '''
-        wb = self.wb
+
         if self._reload_wb(reload=reload):
-            import pandas as pd # Optional import
+            import pandas as pd # Optional (slow) import
             if self.blob is not None:
                 self.tofile(output=False)
                 wb = pd.ExcelFile(self.bytes, **kwargs)
             else:
                 errormsg = 'For pandas, must load an existing workbook; use openpyxl to create a new workbook'
                 raise FileNotFoundError(errormsg)
+        else:
+            wb = self.wb
+
         if store:
             self.wb = wb
         return wb
@@ -1075,7 +1086,7 @@ def save(self, filename='spreadsheet.xlsx'):
 
 
 
-def loadspreadsheet(filename=None, folder=None, fileobj=None, sheet=0, asdataframe=None, header=True, method='pandas', **kwargs):
+def loadspreadsheet(filename=None, folder=None, fileobj=None, sheet=0, header=1, asdataframe=None, method='pandas', **kwargs):
     '''
     Load a spreadsheet as a dataframe or a list of lists.
 
@@ -1107,12 +1118,9 @@ def loadspreadsheet(filename=None, folder=None, fileobj=None, sheet=0, asdatafra
 
     # Load using pandas
     if method == 'pandas':
-        import pandas as pd # Optional import
+        import pandas as pd # Optional import, here for loading speed
         if fileobj is not None: fullpath = fileobj # Substitute here for reading
-        if header  is not None: header = np.arange(header)
         data = pd.read_excel(fullpath, sheet_name=sheet, header=header, **kwargs)
-        if asdataframe is False:
-            pass
         return data
 
     # Load using openpyxl

diff --git a/sciris/sc_math.py b/sciris/sc_math.py
@@ -18,7 +18,7 @@
 ##############################################################################
 
 __all__ = ['approx', 'safedivide', 'findinds', 'findfirst', 'findlast', 'findnearest',
-           'dataindex', 'getvalidinds', 'sanitize', 'getvaliddata', 'isprime']
+           'count', 'dataindex', 'getvalidinds', 'sanitize', 'getvaliddata', 'isprime']
 
 
 def approx(val1=None, val2=None, eps=None, **kwargs):
@@ -83,27 +83,29 @@ def safedivide(numerator=None, denominator=None, default=None, eps=None, warn=Fa
 
 def findinds(arr=None, val=None, eps=1e-6, first=False, last=False, die=True, **kwargs):
     '''
-    Little function to find matches even if two things aren't eactly equal (eg.
-    due to floats vs. ints). If one argument, find nonzero values. With two arguments,
-    check for equality using eps. Returns a tuple of arrays if val1 is multidimensional,
-    else returns an array. Similar to calling np.nonzero(np.isclose(arr, val))[0].
+    Find matches even if two things aren't eactly equal (e.g. floats vs. ints).
+
+    If one argument, find nonzero values. With two arguments, check for equality
+    using eps. Returns a tuple of arrays if val1 is multidimensional, else returns
+    an array. Similar to calling ``np.nonzero(np.isclose(arr, val))[0]``.
 
     Args:
-        arr (array): the array to find values in
-        val (float): if provided, the value to match
-        eps (float): the precision for matching (default 1e-6, equivalent to np.isclose's atol)
-        first (bool): whether to return the first matching value
-        last (bool): whether to return the last matching value
-        die (bool): whether to raise an exception if first or last is true and no matches were found
-        kwargs (dict): passed to np.isclose()
+        arr    (array): the array to find values in
+        val    (float): if provided, the value to match
+        eps    (float): the precision for matching (default 1e-6, equivalent to ``np.isclose()``'s atol)
+        first  (bool):  whether to return the first matching value
+        last   (bool):  whether to return the last matching value
+        die    (bool):  whether to raise an exception if first or last is true and no matches were found
+        kwargs (dict):  passed to ``np.isclose()``
 
     **Examples**::
 
         sc.findinds(rand(10)<0.5) # returns e.g. array([2, 4, 5, 9])
         sc.findinds([2,3,6,3], 3) # returs array([1,3])
         sc.findinds([2,3,6,3], 3, first=True) # returns 1
 
-    New in version 1.2.3: "die" argument
+    | New in version 1.2.3: "die" argument
+    | New in version 2.0.0: fix string matching
     '''
 
     # Handle first or last
@@ -118,7 +120,7 @@ def findinds(arr=None, val=None, eps=1e-6, first=False, last=False, die=True, **
         arr = kwargs.pop('val1', arr)
         val = kwargs.pop('val2', val)
         warnmsg = 'sc.findinds() arguments "val1" and "val2" have been deprecated as of v1.0.0; use "arr" and "val" instead'
-        warnings.warn(warnmsg, category=DeprecationWarning, stacklevel=2)
+        warnings.warn(warnmsg, category=FutureWarning, stacklevel=2)
 
     # Calculate matches
     arr = scu.promotetoarray(arr)
@@ -127,13 +129,14 @@ def findinds(arr=None, val=None, eps=1e-6, first=False, last=False, die=True, **
     else:
         if scu.isstring(val):
             output = np.nonzero(arr==val)
-        try: # Standard usage, use nonzero
-            output = np.nonzero(np.isclose(a=arr, b=val, atol=atol, **kwargs)) # If absolute difference between the two values is less than a certain amount
-        except Exception as E: # pragma: no cover # As a fallback, try simpler comparison
-            output = np.nonzero(abs(arr-val) < atol)
-            if kwargs: # Raise a warning if and only if special settings were passed
-                warnmsg = f'{str(E)}\nsc.findinds(): np.isclose() encountered an exception (above), falling back to direct comparison'
-                warnings.warn(warnmsg, category=RuntimeWarning, stacklevel=2)
+        else:
+            try: # Standard usage, use nonzero
+                output = np.nonzero(np.isclose(a=arr, b=val, atol=atol, **kwargs)) # If absolute difference between the two values is less than a certain amount
+            except Exception as E: # pragma: no cover # As a fallback, try simpler comparison
+                output = np.nonzero(abs(arr-val) < atol)
+                if kwargs: # Raise a warning if and only if special settings were passed
+                    warnmsg = f'{str(E)}\nsc.findinds(): np.isclose() encountered an exception (above), falling back to direct comparison'
+                    warnings.warn(warnmsg, category=RuntimeWarning, stacklevel=2)
 
     # Process output
     try:
@@ -189,6 +192,31 @@ def findnearest(series=None, value=None):
     return output
 
 
+def count(arr=None, val=None, eps=1e-6, **kwargs):
+    '''
+    Count the number of matching elements.
+
+    Similar to ``np.count_nonzero()``, but allows for slight mismatches (e.g.,
+    floats vs. ints). Equivalent to ``len(sc.findinds())``.
+
+    Args:
+        arr (array): the array to find values in
+        val (float): if provided, the value to match
+        eps (float): the precision for matching (default 1e-6, equivalent to np.isclose's atol)
+        kwargs (dict): passed to ``np.isclose()``
+
+    **Examples**::
+
+        sc.count(rand(10)<0.5) # returns e.g. 4
+        sc.count([2,3,6,3], 3) # returs 2
+
+    New in version 1.4.0.
+    '''
+    output = len(findinds(arr=arr, val=val, eps=eps, **kwargs))
+    return output
+
+
+
 def dataindex(dataarray, index): # pragma: no cover
     '''
     Take an array of data and return either the first or last (or some other) non-NaN entry.

diff --git a/sciris/sc_nested.py b/sciris/sc_nested.py
@@ -179,25 +179,27 @@ def flattendict(nesteddict, sep=None, _prefix=None):
         {'a_b': 1, 'a_c_d': 2, 'a_c_e': 3}
 
     Args:
-        d: Input dictionary potentially containing dicts as values
-        sep: Concatenate keys using string separator. If ``None`` the returned dictionary will have tuples as keys
+        nesteddict (dict): Input dictionary potentially containing dicts as values
+        sep        (str): Concatenate keys using string separator. If ``None`` the returned dictionary will have tuples as keys
         _prefix: Internal argument for recursively accumulating the nested keys
 
     Returns:
         A flat dictionary where no values are dicts
+
+    New in version 1.4.0: handle non-string keys.
     """
     output_dict = {}
     for k, v in nesteddict.items():
-        if sep is None:
+        if sep is None: # Create tuples
             if _prefix is None:
                 k2 = (k,)
             else:
                 k2 = _prefix + (k,)
-        else:
+        else: # Create strings
             if _prefix is None:
                 k2 = k
             else:
-                k2 = _prefix + sep + k
+                k2 = str(_prefix) + str(sep) + str(k)
 
         if isinstance(v, dict):
             output_dict.update(flattendict(nesteddict[k], sep=sep, _prefix=k2))