Skip to content

Commit

Permalink
Merge pull request #339 from sciris/rc1.4.0
Browse files Browse the repository at this point in the history
Initial v2.0.0 updates
  • Loading branch information
cliffckerr committed Aug 11, 2022
2 parents fc0148f + e4d4d47 commit 9c272d8
Show file tree
Hide file tree
Showing 10 changed files with 203 additions and 76 deletions.
30 changes: 30 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,36 @@ All notable changes to this project will be documented in this file.
By import convention, components of the Sciris library are listed beginning with ``sc.``, e.g. ``sc.odict()``.


Version 2.0.0 (2022-08-12)
--------------------------

This version contains a number of major improvements, including:

#. **TBC**

New functions and methods
~~~~~~~~~~~~~~~~~~~~~~~~~
#. ``sc.count()`` counts the number of matching elements in an array (similar to ``np.count_nonzero()``, but more flexible with e.g. float vs. int mismatches).
#. ``sc.strsplit()`` will automatically split common types of delimited strings (e.g. ``sc.strsplit('a b c')``).

Bugfixes
~~~~~~~~
#. ``Spreadsheet`` objects no longer pickle the binary spreadsheet (in some cases reducing size by 50%).
#. ``sc.loadspreadsheet()`` has been updated to match current ``pd.read_excel()`` syntax.

Improvements
~~~~~~~~~~~~
#. If a copy/deepcopy is not possible, ``sc.cp()``/``sc.dcp()`` now raise an exception by default (previously, they silenced it).

Housekeeping
~~~~~~~~~~~~
#. ``DeprecationWarning``s have been changed to ``FutureWarning``s.
Regression information
~~~~~~~~~~~~~~~~~~~~~~
#. The default for ``sc.cp()`` and ``sc.dcp()`` changed from ``die=False`` to ``die=True``, which may cause previously caught exceptions to be uncaught. For previous behavior, use ``sc.dcp(..., die=False)``.


Version 1.3.3 (2022-01-16)
--------------------------

Expand Down
4 changes: 2 additions & 2 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -367,5 +367,5 @@ A very simple test case of Sciris. In the ``examples/helloworld`` folder, type `

See the directions `here <https://github.com/sciris/scirisweb/tree/develop/examples/helloworld>`__ on how to install and run this example.

.. |Sciris showcase| image:: docs/sciris-showcase-code.png
.. |Sciris output| image:: docs/sciris-showcase-output.png
.. |Sciris showcase| image:: https://github.com/sciris/sciris/raw/develop/docs/sciris-showcase-code.png
.. |Sciris output| image:: https://github.com/sciris/sciris/raw/develop/docs/sciris-showcase-output.png
2 changes: 1 addition & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
# -- Project information -----------------------------------------------------

project = 'Sciris'
copyright = f'2021 by the Sciris Development Team (version {sc.__version__})'
copyright = f'2014–2022 by the Sciris Development Team (version {sc.__version__})'
author = 'Sciris Development Team'

# The short X.Y version
Expand Down
6 changes: 3 additions & 3 deletions sciris/sc_datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,7 @@ def date(obj, *args, start_date=None, readformat=None, outformat=None, as_date=T
if dateformat is not None: # pragma: no cover
outformat = dateformat
warnmsg = 'sc.date() argument "dateformat" has been deprecated as of v1.2.2; use "outformat" instead'
warnings.warn(warnmsg, category=DeprecationWarning, stacklevel=2)
warnings.warn(warnmsg, category=FutureWarning, stacklevel=2)

# Convert to list and handle other inputs
if obj is None:
Expand Down Expand Up @@ -335,7 +335,7 @@ def day(obj, *args, start_date=None, **kwargs):
if start_day is not None: # pragma: no cover
start_date = start_day
warnmsg = 'sc.day() argument "start_day" has been deprecated as of v1.2.2; use "start_date" instead'
warnings.warn(warnmsg, category=DeprecationWarning, stacklevel=2)
warnings.warn(warnmsg, category=FutureWarning, stacklevel=2)

# Do not process a day if it's not supplied, and ensure it's a list
if obj is None:
Expand Down Expand Up @@ -598,7 +598,7 @@ def toc(start=None, label=None, baselabel=None, sigfigs=None, reset=False, outpu
else:
if baselabel is None:
if label:
base = f'Elapsed time for {label}: '
base = f'{label}: '
else: # Handles case toc(label='')
base = ''
else:
Expand Down
30 changes: 19 additions & 11 deletions sciris/sc_fileio.py
Original file line number Diff line number Diff line change
Expand Up @@ -877,6 +877,10 @@ def __init__(self, *args, **kwargs):
self.wb = None
return

def __getstate__(self):
d = self.__dict__.copy() # Shallow copy
d['wb'] = None
return d

def _reload_wb(self, reload=None):
''' Helper function to check if workbook is already loaded '''
Expand All @@ -885,29 +889,33 @@ def _reload_wb(self, reload=None):


def xlrd(self, reload=False, store=True, **kwargs): # pragma: no cover
''' Return a book as opened by xlrd '''
wb = self.wb
''' Legacy method to load from xlrd '''
if self._reload_wb(reload=reload):
try:
import xlrd # Optional import
except ModuleNotFoundError as e:
raise ModuleNotFoundError('The "xlrd" Python package is not available; please install manually') from e
wb = xlrd.open_workbook(file_contents=self.tofile().read(), **kwargs)
else:
wb = self.wb

if store:
self.wb = wb
return wb


def openpyxl(self, reload=False, store=True, **kwargs):
''' Return a book as opened by openpyxl '''
wb = self.wb
if self._reload_wb(reload=reload):
import openpyxl # Optional import
if self.blob is not None:
self.tofile(output=False)
wb = openpyxl.load_workbook(self.bytes, **kwargs) # This stream can be passed straight to openpyxl
else:
wb = openpyxl.Workbook(**kwargs)
else:
wb = self.wb

if store:
self.wb = wb
return wb
Expand All @@ -925,21 +933,24 @@ def openpyexcel(self, *args, **kwargs):
Falling back to openpyxl, which is identical except for how cached cell values are handled.
'''
warnings.warn(warnmsg, category=DeprecationWarning, stacklevel=2)
warnings.warn(warnmsg, category=FutureWarning, stacklevel=2)
return self.openpyxl(*args, **kwargs)


def pandas(self, reload=False, store=True, **kwargs): # pragma: no cover
''' Return a book as opened by pandas '''
wb = self.wb

if self._reload_wb(reload=reload):
import pandas as pd # Optional import
import pandas as pd # Optional (slow) import
if self.blob is not None:
self.tofile(output=False)
wb = pd.ExcelFile(self.bytes, **kwargs)
else:
errormsg = 'For pandas, must load an existing workbook; use openpyxl to create a new workbook'
raise FileNotFoundError(errormsg)
else:
wb = self.wb

if store:
self.wb = wb
return wb
Expand Down Expand Up @@ -1075,7 +1086,7 @@ def save(self, filename='spreadsheet.xlsx'):



def loadspreadsheet(filename=None, folder=None, fileobj=None, sheet=0, asdataframe=None, header=True, method='pandas', **kwargs):
def loadspreadsheet(filename=None, folder=None, fileobj=None, sheet=0, header=1, asdataframe=None, method='pandas', **kwargs):
'''
Load a spreadsheet as a dataframe or a list of lists.
Expand Down Expand Up @@ -1107,12 +1118,9 @@ def loadspreadsheet(filename=None, folder=None, fileobj=None, sheet=0, asdatafra

# Load using pandas
if method == 'pandas':
import pandas as pd # Optional import
import pandas as pd # Optional import, here for loading speed
if fileobj is not None: fullpath = fileobj # Substitute here for reading
if header is not None: header = np.arange(header)
data = pd.read_excel(fullpath, sheet_name=sheet, header=header, **kwargs)
if asdataframe is False:
pass
return data

# Load using openpyxl
Expand Down
70 changes: 49 additions & 21 deletions sciris/sc_math.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
##############################################################################

__all__ = ['approx', 'safedivide', 'findinds', 'findfirst', 'findlast', 'findnearest',
'dataindex', 'getvalidinds', 'sanitize', 'getvaliddata', 'isprime']
'count', 'dataindex', 'getvalidinds', 'sanitize', 'getvaliddata', 'isprime']


def approx(val1=None, val2=None, eps=None, **kwargs):
Expand Down Expand Up @@ -83,27 +83,29 @@ def safedivide(numerator=None, denominator=None, default=None, eps=None, warn=Fa

def findinds(arr=None, val=None, eps=1e-6, first=False, last=False, die=True, **kwargs):
'''
Little function to find matches even if two things aren't eactly equal (eg.
due to floats vs. ints). If one argument, find nonzero values. With two arguments,
check for equality using eps. Returns a tuple of arrays if val1 is multidimensional,
else returns an array. Similar to calling np.nonzero(np.isclose(arr, val))[0].
Find matches even if two things aren't eactly equal (e.g. floats vs. ints).
If one argument, find nonzero values. With two arguments, check for equality
using eps. Returns a tuple of arrays if val1 is multidimensional, else returns
an array. Similar to calling ``np.nonzero(np.isclose(arr, val))[0]``.
Args:
arr (array): the array to find values in
val (float): if provided, the value to match
eps (float): the precision for matching (default 1e-6, equivalent to np.isclose's atol)
first (bool): whether to return the first matching value
last (bool): whether to return the last matching value
die (bool): whether to raise an exception if first or last is true and no matches were found
kwargs (dict): passed to np.isclose()
arr (array): the array to find values in
val (float): if provided, the value to match
eps (float): the precision for matching (default 1e-6, equivalent to ``np.isclose()``'s atol)
first (bool): whether to return the first matching value
last (bool): whether to return the last matching value
die (bool): whether to raise an exception if first or last is true and no matches were found
kwargs (dict): passed to ``np.isclose()``
**Examples**::
sc.findinds(rand(10)<0.5) # returns e.g. array([2, 4, 5, 9])
sc.findinds([2,3,6,3], 3) # returs array([1,3])
sc.findinds([2,3,6,3], 3, first=True) # returns 1
New in version 1.2.3: "die" argument
| New in version 1.2.3: "die" argument
| New in version 2.0.0: fix string matching
'''

# Handle first or last
Expand All @@ -118,7 +120,7 @@ def findinds(arr=None, val=None, eps=1e-6, first=False, last=False, die=True, **
arr = kwargs.pop('val1', arr)
val = kwargs.pop('val2', val)
warnmsg = 'sc.findinds() arguments "val1" and "val2" have been deprecated as of v1.0.0; use "arr" and "val" instead'
warnings.warn(warnmsg, category=DeprecationWarning, stacklevel=2)
warnings.warn(warnmsg, category=FutureWarning, stacklevel=2)

# Calculate matches
arr = scu.promotetoarray(arr)
Expand All @@ -127,13 +129,14 @@ def findinds(arr=None, val=None, eps=1e-6, first=False, last=False, die=True, **
else:
if scu.isstring(val):
output = np.nonzero(arr==val)
try: # Standard usage, use nonzero
output = np.nonzero(np.isclose(a=arr, b=val, atol=atol, **kwargs)) # If absolute difference between the two values is less than a certain amount
except Exception as E: # pragma: no cover # As a fallback, try simpler comparison
output = np.nonzero(abs(arr-val) < atol)
if kwargs: # Raise a warning if and only if special settings were passed
warnmsg = f'{str(E)}\nsc.findinds(): np.isclose() encountered an exception (above), falling back to direct comparison'
warnings.warn(warnmsg, category=RuntimeWarning, stacklevel=2)
else:
try: # Standard usage, use nonzero
output = np.nonzero(np.isclose(a=arr, b=val, atol=atol, **kwargs)) # If absolute difference between the two values is less than a certain amount
except Exception as E: # pragma: no cover # As a fallback, try simpler comparison
output = np.nonzero(abs(arr-val) < atol)
if kwargs: # Raise a warning if and only if special settings were passed
warnmsg = f'{str(E)}\nsc.findinds(): np.isclose() encountered an exception (above), falling back to direct comparison'
warnings.warn(warnmsg, category=RuntimeWarning, stacklevel=2)

# Process output
try:
Expand Down Expand Up @@ -189,6 +192,31 @@ def findnearest(series=None, value=None):
return output


def count(arr=None, val=None, eps=1e-6, **kwargs):
'''
Count the number of matching elements.
Similar to ``np.count_nonzero()``, but allows for slight mismatches (e.g.,
floats vs. ints). Equivalent to ``len(sc.findinds())``.
Args:
arr (array): the array to find values in
val (float): if provided, the value to match
eps (float): the precision for matching (default 1e-6, equivalent to np.isclose's atol)
kwargs (dict): passed to ``np.isclose()``
**Examples**::
sc.count(rand(10)<0.5) # returns e.g. 4
sc.count([2,3,6,3], 3) # returs 2
New in version 1.4.0.
'''
output = len(findinds(arr=arr, val=val, eps=eps, **kwargs))
return output



def dataindex(dataarray, index): # pragma: no cover
'''
Take an array of data and return either the first or last (or some other) non-NaN entry.
Expand Down
12 changes: 7 additions & 5 deletions sciris/sc_nested.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,25 +179,27 @@ def flattendict(nesteddict, sep=None, _prefix=None):
{'a_b': 1, 'a_c_d': 2, 'a_c_e': 3}
Args:
d: Input dictionary potentially containing dicts as values
sep: Concatenate keys using string separator. If ``None`` the returned dictionary will have tuples as keys
nesteddict (dict): Input dictionary potentially containing dicts as values
sep (str): Concatenate keys using string separator. If ``None`` the returned dictionary will have tuples as keys
_prefix: Internal argument for recursively accumulating the nested keys
Returns:
A flat dictionary where no values are dicts
New in version 1.4.0: handle non-string keys.
"""
output_dict = {}
for k, v in nesteddict.items():
if sep is None:
if sep is None: # Create tuples
if _prefix is None:
k2 = (k,)
else:
k2 = _prefix + (k,)
else:
else: # Create strings
if _prefix is None:
k2 = k
else:
k2 = _prefix + sep + k
k2 = str(_prefix) + str(sep) + str(k)

if isinstance(v, dict):
output_dict.update(flattendict(nesteddict[k], sep=sep, _prefix=k2))
Expand Down

0 comments on commit 9c272d8

Please sign in to comment.