Merge pull request #54 from sciris/develop

Python3 fixes, etc.
sciris · Feb 6, 2019 · 2e2bef6 · 2e2bef6
2 parents 294ea90 + 89685b5
commit 2e2bef6
Show file tree

Hide file tree

Showing 7 changed files with 128 additions and 28 deletions.
diff --git a/sciris/sc_fileio.py b/sciris/sc_fileio.py
@@ -427,7 +427,7 @@ def tofile(self, output=True):
         '''
         Return a file-like object with the contents of the file.
         This can then be used to open the workbook from memory without writing anything to disk e.g.
-        - book = openpyxl.load_workbook(self.tofile())
+        - book = openpyexcel.load_workbook(self.tofile())
         - book = xlrd.open_workbook(file_contents=self.tofile().read())
         '''
         bytesblob = io.BytesIO(self.blob)
@@ -447,7 +447,7 @@ def freshbytes(self):
 class Spreadsheet(Blobject):
     '''
     A class for reading and writing Excel files in binary format.No disk IO needs 
-    to happen to manipulate the spreadsheets with openpyxl (or xlrd or pandas).
+    to happen to manipulate the spreadsheets with openpyexcel (or xlrd or pandas).
 
     Version: 2018sep03
     '''
@@ -458,11 +458,11 @@ def xlrd(self, *args, **kwargs):
         book = xlrd.open_workbook(file_contents=self.tofile().read(), *args, **kwargs)
         return book
 
-    def openpyxl(self, *args, **kwargs):
-        ''' Return a book as opened by openpyxl '''
-        import openpyxl # Optional iport
+    def openpyexcel(self, *args, **kwargs):
+        ''' Return a book as opened by openpyexcel '''
+        import openpyexcel # Optional import
         self.tofile(output=False)
-        book = openpyxl.load_workbook(self.bytes, *args, **kwargs) # This stream can be passed straight to openpyxl
+        book = openpyexcel.load_workbook(self.bytes, *args, **kwargs) # This stream can be passed straight to openpyexcel
         return book
 
     def pandas(self, *args, **kwargs):
@@ -486,7 +486,7 @@ def _getsheet(book, sheetname=None, sheetnum=None):
         else:                       sheet = book.active
         return sheet
 
-    def readcells(self, *args, **kwargs):
+    def readcells(self, wbargs=None, *args, **kwargs):
         ''' Alias to loadspreadsheet() '''
         if 'method' in kwargs:
             method = kwargs['method']
@@ -496,33 +496,46 @@ def readcells(self, *args, **kwargs):
         if method is None: method = 'xlrd'
         f = self.tofile()
         kwargs['fileobj'] = f
+
+        # Read in sheetoutput (sciris dataframe object for xlrd, 2D numpy array for openpyexcel).
         if method == 'xlrd':
-            output = loadspreadsheet(*args, **kwargs)
-        elif method == 'openpyxl':
-            book = self.openpyxl()
+            sheetoutput = loadspreadsheet(*args, **kwargs)  # returns sciris dataframe object
+        elif method == 'openpyexcel':
+            if wbargs is None: wbargs = {}
+            book = self.openpyexcel(**wbargs)
             ws = self._getsheet(book=book, sheetname=kwargs.get('sheetname'), sheetnum=kwargs.get('sheetname'))
             rawdata = tuple(ws.rows)
-            output = np.empty(np.shape(rawdata), dtype=object)
+            sheetoutput = np.empty(np.shape(rawdata), dtype=object)
             for r,rowdata in enumerate(rawdata):
                 for c,val in enumerate(rowdata):
-                    output[r][c] = rawdata[r][c].value
+                    sheetoutput[r][c] = rawdata[r][c].value
         else:
-            errormsg = 'Reading method not found; must be one of xlrd, openpyxl, or pandas, not %s' % method
+            errormsg = 'Reading method not found; must be one of xlrd, openpyexcel, or pandas, not %s' % method
             raise Exception(errormsg)
-        return output
+
+        # Return the appropriate output.
+        cells = kwargs.get('cells')
+        if cells is None:  # If no cells specified, return the whole sheet.
+            return sheetoutput
+        else:
+            results = []
+            for cell in cells:  # Loop over all cells
+                rownum = cell[0]
+                colnum = cell[1]
+                if method == 'xlrd':  # If we're using xlrd, reduce the row number by 1.
+                    rownum -= 1
+                results.append(sheetoutput[rownum][colnum])  # Grab and append the result at the cell.
+            return results
 
     def writecells(self, cells=None, startrow=None, startcol=None, vals=None, sheetname=None, sheetnum=None, verbose=False, wbargs=None):
         '''
         Specify cells to write. Can supply either a list of cells of the same length
         as the values, or else specify a starting row and column and write the values
         from there.
         '''
-        import openpyxl # Optional import
-
         # Load workbook
         if wbargs is None: wbargs = {}
-        self.tofile(output=False) # Convert to bytes
-        wb = openpyxl.load_workbook(self.bytes, **wbargs)
+        wb = self.openpyexcel(**wbargs)
         if verbose: print('Workbook loaded: %s' % wb)
 
         # Get right worksheet
@@ -539,13 +552,18 @@ def writecells(self, cells=None, startrow=None, startcol=None, vals=None, sheetn
             for cell,val in zip(cells,vals):
                 try:
                     if ut.isstring(cell): # Handles e.g. cell='A1'
-                        ws[cell] = val
+                        cellobj = ws[cell]
                     elif ut.checktype(cell, 'arraylike','number') and len(cell)==2: # Handles e.g. cell=(0,0)
-                        ws.cell(row=cell[0], column=cell[1], value=val)
+                        cellobj = ws.cell(row=cell[0], column=cell[1])
                     else:
                         errormsg = 'Cell must be formatted as a label or row-column pair, e.g. "A1" or (3,5); not "%s"' % cell
                         raise Exception(errormsg)
                     if verbose: print('  Cell %s = %s' % (cell,val))
+                    if isinstance(val,tuple):
+                        cellobj.value = val[0]
+                        cellobj.cached_value = val[1]
+                    else:
+                        cellobj.value = val
                 except Exception as E:
                     errormsg = 'Could not write "%s" to cell "%s": %s' % (val, cell, repr(E))
                     raise Exception(errormsg)
@@ -575,7 +593,7 @@ def writecells(self, cells=None, startrow=None, startcol=None, vals=None, sheetn
     pass
 
 
-def loadspreadsheet(filename=None, folder=None, fileobj=None, sheetname=None, sheetnum=None, asdataframe=None, header=True):
+def loadspreadsheet(filename=None, folder=None, fileobj=None, sheetname=None, sheetnum=None, asdataframe=None, header=True, cells=None):
     '''
     Load a spreadsheet as a list of lists or as a dataframe. Read from either a filename or a file object.
     '''
@@ -606,7 +624,7 @@ def loadspreadsheet(filename=None, folder=None, fileobj=None, sheetname=None, sh
             val = sheet.cell_value(rownum+header,colnum)
             try:
                 val = float(val) # Convert it to a number if possible
-            except: 
+            except:
                 try:    val = str(val)  # But give up easily and convert to a string (not Unicode)
                 except: pass # Still no dice? Fine, we tried
             rawdata[rownum][str(attr)] = val

diff --git a/sciris/sc_odict.py b/sciris/sc_odict.py
@@ -61,7 +61,7 @@ def __sanitize_items(self, items):
         ''' Try to convert the output of a slice to an array, but give up easily and return a list '''
         try: 
             output = np.array(items) # Try standard Numpy array...
-            if 'S' in str(output.dtype): # ...but instead of converting to string, convert to object array
+            if 'S' in str(output.dtype) or 'U' in str(output.dtype): # ...but instead of converting to string, convert to object array for Python 2 or 3 -- WARNING, fragile!
                 output = np.array(items, dtype=object)
         except:
             output = items # If that fails, just give up and return the list

diff --git a/sciris/sc_utils.py b/sciris/sc_utils.py
@@ -966,7 +966,6 @@ def toc(start=None, output=False, label=None, sigfigs=None, filename=None):
         if filename is not None: printtologfile(logmessage, filename) # If we passed in a filename, append the message to that file.
         else: print(logmessage) # Otherwise, print the message.
         return None
-
 
 
 def percentcomplete(step=None, maxsteps=None, indent=1):
@@ -1299,7 +1298,7 @@ def flattendict(inputdict=None, basekey=None, subkeys=None, complist=None, keyli
 ### CLASSES
 ##############################################################################
 
-__all__ += ['prettyobj', 'LinkException', 'Link']
+__all__ += ['prettyobj', 'LinkException', 'Link', 'Timer']
 
 class prettyobj(object):
     def __repr__(self):
@@ -1354,3 +1353,63 @@ def __copy__(self, *args, **kwargs):
     def __deepcopy__(self, *args, **kwargs):
         ''' Same as copy '''
         return self.__copy__(*args, **kwargs)
+
+
+class Timer(object):
+    '''
+    Simple timer class
+
+    This wraps `tic` and `toc` with the formatting arguments and
+    the start time (at construction)
+    Use this in a ``with...as``` block to automatically print
+    elapsed time when the block finishes.
+
+    Implementation based on https://preshing.com/20110924/timing-your-code-using-pythons-with-statement/
+
+    Example making repeated calls to the same Timer:
+
+    >>> timer = Timer()
+    >>> timer.toc()
+    Elapsed time: 2.63 s
+    >>> timer.toc()
+    Elapsed time: 5.00 s
+
+    Example wrapping code using with-as:
+
+    >>> with Timer(label='mylabel') as t:
+    >>>     foo()
+
+    '''
+
+    def __init__(self,**kwargs):
+        self.tic()
+        self.kwargs = kwargs #: Store kwargs to pass to :func:`toc` at the end of the block
+
+    def __enter__(self):
+        '''
+        Reset start time when entering with-as block
+        '''
+
+        self.tic()
+        return self
+
+    def __exit__(self, *args):
+        '''
+        Print elapsed time when leaving a with-as block
+        '''
+
+        self.toc()
+
+    def tic(self):
+        '''
+        Set start time
+        '''
+
+        self.start = tic()
+
+    def toc(self):
+        '''
+        Print elapsed time
+        '''
+
+        toc(self.start,**self.kwargs)
diff --git a/sciris/sc_version.py b/sciris/sc_version.py
@@ -1,5 +1,5 @@
 __all__ = ['__version__', '__versiondate__', '__license__']
 
-__version__      = '0.12.5'
-__versiondate__  = '2019-01-15'
+__version__      = '0.13.0'
+__versiondate__  = '2019-02-07'
 __license__      = 'Sciris %s (%s) -- (c) Sciris.org' % (__version__, __versiondate__)
diff --git a/setup.py b/setup.py
@@ -10,7 +10,7 @@
         'numpy>=1.10.1',     # Numerical functions
         'dill',              # File I/O
         'gitpython',         # Version information
-        'openpyxl>=2.5',     # Spreadsheet functions
+        'openpyexcel>=2.5',  # Spreadsheet functions -- fork of openpyxl
         'pandas',            # Spreadsheet input
         'psutil',            # Load monitoring
         'xlrd',              # Spreadsheet input

diff --git a/tests/nutrition_databook.xlsx b/tests/nutrition_databook.xlsx
diff --git a/tests/test_readcells.py b/tests/test_readcells.py
@@ -0,0 +1,23 @@
+import sciris as sc
+
+# Load a sample Nutrition databook to try pulling cells from.
+wb = sc.Spreadsheet(filename='nutrition_databook.xlsx')
+
+# thang = wb.readcells(method='xlrd', sheetname='Baseline year population inputs')
+# thang2 = wb.readcells(method='openpyexcel', sheetname='Baseline year population inputs', wbargs={'data_only': False})
+# thang2b = wb.readcells(method='openpyexcel', sheetname='Baseline year population inputs', wbargs={'data_only': True})
+
+# Grab cells using xlrd.
+celltest = wb.readcells(method='xlrd', sheetname='Baseline year population inputs', cells=[[46, 2], [47, 2]])
+
+# Grab cells using openpyexcel.  You have to set wbargs={'data_only': True} to pull out cached values instead of
+# formula strings.
+celltest2 = wb.readcells(method='openpyexcel', wbargs={'data_only': True},
+                        sheetname='Baseline year population inputs', cells=[[46, 2], [47, 2]])
+
+print ('Cell test')
+print(celltest)
+print ('Cell test2')
+print(celltest2)
+
+print('Done.')