Skip to content

Commit

Permalink
Merge pull request #54 from sciris/develop
Browse files Browse the repository at this point in the history
Python3 fixes, etc.
  • Loading branch information
cliffckerr committed Feb 6, 2019
2 parents 294ea90 + 89685b5 commit 2e2bef6
Show file tree
Hide file tree
Showing 7 changed files with 128 additions and 28 deletions.
62 changes: 40 additions & 22 deletions sciris/sc_fileio.py
Original file line number Diff line number Diff line change
Expand Up @@ -427,7 +427,7 @@ def tofile(self, output=True):
'''
Return a file-like object with the contents of the file.
This can then be used to open the workbook from memory without writing anything to disk e.g.
- book = openpyxl.load_workbook(self.tofile())
- book = openpyexcel.load_workbook(self.tofile())
- book = xlrd.open_workbook(file_contents=self.tofile().read())
'''
bytesblob = io.BytesIO(self.blob)
Expand All @@ -447,7 +447,7 @@ def freshbytes(self):
class Spreadsheet(Blobject):
'''
A class for reading and writing Excel files in binary format.No disk IO needs
to happen to manipulate the spreadsheets with openpyxl (or xlrd or pandas).
to happen to manipulate the spreadsheets with openpyexcel (or xlrd or pandas).
Version: 2018sep03
'''
Expand All @@ -458,11 +458,11 @@ def xlrd(self, *args, **kwargs):
book = xlrd.open_workbook(file_contents=self.tofile().read(), *args, **kwargs)
return book

def openpyxl(self, *args, **kwargs):
''' Return a book as opened by openpyxl '''
import openpyxl # Optional iport
def openpyexcel(self, *args, **kwargs):
''' Return a book as opened by openpyexcel '''
import openpyexcel # Optional import
self.tofile(output=False)
book = openpyxl.load_workbook(self.bytes, *args, **kwargs) # This stream can be passed straight to openpyxl
book = openpyexcel.load_workbook(self.bytes, *args, **kwargs) # This stream can be passed straight to openpyexcel
return book

def pandas(self, *args, **kwargs):
Expand All @@ -486,7 +486,7 @@ def _getsheet(book, sheetname=None, sheetnum=None):
else: sheet = book.active
return sheet

def readcells(self, *args, **kwargs):
def readcells(self, wbargs=None, *args, **kwargs):
''' Alias to loadspreadsheet() '''
if 'method' in kwargs:
method = kwargs['method']
Expand All @@ -496,33 +496,46 @@ def readcells(self, *args, **kwargs):
if method is None: method = 'xlrd'
f = self.tofile()
kwargs['fileobj'] = f

# Read in sheetoutput (sciris dataframe object for xlrd, 2D numpy array for openpyexcel).
if method == 'xlrd':
output = loadspreadsheet(*args, **kwargs)
elif method == 'openpyxl':
book = self.openpyxl()
sheetoutput = loadspreadsheet(*args, **kwargs) # returns sciris dataframe object
elif method == 'openpyexcel':
if wbargs is None: wbargs = {}
book = self.openpyexcel(**wbargs)
ws = self._getsheet(book=book, sheetname=kwargs.get('sheetname'), sheetnum=kwargs.get('sheetname'))
rawdata = tuple(ws.rows)
output = np.empty(np.shape(rawdata), dtype=object)
sheetoutput = np.empty(np.shape(rawdata), dtype=object)
for r,rowdata in enumerate(rawdata):
for c,val in enumerate(rowdata):
output[r][c] = rawdata[r][c].value
sheetoutput[r][c] = rawdata[r][c].value
else:
errormsg = 'Reading method not found; must be one of xlrd, openpyxl, or pandas, not %s' % method
errormsg = 'Reading method not found; must be one of xlrd, openpyexcel, or pandas, not %s' % method
raise Exception(errormsg)
return output

# Return the appropriate output.
cells = kwargs.get('cells')
if cells is None: # If no cells specified, return the whole sheet.
return sheetoutput
else:
results = []
for cell in cells: # Loop over all cells
rownum = cell[0]
colnum = cell[1]
if method == 'xlrd': # If we're using xlrd, reduce the row number by 1.
rownum -= 1
results.append(sheetoutput[rownum][colnum]) # Grab and append the result at the cell.
return results

def writecells(self, cells=None, startrow=None, startcol=None, vals=None, sheetname=None, sheetnum=None, verbose=False, wbargs=None):
'''
Specify cells to write. Can supply either a list of cells of the same length
as the values, or else specify a starting row and column and write the values
from there.
'''
import openpyxl # Optional import

# Load workbook
if wbargs is None: wbargs = {}
self.tofile(output=False) # Convert to bytes
wb = openpyxl.load_workbook(self.bytes, **wbargs)
wb = self.openpyexcel(**wbargs)
if verbose: print('Workbook loaded: %s' % wb)

# Get right worksheet
Expand All @@ -539,13 +552,18 @@ def writecells(self, cells=None, startrow=None, startcol=None, vals=None, sheetn
for cell,val in zip(cells,vals):
try:
if ut.isstring(cell): # Handles e.g. cell='A1'
ws[cell] = val
cellobj = ws[cell]
elif ut.checktype(cell, 'arraylike','number') and len(cell)==2: # Handles e.g. cell=(0,0)
ws.cell(row=cell[0], column=cell[1], value=val)
cellobj = ws.cell(row=cell[0], column=cell[1])
else:
errormsg = 'Cell must be formatted as a label or row-column pair, e.g. "A1" or (3,5); not "%s"' % cell
raise Exception(errormsg)
if verbose: print(' Cell %s = %s' % (cell,val))
if isinstance(val,tuple):
cellobj.value = val[0]
cellobj.cached_value = val[1]
else:
cellobj.value = val
except Exception as E:
errormsg = 'Could not write "%s" to cell "%s": %s' % (val, cell, repr(E))
raise Exception(errormsg)
Expand Down Expand Up @@ -575,7 +593,7 @@ def writecells(self, cells=None, startrow=None, startcol=None, vals=None, sheetn
pass


def loadspreadsheet(filename=None, folder=None, fileobj=None, sheetname=None, sheetnum=None, asdataframe=None, header=True):
def loadspreadsheet(filename=None, folder=None, fileobj=None, sheetname=None, sheetnum=None, asdataframe=None, header=True, cells=None):
'''
Load a spreadsheet as a list of lists or as a dataframe. Read from either a filename or a file object.
'''
Expand Down Expand Up @@ -606,7 +624,7 @@ def loadspreadsheet(filename=None, folder=None, fileobj=None, sheetname=None, sh
val = sheet.cell_value(rownum+header,colnum)
try:
val = float(val) # Convert it to a number if possible
except:
except:
try: val = str(val) # But give up easily and convert to a string (not Unicode)
except: pass # Still no dice? Fine, we tried
rawdata[rownum][str(attr)] = val
Expand Down
2 changes: 1 addition & 1 deletion sciris/sc_odict.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def __sanitize_items(self, items):
''' Try to convert the output of a slice to an array, but give up easily and return a list '''
try:
output = np.array(items) # Try standard Numpy array...
if 'S' in str(output.dtype): # ...but instead of converting to string, convert to object array
if 'S' in str(output.dtype) or 'U' in str(output.dtype): # ...but instead of converting to string, convert to object array for Python 2 or 3 -- WARNING, fragile!
output = np.array(items, dtype=object)
except:
output = items # If that fails, just give up and return the list
Expand Down
63 changes: 61 additions & 2 deletions sciris/sc_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -966,7 +966,6 @@ def toc(start=None, output=False, label=None, sigfigs=None, filename=None):
if filename is not None: printtologfile(logmessage, filename) # If we passed in a filename, append the message to that file.
else: print(logmessage) # Otherwise, print the message.
return None



def percentcomplete(step=None, maxsteps=None, indent=1):
Expand Down Expand Up @@ -1299,7 +1298,7 @@ def flattendict(inputdict=None, basekey=None, subkeys=None, complist=None, keyli
### CLASSES
##############################################################################

__all__ += ['prettyobj', 'LinkException', 'Link']
__all__ += ['prettyobj', 'LinkException', 'Link', 'Timer']

class prettyobj(object):
def __repr__(self):
Expand Down Expand Up @@ -1354,3 +1353,63 @@ def __copy__(self, *args, **kwargs):
def __deepcopy__(self, *args, **kwargs):
''' Same as copy '''
return self.__copy__(*args, **kwargs)


class Timer(object):
'''
Simple timer class
This wraps `tic` and `toc` with the formatting arguments and
the start time (at construction)
Use this in a ``with...as``` block to automatically print
elapsed time when the block finishes.
Implementation based on https://preshing.com/20110924/timing-your-code-using-pythons-with-statement/
Example making repeated calls to the same Timer:
>>> timer = Timer()
>>> timer.toc()
Elapsed time: 2.63 s
>>> timer.toc()
Elapsed time: 5.00 s
Example wrapping code using with-as:
>>> with Timer(label='mylabel') as t:
>>> foo()
'''

def __init__(self,**kwargs):
self.tic()
self.kwargs = kwargs #: Store kwargs to pass to :func:`toc` at the end of the block

def __enter__(self):
'''
Reset start time when entering with-as block
'''

self.tic()
return self

def __exit__(self, *args):
'''
Print elapsed time when leaving a with-as block
'''

self.toc()

def tic(self):
'''
Set start time
'''

self.start = tic()

def toc(self):
'''
Print elapsed time
'''

toc(self.start,**self.kwargs)
4 changes: 2 additions & 2 deletions sciris/sc_version.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
__all__ = ['__version__', '__versiondate__', '__license__']

__version__ = '0.12.5'
__versiondate__ = '2019-01-15'
__version__ = '0.13.0'
__versiondate__ = '2019-02-07'
__license__ = 'Sciris %s (%s) -- (c) Sciris.org' % (__version__, __versiondate__)
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
'numpy>=1.10.1', # Numerical functions
'dill', # File I/O
'gitpython', # Version information
'openpyxl>=2.5', # Spreadsheet functions
'openpyexcel>=2.5', # Spreadsheet functions -- fork of openpyxl
'pandas', # Spreadsheet input
'psutil', # Load monitoring
'xlrd', # Spreadsheet input
Expand Down
Binary file added tests/nutrition_databook.xlsx
Binary file not shown.
23 changes: 23 additions & 0 deletions tests/test_readcells.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import sciris as sc

# Load a sample Nutrition databook to try pulling cells from.
wb = sc.Spreadsheet(filename='nutrition_databook.xlsx')

# thang = wb.readcells(method='xlrd', sheetname='Baseline year population inputs')
# thang2 = wb.readcells(method='openpyexcel', sheetname='Baseline year population inputs', wbargs={'data_only': False})
# thang2b = wb.readcells(method='openpyexcel', sheetname='Baseline year population inputs', wbargs={'data_only': True})

# Grab cells using xlrd.
celltest = wb.readcells(method='xlrd', sheetname='Baseline year population inputs', cells=[[46, 2], [47, 2]])

# Grab cells using openpyexcel. You have to set wbargs={'data_only': True} to pull out cached values instead of
# formula strings.
celltest2 = wb.readcells(method='openpyexcel', wbargs={'data_only': True},
sheetname='Baseline year population inputs', cells=[[46, 2], [47, 2]])

print ('Cell test')
print(celltest)
print ('Cell test2')
print(celltest2)

print('Done.')

0 comments on commit 2e2bef6

Please sign in to comment.