Skip to content

Commit

Permalink
Merge pull request #323 from sciris/rc1.4.0-psl-rmpath
Browse files Browse the repository at this point in the history
Rc1.4.0 psl rmpath and spreadsheet methods
  • Loading branch information
cliffckerr committed Aug 13, 2022
2 parents ee659cb + 0cb6c7e commit 5e7720a
Show file tree
Hide file tree
Showing 3 changed files with 129 additions and 46 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ New functions and methods
#. ``sc.count()`` counts the number of matching elements in an array (similar to ``np.count_nonzero()``, but more flexible with e.g. float vs. int mismatches).
#. ``sc.strsplit()`` will automatically split common types of delimited strings (e.g. ``sc.strsplit('a b c')``).
#. ``sc.rmnans()`` and ``sc.fillnans()`` have been added as aliases of ``sc.sanitize()`` with default options.
#. ``sc.rmpath()`` removes both files and folders, with an optional interactive mode.
#. ``sc.ispath()`` is an alias for ``isinstance(obj, pathlib.Path)``.

Bugfixes
~~~~~~~~
Expand Down
106 changes: 90 additions & 16 deletions sciris/sc_fileio.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,12 @@
Functions for reading/writing to files, including pickles, JSONs, and Excel.
Highlights:
- ``sc.saveobj()/sc.loadobj()``: efficiently save/load any Python object (via pickling)
- ``sc.savejson()/sc.loadjson()``: likewise, for JSONs
- ``sc.thisdir()``: get current folder
- ``sc.getfilelist()``: easy way to access glob
- ``sc.save()/sc.load()``: efficiently save/load any Python object (via pickling)
- ``sc.savejson()/sc.loadjson()``: likewise, for JSONs
- ``sc.savetext()/sc.loadtext()``: likewise, for text
- ``sc.thisdir()``: get current folder
- ``sc.getfilelist()``: easy way to access glob
- ``sc.rmpath()``: remove files and folders
"""

##############################################################################
Expand All @@ -17,6 +19,7 @@
import os
import re
import json
import shutil
import uuid
import types
import inspect
Expand Down Expand Up @@ -45,7 +48,7 @@
#%% Pickling functions
##############################################################################

__all__ = ['loadobj', 'loadstr', 'saveobj', 'dumpstr', 'load', 'save']
__all__ = ['loadobj', 'loadstr', 'saveobj', 'dumpstr', 'load', 'save', 'rmpath']


def loadobj(filename=None, folder=None, verbose=False, die=None, remapping=None, method='pickle', **kwargs):
Expand Down Expand Up @@ -262,7 +265,7 @@ def dumpstr(obj=None):
#%% Other file functions
##############################################################################

__all__ += ['loadtext', 'savetext', 'savezip', 'getfilelist', 'sanitizefilename', 'makefilepath', 'path', 'thisdir']
__all__ += ['loadtext', 'savetext', 'savezip', 'getfilelist', 'sanitizefilename', 'makefilepath', 'path', 'ispath', 'thisdir']


def loadtext(filename=None, folder=None, splitlines=False):
Expand Down Expand Up @@ -499,6 +502,11 @@ def path(*args, **kwargs):
path.__doc__ += '\n\n' + Path.__doc__


def ispath(obj):
''' Alias to isinstance(obj, Path) '''
return isinstance(obj, Path)


def thisdir(file=None, path=None, *args, aspath=None, **kwargs):
'''
Tiny helper function to get the folder for a file, usually the current file.
Expand Down Expand Up @@ -540,6 +548,67 @@ def thisdir(file=None, path=None, *args, aspath=None, **kwargs):
return filepath


def rmpath(path=None, *args, die=True, verbose=True, interactive=False, **kwargs):
"""
Remove file(s) and folder(s). Alias to ``os.remove()`` (for files) and ``shutil.rmtree()``
(for folders).
Arguments:
path (str/Path/list): file, folder, or list to remove
args (list): additional paths to remove
die (bool): whether or not to raise an exception if cannot remove
verbose (bool): how much detail to print
interactive (bool): whether to confirm prior to each deletion
kwargs (dict): passed to ``os.remove()``/``shutil.rmtree()``
**Examples**::
sc.rmpath('myobj.obj') # Remove a single file
sc.rmpath('myobj1.obj', 'myobj2.obj', 'myobj3.obj') # Remove multiple files
sc.rmpath(['myobj.obj', 'tests']) # Remove a file and a folder interactively
sc.rmpath(sc.getfilelist('tests/*.obj')) # Example of removing multiple files
"""

paths = scu.mergelists(path, *args)
for path in paths:
if not os.path.exists(path):
errormsg = f'Path "{path}" does not exist'
if die:
raise FileNotFoundError(errormsg)
elif verbose:
print(errormsg)
else:
if os.path.isfile(path):
rm_func = os.remove
elif os.path.isdir(path):
rm_func = shutil.rmtree
else:
errormsg = f'Path "{path}" exists, but is neither a file nor a folder: unable to remove'
if die:
raise FileNotFoundError(errormsg)
elif verbose:
print(errormsg)

if interactive:
ans = input(f'Remove "{path}"? (y/[n]) ')
if ans != 'y':
print(f' Skipping "{path}"')
continue

try:
rm_func(path)
if verbose or interactive:
print(f'Removed "{path}"')
except Exception as E:
if die:
raise E
elif verbose:
errormsg = f'Could not remove "{path}": {str(E)}'
print(errormsg)

return


##############################################################################
#%% JSON functions
##############################################################################
Expand All @@ -549,15 +618,15 @@ def thisdir(file=None, path=None, *args, aspath=None, **kwargs):

def sanitizejson(obj, verbose=True, die=False, tostring=False, **kwargs):
"""
This is the main conversion function for Python data-structures into
JSON-compatible data structures (note: sanitizejson/jsonify are identical).
This is the main conversion function for Python data-structures into JSON-compatible
data structures (note: ``sc.sanitizejson()/sc.jsonify()`` are identical).
Args:
obj (any): almost any kind of data structure that is a combination of list, numpy.ndarray, odicts, etc.
verbose (bool): level of detail to print
die (bool): whether or not to raise an exception if conversion failed (otherwise, return a string)
obj (any): almost any kind of data structure that is a combination of list, numpy.ndarray, odicts, etc.
verbose (bool): level of detail to print
die (bool): whether or not to raise an exception if conversion failed (otherwise, return a string)
tostring (bool): whether to return a string representation of the sanitized object instead of the object itself
kwargs (dict): passed to json.dumps() if tostring=True
kwargs (dict): passed to json.dumps() if tostring=True
Returns:
object (any or str): the converted object that should be JSON compatible, or its representation as a string if tostring=True
Expand Down Expand Up @@ -980,9 +1049,16 @@ def readcells(self, wbargs=None, *args, **kwargs):
f = self.tofile()
kwargs['fileobj'] = f

# Return the appropriate output
cells = kwargs.pop('cells', None)

# Read in sheetoutput (sciris dataframe object for xlrd, 2D numpy array for openpyxl).
load_args = scu.mergedicts(dict(header=None), kwargs)
if method == 'xlrd': # pragma: no cover
sheetoutput = loadspreadsheet(*args, **kwargs, method='xlrd') # returns sciris dataframe object
sheetoutput = loadspreadsheet(*args, **load_args, method='xlrd') # returns sciris dataframe object
elif method == 'pandas':
pandas_sheet = loadspreadsheet(*args, **load_args, method='pandas')
sheetoutput = pandas_sheet.values
elif method in ['openpyxl', 'openpyexcel']:
wb_reader = self.openpyxl if method == 'openpyxl' else self.openpyexcel
wb_reader(**wbargs)
Expand All @@ -996,16 +1072,14 @@ def readcells(self, wbargs=None, *args, **kwargs):
errormsg = f'Reading method not found; must be openpyxl or xlrd, not {method}'
raise ValueError(errormsg)

# Return the appropriate output
cells = kwargs.get('cells')
if cells is None: # If no cells specified, return the whole sheet.
return sheetoutput
else:
results = []
for cell in cells: # Loop over all cells
rownum = cell[0]
colnum = cell[1]
if method == 'xlrd': # If we're using xlrd, reduce the row number by 1.
if method in ['xlrd']: # If we're using xlrd/pandas, reduce the row number by 1.
rownum -= 1
results.append(sheetoutput[rownum][colnum]) # Grab and append the result at the cell.
return results
Expand Down
67 changes: 37 additions & 30 deletions tests/test_fileio.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,28 +8,29 @@
import openpyxl
import sciris as sc

# Define filenames
filedir = 'files' + os.sep
files = sc.prettyobj()
files.excel = filedir + 'test.xlsx'
files.binary = filedir + 'test.obj'
files.text = filedir + 'text.txt'
files.zip = filedir + 'test.zip'
tidyup = True

# Define the test data
nrows = 15
ncols = 3
testdata = pl.zeros((nrows+1, ncols), dtype=object) # Includes header row
testdata[0,:] = ['A', 'B', 'C'] # Create header
testdata[1:,:] = pl.rand(nrows,ncols) # Create data


def test_spreadsheets():
'''
Preserved for completeness, but fairly fragile since relies on not-well-trodden
Excel libraries.
'''

# Define filenames
filedir = 'files' + os.sep
files = sc.prettyobj()
files.excel = filedir + 'test.xlsx'
files.binary = filedir + 'test.obj'
files.text = filedir + 'text.txt'
files.zip = filedir + 'test.zip'
tidyup = True

# Define the test data
nrows = 15
ncols = 3
testdata = pl.zeros((nrows+1, ncols), dtype=object) # Includes header row
testdata[0,:] = ['A', 'B', 'C'] # Create header
testdata[1:,:] = pl.rand(nrows,ncols) # Create data

# Test spreadsheet writing, and create the file for later
formats = {
'header':{'bold':True, 'bg_color':'#3c7d3e', 'color':'#ffffff'},
Expand All @@ -41,7 +42,6 @@ def test_spreadsheets():
formatdata[0,:] = 'header' # Format header
sc.savespreadsheet(filename=files.excel, data=testdata, formats=formats, formatdata=formatdata)


# Test loading
sc.heading('Loading spreadsheet')
data = sc.loadspreadsheet(files.excel)
Expand All @@ -51,12 +51,15 @@ def test_spreadsheets():
if os.path.exists(excel_path):
sc.heading('Reading cells')
wb = sc.Spreadsheet(filename=excel_path) # Load a sample databook to try pulling cells from
celltest = wb.readcells(method='openpyxl', wbargs={'data_only': True}, sheetname='Baseline year population inputs', cells=[[46, 2], [47, 2]]) # Grab cells using openpyxl. You have to set wbargs={'data_only': True} to pull out cached values instead of formula strings
print(f'openpyxl output: {celltest}')
kw = dict(sheetname='Baseline year population inputs', cells=[[46, 2], [47, 2]])
celltest_opyxl = wb.readcells(method='openpyxl', **kw, wbargs={'data_only': True}) # Grab cells using openpyxl. You have to set wbargs={'data_only': True} to pull out cached values instead of formula strings
celltest_pd = wb.readcells(method='pandas', **kw) # Grab cells using pandas
print(f'openpyxl output: {celltest_opyxl}')
print(f'pandas output: {celltest_pd}')
assert celltest_opyxl == celltest_pd
else:
print(f'{excel_path} not found, skipping...')


sc.heading('Loading a blobject')
blob = sc.Blobject(files.excel)
f = blob.tofile()
Expand All @@ -82,6 +85,16 @@ def test_spreadsheets():
print(S)
sc.pp(data)

if tidyup:
sc.rmpath(files.excel)

return S


def test_fileio():
'''
Test other file I/O functions
'''
sc.heading('Saveobj/loadobj')
sc.saveobj(files.binary, testdata)

Expand All @@ -101,7 +114,7 @@ def test_spreadsheets():
sc.pp(sc.getfilelist(abspath=tf, filesonly=tf, foldersonly=not(tf), nopath=tf, aspath=tf))

sc.heading('Save zip')
sc.savezip(files.zip, [files.text, files.excel])
sc.savezip(files.zip, [files.text, files.binary])


'''
Expand Down Expand Up @@ -145,16 +158,9 @@ def __init__(self, x):
if tidyup:
sc.blank()
sc.heading('Tidying up')
for fn in [files.excel, files.binary, files.text, files.zip, 'spreadsheet.xlsx']:
try:
os.remove(fn)
print('Removed %s' % fn)
except:
pass

print('Done, all fileio tests succeeded')
sc.rmpath([files.binary, files.text, files.zip, 'spreadsheet.xlsx'], die=False)

return S
return obj


def test_json():
Expand Down Expand Up @@ -221,6 +227,7 @@ def test_load_dump_str():
sc.tic()

spread = test_spreadsheets()
fileio = test_fileio()
json = test_json()
jp = test_jsonpickle()
string = test_load_dump_str()
Expand Down

0 comments on commit 5e7720a

Please sign in to comment.