Merge pull request #323 from sciris/rc1.4.0-psl-rmpath

Rc1.4.0 psl rmpath and spreadsheet methods
sciris · Aug 13, 2022 · 5e7720a · 5e7720a
2 parents ee659cb + 0cb6c7e
commit 5e7720a
Show file tree

Hide file tree

Showing 3 changed files with 129 additions and 46 deletions.
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -18,6 +18,8 @@ New functions and methods
 #. ``sc.count()`` counts the number of matching elements in an array (similar to ``np.count_nonzero()``, but more flexible with e.g. float vs. int mismatches).
 #. ``sc.strsplit()`` will automatically split common types of delimited strings (e.g. ``sc.strsplit('a b c')``).
 #. ``sc.rmnans()`` and ``sc.fillnans()`` have been added as aliases of ``sc.sanitize()`` with default options.
+#. ``sc.rmpath()`` removes both files and folders, with an optional interactive mode.
+#. ``sc.ispath()`` is an alias for ``isinstance(obj, pathlib.Path)``.
 
 Bugfixes
 ~~~~~~~~

diff --git a/sciris/sc_fileio.py b/sciris/sc_fileio.py
@@ -2,10 +2,12 @@
 Functions for reading/writing to files, including pickles, JSONs, and Excel.
 
 Highlights:
-    -  ``sc.saveobj()/sc.loadobj()``: efficiently save/load any Python object (via pickling)
-    -  ``sc.savejson()/sc.loadjson()``: likewise, for JSONs
-    -  ``sc.thisdir()``: get current folder
-    -  ``sc.getfilelist()``: easy way to access glob
+    - ``sc.save()/sc.load()``: efficiently save/load any Python object (via pickling)
+    - ``sc.savejson()/sc.loadjson()``: likewise, for JSONs
+    - ``sc.savetext()/sc.loadtext()``: likewise, for text
+    - ``sc.thisdir()``: get current folder
+    - ``sc.getfilelist()``: easy way to access glob
+    - ``sc.rmpath()``: remove files and folders
 """
 
 ##############################################################################
@@ -17,6 +19,7 @@
 import os
 import re
 import json
+import shutil
 import uuid
 import types
 import inspect
@@ -45,7 +48,7 @@
 #%% Pickling functions
 ##############################################################################
 
-__all__ = ['loadobj', 'loadstr', 'saveobj', 'dumpstr', 'load', 'save']
+__all__ = ['loadobj', 'loadstr', 'saveobj', 'dumpstr', 'load', 'save', 'rmpath']
 
 
 def loadobj(filename=None, folder=None, verbose=False, die=None, remapping=None, method='pickle', **kwargs):
@@ -262,7 +265,7 @@ def dumpstr(obj=None):
 #%% Other file functions
 ##############################################################################
 
-__all__ += ['loadtext', 'savetext', 'savezip', 'getfilelist', 'sanitizefilename', 'makefilepath', 'path', 'thisdir']
+__all__ += ['loadtext', 'savetext', 'savezip', 'getfilelist', 'sanitizefilename', 'makefilepath', 'path', 'ispath', 'thisdir']
 
 
 def loadtext(filename=None, folder=None, splitlines=False):
@@ -499,6 +502,11 @@ def path(*args, **kwargs):
 path.__doc__ += '\n\n' + Path.__doc__
 
 
+def ispath(obj):
+    ''' Alias to isinstance(obj, Path) '''
+    return isinstance(obj, Path)
+
+
 def thisdir(file=None, path=None, *args, aspath=None, **kwargs):
     '''
     Tiny helper function to get the folder for a file, usually the current file.
@@ -540,6 +548,67 @@ def thisdir(file=None, path=None, *args, aspath=None, **kwargs):
     return filepath
 
 
+def rmpath(path=None, *args, die=True, verbose=True, interactive=False, **kwargs):
+    """
+    Remove file(s) and folder(s). Alias to ``os.remove()`` (for files) and ``shutil.rmtree()``
+    (for folders).
+
+    Arguments:
+        path (str/Path/list): file, folder, or list to remove
+        args (list): additional paths to remove
+        die (bool): whether or not to raise an exception if cannot remove
+        verbose (bool): how much detail to print
+        interactive (bool): whether to confirm prior to each deletion
+        kwargs (dict): passed to ``os.remove()``/``shutil.rmtree()``
+
+    **Examples**::
+
+       sc.rmpath('myobj.obj') # Remove a single file
+       sc.rmpath('myobj1.obj', 'myobj2.obj', 'myobj3.obj') # Remove multiple files
+       sc.rmpath(['myobj.obj', 'tests']) # Remove a file and a folder interactively
+       sc.rmpath(sc.getfilelist('tests/*.obj')) # Example of removing multiple files
+    """
+
+    paths = scu.mergelists(path, *args)
+    for path in paths:
+        if not os.path.exists(path):
+            errormsg = f'Path "{path}" does not exist'
+            if die:
+                raise FileNotFoundError(errormsg)
+            elif verbose:
+                print(errormsg)
+        else:
+            if os.path.isfile(path):
+                rm_func = os.remove
+            elif os.path.isdir(path):
+                rm_func = shutil.rmtree
+            else:
+                errormsg = f'Path "{path}" exists, but is neither a file nor a folder: unable to remove'
+                if die:
+                    raise FileNotFoundError(errormsg)
+                elif verbose:
+                    print(errormsg)
+
+        if interactive:
+            ans = input(f'Remove "{path}"? (y/[n]) ')
+            if ans != 'y':
+                print(f'  Skipping "{path}"')
+                continue
+
+        try:
+            rm_func(path)
+            if verbose or interactive:
+                print(f'Removed "{path}"')
+        except Exception as E:
+            if die:
+                raise E
+            elif verbose:
+                errormsg = f'Could not remove "{path}": {str(E)}'
+                print(errormsg)
+
+    return
+
+
 ##############################################################################
 #%% JSON functions
 ##############################################################################
@@ -549,15 +618,15 @@ def thisdir(file=None, path=None, *args, aspath=None, **kwargs):
 
 def sanitizejson(obj, verbose=True, die=False, tostring=False, **kwargs):
     """
-    This is the main conversion function for Python data-structures into
-    JSON-compatible data structures (note: sanitizejson/jsonify are identical).
+    This is the main conversion function for Python data-structures into JSON-compatible
+    data structures (note: ``sc.sanitizejson()/sc.jsonify()`` are identical).
 
     Args:
-        obj (any): almost any kind of data structure that is a combination of list, numpy.ndarray, odicts, etc.
-        verbose (bool): level of detail to print
-        die (bool): whether or not to raise an exception if conversion failed (otherwise, return a string)
+        obj      (any):  almost any kind of data structure that is a combination of list, numpy.ndarray, odicts, etc.
+        verbose  (bool): level of detail to print
+        die      (bool): whether or not to raise an exception if conversion failed (otherwise, return a string)
         tostring (bool): whether to return a string representation of the sanitized object instead of the object itself
-        kwargs (dict): passed to json.dumps() if tostring=True
+        kwargs   (dict): passed to json.dumps() if tostring=True
 
     Returns:
         object (any or str): the converted object that should be JSON compatible, or its representation as a string if tostring=True
@@ -980,9 +1049,16 @@ def readcells(self, wbargs=None, *args, **kwargs):
         f = self.tofile()
         kwargs['fileobj'] = f
 
+        # Return the appropriate output
+        cells = kwargs.pop('cells', None)
+
         # Read in sheetoutput (sciris dataframe object for xlrd, 2D numpy array for openpyxl).
+        load_args = scu.mergedicts(dict(header=None), kwargs)
         if method == 'xlrd': # pragma: no cover
-            sheetoutput = loadspreadsheet(*args, **kwargs, method='xlrd')  # returns sciris dataframe object
+            sheetoutput = loadspreadsheet(*args, **load_args, method='xlrd')  # returns sciris dataframe object
+        elif method == 'pandas':
+            pandas_sheet = loadspreadsheet(*args, **load_args, method='pandas')
+            sheetoutput = pandas_sheet.values
         elif method in ['openpyxl', 'openpyexcel']:
             wb_reader = self.openpyxl if method == 'openpyxl' else self.openpyexcel
             wb_reader(**wbargs)
@@ -996,16 +1072,14 @@ def readcells(self, wbargs=None, *args, **kwargs):
             errormsg = f'Reading method not found; must be openpyxl or xlrd, not {method}'
             raise ValueError(errormsg)
 
-        # Return the appropriate output
-        cells = kwargs.get('cells')
         if cells is None:  # If no cells specified, return the whole sheet.
             return sheetoutput
         else:
             results = []
             for cell in cells:  # Loop over all cells
                 rownum = cell[0]
                 colnum = cell[1]
-                if method == 'xlrd':  # If we're using xlrd, reduce the row number by 1.
+                if method in ['xlrd']:  # If we're using xlrd/pandas, reduce the row number by 1.
                     rownum -= 1
                 results.append(sheetoutput[rownum][colnum])  # Grab and append the result at the cell.
             return results

diff --git a/tests/test_fileio.py b/tests/test_fileio.py
@@ -8,28 +8,29 @@
 import openpyxl
 import sciris as sc
 
+# Define filenames
+filedir = 'files' + os.sep
+files = sc.prettyobj()
+files.excel  = filedir + 'test.xlsx'
+files.binary = filedir + 'test.obj'
+files.text   = filedir + 'text.txt'
+files.zip    = filedir + 'test.zip'
+tidyup = True
+
+# Define the test data
+nrows = 15
+ncols = 3
+testdata   = pl.zeros((nrows+1, ncols), dtype=object) # Includes header row
+testdata[0,:] = ['A', 'B', 'C'] # Create header
+testdata[1:,:] = pl.rand(nrows,ncols) # Create data
+
+
 def test_spreadsheets():
     '''
     Preserved for completeness, but fairly fragile since relies on not-well-trodden
     Excel libraries.
     '''
 
-    # Define filenames
-    filedir = 'files' + os.sep
-    files = sc.prettyobj()
-    files.excel  = filedir + 'test.xlsx'
-    files.binary = filedir + 'test.obj'
-    files.text   = filedir + 'text.txt'
-    files.zip    = filedir + 'test.zip'
-    tidyup = True
-
-    # Define the test data
-    nrows = 15
-    ncols = 3
-    testdata   = pl.zeros((nrows+1, ncols), dtype=object) # Includes header row
-    testdata[0,:] = ['A', 'B', 'C'] # Create header
-    testdata[1:,:] = pl.rand(nrows,ncols) # Create data
-
     # Test spreadsheet writing, and create the file for later
     formats = {
         'header':{'bold':True, 'bg_color':'#3c7d3e', 'color':'#ffffff'},
@@ -41,7 +42,6 @@ def test_spreadsheets():
     formatdata[0,:] = 'header' # Format header
     sc.savespreadsheet(filename=files.excel, data=testdata, formats=formats, formatdata=formatdata)
 
-
     # Test loading
     sc.heading('Loading spreadsheet')
     data = sc.loadspreadsheet(files.excel)
@@ -51,12 +51,15 @@ def test_spreadsheets():
     if os.path.exists(excel_path):
         sc.heading('Reading cells')
         wb = sc.Spreadsheet(filename=excel_path) # Load a sample databook to try pulling cells from
-        celltest = wb.readcells(method='openpyxl', wbargs={'data_only': True}, sheetname='Baseline year population inputs', cells=[[46, 2], [47, 2]]) # Grab cells using openpyxl.  You have to set wbargs={'data_only': True} to pull out cached values instead of formula strings
-        print(f'openpyxl output: {celltest}')
+        kw = dict(sheetname='Baseline year population inputs', cells=[[46, 2], [47, 2]])
+        celltest_opyxl = wb.readcells(method='openpyxl', **kw, wbargs={'data_only': True}) # Grab cells using openpyxl.  You have to set wbargs={'data_only': True} to pull out cached values instead of formula strings
+        celltest_pd    = wb.readcells(method='pandas',   **kw)  # Grab cells using pandas
+        print(f'openpyxl output: {celltest_opyxl}')
+        print(f'pandas output: {celltest_pd}')
+        assert celltest_opyxl == celltest_pd
     else:
         print(f'{excel_path} not found, skipping...')
 
-
     sc.heading('Loading a blobject')
     blob = sc.Blobject(files.excel)
     f = blob.tofile()
@@ -82,6 +85,16 @@ def test_spreadsheets():
     print(S)
     sc.pp(data)
 
+    if tidyup:
+        sc.rmpath(files.excel)
+
+    return S
+
+
+def test_fileio():
+    '''
+    Test other file I/O functions
+    '''
     sc.heading('Saveobj/loadobj')
     sc.saveobj(files.binary, testdata)
 
@@ -101,7 +114,7 @@ def test_spreadsheets():
         sc.pp(sc.getfilelist(abspath=tf, filesonly=tf, foldersonly=not(tf), nopath=tf, aspath=tf))
 
     sc.heading('Save zip')
-    sc.savezip(files.zip, [files.text, files.excel])
+    sc.savezip(files.zip, [files.text, files.binary])
 
 
     '''
@@ -145,16 +158,9 @@ def __init__(self, x):
     if tidyup:
         sc.blank()
         sc.heading('Tidying up')
-        for fn in [files.excel, files.binary, files.text, files.zip, 'spreadsheet.xlsx']:
-            try:
-                os.remove(fn)
-                print('Removed %s' % fn)
-            except:
-                pass
-
-    print('Done, all fileio tests succeeded')
+        sc.rmpath([files.binary, files.text, files.zip, 'spreadsheet.xlsx'], die=False)
 
-    return S
+    return obj
 
 
 def test_json():
@@ -221,6 +227,7 @@ def test_load_dump_str():
     sc.tic()
 
     spread = test_spreadsheets()
+    fileio = test_fileio()
     json   = test_json()
     jp     = test_jsonpickle()
     string = test_load_dump_str()