Merge pull request #74 from kaklise/main

Minor updates to address pandas and yaml depreciation warnings
sandialabs · Apr 13, 2022 · 3f62dd2 · 3f62dd2
2 parents 8d95ba4 + cec6041
commit 3f62dd2
Show file tree

Hide file tree

Showing 11 changed files with 65 additions and 41 deletions.
diff --git a/.github/workflows/build_tests.yml b/.github/workflows/build_tests.yml
@@ -18,7 +18,7 @@ jobs:
     - name: Set up Python
       uses: actions/setup-python@v2
       with:
-        python-version: 3.8
+        python-version: 3.9
     - name: Install dependencies
       run: |
         python --version
@@ -166,7 +166,7 @@ jobs:
     - name: Set up Python 
       uses: actions/setup-python@v2
       with:
-        python-version: 3.8
+        python-version: 3.9
     - uses: actions/checkout@v2
     - name: Install coverage
       run: |
@@ -212,7 +212,7 @@ jobs:
     - name: Set up Python 
       uses: actions/setup-python@v2
       with:
-        python-version: 3.8
+        python-version: 3.9
     - uses: actions/checkout@v2
     - name: Install coverage
       run: |

diff --git a/.github/workflows/quick_check.yml b/.github/workflows/quick_check.yml
@@ -19,7 +19,7 @@ jobs:
     - name: Set up Python ${{ matrix.python-version }}
       uses: actions/setup-python@v2
       with:
-        python-version: 3.8
+        python-version: 3.9
     - name: Install packages
       run: |
         python -m pip install --upgrade pip

diff --git a/documentation/automation.rst b/documentation/automation.rst
@@ -50,13 +50,13 @@ The following code could be used as a Python driver that runs using a task sched
     >>> df1 = {'A': np.random.normal(size=N),'B': np.random.normal(size=N)}
     >>> df1 = pd.DataFrame(df1, index=index)
     >>> df1.index.name = 'timestamp'
-    >>> df1.to_sql('data', engine, dtype={'timestamp': DateTime(), 'A': Float(), 'B': Float()})
+    >>> nrows = df1.to_sql('data', engine, dtype={'timestamp': DateTime(), 'A': Float(), 'B': Float()})
 
     >>> index = pd.date_range(date-datetime.timedelta(days=1), periods=N, freq='Min')
     >>> df2 = {'A': np.random.normal(size=N),'B': np.random.normal(size=N)}
     >>> df2 = pd.DataFrame(df2, index=index)
     >>> df2.index.name = 'timestamp'
-    >>> df2.to_sql('qc_data', engine, dtype={'timestamp': DateTime(), 'A': Float(), 'B': Float()})
+    >>> nrows = df2.to_sql('qc_data', engine, dtype={'timestamp': DateTime(), 'A': Float(), 'B': Float()})
 
     >>> #data1 = engine.execute("SELECT * FROM data").fetchall()
     >>> #history1 = engine.execute("SELECT * FROM qc_data").fetchall()
@@ -91,8 +91,8 @@ The following code could be used as a Python driver that runs using a task sched
     >>> pm.check_outlier([-3, 3], window=3600, streaming=True)
 
     >>> # Save the cleaned data and test results to the database
-    >>> pm.cleaned_data.to_sql('qc_data', engine, if_exists='append')
-    >>> pm.test_results.to_sql('qc_summary', engine, if_exists='append')
+    >>> nrows = pm.cleaned_data.to_sql('qc_data', engine, if_exists='append')
+    >>> nrows = pm.test_results.to_sql('qc_summary', engine, if_exists='append')
 
     >>> # Create a monitoring report with test results and graphics
     >>> test_results_graphics = pecos.graphics.plot_test_results(data, pm.test_results)

diff --git a/documentation/environment.yml b/documentation/environment.yml
@@ -1,6 +1,6 @@
 name: pecos
 dependencies:
-    - python=3.8
+    - python=3.9
     - numpy
     - pandas
     - pytz

diff --git a/documentation/whatsnew/v0.2.1.rst b/documentation/whatsnew/v0.2.1.rst
@@ -4,6 +4,7 @@ v0.2.1 (main)
 --------------------------
 
 * Bug fix in custom static and streaming quality control tests to use a specific column
+* Minor updates to address pandas and yaml depreciation warnings
 * Minor updates for testing and documentation
 * Added GitHub Actions and Python 3.9 tests
 
diff --git a/examples/dashboard/dashboard_example_1.py b/examples/dashboard/dashboard_example_1.py
@@ -29,7 +29,7 @@
         # Open config file and extract information
         config_file = system_name + '_config.yml'
         fid = open(config_file, 'r')
-        config = yaml.load(fid)
+        config = yaml.safe_load(fid)
         fid.close()
         trans = config['Translation']
         specs = config['Specifications']

diff --git a/examples/pv/pv_example.py b/examples/pv/pv_example.py
@@ -27,7 +27,7 @@
 # Open config file and extract information
 config_file = 'Baseline_config.yml'
 fid = open(config_file, 'r')
-config = yaml.load(fid)
+config = yaml.safe_load(fid)
 fid.close()
 location = config['Location']
 sapm_parameters = config['SAPM Parameters']

diff --git a/examples/simple/simple_example_using_config.py b/examples/simple/simple_example_using_config.py
@@ -13,7 +13,7 @@
 # Open configuration file
 config_file = 'simple_config.yml'
 fid = open(config_file, 'r')
-config = yaml.load(fid)
+config = yaml.safe_load(fid)
 fid.close()
 
 # Create a Pecos PerformanceMonitoring data object

diff --git a/pecos/io.py b/pecos/io.py
@@ -55,7 +55,8 @@ def read_campbell_scientific(filename, index_col='TIMESTAMP', encoding=None):
     logger.info("Reading Campbell Scientific CSV file " + filename)
 
     try:
-        df = pd.read_csv(filename, skiprows=1, encoding=encoding, index_col=index_col, parse_dates=True, dtype ='unicode', error_bad_lines=False) #, low_memory=False)
+        df = pd.read_csv(filename, skiprows=1, encoding=encoding, index_col=index_col, 
+                         parse_dates=True, dtype ='unicode', error_bad_lines=False) #, low_memory=False)
         df = df[2:]
         index = pd.to_datetime(df.index)
         Unnamed = df.filter(regex='Unnamed')
@@ -385,23 +386,33 @@ def write_dashboard(column_names, row_names, content, title='Pecos Dashboard',
     Parameters
     ----------
     column_names : list of strings
-        Column names listed in the order they should appear in the dashboard, i.e. ['location1', 'location2']
+        Column names listed in the order they should appear in the dashboard, 
+        i.e. ['location1', 'location2']
         
     row_names : list of strings
-        Row names listed in the order they should appear in the dashboard, i.e. ['system1', 'system2']
+        Row names listed in the order they should appear in the dashboard, 
+        i.e. ['system1', 'system2']
         
     content : dictionary
         Dashboard content for each cell. 
+        Dictionary keys are tuples indicating the row name and column name, 
+        i.e. ('row name', 'column name'), where 'row name' is in the list 
+        row_names and 'column name' is in the list column_names. 
         
-        Dictionary keys are tuples indicating the row name and column name, i.e. ('row name', 'column name'), where 'row name' is in the list row_names and 'column name' is in the list column_names. 
+        For each ('row name', 'column name') key, another dictionary is defined 
+        that contains the content to be included in each cell of the dashboard. 
+        Each cell can contain text, graphics, a table, and an html link. These 
+        are defined using the following **case sensitive** keys:
         
-        For each key, another dictionary is defined that contains the content to be included in each cell of the dashboard.
-        Each cell can contain text, graphics, a table, and an html link.  These are defined using the following keys:
-        
-        - text (string) =  text at the top of each cell
-        - graphics (list of strings) =  a list of graphics file names.  Each file name includes the full path
-        - table (string) = a table in html format, for example a table of performance metrics.  DataFrames can be converted to an html string using df.to_html() or df.transpose().to_html().  Values in the table can be color coded using pandas Styler class. 
-        - link (dict) = a dictionary where keys define the name of the link and values define the html link (with full path)
+        - **text** (string) =  text at the top of each cell
+        - **graphics** (list of strings) =  a list of graphics file names.  Each 
+          file name includes the full path
+        - **table** (string) = a table in html format, for example a table of 
+          performance metrics.  DataFrames can be converted to an html string 
+          using df.to_html() or df.transpose().to_html().  Values in the table 
+          can be color coded using pandas Styler class. 
+        - **link** (dict) = a dictionary where keys define the name of the link 
+          and values define the html link (with full path)
         
         For example::
         
@@ -424,7 +435,8 @@ def write_dashboard(column_names, row_names, content, title='Pecos Dashboard',
         Image width in the HTML report, default = 250
         
     datatables : boolean, optional
-        Use datatables.net to format the dashboard, default = False.  See https://datatables.net/ for more information.
+        Use datatables.net to format the dashboard, default = False.  
+        See https://datatables.net/ for more information.
     
     encode : boolean, optional
         Encode graphics in the html, default = False
@@ -442,10 +454,12 @@ def write_dashboard(column_names, row_names, content, title='Pecos Dashboard',
     logger.info("Writing dashboard")
 
     # Set pandas display option     
-    pd.set_option('display.max_colwidth', -1)
+    pd.set_option('display.max_colwidth', None)
     pd.set_option('display.width', 40)
 
-    html_string = _html_template_dashboard(column_names, row_names, content, title, footnote, logo, im_width, datatables, encode)
+    html_string = _html_template_dashboard(column_names, row_names, content, 
+                                           title, footnote, logo, im_width, 
+                                           datatables, encode)
 
     # Write html file
     if os.path.dirname(filename) == '':
@@ -460,7 +474,8 @@ def write_dashboard(column_names, row_names, content, title='Pecos Dashboard',
 
     return full_filename
 
-def _latex_template_monitoring_report(content, title, logo, im_width_test_results, im_width_custom, im_width_logo):
+def _latex_template_monitoring_report(content, title, logo, im_width_test_results, 
+                                      im_width_custom, im_width_logo):
 
     template = env.get_template('monitoring_report.tex')
 
@@ -471,7 +486,8 @@ def _latex_template_monitoring_report(content, title, logo, im_width_test_result
 
     return template.render(**locals())
 
-def _html_template_monitoring_report(content, title, logo, im_width_test_results, im_width_custom, im_width_logo, encode):
+def _html_template_monitoring_report(content, title, logo, im_width_test_results, 
+                                     im_width_custom, im_width_logo, encode):
 
     # if encode == True, encode the images
     img_dic = {}
@@ -492,7 +508,8 @@ def _html_template_monitoring_report(content, title, logo, im_width_test_results
 
     return template.render(**locals())
 
-def _html_template_dashboard(column_names, row_names, content, title, footnote, logo, im_width, datatables, encode):
+def _html_template_dashboard(column_names, row_names, content, title, footnote, 
+                             logo, im_width, datatables, encode):
 
     # if encode == True, encode the images
     img_dic = {}
@@ -517,9 +534,10 @@ def _html_template_dashboard(column_names, row_names, content, title, footnote,
 
 def device_to_client(config):
     """
-    Read channels on modbus device, scale and calibrate the values, and store the data in a MySQL database.
-    The inputs are provided by a configuration dictionary that describe general information for
-    data acquisition and the devices.
+    Read channels on modbus device, scale and calibrate the values, and store 
+    the data in a MySQL database. The inputs are provided by a configuration 
+    dictionary that describe general information for data acquisition and the 
+    devices.
     
     Parameters
     ----------

diff --git a/pecos/monitoring.py b/pecos/monitoring.py
@@ -57,10 +57,14 @@ def __init__(self):
         """
         self.df = pd.DataFrame()
         self.trans = {}
-        self.tfilter = pd.Series()
+        self.tfilter = pd.Series(dtype='float64')
         self.test_results = pd.DataFrame(columns=['Variable Name',
                                                 'Start Time', 'End Time',
                                                 'Timesteps', 'Error Flag'])
+        self.test_results['Start Time'] = self.test_results['Start Time'].astype('datetime64[ns]')
+        self.test_results['End Time'] = self.test_results['End Time'].astype('datetime64[ns]')
+        self.test_results['Timesteps'] = self.test_results['Timesteps'].astype('int64')
+
 
     @property
     def data(self):
@@ -212,7 +216,7 @@ def _append_test_results(self, mask, error_msg, min_failures=1, timestamp_test=F
                 counter = counter + 1
 
         test_results = pd.DataFrame(test_results).T
-        self.test_results = self.test_results.append(test_results, ignore_index=True)
+        self.test_results = pd.concat([self.test_results, test_results], ignore_index=True)
 
     def add_dataframe(self, data):
         """
@@ -859,7 +863,8 @@ def check_custom_streaming(self, quality_control_func, window, key=None,
 
         for i, t in enumerate(np.arange(ti,np_data.shape[0],1)):
 
-            t_start = df.index.get_loc(df.index[t]-history_window, method='nearest')
+            #t_start = df.index.get_loc(df.index[t]-history_window, method='nearest')
+            t_start = df.index.get_indexer([df.index[t]-history_window], method='nearest')[0]
             t_timestamp = df.index[t]
 
             data_pt = pd.Series(np_data[t], index=df.columns)

diff --git a/pecos/tests/test_metrics.py b/pecos/tests/test_metrics.py
@@ -121,21 +121,21 @@ def test_qci_with_test_results():
     pm.add_dataframe(df)
     pm.add_translation_dictionary(trans)
 
-    test_result = {
+    test_result = pd.DataFrame({
     'Variable Name': 'A', 
     'Start Time': '2016-01-01 01:00:00', 
     'End Time': '2016-01-01 04:00:00', 
     'Timesteps': 4, 
-    'Error Flag': 'Error Flag'}
-    pm.test_results = pm.test_results.append(pd.DataFrame(test_result, index=[1]))
+    'Error Flag': 'Error Flag'}, index=[1])
+    pm.test_results = pd.concat([pm.test_results, test_result])
 
-    test_result = {
+    test_result = pd.DataFrame({
     'Variable Name': 'B', 
     'Start Time': '2016-01-01 01:00:00', 
     'End Time': '2016-01-01 01:00:00', 
     'Timesteps': 1, 
-    'Error Flag': 'Error Flag'}
-    pm.test_results = pm.test_results.append(pd.DataFrame(test_result, index=[2]))
+    'Error Flag': 'Error Flag'}, index=[2])
+    pm.test_results = pd.concat([pm.test_results, test_result])
     mask = pm.mask
     QCI = pecos.metrics.qci(mask)