implement unit testing for conversion

- OOP hbn, hdf5 file access classes - unit testing frame work setup - hspf test uci file and output hbn files updates
respec · Apr 22, 2021 · c556ff7 · c556ff7
1 parent bdb8b5a
commit c556ff7
Show file tree

Hide file tree

Showing 13 changed files with 755 additions and 22 deletions.
diff --git a/.gitignore b/.gitignore
@@ -31,3 +31,4 @@ __pycache__/*
 .tox/*
 stats.dat
 .ropeproject
+.idea
diff --git a/HSP2tools/HBNOutput.py b/HSP2tools/HBNOutput.py
@@ -0,0 +1,130 @@
+from struct import unpack
+from numpy import fromfile
+from pandas import DataFrame
+from datetime import datetime, timedelta
+from collections import defaultdict
+
+class HBNOutput:
+    def __init__(self, file_name):
+        self.data_frames = []
+        self.file_name = file_name
+        self.simulation_duration_count = 0
+        self.summary = []
+        self.summarycols = []
+        self.summaryindx = []
+
+        self.tcodes = {1: 'Minutely', 2: 'Hourly', 3: 'Daily', 4: 'Monthly', 5: 'Yearly'}
+
+    def read_data(self):
+        """
+        Reads ALL data from hbn_file and return them in DataFrame
+
+        Parameters
+        ----------
+        hbn_file : str
+            Name/path of HBN created by HSPF.
+
+        Returns
+        -------
+        df_summary : DataFrame
+            Summary information of data found in HBN file (also saved to HDF5 file.)
+        """
+
+        data = fromfile(self.file_name, 'B')
+        if data[0] != 0xFD:
+            print('BAD HBN FILE - must start with magic number 0xFD')
+            return
+
+        # Build layout maps of the file's contents
+        mapn = defaultdict(list)
+        mapd = defaultdict(list)
+        index = 1  # already used first byte (magic number)
+        while index < len(data):
+            rc1, rc2, rc3, rc, rectype, operation, id, activity = unpack('4BI8sI8s', data[index:index + 28])
+            rc1 = int(rc1 >> 2)
+            rc2 = int(rc2) * 64 + rc1  # 2**6
+            rc3 = int(rc3) * 16384 + rc2  # 2**14
+            reclen = int(rc) * 4194304 + rc3 - 24  # 2**22
+
+            operation = operation.decode('ascii').strip()  # Python3 converts to bytearray not string
+            activity = activity.decode('ascii').strip()
+
+            if operation not in {'PERLND', 'IMPLND', 'RCHRES'}:
+                print('ALIGNMENT ERROR', operation)
+
+            if rectype == 1:  # data record
+                tcode = unpack('I', data[index + 32: index + 36])[0]
+                mapd[operation, id, activity, tcode].append((index, reclen))
+            elif rectype == 0:  # data names record
+                i = index + 28
+                slen = 0
+                while slen < reclen:
+                    ln = unpack('I', data[i + slen: i + slen + 4])[0]
+                    n = unpack(f'{ln}s', data[i + slen + 4: i + slen + 4 + ln])[0].decode('ascii').strip()
+                    mapn[operation, id, activity].append(n.replace('-', ''))
+                    slen += 4 + ln
+            else:
+                print('UNKNOW RECTYPE', rectype)
+            if reclen < 36:
+                index += reclen + 29  # found by trial and error
+            else:
+                index += reclen + 30
+
+        self.data_frames = []
+        self.summary = []
+        self.summarycols = ['Operation', 'Activity', 'segment', 'Frequency', 'Shape', 'Start', 'Stop']
+        self.summaryindx = []
+        for (operation, id, activity, tcode) in mapd:
+            rows = []
+            times = []
+            nvals = len(mapn[operation, id, activity])
+            for (index, reclen) in mapd[operation, id, activity, tcode]:
+                yr, mo, dy, hr, mn = unpack('5I', data[index + 36: index + 56])
+                dt = datetime(yr, mo, dy, 0, mn) + timedelta(hours=hr)
+                times.append(dt)
+
+                index += 56
+                row = unpack(f'{nvals}f', data[index:index + (4 * nvals)])
+                rows.append(row)
+            dfname = f'{operation}_{activity}_{id:03d}_{tcode}'
+            if self.simulation_duration_count == 0:
+                self.simulation_duration_count = len(times)
+            df = DataFrame(rows, index=times, columns=mapn[operation, id, activity]).sort_index('index')
+            self.data_frames.append(df)
+
+            self.summaryindx.append(dfname)
+            self.summary.append((operation, activity, str(id), self.tcodes[tcode], str(df.shape), df.index[0], df.index[-1]))
+
+    def get_time_series(self, name, time_unit):
+        """
+        get a single time series based on:
+        1. constituent name
+        2. time_unit: yearly, monthly, full (default is 'full' simulation duration)
+        """
+        target_tcode = 2
+        for tcode_key in self.tcodes.keys():
+            if self.tcodes[tcode_key].lower() == time_unit:
+                target_tcode = tcode_key
+                break
+
+        target_data_frames = []
+        for index_group_key in self.summaryindx:
+            if index_group_key.endswith(str(target_tcode)):
+                group_index = self.summaryindx.index(index_group_key)
+                target_data_frames.append(self.data_frames[group_index])
+
+        for data_frame in target_data_frames:
+            for key in data_frame.keys():
+                if key == name:
+                    return data_frame[key]
+
+        return None
+
+    @staticmethod
+    def save_time_series_to_file(file_name, time_series):
+        with open(file_name, 'w+') as f:
+            for row in range(len(time_series.index)):
+                dt = time_series.index[row]
+                dv = time_series.values[row]
+                # f.write(f'{dt},{"{:.2f}".format(dv)}\n')
+                f.write(f'{dt},{dv}\n')
diff --git a/HSP2tools/HDF5.py b/HSP2tools/HDF5.py
@@ -0,0 +1,163 @@
+from struct import unpack
+
+import h5py
+from numpy import fromfile
+from pandas import DataFrame
+import pandas as pd
+from datetime import datetime, timedelta
+from collections import defaultdict
+
+class HDF5:
+    def __init__(self, file_name):
+        self.data_frames = []
+        self.file_name = file_name
+        self.simulation_duration_count = 0
+        self.summary = []
+        self.summarycols = []
+        self.summaryindx = []
+
+        self.time_index = [] # this will be shared with all time series
+        self.data_dictionary = {}
+        # self.dd_implnd = {}
+        # self.dd_perlnd = {}
+        # self.dd_rchres = {}
+        self.dd_key_separator = ':'
+
+        self.tcodes = {1: 'Minutely', 2: 'Hourly', 3: 'Daily', 4: 'Monthly', 5: 'Yearly'}
+
+    def open_output(self):
+        """
+        Reads ALL data dictionary from hdf5_file's /RESULTS group
+
+        Parameters
+        ----------
+        hdf5_file : str
+            Name/path of HBN created by HSPF.
+
+        Populate
+        -------
+        data_dictionary : {}
+            Summary information of data found in HDF5 file HSP2 outputs
+        """
+        with h5py.File(self.file_name, "r") as f:
+            str_starttime = f.get('/CONTROL/GLOBAL')['table'].fields('Info')[1].astype('datetime64[D]')
+            str_endtime = f.get('/CONTROL/GLOBAL')['table'].fields('Info')[2].astype('datetime64[D]')
+            start_time = pd.to_datetime(str_starttime)
+            end_time = pd.to_datetime(str_endtime)
+            section = f.get('/RESULTS')
+            opn_keys = list(section.keys())
+            for opn_key in opn_keys:
+                opn_output_grp = section[opn_key]   # e.g. opn_key = IMPLND_I001
+                opn_output_keys = list(opn_output_grp.keys())
+                for opn_output_key in opn_output_keys:
+                    dd_key = opn_key + self.dd_key_separator + opn_output_key
+                    data_table = section[opn_key][opn_output_key]['table']  # e.g. opn_output_key = IQUAL
+                    all_table_attrs = list(data_table.attrs)
+                    field_indices = {}
+                    for table_attr in all_table_attrs:
+                        str_attr_value = ''
+                        try:
+                            str_attr_value = data_table.attrs[table_attr].astype('unicode') # e.g. table_attr = FIELD_2_NAME
+                        except:
+                            str_attr_value = ''
+                        if (not str_attr_value == '') and table_attr.startswith('FIELD') and table_attr.endswith('NAME'):
+                            # convert FIELD_n_NAME to lookup of field index <-> field name'
+                            name_parts = table_attr.split('_')
+                            field_indices[int(name_parts[1])] = str_attr_value
+                    self.data_dictionary[dd_key] = field_indices
+                    self.data_dictionary[dd_key + f'{self.dd_key_separator}values'] = None
+                    if len(self.time_index) == 0:
+                        # alternatively, could construct the time index from the start and end times above
+                        self.time_index = list(pd.date_range(start_time, end_time, freq='H')[:-1])  # issue in HDF5 table!
+                        '''
+                        for row in range(data_table.attrs['NROWS']):
+                            dt = pd.to_datetime(data_table.fields('index')[row].astype('datetime64[D]'))
+                            self.time_index.append(dt)
+                        '''
+                    pass
+                pass
+            pass
+        pass
+
+    def screen_dd_key(self, opn_type, opn_ids):
+        dd_keys_to_read = []
+        key_prefix = opn_type
+        if opn_type == 'IMPLND':
+            key_prefix += '_I'
+        elif opn_type == 'PERLND':
+            key_prefix += '_P'
+        elif opn_type == 'RCHRES':
+            key_prefix += '_R'
+
+        for key in self.data_dictionary.keys():
+            if not key.startswith(opn_type):
+                continue
+            if key.endswith('values'):
+                continue
+            parts = key.split(self.dd_key_separator)
+            try:
+                opn_id = int(parts[0][len(key_prefix):])
+                if opn_ids is None or len(opn_ids) == 0:
+                    dd_keys_to_read.append(key)
+                elif opn_id in opn_ids:
+                    dd_keys_to_read.append(key)
+            except:
+                pass
+
+        return dd_keys_to_read
+
+    def read_output_from_table(self, table_key):
+        (opn_key, activity_key) = table_key.split(self.dd_key_separator)
+        mapn = []
+        mapn_keys = list(self.data_dictionary[table_key].keys())
+        mapn_keys.sort()
+        for mapn_key in mapn_keys:
+            mapn.append(self.data_dictionary[table_key][mapn_key])
+        with h5py.File(self.file_name, "r") as f:
+            str_starttime = f.get('/CONTROL/GLOBAL')['table'].fields('Info')[1].astype('datetime64[D]')
+            str_endtime = f.get('/CONTROL/GLOBAL')['table'].fields('Info')[2].astype('datetime64[D]')
+            start_time = pd.to_datetime(str_starttime)
+            end_time = pd.to_datetime(str_endtime)
+            section = f.get('/RESULTS')
+            data_table = section[opn_key][activity_key]['table']  # e.g. activity_key = IQUAL
+            data_table_rows = list(data_table)
+            rows = []
+            for row in data_table_rows:
+                rows.append(list(row)[1:])
+            self.data_dictionary[table_key + f'{self.dd_key_separator}values'] = \
+                DataFrame(rows, index=self.time_index, columns=mapn[1:])
+
+    def read_output(self, opn_type, opn_ids=None):
+        if len(self.data_dictionary) == 0:
+            return
+        dd_keys_to_read = self.screen_dd_key(opn_type, opn_ids)
+        for dd_key_to_read in dd_keys_to_read:
+            self.read_output_from_table(dd_key_to_read)
+
+    def get_time_series(self, name, duration):
+        """
+        get a single time series based on:
+        1. constituent name
+        2. duration: yearly, monthly, full (default is 'full' simulation duration)
+        """
+        search_shape = self.simulation_duration_count
+        if duration == 'yearly':
+            search_shape = 366
+        elif duration == 'monthly':
+            search_shape = 12
+
+        for data_frame in self.data_frames:
+            for key in data_frame.keys():
+                if key == name and data_frame[key].shape[0] == search_shape:
+                    return data_frame[key]
+
+        return None
+
+    @staticmethod
+    def save_time_series_to_file(file_name, time_series):
+        with open(file_name, 'w+') as f:
+            for row in range(len(time_series.index)):
+                dt = time_series.index[row]
+                dv = time_series.values[row]
+                # f.write(f'{dt},{"{:.2f}".format(dv)}\n')
+                f.write(f'{dt},{dv}\n')
diff --git a/HSP2tools/readUCI.py b/HSP2tools/readUCI.py
@@ -481,7 +481,7 @@ def operation(info, llines, op):
             if cat == 'SKIP':
                 continue
             if cat in {'PARAMETERS', 'STATES', 'FLAGS', 'ACTIVITY','INFO'}:
-                df = concat([temp[1] for temp in history[path,cat]], axis='columns')
+                df = concat([temp[1] for temp in history[path,cat]], axis='columns', sort=False)
                 df = fix_df(df, op, path, ddfaults, valid)
                 if cat == 'ACTIVITY' and op == 'PERLND':
                     df = df.rename(columns = {'AIRTFG':'ATEMP', 'SNOWFG':'SNOW',
-Original file line number
+Diff line change
@@ Expand Up / @@ -31,3 +31,4 @@ __pycache__/* @@
     .tox/*
     stats.dat
     .ropeproject
+    .idea