-
Notifications
You must be signed in to change notification settings - Fork 17
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
implement unit testing for conversion
- OOP hbn, hdf5 file access classes - unit testing frame work setup - hspf test uci file and output hbn files updates
- Loading branch information
Showing
13 changed files
with
755 additions
and
22 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -31,3 +31,4 @@ __pycache__/* | |
.tox/* | ||
stats.dat | ||
.ropeproject | ||
.idea |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,130 @@ | ||
from struct import unpack | ||
from numpy import fromfile | ||
from pandas import DataFrame | ||
from datetime import datetime, timedelta | ||
from collections import defaultdict | ||
|
||
class HBNOutput: | ||
def __init__(self, file_name): | ||
self.data_frames = [] | ||
self.file_name = file_name | ||
self.simulation_duration_count = 0 | ||
self.summary = [] | ||
self.summarycols = [] | ||
self.summaryindx = [] | ||
|
||
self.tcodes = {1: 'Minutely', 2: 'Hourly', 3: 'Daily', 4: 'Monthly', 5: 'Yearly'} | ||
|
||
def read_data(self): | ||
""" | ||
Reads ALL data from hbn_file and return them in DataFrame | ||
Parameters | ||
---------- | ||
hbn_file : str | ||
Name/path of HBN created by HSPF. | ||
Returns | ||
------- | ||
df_summary : DataFrame | ||
Summary information of data found in HBN file (also saved to HDF5 file.) | ||
""" | ||
|
||
data = fromfile(self.file_name, 'B') | ||
if data[0] != 0xFD: | ||
print('BAD HBN FILE - must start with magic number 0xFD') | ||
return | ||
|
||
# Build layout maps of the file's contents | ||
mapn = defaultdict(list) | ||
mapd = defaultdict(list) | ||
index = 1 # already used first byte (magic number) | ||
while index < len(data): | ||
rc1, rc2, rc3, rc, rectype, operation, id, activity = unpack('4BI8sI8s', data[index:index + 28]) | ||
rc1 = int(rc1 >> 2) | ||
rc2 = int(rc2) * 64 + rc1 # 2**6 | ||
rc3 = int(rc3) * 16384 + rc2 # 2**14 | ||
reclen = int(rc) * 4194304 + rc3 - 24 # 2**22 | ||
|
||
operation = operation.decode('ascii').strip() # Python3 converts to bytearray not string | ||
activity = activity.decode('ascii').strip() | ||
|
||
if operation not in {'PERLND', 'IMPLND', 'RCHRES'}: | ||
print('ALIGNMENT ERROR', operation) | ||
|
||
if rectype == 1: # data record | ||
tcode = unpack('I', data[index + 32: index + 36])[0] | ||
mapd[operation, id, activity, tcode].append((index, reclen)) | ||
elif rectype == 0: # data names record | ||
i = index + 28 | ||
slen = 0 | ||
while slen < reclen: | ||
ln = unpack('I', data[i + slen: i + slen + 4])[0] | ||
n = unpack(f'{ln}s', data[i + slen + 4: i + slen + 4 + ln])[0].decode('ascii').strip() | ||
mapn[operation, id, activity].append(n.replace('-', '')) | ||
slen += 4 + ln | ||
else: | ||
print('UNKNOW RECTYPE', rectype) | ||
if reclen < 36: | ||
index += reclen + 29 # found by trial and error | ||
else: | ||
index += reclen + 30 | ||
|
||
self.data_frames = [] | ||
self.summary = [] | ||
self.summarycols = ['Operation', 'Activity', 'segment', 'Frequency', 'Shape', 'Start', 'Stop'] | ||
self.summaryindx = [] | ||
for (operation, id, activity, tcode) in mapd: | ||
rows = [] | ||
times = [] | ||
nvals = len(mapn[operation, id, activity]) | ||
for (index, reclen) in mapd[operation, id, activity, tcode]: | ||
yr, mo, dy, hr, mn = unpack('5I', data[index + 36: index + 56]) | ||
dt = datetime(yr, mo, dy, 0, mn) + timedelta(hours=hr) | ||
times.append(dt) | ||
|
||
index += 56 | ||
row = unpack(f'{nvals}f', data[index:index + (4 * nvals)]) | ||
rows.append(row) | ||
dfname = f'{operation}_{activity}_{id:03d}_{tcode}' | ||
if self.simulation_duration_count == 0: | ||
self.simulation_duration_count = len(times) | ||
df = DataFrame(rows, index=times, columns=mapn[operation, id, activity]).sort_index('index') | ||
self.data_frames.append(df) | ||
|
||
self.summaryindx.append(dfname) | ||
self.summary.append((operation, activity, str(id), self.tcodes[tcode], str(df.shape), df.index[0], df.index[-1])) | ||
|
||
def get_time_series(self, name, time_unit): | ||
""" | ||
get a single time series based on: | ||
1. constituent name | ||
2. time_unit: yearly, monthly, full (default is 'full' simulation duration) | ||
""" | ||
target_tcode = 2 | ||
for tcode_key in self.tcodes.keys(): | ||
if self.tcodes[tcode_key].lower() == time_unit: | ||
target_tcode = tcode_key | ||
break | ||
|
||
target_data_frames = [] | ||
for index_group_key in self.summaryindx: | ||
if index_group_key.endswith(str(target_tcode)): | ||
group_index = self.summaryindx.index(index_group_key) | ||
target_data_frames.append(self.data_frames[group_index]) | ||
|
||
for data_frame in target_data_frames: | ||
for key in data_frame.keys(): | ||
if key == name: | ||
return data_frame[key] | ||
|
||
return None | ||
|
||
@staticmethod | ||
def save_time_series_to_file(file_name, time_series): | ||
with open(file_name, 'w+') as f: | ||
for row in range(len(time_series.index)): | ||
dt = time_series.index[row] | ||
dv = time_series.values[row] | ||
# f.write(f'{dt},{"{:.2f}".format(dv)}\n') | ||
f.write(f'{dt},{dv}\n') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,163 @@ | ||
from struct import unpack | ||
|
||
import h5py | ||
from numpy import fromfile | ||
from pandas import DataFrame | ||
import pandas as pd | ||
from datetime import datetime, timedelta | ||
from collections import defaultdict | ||
|
||
class HDF5: | ||
def __init__(self, file_name): | ||
self.data_frames = [] | ||
self.file_name = file_name | ||
self.simulation_duration_count = 0 | ||
self.summary = [] | ||
self.summarycols = [] | ||
self.summaryindx = [] | ||
|
||
self.time_index = [] # this will be shared with all time series | ||
self.data_dictionary = {} | ||
# self.dd_implnd = {} | ||
# self.dd_perlnd = {} | ||
# self.dd_rchres = {} | ||
self.dd_key_separator = ':' | ||
|
||
self.tcodes = {1: 'Minutely', 2: 'Hourly', 3: 'Daily', 4: 'Monthly', 5: 'Yearly'} | ||
|
||
def open_output(self): | ||
""" | ||
Reads ALL data dictionary from hdf5_file's /RESULTS group | ||
Parameters | ||
---------- | ||
hdf5_file : str | ||
Name/path of HBN created by HSPF. | ||
Populate | ||
------- | ||
data_dictionary : {} | ||
Summary information of data found in HDF5 file HSP2 outputs | ||
""" | ||
with h5py.File(self.file_name, "r") as f: | ||
str_starttime = f.get('/CONTROL/GLOBAL')['table'].fields('Info')[1].astype('datetime64[D]') | ||
str_endtime = f.get('/CONTROL/GLOBAL')['table'].fields('Info')[2].astype('datetime64[D]') | ||
start_time = pd.to_datetime(str_starttime) | ||
end_time = pd.to_datetime(str_endtime) | ||
section = f.get('/RESULTS') | ||
opn_keys = list(section.keys()) | ||
for opn_key in opn_keys: | ||
opn_output_grp = section[opn_key] # e.g. opn_key = IMPLND_I001 | ||
opn_output_keys = list(opn_output_grp.keys()) | ||
for opn_output_key in opn_output_keys: | ||
dd_key = opn_key + self.dd_key_separator + opn_output_key | ||
data_table = section[opn_key][opn_output_key]['table'] # e.g. opn_output_key = IQUAL | ||
all_table_attrs = list(data_table.attrs) | ||
field_indices = {} | ||
for table_attr in all_table_attrs: | ||
str_attr_value = '' | ||
try: | ||
str_attr_value = data_table.attrs[table_attr].astype('unicode') # e.g. table_attr = FIELD_2_NAME | ||
except: | ||
str_attr_value = '' | ||
if (not str_attr_value == '') and table_attr.startswith('FIELD') and table_attr.endswith('NAME'): | ||
# convert FIELD_n_NAME to lookup of field index <-> field name' | ||
name_parts = table_attr.split('_') | ||
field_indices[int(name_parts[1])] = str_attr_value | ||
self.data_dictionary[dd_key] = field_indices | ||
self.data_dictionary[dd_key + f'{self.dd_key_separator}values'] = None | ||
if len(self.time_index) == 0: | ||
# alternatively, could construct the time index from the start and end times above | ||
self.time_index = list(pd.date_range(start_time, end_time, freq='H')[:-1]) # issue in HDF5 table! | ||
''' | ||
for row in range(data_table.attrs['NROWS']): | ||
dt = pd.to_datetime(data_table.fields('index')[row].astype('datetime64[D]')) | ||
self.time_index.append(dt) | ||
''' | ||
pass | ||
pass | ||
pass | ||
pass | ||
|
||
def screen_dd_key(self, opn_type, opn_ids): | ||
dd_keys_to_read = [] | ||
key_prefix = opn_type | ||
if opn_type == 'IMPLND': | ||
key_prefix += '_I' | ||
elif opn_type == 'PERLND': | ||
key_prefix += '_P' | ||
elif opn_type == 'RCHRES': | ||
key_prefix += '_R' | ||
|
||
for key in self.data_dictionary.keys(): | ||
if not key.startswith(opn_type): | ||
continue | ||
if key.endswith('values'): | ||
continue | ||
parts = key.split(self.dd_key_separator) | ||
try: | ||
opn_id = int(parts[0][len(key_prefix):]) | ||
if opn_ids is None or len(opn_ids) == 0: | ||
dd_keys_to_read.append(key) | ||
elif opn_id in opn_ids: | ||
dd_keys_to_read.append(key) | ||
except: | ||
pass | ||
|
||
return dd_keys_to_read | ||
|
||
def read_output_from_table(self, table_key): | ||
(opn_key, activity_key) = table_key.split(self.dd_key_separator) | ||
mapn = [] | ||
mapn_keys = list(self.data_dictionary[table_key].keys()) | ||
mapn_keys.sort() | ||
for mapn_key in mapn_keys: | ||
mapn.append(self.data_dictionary[table_key][mapn_key]) | ||
with h5py.File(self.file_name, "r") as f: | ||
str_starttime = f.get('/CONTROL/GLOBAL')['table'].fields('Info')[1].astype('datetime64[D]') | ||
str_endtime = f.get('/CONTROL/GLOBAL')['table'].fields('Info')[2].astype('datetime64[D]') | ||
start_time = pd.to_datetime(str_starttime) | ||
end_time = pd.to_datetime(str_endtime) | ||
section = f.get('/RESULTS') | ||
data_table = section[opn_key][activity_key]['table'] # e.g. activity_key = IQUAL | ||
data_table_rows = list(data_table) | ||
rows = [] | ||
for row in data_table_rows: | ||
rows.append(list(row)[1:]) | ||
self.data_dictionary[table_key + f'{self.dd_key_separator}values'] = \ | ||
DataFrame(rows, index=self.time_index, columns=mapn[1:]) | ||
|
||
def read_output(self, opn_type, opn_ids=None): | ||
if len(self.data_dictionary) == 0: | ||
return | ||
dd_keys_to_read = self.screen_dd_key(opn_type, opn_ids) | ||
for dd_key_to_read in dd_keys_to_read: | ||
self.read_output_from_table(dd_key_to_read) | ||
|
||
def get_time_series(self, name, duration): | ||
""" | ||
get a single time series based on: | ||
1. constituent name | ||
2. duration: yearly, monthly, full (default is 'full' simulation duration) | ||
""" | ||
search_shape = self.simulation_duration_count | ||
if duration == 'yearly': | ||
search_shape = 366 | ||
elif duration == 'monthly': | ||
search_shape = 12 | ||
|
||
for data_frame in self.data_frames: | ||
for key in data_frame.keys(): | ||
if key == name and data_frame[key].shape[0] == search_shape: | ||
return data_frame[key] | ||
|
||
return None | ||
|
||
@staticmethod | ||
def save_time_series_to_file(file_name, time_series): | ||
with open(file_name, 'w+') as f: | ||
for row in range(len(time_series.index)): | ||
dt = time_series.index[row] | ||
dv = time_series.values[row] | ||
# f.write(f'{dt},{"{:.2f}".format(dv)}\n') | ||
f.write(f'{dt},{dv}\n') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.