Skip to content


implement unit testing for conversion
Browse files Browse the repository at this point in the history
- OOP hbn, hdf5 file access classes
- unit testing frame work setup
- hspf test uci file and output hbn files updates
  • Loading branch information
TongZhai committed Apr 22, 2021
1 parent bdb8b5a commit c556ff7
Show file tree
Hide file tree
Showing 13 changed files with 755 additions and 22 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,4 @@ __pycache__/*
130 changes: 130 additions & 0 deletions HSP2tools/
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
from struct import unpack
from numpy import fromfile
from pandas import DataFrame
from datetime import datetime, timedelta
from collections import defaultdict

class HBNOutput:
def __init__(self, file_name):
self.data_frames = []
self.file_name = file_name
self.simulation_duration_count = 0
self.summary = []
self.summarycols = []
self.summaryindx = []

self.tcodes = {1: 'Minutely', 2: 'Hourly', 3: 'Daily', 4: 'Monthly', 5: 'Yearly'}

def read_data(self):
Reads ALL data from hbn_file and return them in DataFrame
hbn_file : str
Name/path of HBN created by HSPF.
df_summary : DataFrame
Summary information of data found in HBN file (also saved to HDF5 file.)

data = fromfile(self.file_name, 'B')
if data[0] != 0xFD:
print('BAD HBN FILE - must start with magic number 0xFD')

# Build layout maps of the file's contents
mapn = defaultdict(list)
mapd = defaultdict(list)
index = 1 # already used first byte (magic number)
while index < len(data):
rc1, rc2, rc3, rc, rectype, operation, id, activity = unpack('4BI8sI8s', data[index:index + 28])
rc1 = int(rc1 >> 2)
rc2 = int(rc2) * 64 + rc1 # 2**6
rc3 = int(rc3) * 16384 + rc2 # 2**14
reclen = int(rc) * 4194304 + rc3 - 24 # 2**22

operation = operation.decode('ascii').strip() # Python3 converts to bytearray not string
activity = activity.decode('ascii').strip()

if operation not in {'PERLND', 'IMPLND', 'RCHRES'}:
print('ALIGNMENT ERROR', operation)

if rectype == 1: # data record
tcode = unpack('I', data[index + 32: index + 36])[0]
mapd[operation, id, activity, tcode].append((index, reclen))
elif rectype == 0: # data names record
i = index + 28
slen = 0
while slen < reclen:
ln = unpack('I', data[i + slen: i + slen + 4])[0]
n = unpack(f'{ln}s', data[i + slen + 4: i + slen + 4 + ln])[0].decode('ascii').strip()
mapn[operation, id, activity].append(n.replace('-', ''))
slen += 4 + ln
print('UNKNOW RECTYPE', rectype)
if reclen < 36:
index += reclen + 29 # found by trial and error
index += reclen + 30

self.data_frames = []
self.summary = []
self.summarycols = ['Operation', 'Activity', 'segment', 'Frequency', 'Shape', 'Start', 'Stop']
self.summaryindx = []
for (operation, id, activity, tcode) in mapd:
rows = []
times = []
nvals = len(mapn[operation, id, activity])
for (index, reclen) in mapd[operation, id, activity, tcode]:
yr, mo, dy, hr, mn = unpack('5I', data[index + 36: index + 56])
dt = datetime(yr, mo, dy, 0, mn) + timedelta(hours=hr)

index += 56
row = unpack(f'{nvals}f', data[index:index + (4 * nvals)])
dfname = f'{operation}_{activity}_{id:03d}_{tcode}'
if self.simulation_duration_count == 0:
self.simulation_duration_count = len(times)
df = DataFrame(rows, index=times, columns=mapn[operation, id, activity]).sort_index('index')

self.summary.append((operation, activity, str(id), self.tcodes[tcode], str(df.shape), df.index[0], df.index[-1]))

def get_time_series(self, name, time_unit):
get a single time series based on:
1. constituent name
2. time_unit: yearly, monthly, full (default is 'full' simulation duration)
target_tcode = 2
for tcode_key in self.tcodes.keys():
if self.tcodes[tcode_key].lower() == time_unit:
target_tcode = tcode_key

target_data_frames = []
for index_group_key in self.summaryindx:
if index_group_key.endswith(str(target_tcode)):
group_index = self.summaryindx.index(index_group_key)

for data_frame in target_data_frames:
for key in data_frame.keys():
if key == name:
return data_frame[key]

return None

def save_time_series_to_file(file_name, time_series):
with open(file_name, 'w+') as f:
for row in range(len(time_series.index)):
dt = time_series.index[row]
dv = time_series.values[row]
# f.write(f'{dt},{"{:.2f}".format(dv)}\n')
163 changes: 163 additions & 0 deletions HSP2tools/
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
from struct import unpack

import h5py
from numpy import fromfile
from pandas import DataFrame
import pandas as pd
from datetime import datetime, timedelta
from collections import defaultdict

class HDF5:
def __init__(self, file_name):
self.data_frames = []
self.file_name = file_name
self.simulation_duration_count = 0
self.summary = []
self.summarycols = []
self.summaryindx = []

self.time_index = [] # this will be shared with all time series
self.data_dictionary = {}
# self.dd_implnd = {}
# self.dd_perlnd = {}
# self.dd_rchres = {}
self.dd_key_separator = ':'

self.tcodes = {1: 'Minutely', 2: 'Hourly', 3: 'Daily', 4: 'Monthly', 5: 'Yearly'}

def open_output(self):
Reads ALL data dictionary from hdf5_file's /RESULTS group
hdf5_file : str
Name/path of HBN created by HSPF.
data_dictionary : {}
Summary information of data found in HDF5 file HSP2 outputs
with h5py.File(self.file_name, "r") as f:
str_starttime = f.get('/CONTROL/GLOBAL')['table'].fields('Info')[1].astype('datetime64[D]')
str_endtime = f.get('/CONTROL/GLOBAL')['table'].fields('Info')[2].astype('datetime64[D]')
start_time = pd.to_datetime(str_starttime)
end_time = pd.to_datetime(str_endtime)
section = f.get('/RESULTS')
opn_keys = list(section.keys())
for opn_key in opn_keys:
opn_output_grp = section[opn_key] # e.g. opn_key = IMPLND_I001
opn_output_keys = list(opn_output_grp.keys())
for opn_output_key in opn_output_keys:
dd_key = opn_key + self.dd_key_separator + opn_output_key
data_table = section[opn_key][opn_output_key]['table'] # e.g. opn_output_key = IQUAL
all_table_attrs = list(data_table.attrs)
field_indices = {}
for table_attr in all_table_attrs:
str_attr_value = ''
str_attr_value = data_table.attrs[table_attr].astype('unicode') # e.g. table_attr = FIELD_2_NAME
str_attr_value = ''
if (not str_attr_value == '') and table_attr.startswith('FIELD') and table_attr.endswith('NAME'):
# convert FIELD_n_NAME to lookup of field index <-> field name'
name_parts = table_attr.split('_')
field_indices[int(name_parts[1])] = str_attr_value
self.data_dictionary[dd_key] = field_indices
self.data_dictionary[dd_key + f'{self.dd_key_separator}values'] = None
if len(self.time_index) == 0:
# alternatively, could construct the time index from the start and end times above
self.time_index = list(pd.date_range(start_time, end_time, freq='H')[:-1]) # issue in HDF5 table!
for row in range(data_table.attrs['NROWS']):
dt = pd.to_datetime(data_table.fields('index')[row].astype('datetime64[D]'))

def screen_dd_key(self, opn_type, opn_ids):
dd_keys_to_read = []
key_prefix = opn_type
if opn_type == 'IMPLND':
key_prefix += '_I'
elif opn_type == 'PERLND':
key_prefix += '_P'
elif opn_type == 'RCHRES':
key_prefix += '_R'

for key in self.data_dictionary.keys():
if not key.startswith(opn_type):
if key.endswith('values'):
parts = key.split(self.dd_key_separator)
opn_id = int(parts[0][len(key_prefix):])
if opn_ids is None or len(opn_ids) == 0:
elif opn_id in opn_ids:

return dd_keys_to_read

def read_output_from_table(self, table_key):
(opn_key, activity_key) = table_key.split(self.dd_key_separator)
mapn = []
mapn_keys = list(self.data_dictionary[table_key].keys())
for mapn_key in mapn_keys:
with h5py.File(self.file_name, "r") as f:
str_starttime = f.get('/CONTROL/GLOBAL')['table'].fields('Info')[1].astype('datetime64[D]')
str_endtime = f.get('/CONTROL/GLOBAL')['table'].fields('Info')[2].astype('datetime64[D]')
start_time = pd.to_datetime(str_starttime)
end_time = pd.to_datetime(str_endtime)
section = f.get('/RESULTS')
data_table = section[opn_key][activity_key]['table'] # e.g. activity_key = IQUAL
data_table_rows = list(data_table)
rows = []
for row in data_table_rows:
self.data_dictionary[table_key + f'{self.dd_key_separator}values'] = \
DataFrame(rows, index=self.time_index, columns=mapn[1:])

def read_output(self, opn_type, opn_ids=None):
if len(self.data_dictionary) == 0:
dd_keys_to_read = self.screen_dd_key(opn_type, opn_ids)
for dd_key_to_read in dd_keys_to_read:

def get_time_series(self, name, duration):
get a single time series based on:
1. constituent name
2. duration: yearly, monthly, full (default is 'full' simulation duration)
search_shape = self.simulation_duration_count
if duration == 'yearly':
search_shape = 366
elif duration == 'monthly':
search_shape = 12

for data_frame in self.data_frames:
for key in data_frame.keys():
if key == name and data_frame[key].shape[0] == search_shape:
return data_frame[key]

return None

def save_time_series_to_file(file_name, time_series):
with open(file_name, 'w+') as f:
for row in range(len(time_series.index)):
dt = time_series.index[row]
dv = time_series.values[row]
# f.write(f'{dt},{"{:.2f}".format(dv)}\n')
2 changes: 1 addition & 1 deletion HSP2tools/
Original file line number Diff line number Diff line change
Expand Up @@ -481,7 +481,7 @@ def operation(info, llines, op):
if cat == 'SKIP':
df = concat([temp[1] for temp in history[path,cat]], axis='columns')
df = concat([temp[1] for temp in history[path,cat]], axis='columns', sort=False)
df = fix_df(df, op, path, ddfaults, valid)
if cat == 'ACTIVITY' and op == 'PERLND':
df = df.rename(columns = {'AIRTFG':'ATEMP', 'SNOWFG':'SNOW',
Expand Down

0 comments on commit c556ff7

Please sign in to comment.