Skip to content

Commit

Permalink
Feat denseblockread (pypest#512)
Browse files Browse the repository at this point in the history
* refactored dense binary reading into in info fxn and read fxn, with ability to seek and read a subset of rows

* small fix in Pst.parrep()

* fix in parrep

---------

Co-authored-by: Brioch Hemmings <briochh@gmail.com>
  • Loading branch information
jtwhite79 and briochh committed Jun 10, 2024
1 parent d0804f6 commit c9cdb34
Show file tree
Hide file tree
Showing 6 changed files with 144 additions and 44 deletions.
20 changes: 18 additions & 2 deletions autotest/mat_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -556,6 +556,22 @@ def dense_mat_format_test(setup_empty_mat_temp):
matfile = os.path.join(wd, "dense.bin")
m = pyemu.Matrix(x=arr, row_names=rnames, col_names=cnames)
f = m.to_dense(matfile, close=True)
row_names,row_offsets,col_names,success = pyemu.Matrix.get_dense_binary_info(matfile)
assert success
assert col_names == cnames
assert row_names == rnames
assert len(row_offsets) == len(row_names)

only_rows = row_names[::5]
data_rows_only, row_names_only,col_names_only = pyemu.Matrix.read_dense(matfile,only_rows=only_rows)

assert len(row_names_only) == len(only_rows)
assert data_rows_only.shape == (len(only_rows),ncol)
assert len(col_names_only) == len(col_names)
dif = np.abs(arr[::5,:] - data_rows_only)
print(dif.max())
assert dif.max() == 0.0

m1 = pyemu.Matrix.from_binary(matfile)
print(m1.shape)
assert m1.shape == (nrow, ncol)
Expand Down Expand Up @@ -746,7 +762,7 @@ def trunc_names_test():
# sparse_extend_test()
# sparse_get_test()
# sparse_get_sparse_test()
#dense_mat_format_test()
dense_mat_format_test(".")
#icode_minus_one_test()
#from_uncfile_firstlast_test()
trunc_names_test()
#trunc_names_test()
31 changes: 16 additions & 15 deletions autotest/pst_from_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -5147,7 +5147,8 @@ def plot_thresh(m_d):
#pst.control_data.noptmax = 10
phidf = pd.read_csv(os.path.join(m_d,"freyberg.phi.actual.csv"))
mxiter = phidf.iteration.max()
print(mxiter)
#mxiter = 1
#print(mxiter)
pr_oe = pyemu.ObservationEnsemble.from_csv(pst=pst,filename=os.path.join(m_d,"freyberg.0.obs.csv"))
pr_oe.index = pr_oe.index.map(str)
pr_pv = pr_oe.phi_vector
Expand Down Expand Up @@ -5233,15 +5234,15 @@ def plot_thresh(m_d):
#mx = max(np.nanmax(prarr),np.nanmax(ptarr))
#mn = max(np.nanmin(prarr), np.nanmin(ptarr))
fig,axes = plt.subplots(2,3,figsize=(10,10))
cb = axes[0,0].imshow(tarray, vmin=mn, vmax=mx, cmap="plasma")
cb = axes[0,2].imshow(tarray, vmin=mn, vmax=mx, cmap="plasma")
plt.colorbar(cb, ax=axes[0,0])
cb = axes[0,1].imshow(np.log10(prarr),vmin=mn,vmax=mx,cmap="plasma")
cb = axes[0,0].imshow(np.log10(prarr),vmin=mn,vmax=mx,cmap="plasma")
plt.colorbar(cb,ax=axes[0,1])
cb = axes[0,2].imshow(np.log10(ptarr), vmin=mn, vmax=mx,cmap="plasma")
cb = axes[0,1].imshow(np.log10(ptarr), vmin=mn, vmax=mx,cmap="plasma")
plt.colorbar(cb,ax=axes[0,2])
axes[0,2].set_title("post real: {1}, phi: {0:4.1f}".format(pv[real], real), loc="left")
axes[0,1].set_title("prior real: {1}, phi: {0:4.1f}".format(pr_pv[real],real),loc="left")
axes[0,0].set_title("truth", loc="left")
axes[0,1].set_title("post real: {1}, phi: {0:4.1f}".format(pv[real], real), loc="left")
axes[0,0].set_title("prior real: {1}, phi: {0:4.1f}".format(pr_pv[real],real),loc="left")
axes[0,2].set_title("truth", loc="left")


prarr = np.zeros((nrow,ncol)) - 1
Expand All @@ -5250,15 +5251,15 @@ def plot_thresh(m_d):
ptarr = np.zeros((nrow, ncol)) - 1
ptarr[kcobs.i, kcobs.j] = pt_oe.loc[real, kcobs.obsnme]
ptarr[ib == 0] = np.nan
cb = axes[1,0].imshow(tcarray,vmin=cmn,vmax=cmx,cmap="plasma")
cb = axes[1,0].imshow(prarr,vmin=cmn,vmax=cmx,cmap="plasma")
plt.colorbar(cb, ax=axes[1,0])
cb = axes[1,1].imshow(prarr,vmin=cmn,vmax=cmx,cmap="plasma")
plt.colorbar(cb,ax=axes[1,1])
cb = axes[1,2].imshow(ptarr, vmin=cmn,vmax=cmx,cmap="plasma")
cb = axes[1,2].imshow(tcarray, vmin=cmn,vmax=cmx,cmap="plasma")
plt.colorbar(cb,ax=axes[1,2])
axes[1,2].set_title("post real: {1}, phi: {0:4.1f}".format(pv[real], real), loc="left")
axes[1,1].set_title("prior real: {1}, phi: {0:4.1f}".format(pr_pv[real],real),loc="left")
axes[1,0].set_title("truth", loc="left")
axes[1,1].set_title("post real: {1}, phi: {0:4.1f}".format(pv[real], real), loc="left")
axes[1,0].set_title("prior real: {1}, phi: {0:4.1f}".format(pr_pv[real],real),loc="left")
axes[1,2].set_title("truth", loc="left")

plt.tight_layout()
pdf.savefig()
Expand Down Expand Up @@ -5389,7 +5390,7 @@ def test_array_fmt_pst_from(tmp_path):


if __name__ == "__main__":
mf6_freyberg_pp_locs_test()
#mf6_freyberg_pp_locs_test()
# invest()
#freyberg_test(os.path.abspath("."))
# freyberg_prior_build_test()
Expand All @@ -5399,9 +5400,9 @@ def test_array_fmt_pst_from(tmp_path):
#mf6_freyberg_shortnames_test()
#mf6_freyberg_direct_test()

mf6_freyberg_thresh_test(".")
#mf6_freyberg_thresh_test(".")

#plot_thresh("master_thresh")
plot_thresh("master_thresh")
#plot_thresh("master_thresh_mm")
#mf6_freyberg_varying_idomain()
# xsec_test()
Expand Down
5 changes: 3 additions & 2 deletions autotest/pst_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -1515,7 +1515,8 @@ def interface_check_test():
with this.
"""
d = 'temp'
interface_check_test()
parrep_test(d)
#interface_check_test()
# new_format_test_2()
#write2_nan_test()
#process_output_files_test()
Expand Down Expand Up @@ -1557,7 +1558,7 @@ def interface_check_test():
# rectify_pgroup_test()
#sanity_check_test()
#change_limit_test()
write_tables_test('.')
#write_tables_test('.')
#pi_helper_test()
#ctrl_data_test()
#new_format_test_2()
Expand Down
6 changes: 3 additions & 3 deletions autotest/pst_tests_2.py
Original file line number Diff line number Diff line change
Expand Up @@ -591,7 +591,7 @@ def parrep_test(tmp_path):
# flip the parameter ensemble back around
parens = parens[parens.columns.sort_values()]
assert np.allclose(pst.parameter_data.parval1.values[:-1],parens.T[0].values[:-1],atol=0.0001)

print(pyemu.__file__)
pst.parrep('fake.par.0.csv', real_name=3)
# flip the parameter ensemble back around
parens = parens[parens.columns.sort_values()]
Expand Down Expand Up @@ -912,10 +912,10 @@ def test_pstfromflopy_deprecation():
# from_flopy_zone_pars()
#from_flopy_pp_test()
#from_flopy()
#parrep_test()
parrep_test(".")
#from_flopy_kl_test()
#from_flopy_reachinput()
ineq_phi_test()
#ineq_phi_test()



122 changes: 103 additions & 19 deletions pyemu/mat/mat_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -2098,7 +2098,7 @@ def to_binary(self, filename, droptol=None, chunk=None):
f.close()

@staticmethod
def read_dense(filename, forgive=False, close=True):
def read_dense(filename, forgive=False, close=True, only_rows=None):
"""read a dense-format binary file.
Args:
Expand All @@ -2108,6 +2108,7 @@ def read_dense(filename, forgive=False, close=True):
records are returned. If False, an exception is raised for an
incomplete record
close (`bool`): flag to close the filehandle. Default is True
only_rows (`iterable`): rows to read. If None, all rows are read
Returns:
tuple containing
Expand All @@ -2126,27 +2127,26 @@ def read_dense(filename, forgive=False, close=True):
f = open(filename, "rb")
else:
f = filename
# the header datatype
itemp1, itemp2, icount = np.fromfile(f, Matrix.binary_header_dt, 1)[0]
# print(itemp1,itemp2,icount)
if itemp1 != 0:
raise Exception("Matrix.read_dense() itemp1 != 0")
if itemp2 != icount:
raise Exception("Matrix.read_dense() itemp2 != icount")
ncol = np.abs(itemp2)

col_names = []
row_names = []
data_rows = []
col_slens = np.fromfile(f, Matrix.integer, ncol)

row_names,row_offsets,col_names,success = Matrix.get_dense_binary_info(filename)
if not forgive and not success:
raise Exception("Matrix.read_dense(): error reading dense binary info")
if only_rows is not None:
missing = list(set(only_rows)-set(row_names))
if len(missing) > 0:
raise Exception("the following only_rows are missing:{0}".format(",".join(missing)))
only_offsets = [row_offsets[row_names.index(only_row)] for only_row in only_rows]
row_names = only_rows
row_offsets = only_offsets
ncol = len(col_names)

i = 0
# for j in range(ncol):
for slen in col_slens:
# slen = np.fromfile(f, Matrix.integer,1)[0]
name = (
struct.unpack(str(slen) + "s", f.read(slen))[0].strip().lower().decode()
)
col_names.append(name)
while True:
for row_name,offset in zip(row_names,row_offsets):
f.seek(offset)
try:
slen = np.fromfile(f, Matrix.integer, 1)[0]
except Exception as e:
Expand All @@ -2172,7 +2172,6 @@ def read_dense(filename, forgive=False, close=True):
break
else:
raise Exception("error reading row {0}: {1}".format(i, str(e)))
row_names.append(name)
data_rows.append(data_row)
i += 1

Expand All @@ -2181,6 +2180,91 @@ def read_dense(filename, forgive=False, close=True):
f.close()
return data_rows, row_names, col_names



@staticmethod
def get_dense_binary_info(filename):
"""read the header and row and offsets for a dense binary file.
Parameters
----------
fileanme (`str`): dense binary filename
Returns:
tuple containing
- **['str']**: list of row names
- **['int']**: list of row offsets
- **[`str`]**: list of col names
- **bool**: flag indicating successful reading of all records found
"""
if not os.path.exists(filename):
raise Exception(
"Matrix.read_dense(): filename '{0}' not found".format(filename)
)
if isinstance(filename, str):
f = open(filename, "rb")
else:
f = filename
# the header datatype
itemp1, itemp2, icount = np.fromfile(f, Matrix.binary_header_dt, 1)[0]
# print(itemp1,itemp2,icount)
if itemp1 != 0:
raise Exception("Matrix.read_dense() itemp1 != 0")
if itemp2 != icount:
raise Exception("Matrix.read_dense() itemp2 != icount")
ncol = np.abs(itemp2)
col_slens = np.fromfile(f, Matrix.integer, ncol)
i = 0
col_names = []
for slen in col_slens:
name = (
struct.unpack(str(slen) + "s", f.read(slen))[0].strip().lower().decode()
)
col_names.append(name)
row_names = []
row_offsets = []
data_len = np.array(1,dtype=Matrix.double).itemsize * ncol
success = True
while True:
curr_pos = f.tell()
try:
slen = np.fromfile(f, Matrix.integer, 1)[0]
except Exception as e:
break
try:
name = (
struct.unpack(str(slen) + "s", f.read(slen))[0]
.strip()
.lower()
.decode()
)
except Exception as e:
print("error reading row name {0}: {1}".format(i, str(e)))
success = False
break
try:
data_row = np.fromfile(f, Matrix.double, ncol)
if data_row.shape[0] != ncol:
raise Exception(
"incomplete data in row {0}: {1} vs {2}".format(
i, data_row.shape[0], ncol))
except Exception as e:
print("error reading row data record {0}: {1}".format(i, str(e)))
success = False
break

row_offsets.append(curr_pos)
row_names.append(name)

i += 1
f.close()
return row_names,row_offsets,col_names,success


@classmethod
def from_binary(cls, filename, forgive=False):
"""class method load from PEST-compatible binary file into a
Expand Down
4 changes: 1 addition & 3 deletions pyemu/pst/pst_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -2290,9 +2290,7 @@ def parrep(
real_name, parfile
)
)
self.parameter_data.parval1 = parens.loc[real_name].T.loc[
self.parameter_data.parnme
]
self.parameter_data["parval1"] = parens.loc[real_name,self.par_names]

if enforce_bounds:
par = self.parameter_data
Expand Down

0 comments on commit c9cdb34

Please sign in to comment.