Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion autotest/en_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -592,7 +592,7 @@ def mixed_par_draw_test():
#phi_vector_test()
#add_base_test()
#nz_test()
# deviations_test()
deviations_test()
# as_pyemu_matrix_test()
# dropna_test()
#enforce_test()
Expand Down
28 changes: 25 additions & 3 deletions autotest/utils_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -1787,6 +1787,27 @@ def run_test():
raise Exception("should have failed")


def maha_pdc_test():
import pyemu
l1_critical_value = 6.4 #chi squared value at df=1,p=0.01
l2_critical_value = 9.2 #chi sqaured value at df=2,p=0.01
pst = pyemu.Pst(os.path.join("la", "pest.pst"))
en = pyemu.ObservationEnsemble.from_gaussian_draw(pst=pst,num_reals=20)
level_1,level_2 = pyemu.helpers.get_maha_obs_summary(en)
assert level_1.max() < l1_critical_value
assert level_2.sq_distance.max() < l2_critical_value

# pst = pyemu.Pst(os.path.join("pst","zoned_nz_64.pst"))
# en = pyemu.ObservationEnsemble.from_gaussian_draw(pst=pst, num_reals=20)
# level_1, level_2 = pyemu.helpers.get_maha_obs_summary(en)
# level_1.sort_values(inplace=True)
# level_2.sort_values(by="sq_distance",inplace=True)
# print(level_1)
# print(level_2)
# assert level_1.max() < l1_critical_value
# assert level_2.sq_distance.max() < l2_critical_value


if __name__ == "__main__":

#run_test()
Expand All @@ -1805,13 +1826,13 @@ def run_test():
# sfr_obs_test()
#sfr_reach_obs_test()
#gage_obs_test()
setup_pp_test()
#setup_pp_test()
# sfr_helper_test()
# gw_sft_ins_test()
#par_knowledge_test()
# grid_obs_test()
#hds_timeseries_test()
postprocess_inactive_conc_test()
#postprocess_inactive_conc_test()
#plot_summary_test()
# load_sgems_expvar_test()
# read_hydmod_test()
Expand Down Expand Up @@ -1856,4 +1877,5 @@ def run_test():
# ok_grid_test()
# ok_grid_zone_test()
# ppk2fac_verf_test()
#ok_grid_invest()
#ok_grid_invest()
maha_pdc_test()
71 changes: 71 additions & 0 deletions pyemu/utils/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -5970,3 +5970,74 @@ def _set_vertices(self):
# "instead.", category=DeprecationWarning)
#
# return get_spatialreference(epsg, text='proj4')

def get_maha_obs_summary(sim_en):
""" calculate the 1-D and 2-D mahalanobis distance

Args:
sim_en (`pyemu.ObservationEnsemble`): a simulated outputs ensemble

Returns:

tuple containing

- **pandas.DataFrame**: 1-D subspace squared mahalanobis distances
- **pandas.DataFrame**: 2-D subspace squared mahalanobis distances

Note:
Noise realizations are added to `sim_en` to account for measurement
noise.




"""

if not isinstance(sim_en,pyemu.ObservationEnsemble):
raise Exception("'sim_en' must be a "+\
" pyemu.ObservationEnsemble instance")
if sim_en.pst.nnz_obs < 1:
raise Exception(" at least one non-zero weighted obs is needed")

# process the simulated ensemblet to only have non-zero weighted obs
obs = sim_en.pst.observation_data
nz_names =sim_en.pst.nnz_obs_names
# get the full cov matrix
nz_cov_df = sim_en.covariance_matrix().to_dataframe()
nnz_en = sim_en.loc[:,nz_names].copy()
nz_cov_df = nz_cov_df.loc[nz_names,nz_names]
# get some noise realizations
nnz_en.reseed()
obsmean = obs.loc[nnz_en.columns.values, "obsval"]
noise_en = pyemu.ObservationEnsemble.from_gaussian_draw(sim_en.pst,num_reals=sim_en.shape[0])
noise_en -= obsmean #subtract off the obs val bc we just want the noise
noise_en.index = nnz_en.index
nnz_en += noise_en



#obsval_dict = obs.loc[nnz_en.columns.values,"obsval"].to_dict()

# first calculate the 1-D subspace maha distances
sim_mean = nnz_en.mean()
obs_mean = obs.loc[nnz_en.columns.values,"obsval"]
simvar_inv = 1. / (nnz_en.std()**2)
res_mean = sim_mean - obs_mean
l1_maha_sq_df = res_mean**2 * simvar_inv

# now calculate the 2-D subspace maha distances
onames1,onames2,l2_maha_sq_vals = [],[],[]
for i1,o1 in enumerate(nz_names):
r1 = res_mean[o1]
print("{0} of {1}".format(i1+1,len(nz_names)))
for i2,o2 in enumerate(nz_names[i1+1:]):
c_inv = np.linalg.inv(nz_cov_df.loc[[o1,o2],[o1,o2]].values)
r2 = res_mean[o2]
r_vec = np.array([r1,r2])
l2_maha_sq_val = np.dot(np.dot(r_vec,c_inv),r_vec.transpose())
onames1.append(o1)
onames2.append(o2)
l2_maha_sq_vals.append(l2_maha_sq_val)
l2_maha_sq_df = pd.DataFrame({"obsnme_1":onames1,"obsnme_2":onames2,"sq_distance":l2_maha_sq_vals})

return l1_maha_sq_df,l2_maha_sq_df