diff --git a/autotest/en_tests.py b/autotest/en_tests.py index f0faafb54..5270c008d 100644 --- a/autotest/en_tests.py +++ b/autotest/en_tests.py @@ -592,7 +592,7 @@ def mixed_par_draw_test(): #phi_vector_test() #add_base_test() #nz_test() - # deviations_test() + deviations_test() # as_pyemu_matrix_test() # dropna_test() #enforce_test() diff --git a/autotest/utils_tests.py b/autotest/utils_tests.py index f5fec372c..9bfd3b2a7 100644 --- a/autotest/utils_tests.py +++ b/autotest/utils_tests.py @@ -1787,6 +1787,27 @@ def run_test(): raise Exception("should have failed") +def maha_pdc_test(): + import pyemu + l1_critical_value = 6.4 #chi squared value at df=1,p=0.01 + l2_critical_value = 9.2 #chi sqaured value at df=2,p=0.01 + pst = pyemu.Pst(os.path.join("la", "pest.pst")) + en = pyemu.ObservationEnsemble.from_gaussian_draw(pst=pst,num_reals=20) + level_1,level_2 = pyemu.helpers.get_maha_obs_summary(en) + assert level_1.max() < l1_critical_value + assert level_2.sq_distance.max() < l2_critical_value + + # pst = pyemu.Pst(os.path.join("pst","zoned_nz_64.pst")) + # en = pyemu.ObservationEnsemble.from_gaussian_draw(pst=pst, num_reals=20) + # level_1, level_2 = pyemu.helpers.get_maha_obs_summary(en) + # level_1.sort_values(inplace=True) + # level_2.sort_values(by="sq_distance",inplace=True) + # print(level_1) + # print(level_2) + # assert level_1.max() < l1_critical_value + # assert level_2.sq_distance.max() < l2_critical_value + + if __name__ == "__main__": #run_test() @@ -1805,13 +1826,13 @@ def run_test(): # sfr_obs_test() #sfr_reach_obs_test() #gage_obs_test() - setup_pp_test() + #setup_pp_test() # sfr_helper_test() # gw_sft_ins_test() #par_knowledge_test() # grid_obs_test() #hds_timeseries_test() - postprocess_inactive_conc_test() + #postprocess_inactive_conc_test() #plot_summary_test() # load_sgems_expvar_test() # read_hydmod_test() @@ -1856,4 +1877,5 @@ def run_test(): # ok_grid_test() # ok_grid_zone_test() # ppk2fac_verf_test() - #ok_grid_invest() \ No newline at end of file + #ok_grid_invest() + maha_pdc_test() \ No newline at end of file diff --git a/pyemu/utils/helpers.py b/pyemu/utils/helpers.py index 2d0e712c1..d85d0123a 100644 --- a/pyemu/utils/helpers.py +++ b/pyemu/utils/helpers.py @@ -5970,3 +5970,74 @@ def _set_vertices(self): # "instead.", category=DeprecationWarning) # # return get_spatialreference(epsg, text='proj4') + +def get_maha_obs_summary(sim_en): + """ calculate the 1-D and 2-D mahalanobis distance + + Args: + sim_en (`pyemu.ObservationEnsemble`): a simulated outputs ensemble + + Returns: + + tuple containing + + - **pandas.DataFrame**: 1-D subspace squared mahalanobis distances + - **pandas.DataFrame**: 2-D subspace squared mahalanobis distances + + Note: + Noise realizations are added to `sim_en` to account for measurement + noise. + + + + + """ + + if not isinstance(sim_en,pyemu.ObservationEnsemble): + raise Exception("'sim_en' must be a "+\ + " pyemu.ObservationEnsemble instance") + if sim_en.pst.nnz_obs < 1: + raise Exception(" at least one non-zero weighted obs is needed") + + # process the simulated ensemblet to only have non-zero weighted obs + obs = sim_en.pst.observation_data + nz_names =sim_en.pst.nnz_obs_names + # get the full cov matrix + nz_cov_df = sim_en.covariance_matrix().to_dataframe() + nnz_en = sim_en.loc[:,nz_names].copy() + nz_cov_df = nz_cov_df.loc[nz_names,nz_names] + # get some noise realizations + nnz_en.reseed() + obsmean = obs.loc[nnz_en.columns.values, "obsval"] + noise_en = pyemu.ObservationEnsemble.from_gaussian_draw(sim_en.pst,num_reals=sim_en.shape[0]) + noise_en -= obsmean #subtract off the obs val bc we just want the noise + noise_en.index = nnz_en.index + nnz_en += noise_en + + + + #obsval_dict = obs.loc[nnz_en.columns.values,"obsval"].to_dict() + + # first calculate the 1-D subspace maha distances + sim_mean = nnz_en.mean() + obs_mean = obs.loc[nnz_en.columns.values,"obsval"] + simvar_inv = 1. / (nnz_en.std()**2) + res_mean = sim_mean - obs_mean + l1_maha_sq_df = res_mean**2 * simvar_inv + + # now calculate the 2-D subspace maha distances + onames1,onames2,l2_maha_sq_vals = [],[],[] + for i1,o1 in enumerate(nz_names): + r1 = res_mean[o1] + print("{0} of {1}".format(i1+1,len(nz_names))) + for i2,o2 in enumerate(nz_names[i1+1:]): + c_inv = np.linalg.inv(nz_cov_df.loc[[o1,o2],[o1,o2]].values) + r2 = res_mean[o2] + r_vec = np.array([r1,r2]) + l2_maha_sq_val = np.dot(np.dot(r_vec,c_inv),r_vec.transpose()) + onames1.append(o1) + onames2.append(o2) + l2_maha_sq_vals.append(l2_maha_sq_val) + l2_maha_sq_df = pd.DataFrame({"obsnme_1":onames1,"obsnme_2":onames2,"sq_distance":l2_maha_sq_vals}) + + return l1_maha_sq_df,l2_maha_sq_df