diff --git a/.gitignore b/.gitignore index 12b75bd25..236a3e870 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,11 @@ __pycache__/ *.py[cod] +# Editor files +#mac +.DS_Store +*~ + # C extensions *.so diff --git a/autotest/moouu/.DS_Store b/autotest/moouu/.DS_Store deleted file mode 100644 index 0572e1baf..000000000 Binary files a/autotest/moouu/.DS_Store and /dev/null differ diff --git a/autotest/moouu/10par_xsec/.DS_Store b/autotest/moouu/10par_xsec/.DS_Store deleted file mode 100644 index a82aa79bb..000000000 Binary files a/autotest/moouu/10par_xsec/.DS_Store and /dev/null differ diff --git a/autotest/moouu/10par_xsec/template/.DS_Store b/autotest/moouu/10par_xsec/template/.DS_Store deleted file mode 100644 index e9f990873..000000000 Binary files a/autotest/moouu/10par_xsec/template/.DS_Store and /dev/null differ diff --git a/autotest/pst_from_tests.py b/autotest/pst_from_tests.py index 05f052ed0..e9d33d398 100644 --- a/autotest/pst_from_tests.py +++ b/autotest/pst_from_tests.py @@ -1,18 +1,21 @@ import os import sys +from pathlib import Path import platform # sys.path.append(os.path.join("..","pyemu")) +import numpy as np +import pandas as pd import pyemu from pyemu import os_utils -from pyemu.utils import PstFrom +from pyemu.utils import PstFrom, pp_file_to_dataframe, write_pp_file import shutil ext = '' bin_path = os.path.join("..", "..", "bin") if "linux" in platform.platform().lower(): bin_path = os.path.join(bin_path, "linux") -elif "darwin" in platform.platform().lower() or "macos" in platform.platform().lower(): +elif "darwin" in platform.platform().lower() or 'macos' in platform.platform().lower(): bin_path = os.path.join(bin_path, "mac") else: bin_path = os.path.join(bin_path, "win") @@ -201,8 +204,9 @@ def freyberg_test(): # check mult files are in pst input files csv = os.path.join(template_ws, "mult2model_info.csv") df = pd.read_csv(csv, index_col=0) + pst_input_files = {str(f) for f in pst.input_files} mults_not_linked_to_pst = ((set(df.mlt_file.unique()) - - set(pst.input_files)) - + pst_input_files) - set(df.loc[df.pp_file.notna()].mlt_file)) assert len(mults_not_linked_to_pst) == 0, print(mults_not_linked_to_pst) @@ -427,8 +431,9 @@ def freyberg_prior_build_test(): # check mult files are in pst input files csv = os.path.join(template_ws, "mult2model_info.csv") df = pd.read_csv(csv, index_col=0) + pst_input_files = {str(f) for f in pst.input_files} mults_not_linked_to_pst = ((set(df.mlt_file.unique()) - - set(pst.input_files)) - + pst_input_files) - set(df.loc[df.pp_file.notna()].mlt_file)) assert len(mults_not_linked_to_pst) == 0, print(mults_not_linked_to_pst) @@ -823,8 +828,9 @@ def mf6_freyberg_test(): # check mult files are in pst input files csv = os.path.join(template_ws, "mult2model_info.csv") df = pd.read_csv(csv, index_col=0) + pst_input_files = {str(f) for f in pst.input_files} mults_not_linked_to_pst = ((set(df.mlt_file.unique()) - - set(pst.input_files)) - + pst_input_files) - set(df.loc[df.pp_file.notna()].mlt_file)) assert len(mults_not_linked_to_pst) == 0, print(mults_not_linked_to_pst) @@ -838,6 +844,7 @@ def mf6_freyberg_test(): assert np.abs(float(df.upper_bound.min()) - 30.) < 1.0e-6,df.upper_bound.min() assert np.abs(float(df.lower_bound.max()) - -0.3) < 1.0e-6,df.lower_bound.max() + def mf6_freyberg_shortnames_test(): import numpy as np import pandas as pd @@ -974,8 +981,9 @@ def mf6_freyberg_shortnames_test(): # check mult files are in pst input files csv = os.path.join(template_ws, "mult2model_info.csv") df = pd.read_csv(csv, index_col=0) + pst_input_files = {str(f) for f in pst.input_files} mults_not_linked_to_pst = ((set(df.mlt_file.unique()) - - set(pst.input_files)) - + pst_input_files) - set(df.loc[df.pp_file.notna()].mlt_file)) assert len(mults_not_linked_to_pst) == 0, print(mults_not_linked_to_pst) @@ -1149,8 +1157,9 @@ def mf6_freyberg_da_test(): # check mult files are in pst input files csv = os.path.join(template_ws, "mult2model_info.csv") df = pd.read_csv(csv, index_col=0) + pst_input_files = {str(f) for f in pst.input_files} mults_not_linked_to_pst = ((set(df.mlt_file.unique()) - - set(pst.input_files)) - + pst_input_files) - set(df.loc[df.pp_file.notna()].mlt_file)) assert len(mults_not_linked_to_pst) == 0, print(mults_not_linked_to_pst) @@ -1333,6 +1342,7 @@ def mf6_freyberg_direct_test(): if np.abs(arr.max() - rch_val) > 1.0e-6 or np.abs(arr.min() - rch_val) > 1.0e-6: raise Exception("recharge too diff") + def mf6_freyberg_varying_idomain(): import numpy as np import pandas as pd @@ -1512,13 +1522,452 @@ def xsec_test(): assert pst.phi < 1.0e-7 - - +class TestPstFrom(): + """Test class for some PstFrom functionality + """ + @classmethod + def setup(cls): + + # record the original wd + cls.original_wd = Path().cwd() + + cls.sim_ws = Path('temp/pst-from-small/') + external_files_folders = [cls.sim_ws / 'external', + cls.sim_ws / '../external_files'] + for folder in external_files_folders: + folder.mkdir(parents=True, exist_ok=True) + + cls.dest_ws = Path('temp/pst-from-small-template') + + cls.sr = pyemu.helpers.SpatialReference(delr=np.ones(3), + delc=np.ones(3), + rotation=0, + epsg=3070, + xul=0., + yul=0., + units='meters', # gis units of meters? + lenuni=2 # model units of meters + ) + # make some fake external data + # array data + cls.array_file = cls.sim_ws / 'hk.dat' + cls.array_data = np.ones((3, 3)) + np.savetxt(cls.array_file, cls.array_data) + # list data + cls.list_file = cls.sim_ws / 'wel.dat' + cls.list_data = pd.DataFrame({'#k': [1, 1, 1], + 'i': [2, 3, 3], + 'j': [2, 2, 1], + 'flux': [1., 10., 100.] + }, columns=['#k', 'i', 'j', 'flux']) + cls.list_data.to_csv(cls.list_file, sep=' ', index=False) + + # set up the zones + zone_array = np.ones((3, 3)) # default of zone 1 + zone_array[2:, 2:] = 0 # position 3, 3 is not parametrized (no zone) + #zone_array[0, :2] = 2 # 0, 0 and 0, 1 are in zone 2 + zone_array[1, 1] = 2 # 1, 1 is in zone 2 + cls.zone_array = zone_array + + # "geostatistical structure(s)" + v = pyemu.geostats.ExpVario(contribution=1.0, a=1000) + cls.grid_gs = pyemu.geostats.GeoStruct(variograms=v, transform='log') + + cls.pf = pyemu.utils.PstFrom(original_d=cls.sim_ws, new_d=cls.dest_ws, + remove_existing=True, + longnames=True, spatial_reference=cls.sr, + zero_based=False, tpl_subfolder='tpl') + + def test_add_array_parameters(self): + """test setting up array parameters with different external file + configurations and path formats. + """ + tag = 'hk' + # test with different array input configurations + array_file_input = [ + Path('hk0.dat'), # sim_ws; just file name as Path instance + 'hk1.dat', # sim_ws; just file name as string + Path(self.sim_ws, 'hk2.dat'), # sim_ws; full path as Path instance + 'external/hk3.dat', # subfolder; relative file path as string + Path('external/hk4.dat'), # subfolder; relative path as Path instance + '../external_files/hk5.dat', # subfolder up one level + ] + for i, array_file in enumerate(array_file_input): + par_name_base = f'{tag}_{i:d}' + + # create the file + # dest_file is the data file relative to the sim or dest ws + dest_file = Path(array_file) + if self.sim_ws in dest_file.parents: + dest_file = dest_file.relative_to(self.sim_ws) + shutil.copy(self.array_file, Path(self.dest_ws, dest_file)) + + self.pf.add_parameters(filenames=array_file, par_type='zone', + zone_array=self.zone_array, + par_name_base=par_name_base, # basename for parameters that are set up + pargp=f'{tag}_zone', # Parameter group to assign pars to. + ) + + assert (self.dest_ws / dest_file).exists() + assert (self.dest_ws / f'org/{dest_file.name}').exists() + # mult file name is par_name_base + `instance` identifier + part_type + mult_filename = f'{par_name_base}_inst0_zone.csv' + assert (self.dest_ws / f'mult/{mult_filename}').exists() + # for now, assume tpl file should be in main folder + template_file = (self.pf.tpl_d / f'{mult_filename}.tpl') + assert template_file.exists() + + # make the PEST control file + pst = self.pf.build_pst() + assert pst.filename == Path('temp/pst-from-small-template/pst-from-small.pst') + assert pst.filename.exists() + rel_tpl = pyemu.utils.pst_from.get_relative_filepath(self.pf.new_d, template_file) + assert rel_tpl in pst.template_files + + # make the PEST control file (just filename) + pst = self.pf.build_pst('junk.pst') + assert pst.filename == Path('temp/pst-from-small-template/junk.pst') + assert pst.filename.exists() + + # make the PEST control file (file path) + pst = self.pf.build_pst('temp/pst-from-small-template/junk2.pst') + assert pst.filename == Path('temp/pst-from-small-template/junk2.pst') + assert pst.filename.exists() + + # check the mult2model info + df = pd.read_csv(self.dest_ws / 'mult2model_info.csv') + # org data file relative to dest_ws + org_file = Path(df['org_file'].values[i]) + assert org_file == Path(f'org/{dest_file.name}') + # model file relative to dest_ws + model_file = Path(df['model_file'].values[i]) + assert model_file == dest_file + # mult file + mult_file = Path(df['mlt_file'].values[i]) + assert mult_file == Path(f'mult/{mult_filename}') + + # check applying the parameters (in the dest or template ws) + os.chdir(self.dest_ws) + # first delete the model file in the template ws + model_file.unlink() + # manually apply a multipler + mult = 4 + mult_values = np.loadtxt(mult_file) + mult_values[:] = mult + np.savetxt(mult_file, mult_values) + # apply the multiplier + pyemu.helpers.apply_list_and_array_pars(arr_par_file='mult2model_info.csv') + # model file should have been remade by apply_list_and_array_pars + assert model_file.exists() + result = np.loadtxt(model_file) + # results should be the same with default multipliers of 1 + # assume details of parameterization are handled by other tests + assert np.allclose(result, self.array_data * mult) + + # revert to original wd + os.chdir(self.original_wd) + + def test_add_list_parameters(self): + """test setting up list parameters with different external file + configurations and path formats. + """ + tag = 'wel' + # test with different array input configurations + list_file_input = [ + Path('wel0.dat'), # sim_ws; just file name as Path instance + 'wel1.dat', # sim_ws; just file name as string + Path(self.sim_ws, 'wel2.dat'), # sim_ws; full path as Path instance + 'external/wel3.dat', # subfolder; relative file path as string + Path('external/wel4.dat'), # subfolder; relative path as Path instance + '../external_files/wel5.dat', # subfolder up one level + ] + par_type = 'constant' + for i, list_file in enumerate(list_file_input): + par_name_base = f'{tag}_{i:d}' + + # create the file + # dest_file is the data file relative to the sim or dest ws + dest_file = Path(list_file) + if self.sim_ws in dest_file.parents: + dest_file = dest_file.relative_to(self.sim_ws) + shutil.copy(self.list_file, Path(self.dest_ws, dest_file)) + + self.pf.add_parameters(filenames=list_file, par_type=par_type, + par_name_base=par_name_base, + index_cols=[0, 1, 2], use_cols=[3], + pargp=f'{tag}_{i}', + comment_char='#', + ) + + assert (self.dest_ws / dest_file).exists() + assert (self.dest_ws / f'org/{dest_file.name}').exists() + # mult file name is par_name_base + `instance` identifier + part_type + mult_filename = f'{par_name_base}_inst0_{par_type}.csv' + assert (self.dest_ws / f'mult/{mult_filename}').exists() + # for now, assume tpl file should be in main folder + template_file = (self.pf.tpl_d / f'{mult_filename}.tpl') + assert template_file.exists() + + # make the PEST control file + pst = self.pf.build_pst() + rel_tpl = pyemu.utils.pst_from.get_relative_filepath(self.pf.new_d, template_file) + assert rel_tpl in pst.template_files + + # check the mult2model info + df = pd.read_csv(self.dest_ws / 'mult2model_info.csv') + # org data file relative to dest_ws + org_file = Path(df['org_file'].values[i]) + assert org_file == Path(f'org/{dest_file.name}') + # model file relative to dest_ws + model_file = Path(df['model_file'].values[i]) + assert model_file == dest_file + # mult file + mult_file = Path(df['mlt_file'].values[i]) + assert mult_file == Path(f'mult/{mult_filename}') + + # check applying the parameters (in the dest or template ws) + os.chdir(self.dest_ws) + # first delete the model file in the template ws + model_file.unlink() + # manually apply a multipler + mult = 4 + mult_df = pd.read_csv(mult_file) + # no idea why '3' is the column with multipliers and 'parval1_3' isn't + # what is the purpose of 'parval1_3'? + parval_col = '3' + mult_df[parval_col] = mult + mult_df.to_csv(mult_file, index=False) + # apply the multiplier + pyemu.helpers.apply_list_and_array_pars(arr_par_file='mult2model_info.csv') + # model file should have been remade by apply_list_and_array_pars + assert model_file.exists() + result = pd.read_csv(model_file, delim_whitespace=True) + # results should be the same with default multipliers of 1 + # assume details of parameterization are handled by other tests + assert np.allclose(result['flux'], self.list_data['flux'] * mult) + + # revert to original wd + os.chdir(self.original_wd) + + def test_add_array_parameters_pps_grid(self): + """test setting up array parameters with a list of array text + files in a subfolder. + """ + tag = 'hk' + par_styles = ['multiplier', #'direct' + ] + array_files = ['hk_{}_{}.dat', 'external/hk_{}_{}.dat'] + for par_style in par_styles: + mult2model_row = 0 + for j, array_file in enumerate(array_files): + + par_types = {'pilotpoints': 'pp', + 'grid': 'gr'} + for i, (par_type, suffix) in enumerate(par_types.items()): + # (re)create the file + dest_file = array_file.format(mult2model_row, suffix) + shutil.copy(self.array_file, Path(self.dest_ws, dest_file)) + # add the parameters + par_name_base = f'{tag}_{suffix}' + self.pf.add_parameters(filenames=dest_file, par_type=par_type, + zone_array=self.zone_array, + par_name_base=par_name_base, + pargp=f'{tag}_zone', + pp_space=1, geostruct=self.grid_gs, + par_style=par_style + ) + if par_type != 'pilotpoints': + template_file = (self.pf.tpl_d / f'{par_name_base}_inst0_grid.csv.tpl') + assert template_file.exists() + else: + template_file = (self.pf.tpl_d / f'{par_name_base}_inst0pp.dat.tpl') + assert template_file.exists() + + # make the PEST control file + pst = self.pf.build_pst() + rel_tpl = pyemu.utils.pst_from.get_relative_filepath(self.pf.new_d, template_file) + assert rel_tpl in pst.template_files + + # check the mult2model info + df = pd.read_csv(self.dest_ws / 'mult2model_info.csv') + mult_file = Path(df['mlt_file'].values[mult2model_row]) + + # check applying the parameters (in the dest or template ws) + os.chdir(self.dest_ws) + # first delete the model file in the template ws + model_file = df['model_file'].values[mult2model_row] + os.remove(model_file) + # manually apply a multipler + mult = 4 + if par_type != "pilotpoints": + mult_values = np.loadtxt(mult_file) + mult_values[:] = mult + np.savetxt(mult_file, mult_values) + else: + ppdata = pp_file_to_dataframe(df['pp_file'].values[mult2model_row]) + ppdata['parval1'] = mult + write_pp_file(df['pp_file'].values[mult2model_row], ppdata) + # apply the multiplier + pyemu.helpers.apply_list_and_array_pars(arr_par_file='mult2model_info.csv') + # model files should have been remade by apply_list_and_array_pars + for model_file in df['model_file']: + assert os.path.exists(model_file) + result = np.loadtxt(model_file) + # results should be the same with default multipliers of 1 + # assume details of parameterization are handled by other tests + + # not sure why zone 2 is coming back as invalid (1e30) + zone1 = self.zone_array == 1 + assert np.allclose(result[zone1], self.array_data[zone1] * mult) + + # revert to original wd + os.chdir(self.original_wd) + mult2model_row += 1 + + def test_add_direct_array_parameters(self): + """test setting up array parameters with a list of array text + files in a subfolder. + """ + tag = 'hk' + par_styles = ['direct', #'direct' + ] + array_files = ['hk_{}_{}.dat', 'external/hk_{}_{}.dat'] + for par_style in par_styles: + mult2model_row = 0 + for j, array_file in enumerate(array_files): + + par_types = {#'constant': 'cn', + 'zone': 'zn', + 'grid': 'gr'} + for i, (par_type, suffix) in enumerate(par_types.items()): + # (re)create the file + dest_file = array_file.format(mult2model_row, suffix) + + # make a new input array file with initial values + arr = np.loadtxt(self.array_file) + parval = 8 + arr[:] = parval + np.savetxt(Path(self.dest_ws, dest_file), arr) + + # add the parameters + par_name_base = f'{tag}_{suffix}' + self.pf.add_parameters(filenames=dest_file, par_type=par_type, + zone_array=self.zone_array, + par_name_base=par_name_base, + pargp=f'{tag}_zone', + par_style=par_style + ) + template_file = (self.pf.tpl_d / f'{Path(dest_file).name}.tpl') + assert template_file.exists() + + # make the PEST control file + pst = self.pf.build_pst() + rel_tpl = pyemu.utils.pst_from.get_relative_filepath(self.pf.new_d, template_file) + assert rel_tpl in pst.template_files + + # check the mult2model info + df = pd.read_csv(self.dest_ws / 'mult2model_info.csv') + + # check applying the parameters (in the dest or template ws) + os.chdir(self.dest_ws) + + # first delete the model file that was in the template ws + model_file = df['model_file'].values[mult2model_row] + assert model_file == dest_file + os.remove(model_file) + + # pretend that PEST created the input files + # values from dest_file above formed basis for parval in PEST control data + # PEST input file is set up as the org/ version + # apply_list_and_array_pars then takes the org/ version and writes model_file + np.savetxt(pst.input_files[mult2model_row], arr) + + pyemu.helpers.apply_list_and_array_pars(arr_par_file='mult2model_info.csv') + # model files should have been remade by apply_list_and_array_pars + for model_file in df['model_file']: + assert os.path.exists(model_file) + result = np.loadtxt(model_file) + # results should be the same with default multipliers of 1 + # assume details of parameterization are handled by other tests + + # not sure why zone 2 is coming back as invalid (1e30) + zone1 = self.zone_array == 1 + assert np.allclose(result[zone1], parval) + + # revert to original wd + os.chdir(self.original_wd) + mult2model_row += 1 + + def test_add_array_parameters_to_file_list(self): + """test setting up array parameters with a list of array text + files in a subfolder. + """ + tag = 'r' + array_file_input = ['external/r0.dat', + 'external/r1.dat', + 'external/r2.dat'] + for file in array_file_input: + shutil.copy(self.array_file, Path(self.dest_ws, file)) + + self.pf.add_parameters(filenames=array_file_input, par_type='zone', + zone_array=self.zone_array, + par_name_base=tag, # basename for parameters that are set up + pargp=f'{tag}_zone', # Parameter group to assign pars to. + ) + # make the PEST control file + pst = self.pf.build_pst() + # check the mult2model info + df = pd.read_csv(self.dest_ws / 'mult2model_info.csv') + mult_file = Path(df['mlt_file'].values[0]) + + # check applying the parameters (in the dest or template ws) + os.chdir(self.dest_ws) + # first delete the model file in the template ws + for model_file in df['model_file']: + os.remove(model_file) + # manually apply a multipler + mult = 4 + mult_values = np.loadtxt(mult_file) + mult_values[:] = mult + np.savetxt(mult_file, mult_values) + # apply the multiplier + pyemu.helpers.apply_list_and_array_pars(arr_par_file='mult2model_info.csv') + # model files should have been remade by apply_list_and_array_pars + for model_file in df['model_file']: + assert os.path.exists(model_file) + result = np.loadtxt(model_file) + # results should be the same with default multipliers of 1 + # assume details of parameterization are handled by other tests + assert np.allclose(result, self.array_data * mult) + + # revert to original wd + os.chdir(self.original_wd) + + @classmethod + def teardown(cls): + # cleanup + os.chdir(cls.original_wd) + shutil.rmtree(cls.sim_ws / '../external_files') + shutil.rmtree(cls.sim_ws) + shutil.rmtree(cls.dest_ws) + + +def test_get_filepath(): + from pyemu.utils.pst_from import get_filepath + + input_expected = [(('folder', 'file.txt'), Path('folder/file.txt')), + ((Path('folder'), 'file.txt'), Path('folder/file.txt')), + (('folder', Path('file.txt')), Path('folder/file.txt')), + ((Path('folder'), Path('file.txt')), Path('folder/file.txt')), + ] + for input, expected in input_expected: + result = get_filepath(*input) + assert result == expected if __name__ == "__main__": - # freyberg_test() - # freyberg_prior_build_test() + #freyberg_test() + #freyberg_prior_build_test() #mf6_freyberg_test() #mf6_freyberg_shortnames_test() # mf6_freyberg_da_test() diff --git a/autotest/smoother/10par_xsec/template copy/.DS_Store b/autotest/smoother/10par_xsec/template copy/.DS_Store deleted file mode 100644 index e9f990873..000000000 Binary files a/autotest/smoother/10par_xsec/template copy/.DS_Store and /dev/null differ diff --git a/autotest/smoother/10par_xsec/template/.DS_Store b/autotest/smoother/10par_xsec/template/.DS_Store deleted file mode 100644 index e9f990873..000000000 Binary files a/autotest/smoother/10par_xsec/template/.DS_Store and /dev/null differ diff --git a/bin/.DS_Store b/bin/.DS_Store deleted file mode 100644 index d2ac4a334..000000000 Binary files a/bin/.DS_Store and /dev/null differ diff --git a/bin/linux/.DS_Store b/bin/linux/.DS_Store deleted file mode 100644 index 5008ddfcf..000000000 Binary files a/bin/linux/.DS_Store and /dev/null differ diff --git a/bin/win/.DS_Store b/bin/win/.DS_Store deleted file mode 100644 index 5008ddfcf..000000000 Binary files a/bin/win/.DS_Store and /dev/null differ diff --git a/docs/.DS_Store b/docs/.DS_Store deleted file mode 100644 index 76fbb4dc9..000000000 Binary files a/docs/.DS_Store and /dev/null differ diff --git a/docs/_build/.DS_Store b/docs/_build/.DS_Store deleted file mode 100644 index ab7a57b08..000000000 Binary files a/docs/_build/.DS_Store and /dev/null differ diff --git a/docs/_build/html/.DS_Store b/docs/_build/html/.DS_Store deleted file mode 100644 index 579828c8e..000000000 Binary files a/docs/_build/html/.DS_Store and /dev/null differ diff --git a/pyemu/utils/helpers.py b/pyemu/utils/helpers.py index 2e4ede8d9..eb05fecaf 100644 --- a/pyemu/utils/helpers.py +++ b/pyemu/utils/helpers.py @@ -3613,36 +3613,37 @@ def _process_model_file(model_file, df): raise Exception("wrong number of org_files for {0}".format(model_file)) org_arr = np.loadtxt(org_file[0]) - for mlt in df_mf.mlt_file: - if pd.isna(mlt): - continue - mlt_data = np.loadtxt(mlt) - if org_arr.shape != mlt_data.shape: - raise Exception( - "shape of org file {}:{} differs from mlt file {}:{}".format( - org_file, org_arr.shape, mlt, mlt_data.shape + if 'mlt_file' in df_mf.columns: + for mlt in df_mf.mlt_file: + if pd.isna(mlt): + continue + mlt_data = np.loadtxt(mlt) + if org_arr.shape != mlt_data.shape: + raise Exception( + "shape of org file {}:{} differs from mlt file {}:{}".format( + org_file, org_arr.shape, mlt, mlt_data.shape + ) ) - ) - org_arr *= np.loadtxt(mlt) - if "upper_bound" in df.columns: - ub_vals = df_mf.upper_bound.value_counts().dropna().to_dict() - if len(ub_vals) == 0: - pass - elif len(ub_vals) > 1: - print(ub_vals) - raise Exception("different upper bound values for {0}".format(org_file)) - else: - ub = float(list(ub_vals.keys())[0]) - org_arr[org_arr > ub] = ub - if "lower_bound" in df.columns: - lb_vals = df_mf.lower_bound.value_counts().dropna().to_dict() - if len(lb_vals) == 0: - pass - elif len(lb_vals) > 1: - raise Exception("different lower bound values for {0}".format(org_file)) - else: - lb = float(list(lb_vals.keys())[0]) - org_arr[org_arr < lb] = lb + org_arr *= np.loadtxt(mlt) + if "upper_bound" in df.columns: + ub_vals = df_mf.upper_bound.value_counts().dropna().to_dict() + if len(ub_vals) == 0: + pass + elif len(ub_vals) > 1: + print(ub_vals) + raise Exception("different upper bound values for {0}".format(org_file)) + else: + ub = float(list(ub_vals.keys())[0]) + org_arr[org_arr > ub] = ub + if "lower_bound" in df.columns: + lb_vals = df_mf.lower_bound.value_counts().dropna().to_dict() + if len(lb_vals) == 0: + pass + elif len(lb_vals) > 1: + raise Exception("different lower bound values for {0}".format(org_file)) + else: + lb = float(list(lb_vals.keys())[0]) + org_arr[org_arr < lb] = lb np.savetxt(model_file, np.atleast_2d(org_arr), fmt="%15.6E", delimiter="") diff --git a/pyemu/utils/pst_from.py b/pyemu/utils/pst_from.py index e47496c4d..6d5880f7a 100644 --- a/pyemu/utils/pst_from.py +++ b/pyemu/utils/pst_from.py @@ -1,5 +1,6 @@ from __future__ import print_function, division import os +from pathlib import Path from datetime import datetime import shutil import inspect @@ -17,7 +18,7 @@ # the tolerable percent difference (100 * (max - min)/mean) # used when checking that constant and zone type parameters are in fact constant (within # a given zone) -# DIRECT_PAR_PERCENT_DIFF_TOL = 1.0 +DIRECT_PAR_PERCENT_DIFF_TOL = 1.0 def _get_datetime_from_str(sdt): @@ -58,6 +59,8 @@ class PstFrom(object): zero_based (`bool`): flag if the model uses zero-based indices, Default is True start_datetime (`str`): a string that can be case to a datatime instance the represents the starting datetime of the model + tpl_subfolder (`str`): option to write template files to a subfolder within ``new_d``. + Default is False (write template files to ``new_d``). """ @@ -70,12 +73,16 @@ def __init__( spatial_reference=None, zero_based=True, start_datetime=None, + tpl_subfolder=None, ): - self.original_d = original_d - self.new_d = new_d + self.original_d = Path(original_d) + self.new_d = Path(new_d) self.original_file_d = None self.mult_file_d = None + self.tpl_d = self.new_d + if tpl_subfolder is not None: + self.tpl_d = Path(self.new_d, tpl_subfolder) self.remove_existing = bool(remove_existing) self.zero_based = bool(zero_based) self._spatial_reference = spatial_reference @@ -287,7 +294,7 @@ def write_forward_run(self): self.logger.statement("forward_run line:{0}".format(new_sys_cmd)) alist.append(new_sys_cmd) - with open(os.path.join(self.new_d, self.py_run_file), "w") as f: + with open(self.new_d / self.py_run_file, "w") as f: f.write( "import os\nimport multiprocessing as mp\nimport numpy as np" + "\nimport pandas as pd\n" @@ -366,7 +373,7 @@ def build_prior( cov = pyemu.Cov.from_parameter_data(self.pst, sigma_range=sigma_range) if filename is None: - filename = self.pst.filename.replace(".pst", ".prior.cov") + filename = self.pst.filename.with_suffix('.prior.cov') if fmt != "none": self.logger.statement( "saving prior covariance matrix to file {0}".format(filename) @@ -510,12 +517,14 @@ def build_pst(self, filename=None, update=False, version=1): update = False else: if filename is None: - filename = os.path.join(self.new_d, self.pst.filename) + filename = get_filepath(self.new_d, self.pst.filename) else: if filename is None: - filename = os.path.join(self.new_d, self.original_d) - if os.path.dirname(filename) in ["", "."]: - filename = os.path.join(self.new_d, filename) + filename = Path(self.new_d, self.original_d.name).with_suffix('.pst') + filename = get_filepath(self.new_d, filename) + + #if os.path.dirname(filename) in ["", "."]: + # filename = os.path.join(self.new_d, filename) if update: pst = self.pst @@ -537,9 +546,7 @@ def build_pst(self, filename=None, update=False, version=1): par_data = pd.concat(self.par_dfs).loc[:, par_data_cols] # info relating parameter multiplier files to model input files parfile_relations = self.parfile_relations - parfile_relations.to_csv( - os.path.join(self.new_d, "mult2model_info.csv") - ) + parfile_relations.to_csv(self.new_d / "mult2model_info.csv") if not any( ["apply_list_and_array_pars" in s for s in self.pre_py_cmds] ): @@ -597,7 +604,7 @@ def _setup_dirs(self): self.logger.lraise( "original_d '{0}' is not a directory" "".format(self.original_d) ) - if os.path.exists(self.new_d): + if self.new_d.exists(): if self.remove_existing: self.logger.log("removing existing new_d '{0}'" "".format(self.new_d)) shutil.rmtree(self.new_d) @@ -620,19 +627,21 @@ def _setup_dirs(self): "".format(self.original_d, self.new_d) ) - self.original_file_d = os.path.join(self.new_d, "org") - if os.path.exists(self.original_file_d): + self.original_file_d = self.new_d / "org" + if self.original_file_d.exists(): self.logger.lraise( "'org' subdir already exists in new_d '{0}'" "".format(self.new_d) ) - os.makedirs(self.original_file_d) + self.original_file_d.mkdir(exist_ok=True) - self.mult_file_d = os.path.join(self.new_d, "mult") - if os.path.exists(self.mult_file_d): + self.mult_file_d = self.new_d / "mult" + if self.mult_file_d.exists(): self.logger.lraise( "'mult' subdir already exists in new_d '{0}'" "".format(self.new_d) ) - os.makedirs(self.mult_file_d) + self.mult_file_d.mkdir(exist_ok=True) + + self.tpl_d.mkdir(exist_ok=True) self.logger.log("setting up dirs") @@ -665,10 +674,18 @@ def _par_prep( ) if index_cols is not None: for filename, sep, fmt, skip in zip(filenames, seps, fmts, skip_rows): - file_path = os.path.join(self.new_d, filename) - self.logger.log("loading list {0}".format(file_path)) + # cast to pathlib.Path instance + # input file path may or may not include original_d + #input_filepath = get_filepath(self.original_d, filename) + rel_filepath = get_relative_filepath(self.original_d, filename) + dest_filepath = self.new_d / rel_filepath + + # data file in dest_ws/org/ folder + org_file = self.original_file_d / rel_filepath.name + + self.logger.log("loading list {0}".format(dest_filepath)) df, storehead = self._load_listtype_file( - filename, index_cols, use_cols, fmt, sep, skip, c_char + rel_filepath, index_cols, use_cols, fmt, sep, skip, c_char ) # Currently just passing through comments in header (i.e. before the table data) stkeys = np.array( @@ -682,7 +699,7 @@ def _par_prep( if fmt.lower() == "free": if sep is None: sep = " " - if filename.lower().endswith(".csv"): + if rel_filepath.suffix.lower() == ".csv": sep = "," if df.columns.is_integer(): hheader = False @@ -690,7 +707,7 @@ def _par_prep( hheader = df.columns self.logger.statement( - "loaded list '{0}' of shape {1}" "".format(file_path, df.shape) + "loaded list '{0}' of shape {1}" "".format(dest_filepath, df.shape) ) # TODO BH: do we need to be careful of the format of the model # files? -- probs not necessary for the version in @@ -702,11 +719,15 @@ def _par_prep( # input file format (and sep), right? # write orig version of input file to `org` (e.g.) dir + # make any subfolders if they don't exist + #org_path = Path(self.original_file_d, rel_file_path.parent) + #org_path.mkdir(exist_ok=True) + if len(storehead) != 0: kwargs = {} if "win" in platform.platform().lower(): kwargs = {"line_terminator": "\n"} - with open(os.path.join(self.original_file_d, filename), "w") as fp: + with open(org_file, "w") as fp: lc = 0 fr = 0 for key in sorted(storehead.keys()): @@ -742,17 +763,16 @@ def _par_prep( **kwargs ) else: - df.to_csv( - os.path.join(self.original_file_d, filename), + df.to_csv(org_file, index=False, sep=",", header=hheader, ) - file_dict[filename] = df - fmt_dict[filename] = fmt - sep_dict[filename] = sep - skip_dict[filename] = skip - self.logger.log("loading list {0}".format(file_path)) + file_dict[rel_filepath] = df + fmt_dict[rel_filepath] = fmt + sep_dict[rel_filepath] = sep + skip_dict[rel_filepath] = skip + self.logger.log("loading list {0}".format(dest_filepath)) # check for compatibility fnames = list(file_dict.keys()) @@ -770,9 +790,13 @@ def _par_prep( ) else: # load array type files # loop over model input files - for filename, sep, fmt, skip in zip(filenames, seps, fmts, skip_rows): + for input_filena, sep, fmt, skip in zip(filenames, seps, fmts, skip_rows): + # cast to pathlib.Path instance + # input file path may or may not include original_d + input_filena = get_filepath(self.original_d, input_filena) if fmt.lower() == "free": - if filename.lower().endswith(".csv"): + # cast to string to work with pathlib objects + if input_filena.suffix.lower() == ".csv": if sep is None: sep = "," else: @@ -780,26 +804,27 @@ def _par_prep( raise NotImplementedError( "Only free format array " "par files currently supported" ) - file_path = os.path.join(self.new_d, filename) - self.logger.log("loading array {0}".format(file_path)) - if not os.path.exists(file_path): + # file path relative to model workspace + rel_filepath = input_filena.relative_to(self.original_d) + dest_filepath = self.new_d / rel_filepath + self.logger.log("loading array {0}".format(dest_filepath)) + if not dest_filepath.exists(): self.logger.lraise( - "par filename '{0}' not found ".format(file_path) + "par filename '{0}' not found ".format(dest_filepath) ) # read array type input file - arr = np.loadtxt( - os.path.join(self.new_d, filename), delimiter=sep, ndmin=2 - ) - self.logger.log("loading array {0}".format(file_path)) + arr = np.loadtxt(dest_filepath, delimiter=sep, ndmin=2) + self.logger.log("loading array {0}".format(dest_filepath)) self.logger.statement( - "loaded array '{0}' of shape {1}".format(filename, arr.shape) + "loaded array '{0}' of shape {1}".format(input_filena, arr.shape) ) # save copy of input file to `org` dir - np.savetxt(os.path.join(self.original_file_d, filename), arr) - file_dict[filename] = arr - fmt_dict[filename] = fmt - sep_dict[filename] = sep - skip_dict[filename] = skip + # make any subfolders if they don't exist + np.savetxt(self.original_file_d / rel_filepath.name, arr) + file_dict[rel_filepath] = arr + fmt_dict[rel_filepath] = fmt + sep_dict[rel_filepath] = sep + skip_dict[rel_filepath] = skip # check for compatibility fnames = list(file_dict.keys()) for i in range(len(fnames)): @@ -1009,7 +1034,7 @@ def add_observations( obsgp = _check_var_len(obsgp, ncol, fill=True) df_ins = pyemu.pst_utils.csv_to_ins_file( df.set_index("idx_str"), - ins_filename=os.path.join(self.new_d, insfile), + ins_filename=self.new_d / insfile, only_cols=use_cols, only_rows=use_rows, marker="~", @@ -1025,7 +1050,7 @@ def add_observations( "building insfile for tabular output file {0}" "".format(filename) ) new_obs = self.add_observations_from_ins( - ins_file=insfile, out_file=os.path.join(self.new_d, filename) + ins_file=insfile, out_file=self.new_d / filename ) if "obgnme" in df_ins.columns: new_obs.loc[:, "obgnme"] = df_ins.loc[new_obs.index, "obgnme"] @@ -1080,14 +1105,14 @@ def add_observations_from_ins( """ # lifted almost completely from `Pst().add_observation()` if os.path.dirname(ins_file) in ["", "."]: - ins_file = os.path.join(self.new_d, ins_file) + ins_file = self.new_d / ins_file pst_path = "." if not os.path.exists(ins_file): self.logger.lraise( "ins file not found: {0}, {1}" "".format(os.getcwd(), ins_file) ) if out_file is None: - out_file = ins_file.replace(".ins", "") + out_file = str(ins_file).replace(".ins", "") if ins_file == out_file: self.logger.lraise("ins_file == out_file, doh!") @@ -1180,7 +1205,7 @@ def add_parameters( rebuild_pst=False, alt_inst_str="inst", comment_char=None, - par_style="multiplier", + par_style="multiplier" ): """ Add list or array style model input files to PstFrom object. @@ -1275,8 +1300,11 @@ def add_parameters( par_style ) ) - if isinstance(filenames, str): + if isinstance(filenames, str) or isinstance(filenames, Path): filenames = [filenames] + # data file paths relative to the model_ws + filenames = [get_relative_filepath(self.original_d, filename) + for filename in filenames] if par_style == "direct": if len(filenames) != 1: self.logger.lraise( @@ -1422,19 +1450,19 @@ def add_parameters( if par_style == "multiplier": mlt_filename = "{0}_{1}.csv".format(par_name_store, par_type) # pst input file (for tpl->in pair) is multfile (in mult dir) - in_filepst = os.path.relpath( - os.path.join(self.mult_file_d, mlt_filename), self.new_d - ) - tpl_filename = mlt_filename + ".tpl" - in_fileabs = os.path.join(self.mult_file_d, mlt_filename) + in_fileabs = self.mult_file_d / mlt_filename + # pst input file (for tpl->in pair) is multfile (in mult dir) + in_filepst = in_fileabs.relative_to(self.new_d) + tpl_filename = self.tpl_d / (mlt_filename + ".tpl") else: mlt_filename = np.NaN - # pst input file (for tpl->in pair) is multfile (in mult dir) - in_filepst = os.path.relpath( - os.path.join(self.original_file_d, filenames[0]), self.new_d - ) - tpl_filename = filenames[0] + ".tpl" - in_fileabs = os.path.join(self.new_d, in_filepst) + # absolute path to org/datafile + in_fileabs = self.original_file_d / filenames[0].name + # pst input file (for tpl->in pair) is orgfile (in org dir) + # relative path to org/datafile (relative to dest model workspace): + in_filepst = in_fileabs.relative_to(self.new_d) + tpl_filename = self.tpl_d / (filenames[0].name + ".tpl") + pp_filename = None # setup placeholder variables fac_filename = None @@ -1459,12 +1487,13 @@ def add_parameters( "writing list-based template file '{0}'".format(tpl_filename) ) # Generate tabular type template - also returns par data - dfs = [file_dict[filename] for filename in filenames] + # relative file paths are in file_dict as Path instances (kludgey) + dfs = [file_dict[Path(filename)] for filename in filenames] df = write_list_tpl( filenames, dfs, par_name_base, - tpl_filename=os.path.join(self.new_d, tpl_filename), + tpl_filename=tpl_filename, par_type=par_type, suffix="", index_cols=index_cols, @@ -1504,7 +1533,7 @@ def add_parameters( # Generate array type template - also returns par data df = write_array_tpl( name=par_name_base[0], - tpl_filename=os.path.join(self.new_d, tpl_filename), + tpl_filename=tpl_filename, suffix="", par_type=par_type, zone_array=zone_array, @@ -1576,7 +1605,8 @@ def add_parameters( # pst inputfile (for tpl->in pair) is # par_name_storepp.dat table (in pst ws) in_filepst = pp_filename - tpl_filename = pp_filename + ".tpl" + tpl_filename = self.tpl_d / (pp_filename + ".tpl") + #tpl_filename = get_relative_filepath(self.new_d, tpl_filename) if pp_space is None: # default spacing if not passed self.logger.warn("pp_space is None, using 10...\n") pp_space = 10 @@ -1630,10 +1660,8 @@ def add_parameters( prefix_dict=pp_dict, every_n_cell=pp_space, pp_dir=self.new_d, - tpl_dir=self.new_d, - shapename=os.path.join( - self.new_d, "{0}.shp".format(par_name_store) - ), + tpl_dir=self.tpl_d, + shapename=str(self.new_d / "{0}.shp".format(par_name_store)), longnames=self.longnames, ) df.set_index("parnme", drop=False, inplace=True) @@ -1672,10 +1700,8 @@ def add_parameters( if not fac_processed: # TODO need better way of naming squential fac_files? self.logger.log("calculating factors for pargp={0}".format(pg)) - fac_filename = os.path.join( - self.new_d, "{0}pp.fac".format(par_name_store) - ) - var_filename = fac_filename.replace(".fac", ".var.dat") + fac_filename = self.new_d / "{0}pp.fac".format(par_name_store) + var_filename = fac_filename.with_suffix('.var.dat') self.logger.statement( "saving krige variance file:{0}".format(var_filename) ) @@ -1719,9 +1745,7 @@ def add_parameters( relate_parfiles = [] for mod_file in file_dict.keys(): mult_dict = { - "org_file": os.path.join( - *os.path.split(self.original_file_d)[1:], mod_file - ), + "org_file": Path(self.original_file_d.name, mod_file.name), "model_file": mod_file, "use_cols": use_cols, "index_cols": index_cols, @@ -1732,9 +1756,7 @@ def add_parameters( "lower_bound": ult_lbound, } if par_style == "multiplier": - mult_dict["mlt_file"] = os.path.join( - *os.path.split(self.mult_file_d)[1:], mlt_filename - ) + mult_dict["mlt_file"] = Path(self.mult_file_d.name, mlt_filename) if pp_filename is not None: # if pilotpoint need to store more info @@ -1754,7 +1776,7 @@ def add_parameters( # df.loc[:,"tpl_filename"] = tpl_filename # store tpl --> in filename pair - self.tpl_filenames.append(tpl_filename) + self.tpl_filenames.append(get_relative_filepath(self.new_d, tpl_filename)) self.input_filenames.append(in_filepst) for file_name in file_dict.keys(): # store mult --> original file pairs @@ -1889,14 +1911,14 @@ def _load_listtype_file( "use_cols also listed in " "index_cols: {0}".format(str(i)) ) - file_path = os.path.join(self.new_d, filename) + file_path = self.new_d / filename if not os.path.exists(file_path): self.logger.lraise("par filename '{0}' not found " "".format(file_path)) self.logger.log("reading list {0}".format(file_path)) if fmt.lower() == "free": if sep is None: sep = "\s+" - if filename.lower().endswith(".csv"): + if Path(filename).suffix == ".csv": sep = "," else: # TODO support reading fixed-format @@ -2822,7 +2844,7 @@ def grid_namer(i, j): def _check_diff(org_arr, input_filename, zval=None): percent_diff = 100.0 * np.abs( - np.nanmax(org_arr) - np.nanmin(org_arr) / np.nanmean(org_arr) + (np.nanmax(org_arr) - np.nanmin(org_arr)) / np.nanmean(org_arr) ) if percent_diff > DIRECT_PAR_PERCENT_DIFF_TOL: message = "_check_diff() error: direct par for file '{0}'".format( @@ -2833,3 +2855,21 @@ def _check_diff(org_arr, input_filename, zval=None): if zval is not None: message += " in zone {0}".format(zval) raise Exception(message) + + +def get_filepath(folder, filename): + """Return a path to a file within a folder, + without repeating the folder in the output path, + if the input filename (path) already contains the folder.""" + filename = Path(filename) + folder = Path(folder) + if folder not in filename.parents: + filename = folder / filename + return filename + + +def get_relative_filepath(folder, filename): + """Like :func:`~pyemu.utils.pst_from.get_filepath`, except + return path for filename relative to folder. + """ + return get_filepath(folder, filename).relative_to(folder) \ No newline at end of file