In [None]:
import os
import sys
sys.path.insert(0,"..")
import numpy as np
import matplotlib.pyplot as plt
import pyemu
print(pyemu.__file__)
import flopy
import platform
from pathlib import Path
import shutil
import pandas as pd

#magic inlide reload import
%reload_ext autoreload
%autoreload 2
import plot_helpers as ph

In [None]:
from pyemu.emulators import DSI

In [None]:
t_d = "template"
if not os.path.exists(t_d):
    raise Exception("need to run simple_pstfrom notebook")

# Generate some training data
## load in the FOM pest control file

In [None]:
pst = pyemu.Pst(os.path.join(t_d,"at.pst"))
pst

In [None]:
pr_pe = pyemu.ParameterEnsemble.from_csv(pst, os.path.join(t_d,"prior.csv"))
pr_pe.shape

In [None]:
pst.pestpp_options['ies_num_reals'] = pr_pe.shape[0]

In [None]:
pst.control_data.noptmax = -1

In [None]:
pst.write(os.path.join(t_d,"at.pst"),version=2)


In [None]:
m_d = "master_train"

In [None]:
pyemu.os_utils.start_workers(t_d, "pestpp-ies","at.pst",
                             num_workers=15,
                             worker_root=".",
                            master_dir=m_d,
                            )

In [None]:
m_d

# lests read the oe

In [None]:
pst = pyemu.Pst(os.path.join(m_d,"at.pst"))
pst



# use the prior oe as training data

In [None]:
data = pst.ies.obsen0.copy()

data.head()

# talk about data transformation to gauss distribution

In [None]:
data.loc[:, 'oname:k_aq_otype:arr_i:0_j:0'].hist()

In [None]:
logcols = [c for c in data.columns if c.startswith("oname:k_")]
print(logcols)

# generate the dsi object...

In [None]:
transforms = [
                {"type":"log10", "columns":logcols},
                {"type":"normal_score"}, #MUST BE SEQUENTIAL!
               ]

dsi = DSI(pst=pst,
          data=data,
          transforms=transforms,
          energy_threshold=0.999)

dsi.fit()

In [None]:
dt = dsi.data_transformed.copy()
dt.head()

In [None]:
fig,axs=plt.subplots(1,2,figsize=(6,4))

col = logcols[0]#data.columns[0]
axs[0].set_title("original")
axs[0].hist(data.loc[:,col])
axs[1].set_title("transformed")
axs[1].hist(dt.loc[:,col])

fig.tight_layout()

In [None]:
dsi.s.shape #singular values

In [None]:
pvals = np.zeros_like(dsi.s)
pvals

In [None]:
svals = dsi.predict(pvals)
svals

In [None]:
data.mean()

In [None]:
(svals - data.mean()).sort_values()

In [None]:
dsiprior = data.copy()
dsiprior.loc[:,:] = np.nan
for i in data.index.values:
    svals = dsi.predict(np.random.normal(0,1,size=dsi.s.shape[0]))
    dsiprior.loc[i,svals.index] = svals.values


In [None]:
fig,ax=plt.subplots(1,1,figsize=(4,4))

col = (svals - data.mean()).sort_values().index[-1]

#get sahred bins
bmin = min(data.loc[:,col].min(),dsiprior.loc[:,col].min())
bmax = max(data.loc[:,col].max(),dsiprior.loc[:,col].max())
bins = np.linspace(bmin,bmax,20)

ax.hist(data.loc[:,col],alpha=0.5,label="org",bins=bins)
ax.hist(dsiprior.loc[:,col],alpha=0.5,label="dsi",bins=bins)

ymin,ymax = ax.get_ylim()
ax.vlines(data.loc[:,col].mean(),ymin,ymax,label="org mean")
ax.vlines(dsiprior.loc[:,col].mean(),ymin,ymax,label="dsi mean",color='orange',linestyle='--')
ax.legend()
ax.set_title(col)
fig.tight_layout()

# prepare the dsi pestpp folder

In [None]:
dsi_t_d="template_dsi"

In [None]:
dpst = dsi.prepare_pestpp(t_d=dsi_t_d)

In [None]:
dpst

In [None]:
dpst.control_data.noptmax = 3
dpst.pestpp_options["ies_num_reals"] = 100

In [None]:

dpst.write(os.path.join(dsi_t_d, "dsi.pst"),version=2)


pvals = pd.read_csv(os.path.join(dsi_t_d, "dsi_pars.csv"), index_col=0)
md = f"master_dsi"
num_workers = 50
worker_root = "."

In [None]:
pyemu.os_utils.start_workers(
    dsi_t_d,"pestpp-ies","dsi.pst", num_workers=num_workers,
    worker_root=worker_root, master_dir=md, #port=_get_port(),
    ppw_function=pyemu.helpers.dsi_pyworker,
    ppw_kwargs={
        "dsi": dsi, "pvals": pvals,
    }
)

In [None]:
dpst = pyemu.Pst(os.path.join(md, "dsi.pst"))
dpst.ies.phiactual.head()

In [None]:
%reload_ext autoreload
%autoreload 2

import plot_helpers as ph
ph.plot_ies_results(md,casename="dsi",ptiter=1)

# check influence of training data size

In [None]:
nzobsnmes = dpst.nnz_obs_names

In [None]:
obs = dpst.observation_data
obsnmes = obs.loc[obs.usecol=='chd(chd-1)'].obsnme.tolist()
df = data.loc[:,obsnmes].copy()
df.hist(bins=20,alpha=0.5, label="prior",zorder=0)


In [None]:
[i for i in nzobsnmes if i in obsnmes]

In [None]:
realseq = [10,50,100,150,200,250,300]
for nreal in realseq:

    transforms = [#{"type":"log10", "cols":logcols},
                  {"type":"log10", "columns":obsnmes},
                  {"type":"normal_score"}# "columns":nzobsnmes },
                  ]

    data_redux = data.loc[:,nzobsnmes + obsnmes].copy()
    data_redux = data_redux.iloc[:nreal].copy()

    obs_redux = obs.copy()
    obs_redux = obs_redux.loc[nzobsnmes + obsnmes]
    obs_redux.index.name="index"
    obs_redux
    
    dsi = DSI(#pst=pst,
            data=data_redux,
            transforms=transforms,
            energy_threshold=.999
            verbose=False)
    dsi.fit()
    dsi_t_d=f"template_dsi_{nreal}"


    dpst = dsi.prepare_pestpp(t_d=dsi_t_d, observation_data=obs_redux)
    
    dpst.control_data.noptmax = 2 #NOTE THIS!
    dpst.pestpp_options["ies_num_reals"] = 500 #NOTE this!

    dpst.write(os.path.join(dsi_t_d, "dsi.pst"),version=2)


    pvals = pd.read_csv(os.path.join(dsi_t_d, "dsi_pars.csv"), index_col=0)
    md = f"master_dsi_{nreal}"
    num_workers = 50
    worker_root = "."
    pyemu.os_utils.start_workers(
        dsi_t_d,"pestpp-ies","dsi.pst", num_workers=num_workers,
        worker_root=worker_root, master_dir=md, #port=_get_port(),
        ppw_function=pyemu.helpers.dsi_pyworker,
        ppw_kwargs={
            "dsi": dsi, "pvals": pvals,
        }
    )
    dpst = pyemu.Pst(os.path.join(md, "dsi.pst"))
    dpst.ies.phiactual.head()


In [None]:
#from plot_helpers import plot_dsi_compare_traindata
fig,axs = ph.plot_dsi_compare_traindata(realseq)