## Table of Contents
###  [1.Pre-Processing](pre-processing.ipynb)
###  [2.Data Assimilation](Data Assimilation.ipynb)
<div class="toc" style="margin-top: 1em;">
   <ul class="toc-item" id="toc-level0">
      <li><span><a href='#part 2.1' data-toc-modified-id="part 1"><span class="toc-item-num">&nbsp;&nbsp;</span>2.1 Submit batch job</a></span></li>
      <li><span><a href='#part 2.2' data-toc-modified-id="part 2"><span class="toc-item-num">&nbsp;&nbsp;</span>2.2 Input Setting</a></span></li>
      <li><span><a href='#part 2.3' data-toc-modified-id="part 3"><span class="toc-item-num">&nbsp;&nbsp;</span>2.3 Configuration </a></span></li>
      <li><span><a href='#part 2.4' data-toc-modified-id="part 4"><span class="toc-item-num">&nbsp;&nbsp;</span>2.4 ES-MDA Workflow </a></span></li>
      <li><span><a href='#part 2.5' data-toc-modified-id="part 4"><span class="toc-item-num">&nbsp;&nbsp;</span>2.5 PFLOTRAN related functions </a></span></li>  
</div>
###  [3.Post-Processing](post-processing.ipynb)






<a id='part 2.1'></a>
# 2.1 Submit batch job
The shell script located at ./src can be submmited to Cori in National Energy Research Scientific Computing Center(NERSC) without modification using the sample data. This script needs to be modified to accommodate other super computers.      

Following sections describes the implementation of the entire workflow by using ES-MDA method to estimate hydrologic exchange flux and associated functions. 

<a id='part 2.2'></a>
# 2.2 Input Setting 
The inputs are entered in user_specified_parameters.txt located at ./src. All theose parameters can be modified to accomadate the users.



<a id='part 2.3'></a>
# 2.3 Configuration 
This section has the following functions: reading input file, loading python packages, declaring variables, creating folders to store the checking point files of PFLOTRAN and HDF5 file containing permeability and thermal conductivity at each time step.  

In [None]:
# ------------Initialize Python packages to be used------------

import os
import numpy as np
import scipy.linalg as la
import shutil as shutil
from matplotlib.ticker import FormatStrFormatter
import matplotlib
import matplotlib.pyplot as plt
import math
import h5py
import util as util
import subprocess 
import random
import time
import datetime

#--------Read In User Specified Input------------------
time_start = datetime.datetime.now()
print('\n Reading in User Specified Parameters.')
finput = open("./src/user_specified_parameters.txt", 'r')
fpflotran = open("./dainput/1dthermal.in", 'r')
input_array = finput.readlines()
pflotranin = fpflotran.readlines()
finput.close()
ftest = open("./src/test.txt",'w')

# Run lines of text file to define variables
for line in input_array:
    if "obs_coord" in line:
        new_line = line.replace('= [','= np.array([') + ')' 
        exec(new_line)
    elif "path_to_obs_data" in line:
        new_line = line.replace('path_to_obs_data = ','obs = np.loadtxt(') + ')'
        exec(new_line)
    elif "perm_range" in line:
        new_line = line.replace('= [','= np.array([') +')'
        exec(new_line)
    elif "th_cond_range" in line:
        new_line = line.replace('= [','= np.array([') +')'
        exec(new_line)
    exec(line)
print(' Done.')

# create folder ./pflotran to store the PFLOTRAN related files
subprocess.call("rm -rf ./pflotran",stdin=None, stdout=None,stderr=None,shell=True)
subprocess.call("mkdir ./pflotran",stdin=None, stdout=None,stderr=None,shell=True)

# declare the intermediate variables
nz = int(hz/dz) # number of grid blocks 
z = (np.arange(-hz,0,dz)+np.arange(-hz+dz,dz,dz))/2
init_hy_cond = np.random.normal(hy_cond_mean,hy_cond_sd,nreaz)
np.savetxt("./figure/init_hy_cond.txt",init_hy_cond)
init_th_cond = np.random.normal(th_cond_mean,th_cond_sd,nreaz)
init_perm = init_hy_cond*w_vis/w_den/g/day_to_sec

for iperm,vperm in enumerate(init_perm):
    if vperm < perm_range[0]: init_perm[iperm] = perm_range[0]
    if vperm > perm_range[1]: init_perm[iperm] = perm_range[1]
 
init_logperm = np.log(init_perm)
init_logperm_sd = np.std(init_logperm)
nobs = len(obs_coord)
ntime = np.shape(obs)[0]-1
obs_time = obs[:,0]

perm = np.zeros((ntime,nreaz))
perm[0,:] = init_perm
th_cond = np.zeros((ntime,nreaz))
th_cond[0,:] = init_th_cond
simu_time = np.zeros((ntime,2))
kalman_gain_output = np.zeros((mw_length,ntime))
FNULL = open(os.devnull,'w')



<a id='part 2.4'></a>
# 2.4 ES-MDA Workflow
This section shows the entire workflow of how to implement ES-MDA to estimate the hydrologic exchanage flux.

In [None]:
for itime in range(0,ntime):
    simu_time[itime,:] = np.array([itime*da_interval,(itime+1)*da_interval])
    
    collect_start = 0
    if (itime>0):
        collect_start = simu_time[itime-1,1]
    
    collect_index = np.where((obs_time > collect_start) & (obs_time <= simu_time[itime,1]))
    collect_times = obs_time[collect_index]
    ncollect = len(collect_index)
    simu_ensemble = np.zeros((nobs*len(collect_times),nreaz))

    for itera in range(0,niter):
       util.GenerateDbase(itime,itera,nreaz,perm,th_cond)
       
       util.MakePflotranInput(pflotranin,simu_time,itime,ncollect,collect_times,da_interval)
       
       subprocess.call("./src/pflotran.sh {} {} {} ".format(nreaz,ncore,pflotran_exe),stdin=None, stdout=FNULL,stderr=None,shell=True)
       
       simu_ensemble = util.GenerateSimuEnsemble(nobs,obs_coord,z,nreaz,collect_times)

       ftest.write("simu_ensemble is:{} \n".format(simu_ensemble))
       obs_sd = obs_sd_ratio*np.delete(np.squeeze(obs[collect_index,:],axis=0),0,1)
       obs_sd = obs_sd*(math.sqrt(alpha))

       obs_ensemble = np.repeat(np.transpose(np.delete(obs[collect_index],0,1)),nreaz,1)+np.diag(obs_sd.flatten('C'))@np.random.normal(0,1,nreaz*nobs*ncollect).reshape(nobs*ncollect,nreaz)       
       ftest.write("obs_sd is:{} \n".format(obs_sd))
       
       # update state vector
       state_vector = np.zeros((2,nreaz))
       state_vector[0,:] = np.log(perm[itime,:])
       state_vector[1,:] = th_cond[itime,:]
       
       cov_state_simu = np.zeros((2,nobs))
       cov_state_simu = np.cov(state_vector,simu_ensemble)[0:2,2:]
       cov_simu = np.cov(simu_ensemble)
       ftest.write("cov_state-simu is: {} \n".format(cov_state_simu))
       ftest.write("cov_simu is: {}".format(cov_simu))

       if nobs == 1:
         inv_cov_simuAddobs = np.array([1/(cov_simu+np.square(np.diag(obs_sd)))])
       else:
         inv_cov_simuAddobs = la.inv(cov_simu+np.square(np.diag(obs_sd)))    
       
       kalman_gain = cov_state_simu@inv_cov_simuAddobs
                 
       state_vector = state_vector+kalman_gain@(obs_ensemble-simu_ensemble)

       perm[itime,:] = np.exp(state_vector[0,:]) # no +1
       perm[itime,:][perm[itime,:]>perm_range[1]] = perm_range[1]
       perm[itime,:][perm[itime,:]<perm_range[0]] = perm_range[0]
       th_cond[itime,:] = state_vector[1,:]
       th_cond[itime,:][th_cond[itime,:]>th_cond_range[1]] = th_cond_range[1]
       th_cond[itime,:][th_cond[itime,:]<th_cond_range[0]] = th_cond_range[0]  
            
       if itime+1 == ntime: break
 
       # prepare input for next timestep
       perm[itime+1,:] = perm[itime,:]
       th_cond[itime+1,:] = th_cond[itime,:]
       ftest.write("perm[itime+1,:] is: {} \n".format(perm[itime+1,:]))
       #disturb perm
       perm_temp = np.log(perm[itime+1,:])
       perm_temp = perm_temp+np.random.normal(0,np.sqrt(max(np.square(init_logperm_sd)-np.square(np.std(perm_temp)),0)),nreaz)
       perm[itime+1,:] = np.exp(perm_temp)
       perm[itime+1,:][perm[itime+1,:]>perm_range[1]] = perm_range[1]
       perm[itime+1,:][perm[itime+1,:]<perm_range[0]] = perm_range[0]
       
       th_cond[itime+1,:] = th_cond[itime,:]+np.random.normal(0,np.sqrt(max(np.square(th_cond_sd)-np.square(np.std(th_cond[itime+1,:])),0)),nreaz)
       th_cond[itime+1,:][th_cond[itime+1,:]>th_cond_range[1]] = th_cond_range[1]
       th_cond[itime+1,:][th_cond[itime+1,:]<th_cond_range[0]] = th_cond_range[0]  
              
       np.savetxt("./figure/perm.txt",perm)

time_end = datetime.datetime.now()
time_cost = time_end-time_start
with open("timecost.txt", mode='w') as file:
    file.write('%s.\n'.format(timecost))
fpflotran.close()

<a id='part 2.5'></a>
# 2.5 PFLOTRAN related functions
1. MakePflotranInput: update PFLOTRAN input file at each time step. The updated input file is put in the directory of ./pflotran along with the checkpoint files at each time step and HDF5 file storing permeability and thermal conductivity
2. GenerateDbase: creat two new datasets for estimated permeability and thermal conductivity at each time step in the Dbase.h5 file.
3. GenerateSimuEnsemble: read the simulated data (temperatures at the sensor depths) from the hdf5 file generated by PFLOTRAN at each time step.

In [None]:
def MakePflotranInput(pflotranin,simu_time,itime,ncollect,collect_times,da_interval):   
    restart_lindex = [i for i, s in enumerate(pflotranin) if "RESTART" in s][0]
    restart_card = ["  RESTART \n",
                       "    FILENAME 1dthermal-restart.chk \n",
                       "    REALIZATION_DEPENDENT \n",
		       "/ \n"]
    if simu_time[itime,0] == 0:
        pflotranin = pflotranin[0:restart_lindex]+pflotranin[restart_lindex+1:]
    else:
        pflotranin = pflotranin[0:restart_lindex]+restart_card+pflotranin[restart_lindex+1:]
    
    # write the end time of simulation
    finaltime_lindex = [i for i, s in enumerate(pflotranin) if "FINAL_TIME" in s][0]
    pflotranin[finaltime_lindex] = "  FINAL_TIME "+ np.array_str(simu_time[itime,1])+" sec"+"\n"
    
    # generate observation 
    snapshot_lindex =  [i for i, s in enumerate(pflotranin) if "SNAPSHOT_FILE" in s][0]
    obs_card = "    TIMES sec   "
    npl = 3 
    nline = np.floor_divide(ncollect,npl)
    if nline == 0 or nline == 1:
        obs_card = obs_card+" "+np.array_str(collect_times[0])+"\n"
    else:
        for iline in range(1,nline):
            obs_card = obs_card+np.array_str(collect_times[((iline-1)*npl+1):(iline*npl)])+"\\"
        obs_card = obs_card+" "+np.array_str(collect_times[(iline*npl+1):ncollect])+"\n"
    
    # add observation time
    pflotranin = pflotranin[0:snapshot_lindex+1]+[obs_card]+pflotranin[snapshot_lindex+2:]     
    
    permiso_lindex = [i for i, s in enumerate(pflotranin) if "PERM_ISO" in s][0]
    pflotranin[permiso_lindex] = "    PERM_ISO DBASE_VALUE Permeability{} \n".format(itime)
    
    thercondwet_lindex = [i for i, s in enumerate(pflotranin) if "THERMAL_CONDUCTIVITY_WET" in s][0]
    pflotranin[thercondwet_lindex] = "  THERMAL_CONDUCTIVITY_WET DBASE_VALUE ThermalConductivity{} \n".format(itime)
           
    #prepare strata_card
    strata_card_lindex = [i for i, s in enumerate(pflotranin) if "STRATA" in s][0]-1
    strata_card = ["STRATA\n",
                   "  REGION all\n",
                   "  MATERIAL Alluvium\n",
                   "  START_TIME 0 sec\n",
                   "  FINAL_TIME 0 sec\n",
                   "END\n",
                   "\n"]
    strata_card_length = len(strata_card)
    strata_card_new = list()    
    strata_card[4] = "  FINAL_TIME "+str(da_interval*(itime+1))+" sec"+"\n"
    strata_card_copy = strata_card[:]
    strata_card_new = strata_card_new+strata_card_copy
        
    pflotranin = pflotranin[0:strata_card_lindex]+strata_card_new+pflotranin[strata_card_lindex+strata_card_length:]
    
    new_pflotranin = open("./pflotran/1dthermal.in",'w')
    new_pflotranin.writelines(pflotranin)  
    new_pflotranin.close()
    return

def GenerateDbase(itime,itera,nreaz,perm,th_cond):
    filename = "./pflotran/Dbase.h5"
    if itime == 0:
      h5file = h5py.File(filename,'w')
    else:
      h5file = h5py.File(filename,'r+')
      
    variables = []
    variables.append("Permeability{}".format(itime))
    variables.append("ThermalConductivity{}".format(itime))
    values = []
    values.append(perm[itime,:])
    values.append(th_cond[itime,:])
    if itera == 0:
      for i in range(len(variables)):
        h5dset = h5file.create_dataset(variables[i],data=values[i])
    else:
      for i in range(len(variables)):
        if h5file.get(variables[i]):
          del h5file[variables[i]]
        h5dset = h5file.create_dataset(variables[i],data=values[i])
    h5file.close() 
    return
                          
def GenerateSimuEnsemble(nobs,obs_coord,z,nreaz,collect_times):
    obs_cell = np.zeros(nobs)
    for i in range(nobs):
        obs_cell[i] = np.argmin(np.absolute(z-obs_coord[i]))
    obs_cell = obs_cell.astype(int)
    
    simu_ensemble = np.zeros((nobs*len(collect_times),nreaz))
    for ireaz in range(nreaz):
        obs_temp = np.zeros(nobs*len(collect_times))
        jj = 0
        for collect_itime in collect_times:
            h5f = h5py.File("./pflotran/1dthermalR{}.h5".format(ireaz+1),'r')
            grp_time = "Time:"+str(" %12.5E" % collect_itime)+" s"
            dset_temp = "Temperature [C]"
            obs_temp[jj*nobs:(jj+1)*nobs] = h5f[grp_time][dset_temp][0][0][obs_cell]
            jj = jj+1
        simu_ensemble[:,ireaz] = obs_temp
        h5f.close()
    
    return simu_ensemble