## Table of Contents
###  [1.Pre-Processing](pre-processing.ipynb)
###  [2.Data Assimilation](Data Assimilation.ipynb)
<div class="toc" style="margin-top: 1em;">
   <ul class="toc-item" id="toc-level0">
      <li><span><a href='#part 2.1' data-toc-modified-id="part 1"><span class="toc-item-num">&nbsp;&nbsp;</span>2.1 Submit batch job</a></span></li>
      <li><span><a href='#part 2.2' data-toc-modified-id="part 2"><span class="toc-item-num">&nbsp;&nbsp;</span>2.2 Input Setting</a></span></li>
      <li><span><a href='#part 2.3' data-toc-modified-id="part 3"><span class="toc-item-num">&nbsp;&nbsp;</span>2.3 Configuration </a></span></li>
      <li><span><a href='#part 2.4' data-toc-modified-id="part 4"><span class="toc-item-num">&nbsp;&nbsp;</span>2.4 ES-MDA Workflow </a></span></li>
      <li><span><a href='#part 2.5' data-toc-modified-id="part 4"><span class="toc-item-num">&nbsp;&nbsp;</span>2.5 PFLOTRAN related functions </a></span></li>  
</div>
###  [3.Post-Processing](post-processing.ipynb)






<a id='part 2.1'></a>
# 2.1 Submit batch job
The shell script located at ./src can be submmited to Cori in National Energy Research Scientific Computing Center(NERSC) without modification using the sample data. This script needs to be modified to accommodate other super computers.      

Following sections describes the implementation of the entire workflow by using ES-MDA method to estimate hydrologic exchange flux and associated functions. 

<a id='part 2.2'></a>
# 2.2 Input Setting 
The inputs are entered in user_specified_parameters.txt located at ./src. All theose parameters can be modified to accomadate the users.



<a id='part 2.3'></a>
# 2.3 Configuration 
This section has the following functions: reading input file, loading python packages, declaring variables, creating folders to store the checking point files of PFLOTRAN and HDF5 file containing permeability and thermal conductivity at each time step.  

In [None]:
# ------------Initialize Python packages to be used------------

import os
import numpy as np
import scipy.linalg as la
import shutil as shutil
from matplotlib.ticker import FormatStrFormatter
import matplotlib
import matplotlib.pyplot as plt
import math
import h5py
import util as util
import subprocess 
import random
import time
import datetime

#--------Read In User Specified Input------------------
time_start = datetime.datetime.now()
print('\n Reading in User Specified Parameters.')
finput = open("./src/user_specified_parameters.txt", 'r')
fpflotran = open("./dainput/1dthermal.in", 'r')
input_array = finput.readlines()
pflotranin = fpflotran.readlines()
finput.close()

ftest = open("./src/test.txt",'w')

#------- Run lines of text file to define variables-------
for line in input_array:
    if "obs_coord" in line:
        new_line = line.replace('= [','= np.array([') + ')' 
        exec(new_line)
    elif "path_to_obs_data" in line:
        new_line = line.replace('path_to_obs_data = ','obs = np.loadtxt(') + ')'
        exec(new_line)
    elif "perm_range" in line:
        new_line = line.replace('= [','= np.array([') +')'
        exec(new_line)
    elif "th_cond_range" in line:
        new_line = line.replace('= [','= np.array([') +')'
        exec(new_line)
    exec(line)
print(' Done.')

#------- creat folders to store pflotran files-------
subprocess.call("rm -rf ./pflotran",stdin=None, stdout=None,stderr=None,shell=True)
subprocess.call("mkdir ./pflotran",stdin=None, stdout=None,stderr=None,shell=True)
subprocess.call("cp ./dainput/1dthermal.in ./pflotran/",stdin=None, stdout=None,stderr=None,shell=True)

#------- variables declaration-------
day_to_sec = 24*3600
nz = int(hz/dz) 
z = (np.arange(-hz,0,dz)+np.arange(-hz+dz,dz,dz))/2
init_logperm = np.random.normal(logperm_mean,logperm_sd,nreaz)
init_perm = np.exp(init_logperm)
init_th_cond = np.random.normal(th_cond_mean,th_cond_sd,nreaz)
 
nobs = len(obs_coord)
ntime = np.shape(obs)[0]
obs_time = obs[:,0]
obs_temp = obs[:,1:]
np.savetxt("./figure/obs_temp.txt",obs_temp)
day_to_sec = 24*3600
pflotran_exe = "~/pflotran-edison"

perm = np.zeros((niter,nreaz))
perm[0,:] = init_perm
th_cond = np.zeros((niter,nreaz))
th_cond[0,:] = init_th_cond
simu_time = np.array([obs_interval,nobs*obs_interval])
FNULL = open(os.devnull,'w')
simu_ensemble = np.array((nobs*ntime,nreaz))

avg_perm = list()
avg_th_cond = list()


<a id='part 2.4'></a>
# 2.4 ES-MDA Workflow
This section shows the entire workflow of how to implement ES-MDA to estimate the hydrologic exchanage flux.

In [None]:
for i in range(0,niter):
  print(i)
# forward simulation
  util.GenerateDbase(i,nreaz,perm[i,:],th_cond)

  subprocess.call("./src/pflotran.sh {} {} {} ".format(nreaz,ncore,pflotran_exe),stdin=None,stdout=FNULL,stderr=None,shell=True)

  simu_ensemble = util.GenerateSimuEnsemble(nobs,obs_coord,z,nreaz,obs_time)
  np.savetxt("./figure/simu_ensemble{}.txt".format(i),simu_ensemble)
  
  obs_sd = math.sqrt(alpha)*obs_sd_ratio*obs_temp
  obs_ensemble = np.repeat(obs_temp.flatten('C').reshape(ntime*nobs,1),nreaz,1)+np.diag(obs_sd.flatten('C'))@np.random.normal(0,1,nreaz*nobs*ntime).reshape(nobs*ntime,nreaz)

# update state vector
  state_vector = np.zeros((2,nreaz))
  state_vector[0,:] = np.log(perm[i,:])
  state_vector[1,:] = th_cond[i,:]

  cov_state_simu = np.zeros((2,nobs))
  cov_state_simu = np.cov(state_vector,simu_ensemble)[0:2,2:]      

  cov_simu = np.cov(simu_ensemble)
   
  if nobs*ntime == 1:
    inv_cov_simuAddobs = np.array(1/(cov_simu+np.square(np.diag(obs_sd))))
  else:
    inv_cov_simuAddobs = la.inv(cov_simu+np.square(np.diag(obs_sd.flatten('C'))))  
     
  kalman_gain = cov_state_simu@inv_cov_simuAddobs            
  state_vector = state_vector+kalman_gain@(obs_ensemble-simu_ensemble)

  perm[i,:] = np.exp(state_vector[0,:]) 
  th_cond[i,:] = state_vector[1,:] 

  for iperm,vperm in enumerate(perm[i+1,:]):
    if vperm < perm_range[0]: perm[i+1,iperm] = perm_range[0]
    if vperm > perm_range[1]: perm[i+1,iperm] = perm_range[1]
    
  for iporo,vporo in enumerate(poro[i+1,:]):
    if iporo < poro_range[0]: poro[i+1,iporo] = poro_range[0]
    if iporo > poro_range[1]: poro[i+1,iporo] = poro_range[1]
    
  avg_perm.append(np.exp(np.mean(state_vector[0,:])))  
  avg_th_cond.append(np.mean(state_vector[1,:]))
    
#  print(avg_perm[i])            
#  print(avg_th_cond[i])
  
  if i<niter-1:
    perm[i+1,:] = perm[i,:]
    th_cond[i+1,:] = th_cond[i,:]

np.savetxt("./figure/avg_perm.txt",avg_perm)
np.savetxt("./figure/avg_th_cond.txt",avg_th_cond)
np.savetxt("./figure/perm.txt",perm)
np.savetxt("./figure/th_cond.txt",th_cond)
np.savetxt("./figure/init_perm.txt",init_perm)
np.savetxt("./figure/init_th_cond.txt",init_th_cond) 
        
time_end = datetime.datetime.now()
time_cost = time_end-time_start
with open("timecost.txt", mode='w') as file:
    file.write('%s.\n'.format(time_cost))
fpflotran.close()


<a id='part 2.5'></a>
# 2.5 PFLOTRAN related functions
1. GenerateDbase: creat two new datasets for estimated permeability and thermal conductivity at each time step in the Dbase.h5 file.
2. GenerateSimuEnsemble: read the simulated data (temperatures at the sensor depths) from the hdf5 file generated by PFLOTRAN at each time step.

In [None]:
def GenerateDbase(itera,nreaz,perm,poro):
    filename = "./pflotran/Dbase.h5"
    if itera == 0:
      h5file = h5py.File(filename,'w')
    else:
      h5file = h5py.File(filename,'r+')
      
    variables = []
    variables.append("Permeability")
    variables.append("Porosity")
    values = []
    values.append(perm[:])
    values.append(poro[:])
    for i in range(len(variables)):
        if h5file.get(variables[i]):
          del h5file[variables[i]]    
        h5dset = h5file.create_dataset(variables[i],data=values[i])
    h5file.close() 
    return

def GenerateSimuEnsemble(nobs,obs_coord,z,nreaz,obs_time):
    obs_cell = np.zeros(nobs)
    ntime = len(obs_time)
    for i in range(nobs):
        obs_cell[i] = np.argmin(np.absolute(z-obs_coord[i]))
    obs_cell = obs_cell.astype(int)
    simu_ensemble = np.zeros((nobs*ntime,nreaz))
    for ireaz in range(nreaz):
        obs_temp = np.zeros(nobs*ntime)
        j = 0
        for itime in obs_time:
            h5f = h5py.File("./pflotran/1dthermalR{}.h5".format(ireaz+1),'r')
            grp_time = "Time:"+str(" %12.5E" % itime)+" s"
            dset_temp = "Temperature [C]"
            obs_temp[j*nobs:(j+1)*nobs] = h5f[grp_time][dset_temp][0][0][obs_cell]
            j = j+1
        simu_ensemble[:,ireaz] = obs_temp
        h5f.close()
    
    return simu_ensemble