## Table of Contents
###  [1.Pre-Processing](pre-processing.ipynb)
<div class="toc" style="margin-top: 1em;">
   <ul class="toc-item" id="toc-level0">
      <li><span><a href='#part 1.1' data-toc-modified-id="part 1"><span class="toc-item-num">&nbsp;&nbsp;</span>1.1 Read HDF5 file generated by PFLOTRAN</a></span></li>
      <li><span><a href='#part 1.2' data-toc-modified-id="part 2"><span class="toc-item-num">&nbsp;&nbsp;</span>1.2 Extract geometry and properties</a></span></li>
      <li><span><a href='#part 1.3' data-toc-modified-id="part 3"><span class="toc-item-num">&nbsp;&nbsp;</span>1.3 Output data for 1D columns</a></span></li>
</div>
###  [2.Data Assimilation](Data Assimilation.ipynb)
###  [3.Post-Processing](post-processing.ipynb)






<a id='part 1.1'></a>
# 1.1 Read HDF5 file generated by PFLOTRAN 


In [None]:
import h5py as h5
import os
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

os.chdir("/global/cscratch1/sd/xhsong/flux_estimation")
case_name = "homo_2"
pflotran_name = "pflotran_ERT"
material_name = "ERT_material"
out_dir = "results/"



#os.chdir("/Users/song884/flux_estimation/")
#output_times = range(8760, 8770)
output_times = range(8760, 21889)
ntime = len(output_times)

 
datafile = h5.File(case_name + "/" + pflotran_name + ".h5", "r")


<a id='part 1.2'></a>
# 1.2 Extract geometry and properties


In [None]:
# read coordinates from the H5 file
x = datafile["Coordinates"]["X [m]"][:]
y = datafile["Coordinates"]["Y [m]"][:]
z = datafile["Coordinates"]["Z [m]"][:]

# grid dimension
dx = np.diff(x)
dy = np.diff(y)
dz = np.diff(z)

# grid center coordinates
x = x[0:-1] + 0.5 * dx
y = y[0:-1] + 0.5 * dy
z = z[0:-1] + 0.5 * dz

# grid number in each dimension
nx = len(x)
ny = len(y)
nz = len(z)

# read material and region info
coordx, coordy, coordz = np.meshgrid(x, y, z, indexing="ij")
materialfile = h5.File(case_name + "/" + material_name + ".h5", "r")
cell_ids = materialfile["Materials"]["Cell Ids"][:]
material_ids = materialfile["Materials"]["Material Ids"][:]
cell_ids = np.reshape(cell_ids, (nx, ny, nz), order="F")
material_ids = np.reshape(material_ids, (nx, ny, nz), order="F")

face_cells = materialfile["Regions"]["River"]["Cell Ids"][:]

column_loc_all = list()
material_type_all = list()
column_loc_all.append([366.5181, 214.5487])
material_type_all.append([5])

coordx_flat = np.reshape(coordx, np.product(coordx.shape), order="F")
coordy_flat = np.reshape(coordy, np.product(coordy.shape), order="F")
coordz_flat = np.reshape(coordz, np.product(coordz.shape), order="F")

<a id='part 1.3'></a>
# 1.3 Output data for 1D columns
1. Output the column depth, location and time series.


In [None]:
face_cells = face_cells[np.round(coordz_flat[face_cells-1],2)==104.75]
for selected_y in range(50,400,50):
    index_selected = np.argmin(abs(coordy_flat[face_cells-1]-selected_y))
    column_loc_all.append([coordx_flat[face_cells[index_selected]],
          coordy_flat[face_cells[index_selected]]])
    material_type_all.append([5])
    
column_loc_all.append([317.7934,99.3544])
column_loc_all.append([280.8869,276.3164])
column_loc_all.append([256.7615,218.3866])
material_type_all.append([1])
material_type_all.append([1])
material_type_all.append([1])




ncolumn =  len(material_type_all)
#for icolumn in range(ncolumn):
for icolumn in range(0,11) :    
    column_loc = column_loc_all[icolumn]
    material_type = material_type_all[icolumn]    
    
    column_index = np.argmin(
        abs(x - column_loc[0])), np.argmin(abs(y - column_loc[1]))



    column_material = (
        material_ids[column_index[0], column_index[1], :] ==  material_type)

    column_depth = z[column_material]
    ndepth = len(column_depth)
    column_depth = column_depth - \
                   column_depth[-1] - 0.5*dz[column_material][-1]
    column_loc_all[icolumn].append(z[column_material][-1]+0.5*dz[column_material][-1])

    print(str(column_depth[-1])+str(0.5*dz[column_material][-1]))
    column_data = np.empty([ndepth, ntime])

    groupname = "Time:  " + "{0:.5E}".format(output_times[0]) + " h"
    group_data = datafile[groupname]
    datatype = list(group_data.keys())

    for itype in datatype:
        print(itype)
        for itime in range(ntime):
            print(str(itime) + " in " + str(ntime))
            groupname = "Time:  " + "{0:.5E}".format(output_times[itime]) + " h"
            group_data = datafile[groupname]
            temperature = group_data[itype]
            column_data[:, itime] = temperature[column_index[0],
                                                column_index[1], :][column_material]
        np.savetxt(out_dir + case_name +"_c"+str(icolumn)+"_" + itype + ".txt",
                   np.transpose(column_data), fmt="%.6e", delimiter=",")
    np.savetxt(out_dir + case_name + "_c"+str(icolumn)+"_depth.txt",
               np.transpose(column_depth), fmt="%.6e",  delimiter=",")
np.savetxt(out_dir + case_name + "_time.txt",
           np.transpose(output_times), fmt="%.6e",  delimiter=",")
np.savetxt(out_dir + case_name + "_col_loc.txt",
           column_loc_all, fmt="%.6e",  delimiter=",")

datafile.close()

# fid = open(out_dir + case_name + "column.txt", "w")
# fid.write(column_data)
# fid.close()
# plt.plot(column_depth, column_data[:, 1])
# plt.show()

# plt.plot(range(ntime), column_data[1, :])
# plt.show()