## Lawrence Livermore National Laboratory Automized Surface Titration Model

### Code written by Sol-chan Han (LLNL), Elliot Chang (LLNL), Mavrik Zavarin (LLNL)
### Code distribution license obtained from LLNL Intellectual Property Office

In [None]:
# Import package dependencies
import csv
import pandas as pd
import numpy as np
import math
import re
import subprocess
import matplotlib.pyplot as plt
from pathlib import Path
import os
import shutil
import openpyxl
from numpy import average
import sys

In [None]:
#########################################################################
#                                                                                                                                              #
#                                                        Type of Mineral                                                              #
#                                                                                                                                              #
#########################################################################

Mineral_type = 'Ferri'
# Mineral_type = 'Goe'
# Mineral_type = 'Hem'
# Mineral_type = 'Mag'

In [None]:
#########################################################################
#                                                                                                                                              #
#                                                          Type of SCM                                                                #
#                                                                                                                                              #
#########################################################################

# SCM_type = 'DDL'
SCM_type = 'CCM'
# SCM_type = 'NEM'

In [None]:
#########################################################################
#                                                                                                                                              #
#                                         Import potentiometric titration data file                                          #
#                                                                                                                                              #
#########################################################################

#Type the name of potentiometric titration data file
database = "Ferrihydrite_Potentiometric_Titration.csv"
database = database

In [None]:
Ka1_symbol = "fe_woh2+"
Ka2_symbol = "difk"
Capac_symbol = "capac"

In [None]:
# Ensure no cutting-off of dataset digits by setting significant values to 15.
pd.set_option("display.precision",15)
# Read sc.subset (csv) and import it to dataframe named as df
orgdata = pd.read_csv(database)
#Makes NA to 0
orgdata = orgdata.replace(np.nan, 0)
orgdata["Site_Conc"] = orgdata["Mineral_val"] * orgdata["MineralSA"] * orgdata["Mineralsites"] * (10**18) / (6.022e23)

In [None]:
#Check the status of data import
orgdata.head()

In [None]:
Current_path = os.getcwd()
Initial_path = Current_path
path_temp = Path(Current_path)
path_temp_org = Path(Current_path)

individual_dataset_path = path_temp.joinpath('individual_dataset')
if os.path.exists(individual_dataset_path):
    shutil.rmtree(individual_dataset_path)
if not os.path.isdir(individual_dataset_path):
     os.makedirs(individual_dataset_path)
tot_group_no = len(orgdata.groupby("Set"))
dataset_list = pd.DataFrame(index=range(tot_group_no), columns = ['group_num', 'dataset', 'No.Data'])
group_num = 0
for Ref, selection in orgdata.groupby("Set"):
    dataset_list.iat[group_num, 0] = group_num + 1
    dataset_list.iat[group_num, 1] = Ref
    dataset_list.iat[group_num, 2] = selection.shape[0]
    selection.to_csv(individual_dataset_path.joinpath('dataset_{}.csv'.format(Ref)), index = False, header=True)
    group_num = group_num +1
dataset_list.to_csv(individual_dataset_path.joinpath('0.dataset_list.csv'), index = False, header=True)

In [None]:
titration_groups = orgdata.groupby("Set")
print(titration_groups.size())
titration_dic = dict(list(titration_groups))

In [None]:
phreeqc_dir = Initial_path + r"\phreeqc-3.7.3-15968-x64"
phreeqc_path = phreeqc_dir + r"\bin\phreeqc.bat"
phreeqc_db =  phreeqc_dir+ r"\database\phreeqc_dif.dat"
pre_phreeqc_sim_path = path_temp_org.joinpath('Phreeqc_pre_run')
if os.path.exists(pre_phreeqc_sim_path):
    shutil.rmtree(pre_phreeqc_sim_path)
if not os.path.isdir(pre_phreeqc_sim_path):
     os.makedirs(pre_phreeqc_sim_path)

In [None]:
#########################################################################
#                                                                                                                                              #
#                                         Formatter for Phreeqc pre-processing                                         #
#                                                                                                                                              #
#########################################################################

def phreeqcpreprocessing(orgdata_temp):

    # opent the phreeqc_input file 'w+' allows create files if it does not eixst
    with open(phreeqc_input, 'w+') as t:

        #Write to Phreeqc input file (input.tpt)

        #Script for SELECTED_OUTPUT Keyword datablock for Phreeqc
        #t.write('ptf !' + '\n' + '\n')

        #t.write('USER_PUNCH' + '\n')
        #t.write('-headings sum' + '\n')
        #t.write('10 sum = MOL("Fe_wOH2+") - MOL("Fe_wO-")' + '\n')
        #t.write('20 PUNCH sum' + '\n')
        #t.write('END' + '\n \n')

        t.write('SELECTED_OUTPUT' + '\n') 
        t.write('   ' + fr'-file {phreeqc_results}' + '\n')
        t.write('   ' + '-reset false' + '\n')
        #t.write('   ' + '-simulation true' + '\n')
        t.write('   ' + '-solution' + '\n')
        t.write('   ' + '-state true' + '\n')
        #Please change the name of Element (e.g. Se)
        ####################################t.write('   ' + '-totals Se' + '\n')
        #Please modify the type of Surface Species (e.g. Fe_wOH)
        t.write('   ' + '#-molalities Fe_wOH Fe_wO- Fe_wOH2+' + '\n')
        t.write('   ' + '-pH' + '\n')
        t.write('   ' + '-charge_balance' + '\n')
        t.write('   ' + '-percent_error' + '\n')
        t.write('   ' + '-ionic_strength true' + '\n')
        t.write('   ' + '-high_precision false' + '\n')
        t.write('END' + '\n' + '\n')

        #Script for KNOBS Keyword datablock for Phreeqc
        # t.write('KNOBS' + '\n')
        # t.write('   ' + '-iterations 100' + '\n')
        # t.write('   ' + '-convergence_tolerance 1e-8' + '\n')
        # t.write('   ' + '-tolerance 1e-15' + '\n')
        # t.write('   ' + '-step_size 100' + '\n')
        # t.write('   ' + '# -pe_step_size 10' + '\n')
        # t.write('   ' + '-diagonal_scale true' + '\n')
        # t.write('END' + '\n' + '\n')

        #Script for PHASES Keyword datablock for Phreeqc
        #One can add any Phases that whom wants from here
        t.write('PHASES' + '\n')
        t.write('Fix_H+' + '\n')
        t.write('   ' + 'H+ = H+' + '\n')
        t.write('   ' + 'log_k 0.0' + '\n')
        t.write('END' + '\n' + '\n')


        #df = df.replace(np.nan, 0)

        for n in range(0,orgdata_temp.shape[0]):

            #########################################################################
            #                              Script for SOLUTION and GAS PHASE Keyword datablock for Phreeqc                           #
            #########################################################################

            #Write to phreeqc gas pressure to equilibrate
#             t.write('\n GAS_PHASE %0.1i' % (n+1))
#             t.write('\n -fixed_pressure')
#             t.write('\n -pressure 1.0')
#             t.write('\n -volume 1.0')
#             t.write('\n -temperature 25.0')
#             t.write('\n ' + str(orgdata_temp.Gas1[n]))
#             t.write(' %0.8f' %(orgdata_temp.Gas1_val[n]/1.013) + '\n')
            # t.write('NaBr' + '\n')
            # t.write('   ' + 'NaBr = Na+ + Br-' + '\n')
            # t.write('   ' + 'log_k -10.0' + '\n')
            #t.write('END' + '\n' + '\n')


            t.write('SOLUTION %d' %(n+1) + '\n')
            t.write('   ' + '-units mol/L' + '\n')

            #solution pH initial
            t.write('   ' + 'pH %0.12f' %orgdata_temp.pH[n] + '\n')
            #0.12f decimal points
            t.write('   ' + str(re.sub('[^a-zA-Z]+','', str(re.findall(r"(?i)\b[a-z]+\b", orgdata_temp.Electrolyte1[n])))) + ' %0.12f' %orgdata_temp.Electrolyte1_val[n] + '\n')
            t.write('   ' + orgdata_temp.Electrolyte2[n] + ' %0.12f' %orgdata_temp.Electrolyte2_val[n] + '\n')
            for NE in range(3, 8):
                Electrolyte_name_num = 'Electrolyte{}'.format(NE)
                name_temp = orgdata_temp.loc[[n], ['%s'%Electrolyte_name_num]].values
                name = name_temp[0][0]
                Electrolyte_val_num = 'Electrolyte{}_val'.format(NE)
                val_temp = orgdata_temp.loc[[n], ['%s'%Electrolyte_val_num]].values
                val = val_temp[0][0]
#                 print(name)
                if name != 0:
                    t.write('   ' + str(name) + ' %0.12f' %val + '\n')
#                     t.write('   ' + str(re.sub('[^a-zA-Z]+','', str(re.findall(r"(?i)\b[a-z]+\b", name)))) + ' %0.12f' %val + '\n')

    cmd = [phreeqc_path, phreeqc_input, phreeqc_output, phreeqc_db]
    phreeqc_run = subprocess.run(cmd) 


In [None]:
#########################################################################
#                                                                                                                                              #
#                                                 Run pre Phreeqc Simulations                                              #
#                                                                                                                                              #
#########################################################################

for pre_num in range(0, tot_group_no):
    dataset_ID_pre = dataset_list.dataset[pre_num]
    #orgdata_temp = titration_dic[dataset_ID_pre]
    orgdata_temp = pd.DataFrame(titration_dic[dataset_ID_pre])
    orgdata_temp.reset_index(inplace = True, drop=True)
    #print(orgdata_temp.shape[0])
    #orgdata_temp
    phreeqc_input = pre_phreeqc_sim_path.joinpath('Titration_input_{}.txt'.format(dataset_ID_pre))
    phreeqc_output = pre_phreeqc_sim_path.joinpath('Titration_output_{}.out'.format(dataset_ID_pre))
    phreeqc_results = pre_phreeqc_sim_path.joinpath('Titration_selout_{}.sel'.format(dataset_ID_pre))
    phreeqcpreprocessing(orgdata_temp)
    print(dataset_ID_pre + ' ' + 'done')

In [None]:
#########################################################################
#                                                                                                                                              #
#                                                        Formatter for Phreeqc                                                   #
#                                                                                                                                              #
#########################################################################

def importingphreeqc(orgdata_temp):

    # opent the phreeqc_input file 'w+' allows create files if it does not eixst

    with open(phreeqc_input, 'w+') as t:
    
        #Write to Phreeqc input file (input.tpt)
        
        #Script for SELECTED_OUTPUT Keyword datablock for Phreeqc
        t.write('ptf !' + '\n' + '\n')
        
        t.write('USER_PUNCH' + '\n')
        t.write('-headings sum' + '\n')
        t.write('10 sum = MOL("Fe_wOH2+") - MOL("Fe_wO-")' + '\n')
        t.write('20 PUNCH sum' + '\n')
        t.write('END' + '\n \n')
        
        t.write('SELECTED_OUTPUT' + '\n') 
        t.write('   ' + '-file output_{}.sel'.format(dataset_ID) + '\n')
        t.write('   ' + '-reset false' + '\n')
        t.write('   ' + '-simulation true' + '\n')
        t.write('   ' + '-state true' + '\n')
        #Please change the name of Element (e.g. Se)
        ####################################t.write('   ' + '-totals Se' + '\n')
        #Please modify the type of Surface Species (e.g. Fe_wOH)
        t.write('   ' + '#-molalities Fe_wOH Fe_wO- Fe_wOH2+' + '\n')
        t.write('   ' + '-pH' + '\n')
        t.write('   ' + '-charge_balance' + '\n')
        t.write('   ' + '-percent_error' + '\n')
        t.write('   ' + '-ionic_strength true' + '\n')
        t.write('   ' + '-high_precision false' + '\n')
        t.write('END' + '\n' + '\n')
        
        #Script for KNOBS Keyword datablock for Phreeqc
        # t.write('KNOBS' + '\n')
        # t.write('   ' + '-iterations 100' + '\n')
        # t.write('   ' + '-convergence_tolerance 1e-8' + '\n')
        # t.write('   ' + '-tolerance 1e-15' + '\n')
        # t.write('   ' + '-step_size 100' + '\n')
        # t.write('   ' + '# -pe_step_size 10' + '\n')
        # t.write('   ' + '-diagonal_scale true' + '\n')
        # t.write('END' + '\n' + '\n')

        #Script for PHASES Keyword datablock for Phreeqc
        #One can add any Phases that whom wants from here
        t.write('PHASES' + '\n')
        t.write('Fix_H+' + '\n')
        t.write('   ' + 'H+ = H+' + '\n')
        t.write('   ' + 'log_k 0.0' + '\n')
        t.write('NaBr' + '\n')
        t.write('   ' + 'NaBr = Na+ + Br-' + '\n')
        t.write('   ' + 'log_k -10.0' + '\n')
        t.write('END' + '\n' + '\n')
        
        
        #df = df.replace(np.nan, 0)
        
        for n in range(0,orgdata_temp.shape[0]):
        
            #########################################################################
            #                              Script for SOLUTION and GAS PHASE Keyword datablock for Phreeqc                           #
            #########################################################################
            
            #Write to phreeqc gas pressure to equilibrate
#             t.write('\n GAS_PHASE %0.1i' % (n+1))
#             t.write('\n -fixed_pressure')
#             t.write('\n -pressure 1.0')
#             t.write('\n -volume 1.0')
#             t.write('\n -temperature 25.0')
#             t.write('\n ' + str(orgdata_temp.Gas1[n]))
#             t.write(' %0.8f' %(orgdata_temp.Gas1_val[n]/1.013) + '\n')
#             # t.write('NaBr' + '\n')
#             # t.write('   ' + 'NaBr = Na+ + Br-' + '\n')
#             # t.write('   ' + 'log_k -10.0' + '\n')
#             #t.write('END' + '\n' + '\n')
            
            
            t.write('SOLUTION %d' %(n+1) + '\n')
            t.write('   ' + '-units mol/L' + '\n')
            
            #solution pH initial
            t.write('   ' + 'pH %0.12f' %orgdata_temp.pH[n] + '\n')
            #0.12f decimal points
            
            #solution electrolytes
            if df_phreeqc_pre.pct_err[n] > 0:
                t.write('   ' + str(re.sub('[^a-zA-Z]+','', str(re.findall(r"(?i)\b[a-z]+\b", orgdata_temp.Electrolyte1[n])))) + ' %0.12f' %orgdata_temp.Electrolyte1_val[n] + '\n')
                t.write('   ' + orgdata_temp.Electrolyte2[n] + ' %0.12f' %orgdata_temp.Electrolyte2_val[n] + ' -charge' + '\n')
                for NE2 in range(3, 8):
                    Electrolyte_name_num2 = 'Electrolyte{}'.format(NE2)
                    name_temp2 = orgdata_temp.loc[[n], ['%s'%Electrolyte_name_num2]].values
                    name2 = name_temp2[0][0]
                    Electrolyte_val_num2 = 'Electrolyte{}_val'.format(NE2)
                    val_temp2 = orgdata_temp.loc[[n], ['%s'%Electrolyte_val_num2]].values
                    val2 = val_temp2[0][0]
                    if name2 != 0:
                        t.write('   ' + str(name2) + ' %0.12f' %val2 + '\n')
#                         t.write('   ' + str(re.sub('[^a-zA-Z]+','', str(re.findall(r"(?i)\b[a-z]+\b", name2)))) + ' %0.12f' %val2 + '\n')
            elif df_phreeqc_pre.pct_err[n] < 0:
                t.write('   ' + str(re.sub('[^a-zA-Z]+','', str(re.findall(r"(?i)\b[a-z]+\b", orgdata_temp.Electrolyte1[n])))) + ' %0.12f' %orgdata_temp.Electrolyte1_val[n] + ' -charge' + '\n')
                t.write('   ' + orgdata_temp.Electrolyte2[n] + ' %0.12f' %orgdata_temp.Electrolyte2_val[n] + '\n')
                for NE3 in range(3, 8):
                    Electrolyte_name_num3 = 'Electrolyte{}'.format(NE3)
                    name_temp3 = orgdata_temp.loc[[n], ['%s'%Electrolyte_name_num3]].values
                    name3 = name_temp3[0][0]
                    Electrolyte_val_num3 = 'Electrolyte{}_val'.format(NE3)
                    val_temp3 = orgdata_temp.loc[[n], ['%s'%Electrolyte_val_num3]].values
                    val3 = val_temp3[0][0]
                    if name3 != 0:
                        t.write('   ' + str(name3) + ' %0.12f' %val3 + '\n')
#                         t.write('   ' + str(re.sub('[^a-zA-Z]+','', str(re.findall(r"(?i)\b[a-z]+\b", name3)))) + ' %0.12f' %val3 + '\n')
           
            
            
            #########################################################################
            #         Script for SURFACE Keyword datablock for Phreeqc              #
            #########################################################################        
            
            t.write('SURFACE %d' %(n+1) + '\n')
            if SCM_type == "NEM":
                t.write('   ' + '-no_edl' + '\n')
            else:
                t.write('   ' + '#-no_edl' + '\n') 
            t.write('   ' + '-equilibrate with SOLUTION %d' %(n+1) + '\n') 
            t.write('   ' + '-site_units    density' + '\n')
            t.write('   ' +  'Fe_wOH ' + ' %0.5f' %orgdata_temp.Mineralsites[n] + ' %0.5f' %orgdata_temp.MineralSA[n] + ' %0.5f' %orgdata_temp.Mineral_val[n] + '\n' + '\n')
            if SCM_type == "CCM":
                t.write('   ' +  '-ccm !capac! ' + '\n')
            #+ ' !Fe_wOH!
             #  + ' %0.5f' %orgdata.Mineralsites[n]  #
            #0.5f decimal points
                    
            ##########################################################################
            #   Script for EQUILIBRIUM_PHASES Keyword datablock for Phreeqc          #
            ##########################################################################               
            t.write('EQUILIBRIUM_PHASES %d' %(n+1) + '\n')
            t.write('   ' + 'Fix_H+ %0.4f' %(orgdata_temp.pH[n]*-1) + ' NaOH 10.0' + '\n')
            t.write('   ' + 'NaBr  0  10' + '\n')
            t.write('END' + '\n' + '\n')

In [None]:
#########################################################################
#                                                                                                                                              #
#                                                Formatter for Phreeqc - Fitting                                              #
#                                                                                                                                              #
#########################################################################

def importingphreeqcFitting(orgdata_temp, dataset_ID_Fitting):

    # opent the phreeqc_input file 'w+' allows create files if it does not eixst

    with open(phreeqc_input, 'w+') as t:
    
        #Write to Phreeqc input file (input.tpt)
        
        #Script for SELECTED_OUTPUT Keyword datablock for Phreeqc
        t.write('ptf !' + '\n' + '\n')
        
        t.write('USER_PUNCH' + '\n')
        t.write('-headings sum' + '\n')
        t.write('10 sum = MOL("Fe_wOH2+") - MOL("Fe_wO-")' + '\n')
        t.write('20 PUNCH sum' + '\n')
        t.write('END' + '\n \n')
        
        t.write('SELECTED_OUTPUT' + '\n') 
        t.write('   ' + '-file output_{}.sel'.format(dataset_ID_Fitting) + '\n')
        t.write('   ' + '-reset false' + '\n')
        t.write('   ' + '-simulation true' + '\n')
        t.write('   ' + '-state true' + '\n')
        #Please change the name of Element (e.g. Se)
        ####################################t.write('   ' + '-totals Se' + '\n')
        #Please modify the type of Surface Species (e.g. Fe_wOH)
        t.write('   ' + '#-molalities Fe_wOH Fe_wO- Fe_wOH2+' + '\n')
        t.write('   ' + '-pH' + '\n')
        t.write('   ' + '-charge_balance' + '\n')
        t.write('   ' + '-percent_error' + '\n')
        t.write('   ' + '-ionic_strength true' + '\n')
        t.write('   ' + '-high_precision false' + '\n')
        t.write('END' + '\n' + '\n')
        
        #Script for KNOBS Keyword datablock for Phreeqc
        # t.write('KNOBS' + '\n')
        # t.write('   ' + '-iterations 100' + '\n')
        # t.write('   ' + '-convergence_tolerance 1e-8' + '\n')
        # t.write('   ' + '-tolerance 1e-15' + '\n')
        # t.write('   ' + '-step_size 100' + '\n')
        # t.write('   ' + '# -pe_step_size 10' + '\n')
        # t.write('   ' + '-diagonal_scale true' + '\n')
        # t.write('END' + '\n' + '\n')

        #Script for PHASES Keyword datablock for Phreeqc
        #One can add any Phases that whom wants from here
        t.write('PHASES' + '\n')
        t.write('Fix_H+' + '\n')
        t.write('   ' + 'H+ = H+' + '\n')
        t.write('   ' + 'log_k 0.0' + '\n')
        t.write('NaBr' + '\n')
        t.write('   ' + 'NaBr = Na+ + Br-' + '\n')
        t.write('   ' + 'log_k -10.0' + '\n')
        t.write('END' + '\n' + '\n')
        
        
        #df = df.replace(np.nan, 0)
        
        for n in range(0,orgdata_temp.shape[0]):
        
            #########################################################################
            #                              Script for SOLUTION and GAS PHASE Keyword datablock for Phreeqc                           #
            #########################################################################
            
            #Write to phreeqc gas pressure to equilibrate
#             t.write('\n GAS_PHASE %0.1i' % (n+1))
#             t.write('\n -fixed_pressure')
#             t.write('\n -pressure 1.0')
#             t.write('\n -volume 1.0')
#             t.write('\n -temperature 25.0')
#             t.write('\n ' + str(orgdata_temp.Gas1[n]))
#             t.write(' %0.8f' %(orgdata_temp.Gas1_val[n]/1.013) + '\n')
#             # t.write('NaBr' + '\n')
#             # t.write('   ' + 'NaBr = Na+ + Br-' + '\n')
#             # t.write('   ' + 'log_k -10.0' + '\n')
#             #t.write('END' + '\n' + '\n')
            
            
            t.write('SOLUTION %d' %(n+1) + '\n')
            t.write('   ' + '-units mol/L' + '\n')
            
            #solution pH initial
            t.write('   ' + 'pH %0.12f' %orgdata_temp.pH[n] + '\n')
            #0.12f decimal points
            
            #solution electrolytes
            if df_phreeqc_pre.pct_err[n] > 0:
                t.write('   ' + str(re.sub('[^a-zA-Z]+','', str(re.findall(r"(?i)\b[a-z]+\b", orgdata_temp.Electrolyte1[n])))) + ' %0.12f' %orgdata_temp.Electrolyte1_val[n] + '\n')
                t.write('   ' + orgdata_temp.Electrolyte2[n] + ' %0.12f' %orgdata_temp.Electrolyte2_val[n] + ' -charge' + '\n')
                for NE2 in range(3, 8):
                    Electrolyte_name_num2 = 'Electrolyte{}'.format(NE2)
                    name_temp2 = orgdata_temp.loc[[n], ['%s'%Electrolyte_name_num2]].values
                    name2 = name_temp2[0][0]
                    Electrolyte_val_num2 = 'Electrolyte{}_val'.format(NE2)
                    val_temp2 = orgdata_temp.loc[[n], ['%s'%Electrolyte_val_num2]].values
                    val2 = val_temp2[0][0]
                    if name2 != 0:
                        t.write('   ' + str(name2) + ' %0.12f' %val2 + '\n')
#                         t.write('   ' + str(re.sub('[^a-zA-Z]+','', str(re.findall(r"(?i)\b[a-z]+\b", name2)))) + ' %0.12f' %val2 + '\n')
            elif df_phreeqc_pre.pct_err[n] < 0:
                t.write('   ' + str(re.sub('[^a-zA-Z]+','', str(re.findall(r"(?i)\b[a-z]+\b", orgdata_temp.Electrolyte1[n])))) + ' %0.12f' %orgdata_temp.Electrolyte1_val[n] + ' -charge' + '\n')
                t.write('   ' + orgdata_temp.Electrolyte2[n] + ' %0.12f' %orgdata_temp.Electrolyte2_val[n] + '\n')
                for NE3 in range(3, 8):
                    Electrolyte_name_num3 = 'Electrolyte{}'.format(NE3)
                    name_temp3 = orgdata_temp.loc[[n], ['%s'%Electrolyte_name_num3]].values
                    name3 = name_temp3[0][0]
                    Electrolyte_val_num3 = 'Electrolyte{}_val'.format(NE3)
                    val_temp3 = orgdata_temp.loc[[n], ['%s'%Electrolyte_val_num3]].values
                    val3 = val_temp3[0][0]
                    if name3 != 0:
                        t.write('   ' + str(name3) + ' %0.12f' %val3 + '\n')
#                         t.write('   ' + str(re.sub('[^a-zA-Z]+','', str(re.findall(r"(?i)\b[a-z]+\b", name3)))) + ' %0.12f' %val3 + '\n')
           
            
            
            #########################################################################
            #         Script for SURFACE Keyword datablock for Phreeqc              #
            #########################################################################        
            
            t.write('SURFACE %d' %(n+1) + '\n')
            if SCM_type == "NEM":
                t.write('   ' + '-no_edl' + '\n')
            else:
                t.write('   ' + '#-no_edl' + '\n') 
            t.write('   ' + '-equilibrate with SOLUTION %d' %(n+1) + '\n') 
            t.write('   ' + '-site_units    density' + '\n')
            t.write('   ' +  'Fe_wOH ' + ' %0.5f' %orgdata_temp.Mineralsites[n] + ' %0.5f' %orgdata_temp.MineralSA[n] + ' %0.5f' %orgdata_temp.Mineral_val[n] + '\n' + '\n')
            if SCM_type == "CCM":
                t.write('   ' +  '-ccm !capac! ' + '\n')
            #+ ' !Fe_wOH!
             #  + ' %0.5f' %orgdata.Mineralsites[n]  #
            #0.5f decimal points
                    
            ##########################################################################
            #   Script for EQUILIBRIUM_PHASES Keyword datablock for Phreeqc          #
            ##########################################################################               
            t.write('EQUILIBRIUM_PHASES %d' %(n+1) + '\n')
            t.write('   ' + 'Fix_H+ %0.4f' %(orgdata_temp.pH[n]*-1) + ' NaOH 10.0' + '\n')
            t.write('   ' + 'NaBr  0  10' + '\n')
            t.write('END' + '\n' + '\n')

In [None]:
individual_dataset_run_path = path_temp.joinpath('individual_dataset_run')
if os.path.exists(individual_dataset_run_path):
    shutil.rmtree(individual_dataset_run_path)
if not os.path.isdir(individual_dataset_run_path):
     os.makedirs(individual_dataset_run_path)
        
PEST_EXE = path_temp.joinpath('PEST.EXE')
copy_PEST_EXE = individual_dataset_run_path.joinpath('PEST.EXE')
shutil.copyfile(PEST_EXE, copy_PEST_EXE)

Phreeqc_EXE = path_temp.joinpath('phreeqc.exe')
copy_Phreeqc_EXE = individual_dataset_run_path.joinpath('phreeqc.exe')
shutil.copyfile(Phreeqc_EXE, copy_Phreeqc_EXE)

TDB = path_temp.joinpath('phreeqc_dif.tpt')
copy_TDB = individual_dataset_run_path.joinpath('phreeqc_dif.tpt')
shutil.copyfile(TDB, copy_TDB)

In [None]:
#########################################################################
#                                                                                                                                              #
#                                           Exports Phreeqc Input Files for PEST                                       #
#                                                                                                                                              #
#########################################################################
for gN in range(0, tot_group_no):
    dataset_ID = dataset_list.dataset[gN]
    phreeqc_results_temp = pre_phreeqc_sim_path.joinpath('Titration_selout_{}.sel'.format(dataset_ID))
    df_phreeqc_pre = pd.read_csv(phreeqc_results_temp, sep = '\s+')
    #df_phreeqc_pre.to_csv("df_phreeqc_pre.csv")
    phreeqc_input = individual_dataset_run_path.joinpath('input_{}.tpt'.format(dataset_ID))
    orgdata_temp = pd.DataFrame(titration_dic[dataset_ID])
    orgdata_temp.reset_index(inplace = True, drop=True)
    importingphreeqc(orgdata_temp)

In [None]:
#########################################################################
#                                                                                                                                              #
#                                                 Formatter for PEST Instruction                                            #
#                                                                                                                                              #
#########################################################################
def formattingPESTinstruction(orgdata_temp):

    # open the instruction.ins file 'w+' allows create files if it does not eixst

    with open(PEST_instructions, 'w+') as pi:
        pi.write('pif @' + '\n')
        
        for n in range(0,orgdata_temp.shape[0]):
            pi.write('@react@ w w w w w !Obs%d' %(n+1) + '!' + '\n')

In [None]:
#########################################################################
#                                                                                                                                              #
#                                                 Exports PEST Insturction Files                                            #
#                                                                                                                                              #
#########################################################################

for gN in range(0, tot_group_no):
    dataset_ID = dataset_list.dataset[gN]
    PEST_instructions = individual_dataset_run_path.joinpath('instructions_{}.ins'.format(dataset_ID))
    orgdata_temp = pd.DataFrame(titration_dic[dataset_ID])
    orgdata_temp.reset_index(inplace = True, drop=True)
    formattingPESTinstruction(orgdata_temp)

In [None]:
#########################################################################
#                                                                                                                                              #
#                                                 Formatter for PEST Control                                                 #
#                                                                                                                                              #
#########################################################################

# def formattingPESTcontrol(orgdata_temp, pK1_int, m_pK2_int): 

def formattingPESTcontrol(orgdata_temp, pK1_int): 
    
    # open the control.pst file 'w+' allows create files if it does not eixst

    with open(PEST_control, 'w+') as pct:
        pct.write('pcf' + '\n')
        pct.write('* control data' + '\n')
        pct.write('norestart estimation' + '\n')
        if SCM_type == "CCM":
            pct.write('3' + ' %d' %orgdata_temp.shape[0] + ' 1 0 1' + '\n')  #### first number represents # of parameters
        else:
            pct.write('2' + ' %d' %orgdata_temp.shape[0] + ' 1 0 1' + '\n')  #### first number represents # of parameters
        pct.write('2 1 single point 1 0 0' + '\n')
        pct.write('5.0 3.0 0.3 0.01 8' + '\n')
        pct.write('3.0 3.0 0.001' + '\n')
        pct.write('.1 aui' + '\n')
        pct.write('30 .01 4 4 .01 3' + '\n')
        pct.write('1 1 1' + '\n')
        pct.write('* automatic user intervention' + '\n')
        pct.write('8 1 0.9 0' + '\n')
        pct.write('80.0 0 3' + '\n')
        pct.write('0.8 0.99 4' + '\n')
        pct.write('* parameter groups' + '\n')
        pct.write('rates relative 0.1 0.01 switch 1.0 parabolic' + '\n')
        pct.write('* parameter data' + '\n')
        #Modifications would be made to the following lines for system specific surface species
        pct.write('Fe_wOH2+ none relative' + ' ' + '%0.2f'%(pK1_int) + ' ' + ('%0.2f'%(pK1_int - 5)) + ' ' + ('%0.2f'%(pK1_int + 3)) + ' ' +'rates 1.0 0.0 1' + '\n')
        pct.write('difK none relative 1 0.01 10 rates 1.0 0.0 1' + '\n')
        if SCM_type == "CCM":
            pct.write('capac none relative 0.85 0.01 10 rates 1.0 0.0 1' + '\n')
        #pct.write('Fe_wONa none relative -8.20 -22.20 5.80 rates 1.0 0.0 1' + '\n')
        #pct.write('Fe_wOK none relative -8 -22 6 rates 1.0 0.0 1' + '\n')
        #pct.write('Fe_wOHCl none relative 6.00 -8 20 rates 1.0 0.0 1' + '\n')
        #pct.write('Fe_wOHNO none relative 6.00 -8 20 rates 1.0 0.0 1' + '\n')
        pct.write('* observation groups' + '\n')
        pct.write('group_1' + '\n')
        pct.write('* observation data' + '\n')
        
        
        for n in range(0, orgdata_temp.shape[0]):
            pct.write('Obs%d' %(n+1) + ' %0.12f' %orgdata_temp.SurfCharge_val[n] + ' %0.12f' %(1/((orgdata_temp.Site_Conc[n])/10)) + ' group_1' + '\n')
            
        pct.write('* model command line' + '\n')
        pct.write('phreeqc.exe' + ' ' + 'input_{}.txt'.format(dataset_ID) + ' ' + 'output_{}.txt'.format(dataset_ID) + ' ' + 'phreeqc_dif_{}.txt'.format(dataset_ID) + '\n')
        pct.write('* model input/output' + '\n')
        pct.write('phreeqc_dif.tpt' + ' ' + 'phreeqc_dif_{}.txt'.format(dataset_ID) + '\n')
        pct.write('input_{}.tpt'.format(dataset_ID) + ' ' + 'input_{}.txt'.format(dataset_ID) + '\n')
        pct.write('instructions_{}.ins'.format(dataset_ID) + ' ' + 'output_{}.sel'.format(dataset_ID) + '\n')
        pct.write('* prior information')

In [None]:
#########################################################################
#                                                                                                                                              #
#                                            Formatter for PEST Control - fitting                                           #
#                                                                                                                                              #
#########################################################################

# def formattingPESTcontrol(orgdata_temp, pK1_int, m_pK2_int): 

def formattingPESTcontrolFitting(orgdata_temp, pK1_int, difK2, dataset_ID_Fitting): 
    
    # open the control.pst file 'w+' allows create files if it does not eixst

    with open(PEST_control, 'w+') as pct:
        pct.write('pcf' + '\n')
        pct.write('* control data' + '\n')
        pct.write('norestart estimation' + '\n')
        if SCM_type == "CCM":
            pct.write('3' + ' %d' %orgdata_temp.shape[0] + ' 1 0 1' + '\n')  #### first number represents # of parameters
        else:
            pct.write('2' + ' %d' %orgdata_temp.shape[0] + ' 1 0 1' + '\n')  #### first number represents # of parameters
        pct.write('2 1 single point 1 0 0' + '\n')
        pct.write('5.0 3.0 0.3 0.01 8' + '\n')
        pct.write('3.0 3.0 0.001' + '\n')
        pct.write('.1 aui' + '\n')
        pct.write('-1 .01 4 4 .01 3' + '\n')
        pct.write('1 1 1' + '\n')
        pct.write('* automatic user intervention' + '\n')
        pct.write('8 1 0.9 0' + '\n')
        pct.write('80.0 0 3' + '\n')
        pct.write('0.8 0.99 4' + '\n')
        pct.write('* parameter groups' + '\n')
        pct.write('rates relative 0.1 0.01 switch 1.0 parabolic' + '\n')
        pct.write('* parameter data' + '\n')
        #Modifications would be made to the following lines for system specific surface species
        pct.write('Fe_wOH2+ none relative' + ' ' + '%0.2f'%(pK1_int) + ' ' + ('%0.2f'%(pK1_int - 5)) + ' ' + ('%0.2f'%(pK1_int + 3)) + ' ' +'rates 1.0 0.0 1' + '\n')
        pct.write('difK none relative' + ' ' + '%0.2f'%(difK2) + ' ' +  '0.01 10 rates 1.0 0.0 1' + '\n')
        if SCM_type == "CCM":
            pct.write('capac none relative 0.85 0.01 10 rates 1.0 0.0 1' + '\n')
        #pct.write('Fe_wONa none relative -8.20 -22.20 5.80 rates 1.0 0.0 1' + '\n')
        #pct.write('Fe_wOK none relative -8 -22 6 rates 1.0 0.0 1' + '\n')
        #pct.write('Fe_wOHCl none relative 6.00 -8 20 rates 1.0 0.0 1' + '\n')
        #pct.write('Fe_wOHNO none relative 6.00 -8 20 rates 1.0 0.0 1' + '\n')
        pct.write('* observation groups' + '\n')
        pct.write('group_1' + '\n')
        pct.write('* observation data' + '\n')
        
        
        for n in range(0, orgdata_temp.shape[0]):
            pct.write('Obs%d' %(n+1) + ' %0.12f' %orgdata_temp.SurfCharge_val[n] + ' %0.12f' %(1/((orgdata_temp.Site_Conc[n])/10)) + ' group_1' + '\n')
            
        pct.write('* model command line' + '\n')
        pct.write('phreeqc.exe' + ' ' + 'input_{}.txt'.format(dataset_ID_Fitting) + ' ' + 'output_{}.txt'.format(dataset_ID_Fitting) + ' ' + 'phreeqc_dif_{}.txt'.format(dataset_ID_Fitting) + '\n')
        pct.write('* model input/output' + '\n')
        pct.write('phreeqc_dif.tpt' + ' ' + 'phreeqc_dif_{}.txt'.format(dataset_ID_Fitting) + '\n')
        pct.write('input_{}.tpt'.format(dataset_ID_Fitting) + ' ' + 'input_{}.txt'.format(dataset_ID_Fitting) + '\n')
        pct.write('instructions_{}.ins'.format(dataset_ID_Fitting) + ' ' + 'output_{}.sel'.format(dataset_ID_Fitting) + '\n')
        pct.write('* prior information')

In [None]:
#########################################################################
#                                                                                                                                              #
#                                            Formatter for PEST Control - fitting                                           #
#                                                                                                                                              #
#########################################################################

# def formattingPESTcontrol(orgdata_temp, pK1_int, m_pK2_int): 

def formattingPESTcontrolFittingCapac(orgdata_temp, pK1_int, difK2, capacitacne_input, dataset_ID_Fitting): 
    
    # open the control.pst file 'w+' allows create files if it does not eixst

    with open(PEST_control, 'w+') as pct:
        pct.write('pcf' + '\n')
        pct.write('* control data' + '\n')
        pct.write('norestart estimation' + '\n')
        if SCM_type == "CCM":
            pct.write('3' + ' %d' %orgdata_temp.shape[0] + ' 1 0 1' + '\n')  #### first number represents # of parameters
        else:
            pct.write('2' + ' %d' %orgdata_temp.shape[0] + ' 1 0 1' + '\n')  #### first number represents # of parameters
        pct.write('2 1 single point 1 0 0' + '\n')
        pct.write('5.0 3.0 0.3 0.01 8' + '\n')
        pct.write('3.0 3.0 0.001' + '\n')
        pct.write('.1 aui' + '\n')
        pct.write('-1 .01 4 4 .01 3' + '\n')
        pct.write('1 1 1' + '\n')
        pct.write('* automatic user intervention' + '\n')
        pct.write('8 1 0.9 0' + '\n')
        pct.write('80.0 0 3' + '\n')
        pct.write('0.8 0.99 4' + '\n')
        pct.write('* parameter groups' + '\n')
        pct.write('rates relative 0.1 0.01 switch 1.0 parabolic' + '\n')
        pct.write('* parameter data' + '\n')
        #Modifications would be made to the following lines for system specific surface species
        pct.write('Fe_wOH2+ none relative' + ' ' + '%0.2f'%(pK1_int) + ' ' + ('%0.2f'%(pK1_int - 3)) + ' ' + ('%0.2f'%(pK1_int + 3)) + ' ' +'rates 1.0 0.0 1' + '\n')
        pct.write('difK none relative' + ' ' + '%0.2f'%(difK2) + ' ' + '0.01 8 rates 1.0 0.0 1' + '\n')
        if SCM_type == "CCM":
            pct.write('capac none relative' + ' ' + '%0.2f'%(capacitacne_input) + ' ' + '0.01 10 rates 1.0 0.0 1' + '\n')
        #pct.write('Fe_wONa none relative -8.20 -22.20 5.80 rates 1.0 0.0 1' + '\n')
        #pct.write('Fe_wOK none relative -8 -22 6 rates 1.0 0.0 1' + '\n')
        #pct.write('Fe_wOHCl none relative 6.00 -8 20 rates 1.0 0.0 1' + '\n')
        #pct.write('Fe_wOHNO none relative 6.00 -8 20 rates 1.0 0.0 1' + '\n')
        pct.write('* observation groups' + '\n')
        pct.write('group_1' + '\n')
        pct.write('* observation data' + '\n')
        
        
        for n in range(0, orgdata_temp.shape[0]):
            pct.write('Obs%d' %(n+1) + ' %0.12f' %orgdata_temp.SurfCharge_val[n] + ' %0.12f' %(1/((orgdata_temp.Site_Conc[n])/10)) + ' group_1' + '\n')
            
        pct.write('* model command line' + '\n')
        pct.write('phreeqc.exe' + ' ' + 'input_{}.txt'.format(dataset_ID_Fitting) + ' ' + 'output_{}.txt'.format(dataset_ID_Fitting) + ' ' + 'phreeqc_dif_{}.txt'.format(dataset_ID_Fitting) + '\n')
        pct.write('* model input/output' + '\n')
        pct.write('phreeqc_dif.tpt' + ' ' + 'phreeqc_dif_{}.txt'.format(dataset_ID_Fitting) + '\n')
        pct.write('input_{}.tpt'.format(dataset_ID_Fitting) + ' ' + 'input_{}.txt'.format(dataset_ID_Fitting) + '\n')
        pct.write('instructions_{}.ins'.format(dataset_ID_Fitting) + ' ' + 'output_{}.sel'.format(dataset_ID_Fitting) + '\n')
        pct.write('* prior information')

In [None]:
pK1_int = 7.29
Lower_pK1_int = pK1_int - 5
Upper_pK1_int = pK1_int + 3
# m_pK2_int = -8.93
Lower_difK = 0.01
Upper_difK = 10
Lower_capac = 0.01
Upper_capac = 10

In [None]:
#########################################################################
#                                                                                                                                              #
#                                                    Exports PEST Control Files                                              #
#                                                                                                                                              #
#########################################################################
for gN in range(0, tot_group_no):
    dataset_ID = dataset_list.dataset[gN]
    PEST_control = individual_dataset_run_path.joinpath('control_{}.pst'.format(dataset_ID))
    orgdata_temp = pd.DataFrame(titration_dic[dataset_ID])
    orgdata_temp.reset_index(inplace = True, drop=True)
    pK1_int = 7.29
#     m_pK2_int = -8.93
#     formattingPESTcontrol(orgdata_temp, pK1_int, m_pK2_int)
    formattingPESTcontrol(orgdata_temp, pK1_int)

In [None]:
#########################################################################
#                                                                                                                                              #
#                                                   Running PEST Simulation                                                  #
#                                                                                                                                              #
#########################################################################
for gN in range(0, tot_group_no):
    dataset_ID = dataset_list.dataset[gN]
    os.chdir(individual_dataset_run_path)
    os.system('pest' + ' ' + 'control_{}.pst'.format(dataset_ID))
    print(dataset_ID + ' ' + 'done')

In [None]:
#########################################################################
#                                                                                                                                              #
#                                     Post Processor  -  Imports R and Phi values                                   #
#                                                                                                                                              #
#########################################################################
os.chdir(individual_dataset_run_path)
Post_writer = pd.ExcelWriter('PEST_Titration_Results_{}_{}.xlsx'.format(Mineral_type, SCM_type), engine='xlsxwriter')
Post_writer.close()
Post_wb = openpyxl.load_workbook('PEST_Titration_Results_{}_{}.xlsx'.format(Mineral_type, SCM_type))
Post_wb.create_sheet(title = "R_and_Phi_vals")
#Post_wb.create_sheet(title = "Good_PEST_Simulations")
#Post_wb.create_sheet(title = "logK_Bound_Analysis")
sheetPost = Post_wb["R_and_Phi_vals"]
sheetPost.cell(row=1, column = 1).value = "Dataset_ID"
sheetPost.cell(row=1, column = 2).value = "R_value"
sheetPost.cell(row=1, column = 3).value = "Phi_value"
sheetPost.cell(row=1, column = 4).value = "Fe_wOH2+(pK_1)"
sheetPost.cell(row=1, column = 5).value = "SD_Fe_wOH2+(pK_1)"
sheetPost.cell(row=1, column = 6).value = "Boundary_Eval_1"
sheetPost.cell(row=1, column = 7).value = "Fe_wO-(-pK_2)"
sheetPost.cell(row=1, column = 8).value = "SD_Fe_wO-(-pK_2)"
sheetPost.cell(row=1, column = 9).value = "Boundary_Eval_2"
if SCM_type == "CCM":
    sheetPost.cell(row=1, column = 10).value = "Capacitance"
    sheetPost.cell(row=1, column = 11).value = "SD_Capacitance"
    sheetPost.cell(row=1, column = 12).value = "Boundary_Eval_Capacitance"
Post_wb.save('PEST_Titration_Results_{}_{}.xlsx'.format(Mineral_type, SCM_type))

Row_post = 2
for D_Post in range (0, tot_group_no):
    dataset_ID_post = dataset_list.dataset[D_Post]
    dataset_ID_post_upper = dataset_ID_post.upper()
    sheetPost.cell(row=D_Post + 2, column = 1).value = dataset_ID_post
    GControlREC = open('CONTROL_{}.REC'.format(dataset_ID_post_upper), 'r')
    lines = GControlREC.readlines()
    for line in lines:
        if "  Correlation coefficient                                   =" in line:
            item = line.split()
            R_val = item[3]
            sheetPost.cell(row=D_Post + 2, column = 2).value = float(R_val)
        elif "  Sum of squared weighted residuals (ie phi)                =" in line:
            item2 = line.split()
            Phi_val = item2[8]
            sheetPost.cell(row=D_Post + 2, column = 3).value = float(Phi_val)
    if os.path.exists('CONTROL_{}.MTT'.format(dataset_ID_post_upper)):
        GControlMTT = open('CONTROL_{}.MTT'.format(dataset_ID_post_upper), 'r')
        lines2 = GControlMTT.readlines()
        Surf1 = "Fe_wOH2+"
        Surf1_lower = Surf1.lower()
        Surf2 = "difK"
        Surf2_lower = Surf2.lower()
        capac = "capac"
        for line2 in lines2:
            if Surf1_lower in line2:
                item_post = line2.split()
                Estimated_K1 = item_post[2]
                sheetPost.cell(row=D_Post + 2, column = 4).value = float(Estimated_K1)
                Estimated_K1_SD = item_post[3]
                sheetPost.cell(row=D_Post + 2, column = 5).value = float(Estimated_K1_SD)
                #print(Estimated_K1)
                #print(Upper_pK1_int)
                if float(Estimated_K1) == float(Lower_pK1_int):
                    sheetPost.cell(row=D_Post + 2, column = 6).value = "EST.K hits a lower boundary"
                elif float(Estimated_K1) == float(Upper_pK1_int):
                    sheetPost.cell(row=D_Post + 2, column = 6).value = "EST.K hits a upper boundary"
                    #print("Yes")
                elif float(Estimated_K1) != float(Lower_pK1_int) and float(Estimated_K1) != float(Upper_pK1_int):
                    sheetPost.cell(row=D_Post + 2, column = 6).value = "No EST.K hits a boundary"
                break
            Post_wb.save('PEST_Titration_Results_{}_{}.xlsx'.format(Mineral_type, SCM_type))  
        for line2 in lines2:
            if Surf2_lower in line2:
                item_post = line2.split()
                Estimated_difK = item_post[2]
                Estimated_K2 = float(Estimated_difK) + float(Estimated_K1)
                sheetPost.cell(row=D_Post + 2, column = 7).value = float(Estimated_K2)
                Estimated_difK_SD = item_post[3]
                sheetPost.cell(row=D_Post + 2, column = 8).value = float(Estimated_difK_SD)
                if float(Estimated_difK) == float(Lower_difK):
                    sheetPost.cell(row=D_Post + 2, column = 9).value = "EST.K hits a lower boundary"
                elif float(Estimated_difK) == float(Upper_difK):
                    sheetPost.cell(row=D_Post + 2, column = 9).value = "EST.K hits a upper boundary"
                elif float(Estimated_difK) != float(Lower_difK) and float(Estimated_difK) != float(Upper_difK):
                    sheetPost.cell(row=D_Post + 2, column = 9).value = "No EST.K hits a boundary"
                break
        if SCM_type == "CCM":
            for line2 in lines2:
                if capac in line2:
                    item_post = line2.split()
                    Estimated_capac = item_post[2]
                    sheetPost.cell(row=D_Post + 2, column = 10).value = float(Estimated_capac)
                    Estimated_capac_SD = item_post[3]
                    sheetPost.cell(row=D_Post + 2, column = 11).value = float(Estimated_capac_SD)
                    if float(Estimated_capac) == float(Lower_capac):
                        sheetPost.cell(row=D_Post + 2, column = 12).value = "Capacitance hits a lower boundary"
                    elif float(Estimated_capac) == float(Upper_capac):
                        sheetPost.cell(row=D_Post + 2, column = 12).value = "Capacitance hits a upper boundary"
                    elif float(Estimated_capac) != float(Lower_capac) and float(Estimated_capac) != float(Upper_capac):
                        sheetPost.cell(row=D_Post + 2, column = 12).value = "No Capacitance hits a boundary"
                    break
        Post_wb.save('PEST_Titration_Results_{}_{}.xlsx'.format(Mineral_type, SCM_type))
    Post_wb.save('PEST_Titration_Results_{}_{}.xlsx'.format(Mineral_type, SCM_type))

In [None]:
#########################################################################
#                                                                                                                                              #
#                                              Check the integrity of simulation                                              #
#                                                                                                                                              #
#########################################################################
os.chdir(individual_dataset_run_path)
df_integrity = pd.read_excel("PEST_Titration_Results_{}_{}.xlsx".format(Mineral_type, SCM_type), engine = "openpyxl", sheet_name="R_and_Phi_vals")
check_for_nan = df_integrity['R_value'].isnull().values.any()
if check_for_nan == True:
    print('Some simulation has no R value. Check simulations')
    print('Downstream no longer proceed')
    sys.exit(0)
else:
    print('All simulations successfully ran!!!')

In [None]:
#########################################################################
#                                                                                                                                              #
#                                                Titration Results Plotting Tool                                               #
#                                                                                                                                              #
#########################################################################


path_Figures = individual_dataset_run_path.joinpath('1.Figures')
if os.path.exists(path_Figures):
    shutil.rmtree(path_Figures)
if not os.path.isdir(path_Figures):
    os.makedirs(path_Figures) 

for gN in range(0, tot_group_no):
    dataset_ID = dataset_list.dataset[gN]
    
    df = pd.read_csv(individual_dataset_run_path.joinpath('output_{}.sel'.format(dataset_ID)),sep = '\s+')
    
    df = pd.read_csv(individual_dataset_run_path.joinpath('output_{}.sel'.format(dataset_ID)),sep = '\s+')
    df2 = df.loc[df.state == "react"]
    df2 =  df2.add_prefix('r_')
    df2 = df2.reset_index(drop=True)
    df2 = df2.rename(columns={'r_sum': 'r_SurfCharge_val'})
    df4 = pd.read_csv(individual_dataset_path.joinpath('dataset_{}.csv'.format(dataset_ID)))
    df_col = pd.concat([df4], axis=1)
    df_col = pd.concat([df_col,df2], axis=1)
    df_col["SurfCharge_val_C"] = df_col["SurfCharge_val"] / df_col["Mineral_val"] / df_col["MineralSA"] * (96485) * 100
    df_col["r_SurfCharge_val_C"] = df_col["r_SurfCharge_val"] / df_col["Mineral_val"] / df_col["MineralSA"] * (96485) * 100
    df_col["SurfCharge_SD_C"] = df_col["SurfCharge_SD"] / df_col["Mineral_val"] / df_col["MineralSA"] * (96485) * 100
    df_col["Abs_SurfCharge_SD_C"] = df_col["SurfCharge_SD_C"].abs()
    # df_col = pd.concat([df_col,df1], axis=1)
    df_col.to_csv (individual_dataset_run_path.joinpath('export_dataframe_plot_{}.csv'.format(dataset_ID)), index = False, header=True)
    column = df_col["pH"]
    max_pH = 12
    min_pH = 2
    column2 = df_col["SurfCharge_val_C"]
    max_charge_temp2 = column2.max()
    min_charge_temp2 = column2.min()
    column3 = df_col["r_SurfCharge_val_C"]
    max_charge_temp3 = column3.max()
    min_charge_temp3 = column3.min()
    max_charge = max(max_charge_temp2, max_charge_temp3)
    min_charge = min(min_charge_temp2, min_charge_temp3)
    plt.figure(figsize=(4.4,4), facecolor='white')
    plot_number = 1
    for Ref, selection in df_col.groupby("Set"):
        ax = plt.subplot(1, 1, plot_number)
        selection.sort_values(by='pH', ascending=True, inplace=True) 
        selection.plot(x='pH', y='SurfCharge_val_C', ax=ax, label=Ref, legend=Ref, kind = 'scatter', yerr = "Abs_SurfCharge_SD_C")
        selection.plot(x='pH', y='r_SurfCharge_val_C', marker='o', markersize=3, ax=ax, label='modeled', color='orange')
        plt.xlabel("pH")
        plt.ylabel("Charge" +  "(" + r'$\mu$' + "C/cm" + r'$^2$' + ")")
        plt.ylim(min_charge*1.2, max_charge*1.2)
        plt.xlim(min_pH,max_pH)
        plot_number = plot_number + 1
    plt.tight_layout()
    plt.savefig(path_Figures.joinpath('{}.png'.format(dataset_ID)),dpi = 100)

In [None]:
def RECprocessing(REC_file):

    raw_data = open(REC_file)
    data = raw_data.readlines()

    i = []
    j = []

    for idx in range(0, len(data)):
        if "OPTIMISATION RESULTS" in data[idx]:
            i.append(idx)
        if "Note:" in data[idx]:
            j.append(idx)

    for k, v in zip(i, j):
        result = data[k:v+1]

    for line in result:
        if Ka1_symbol in line:
            item = line.split()
            pKa1 = item[1]
            pKa1 = float(pKa1)
            SD_pKa1 = (float(item[3]) - float(item[2]))/4
        elif Ka2_symbol in line:
            item2 = line.split()
            dif_K = item2[1]
            dif_K = float(dif_K)
            pKa2 = pKa1 + dif_K
            SD_dif_K = (float(item2[3]) - float(item2[2]))/4

    if SCM_type == "CCM":
        for line2 in result:
            if Capac_symbol in line2:
                item3 = line2.split()
                Capac_val = float(item3[1])
                SD_capac = (float(item3[3]) - float(item3[2]))/4
        
    if SCM_type == "CCM":
        return pKa1, SD_pKa1, pKa2, SD_dif_K, Capac_val, SD_capac
    else:
        return pKa1, SD_pKa1, pKa2, SD_dif_K

In [None]:
datasets = dataset_list['dataset']
Num_datasets = len(datasets)
if SCM_type == "CCM":
    df_pKas = pd.DataFrame(index=range(0, Num_datasets + 5), columns = ['Dataset_ID', 'Fe_wOH2+(pK_1)', 'SD_Fe_wOH2+(pK_1)', 
                                                                    'Fe_wO-(-pK_2)', 'SD_Fe_wO-(-pK_2)', 
                                                                    'Capacitance', 'SD_Capacitance',
                                                                       'W1', 'W2', 'WCP'])
else:
    df_pKas = pd.DataFrame(index=range(0, Num_datasets + 5), columns = ['Dataset_ID', 'Fe_wOH2+(pK_1)', 'SD_Fe_wOH2+(pK_1)', 
                                                                    'Fe_wO-(-pK_2)', 'SD_Fe_wO-(-pK_2)'])

In [None]:
for ID_Num in range (0, Num_datasets):
    dataset_ID =datasets[ID_Num]
    df_pKas.loc[ID_Num, 'Dataset_ID'] = dataset_ID
    Rec_file = 'control_{}.rec'.format(dataset_ID)
    if SCM_type == "CCM":
        pKa1, SD_pKa1, pKa2, SD_dif_K, Capac_val, SD_capac = RECprocessing(Rec_file)
    else:
        pKa1, SD_pKa1, pKa2, SD_dif_K = RECprocessing(Rec_file)
    df_pKas.loc[ID_Num, 'Fe_wOH2+(pK_1)'] = pKa1
    df_pKas.loc[ID_Num, 'SD_Fe_wOH2+(pK_1)'] = SD_pKa1
    df_pKas.loc[ID_Num, 'Fe_wO-(-pK_2)'] = pKa2
    df_pKas.loc[ID_Num, 'SD_Fe_wO-(-pK_2)'] = SD_dif_K
    df_pKas.loc[ID_Num, 'W1'] = 1/(SD_pKa1**2)
    df_pKas.loc[ID_Num, 'W2'] = 1/(SD_dif_K**2)
    if SCM_type == "CCM":
        df_pKas.loc[ID_Num, 'Capacitance'] = Capac_val
        df_pKas.loc[ID_Num, 'SD_Capacitance'] = SD_capac
        df_pKas.loc[ID_Num, 'WCP'] = 1/(SD_capac**2)

df_pKas.loc[Num_datasets + 1, 'Dataset_ID'] = "Average"
df_pKas.loc[Num_datasets + 1, 'Fe_wOH2+(pK_1)'] = round(df_pKas['Fe_wOH2+(pK_1)'][0:Num_datasets].mean(), 2)
df_pKas.loc[Num_datasets + 2, 'Dataset_ID'] = "SD"
df_pKas.loc[Num_datasets + 2, 'Fe_wOH2+(pK_1)'] = round(df_pKas['Fe_wOH2+(pK_1)'][0:Num_datasets].std(), 2)
df_pKas.loc[Num_datasets + 3, 'Dataset_ID'] = "Weighted_Average"
pKa1_wav = round(average(df_pKas['Fe_wOH2+(pK_1)'][0:Num_datasets], weights = df_pKas['W1'][0:Num_datasets] ),2)
df_pKas.loc[Num_datasets + 3, 'Fe_wOH2+(pK_1)'] = pKa1_wav
df_pKas.loc[Num_datasets + 4, 'Dataset_ID'] = "Weighted_SD"

df_pKas.loc[Num_datasets + 1, 'SD_Fe_wOH2+(pK_1)'] = "Average"
df_pKas.loc[Num_datasets + 1, 'Fe_wO-(-pK_2)'] = round(df_pKas['Fe_wO-(-pK_2)'][0:Num_datasets].mean(), 2)
df_pKas.loc[Num_datasets + 2, 'SD_Fe_wOH2+(pK_1)'] = "SD"
df_pKas.loc[Num_datasets + 2, 'Fe_wO-(-pK_2)'] = round(df_pKas['Fe_wO-(-pK_2)'][0:Num_datasets].std(), 2)
df_pKas.loc[Num_datasets + 3, 'SD_Fe_wOH2+(pK_1)'] = "Weighted_Average"
pKa2_wav = round(average(df_pKas['Fe_wO-(-pK_2)'][0:Num_datasets], weights = df_pKas['W2'][0:Num_datasets] ),2)
df_pKas.loc[Num_datasets + 3, 'Fe_wO-(-pK_2)'] = pKa2_wav
df_pKas.loc[Num_datasets + 4, 'SD_Fe_wOH2+(pK_1)'] = "Weighted_SD"

if SCM_type == "CCM":
    df_pKas.loc[Num_datasets + 1, 'SD_Fe_wO-(-pK_2)'] = "Average"
    df_pKas.loc[Num_datasets + 1, 'Capacitance'] = round(df_pKas['Capacitance'][0:Num_datasets].mean(), 2)
    df_pKas.loc[Num_datasets + 2, 'SD_Fe_wO-(-pK_2)'] = "SD"
    df_pKas.loc[Num_datasets + 2, 'Capacitance'] = round(df_pKas['Capacitance'][0:Num_datasets].std(), 2)
    df_pKas.loc[Num_datasets + 3, 'SD_Fe_wO-(-pK_2)'] = "Weighted_Average"
    Capac_wav = round(average(df_pKas['Capacitance'][0:Num_datasets], weights = df_pKas['WCP'][0:Num_datasets] ),2)
    df_pKas.loc[Num_datasets + 3, 'Capacitance'] = Capac_wav
    df_pKas.loc[Num_datasets + 4, 'SD_Fe_wO-(-pK_2)'] = "Weighted_SD"


########################################################################################################


Weights1_entire  = df_pKas["W1"][0:Num_datasets]
Weights2_entire  = df_pKas["W2"][0:Num_datasets]
Weights1_sum = Weights1_entire.sum()
Weights2_sum = Weights2_entire.sum()
NW = Num_datasets
NZ1 = len(Weights1_entire.loc[Weights1_entire != 0])
NZ2 = len(Weights2_entire.loc[Weights2_entire != 0])


Diff_K1_SQ = (df_pKas["Fe_wOH2+(pK_1)"][0:Num_datasets] - pKa1_wav)**(2)
W_Diff_K1_SQ = Diff_K1_SQ*Weights1_entire
W_Diff_K1_SQ_sum = W_Diff_K1_SQ.sum()
Weighted_SD1 = (W_Diff_K1_SQ_sum/((NZ1-1)*Weights1_sum/NZ1))**(1/2)
df_pKas.loc[Num_datasets + 4, 'Fe_wOH2+(pK_1)'] = round(Weighted_SD1, 2)

Diff_K2_SQ = (df_pKas["Fe_wO-(-pK_2)"][0:Num_datasets] - pKa2_wav)**(2)
W_Diff_K2_SQ = Diff_K2_SQ*Weights2_entire
W_Diff_K2_SQ_sum = W_Diff_K2_SQ.sum()
Weighted_SD2 = (W_Diff_K2_SQ_sum/((NZ2-1)*Weights2_sum/NZ2))**(1/2)
df_pKas.loc[Num_datasets + 4, 'Fe_wO-(-pK_2)'] = round(Weighted_SD2, 2)

if SCM_type == "CCM":
    Weight_Capac_entire = df_pKas["WCP"][0:Num_datasets]
    Weight_Capac_sum = Weight_Capac_entire.sum()
    NZC = len(Weight_Capac_entire.loc[Weight_Capac_entire != 0])
    Diff_Capac_SQ = (df_pKas["Capacitance"][0:Num_datasets] - Capac_wav)**(2)
    W_Diff_Capac_SQ = Diff_Capac_SQ*Weight_Capac_entire
    W_Diff_Capac_SQ_sum = W_Diff_Capac_SQ.sum()
    Weighted_SD_Capac = (W_Diff_Capac_SQ_sum/((NZC-1)*Weight_Capac_sum/NZC))**(1/2)
    df_pKas.loc[Num_datasets + 4, 'Capacitance'] = round(Weighted_SD_Capac, 2)


df_R_value_temp = pd.read_excel("PEST_Titration_Results_{}_{}.xlsx".format(Mineral_type, SCM_type), engine = "openpyxl", sheet_name="R_and_Phi_vals")
df_R_value = df_R_value_temp["R_value"]
df_pKas = pd.concat([df_pKas, df_R_value], axis=1)
    
# print(format(pKa1_wav,".2f"))
# print(format(Weighted_SD1,".2f"))
# print(format(pKa2_wav,".2f"))
# print(format(Weighted_SD2,".2f"))

# if SCM_type == "CCM":
#     print(format(Capac_wav,".2f"))
#     print(format(Weighted_SD_Capac,".2f"))

# print(NW)
# print(NZ1)
# print(NZ2)

# if SCM_type == "CCM":
#     print(NZC)

df_pKas.to_csv ('0.Simulation_Summary.csv', index = False, header=True)

In [None]:
#########################################################################
#                                                                                                                                              #
#                                         Post PEST Fitting - Folder Gerneration                                        #
#                                                                                                                                              #
#########################################################################

path_fitting = path_temp_org.joinpath('Dataset_Fitting')
if os.path.exists(path_fitting):
    shutil.rmtree(path_fitting)
if not os.path.isdir(path_fitting):
    os.makedirs(path_fitting) 

PEST_EXE = path_temp_org.joinpath('PEST.EXE')
copy_PEST_EXE = path_fitting.joinpath('PEST.EXE')
shutil.copyfile(PEST_EXE, copy_PEST_EXE)

Phreeqc_EXE = path_temp_org.joinpath('phreeqc.exe')
copy_Phreeqc_EXE = path_fitting.joinpath('phreeqc.exe')
shutil.copyfile(Phreeqc_EXE, copy_Phreeqc_EXE)

TDB = path_temp_org.joinpath('phreeqc_dif.tpt')
copy_TDB = path_fitting.joinpath('phreeqc_dif.tpt')
shutil.copyfile(TDB, copy_TDB)


In [None]:
#########################################################################
#                                                                                                                                              #
#                               Post PEST Fitting - Run pre Phreeqc Simulations                                  #
#                                                                                                                                              #
#########################################################################
dataset_ID_Entire = 'Entire_datasets'
phreeqc_input = pre_phreeqc_sim_path.joinpath('Titration_input_{}.txt'.format(dataset_ID_Entire))
phreeqc_output = pre_phreeqc_sim_path.joinpath('Titration_output_{}.out'.format(dataset_ID_Entire))
phreeqc_results = pre_phreeqc_sim_path.joinpath('Titration_selout_{}.sel'.format(dataset_ID_Entire))
phreeqcpreprocessing(orgdata)

In [None]:
#########################################################################
#                                                                                                                                              #
#                          Post PEST Fitting -  Exports Phreeqc Input Files for PEST                         #
#                                                                                                                                              #
#########################################################################
dataset_ID_Fitting = 'orgdata_Fitting'
phreeqc_results_temp_post = pre_phreeqc_sim_path.joinpath('Titration_selout_{}.sel'.format(dataset_ID_Entire))
df_phreeqc_pre = pd.read_csv(phreeqc_results_temp_post, sep = '\s+')
phreeqc_input = path_fitting.joinpath('input_{}.tpt'.format(dataset_ID_Fitting))
importingphreeqcFitting(orgdata, dataset_ID_Fitting)

In [None]:
#########################################################################
#                                                                                                                                              #
#                                Post PEST Fitting - Exports PEST Insturction Files                               #
#                                                                                                                                              #
#########################################################################

dataset_ID_Fitting = 'orgdata_Fitting'
PEST_instructions = path_fitting.joinpath('instructions_{}.ins'.format(dataset_ID_Fitting))
formattingPESTinstruction(orgdata)

In [None]:
#########################################################################
#                                                                                                                                              #
#                                Post PEST Fitting - Exports PEST Control Files                                    #
#                                                                                                                                              #
#########################################################################
dataset_ID_Fitting = 'orgdata_Fitting'
PEST_control = path_fitting.joinpath('control_{}.pst'.format(dataset_ID_Fitting))
pK1_int = pKa1_wav
difK2 = pKa2_wav - pKa1_wav
if SCM_type == "CCM":
    capacitacne_input = Capac_wav
    formattingPESTcontrolFittingCapac(orgdata, pK1_int, difK2, capacitacne_input, dataset_ID_Fitting)
else:
    formattingPESTcontrolFitting(orgdata, pK1_int, difK2, dataset_ID_Fitting)

In [None]:
#########################################################################
#                                                                                                                                              #
#                                     Post PEST Fitting - Running PEST Simulation                                  #
#                                                                                                                                              #
#########################################################################
dataset_ID_Fitting = 'orgdata_Fitting'
os.chdir(path_fitting)
os.system('pest' + ' ' + 'control_{}.pst'.format(dataset_ID_Fitting))

In [None]:
Current_path_post_fig = os.getcwd()
Current_path_post_fig = Path(Current_path_post_fig)

df = pd.read_csv(Current_path_post_fig.joinpath('output_{}.sel'.format(dataset_ID_Fitting)),sep = '\s+')
df2 = df.loc[df.state == "react"]
df2 =  df2.add_prefix('r_')
df2 = df2.reset_index(drop=True)
df2 = df2.rename(columns={'r_sum': 'r_SurfCharge_val'})
df4 = orgdata
df_col = pd.concat([df4], axis=1)
df_col = pd.concat([df_col,df2], axis=1)
df_col["SurfCharge_val_C"] = df_col["SurfCharge_val"] / df_col["Mineral_val"] / df_col["MineralSA"] * (96485) * 100
df_col["r_SurfCharge_val_C"] = df_col["r_SurfCharge_val"] / df_col["Mineral_val"] / df_col["MineralSA"] * (96485) * 100
df_col["SurfCharge_SD_C"] = df_col["SurfCharge_SD"] / df_col["Mineral_val"] / df_col["MineralSA"] * (96485) * 100
df_col["Abs_SurfCharge_SD_C"] = df_col["SurfCharge_SD_C"].abs()
df_col.to_csv (Current_path_post_fig.joinpath('export_dataframe_plot_{}.csv'.format(dataset_ID_post)), index = False, header=True)
len(df_col.groupby("Set"))
rows_fig = math.ceil((len(df_col.groupby("Set")))/5)
column = df_col["pH"]
max_pH = math.ceil(column.max())  
min_pH = math.floor(column.min())
column2 = df_col["SurfCharge_val_C"]

plt.figure(figsize=(22,4*rows_fig), facecolor='white')
plot_number = 1
for Ref, selection in df_col.groupby("Set"):
    ax = plt.subplot(rows_fig, 5, plot_number)
    selection.sort_values(by='pH', ascending=True, inplace=True) 
    selection.plot(x='pH', y='SurfCharge_val_C', ax=ax, label=Ref, legend=Ref, kind = 'scatter', yerr = "Abs_SurfCharge_SD_C")
    selection.plot(x='pH', y='r_SurfCharge_val_C', marker='o', markersize=3, ax=ax, label='modeled', color='orange')
    max_charge_temp2 = selection["SurfCharge_val_C"].max()
    min_charge_temp2 = selection["SurfCharge_val_C"].min()
    max_charge_temp3 = selection["r_SurfCharge_val_C"].max()
    min_charge_temp3 = selection["r_SurfCharge_val_C"].min()
    max_charge = max(max_charge_temp2, max_charge_temp3)
    min_charge = min(min_charge_temp2, min_charge_temp3)

    plt.xlabel("pH")
    plt.ylabel("Charge" +  "(" + r'$\mu$' + "C/cm" + r'$^2$' + ")")
    plt.ylim(min_charge*1.1 , max_charge*1.1)
    plt.xlim(min_pH,max_pH)
    # Go to the next plot for the next loop
    plot_number = plot_number + 1
plt.tight_layout()
plt.savefig(Current_path_post_fig.joinpath('Charge_{}.png'.format(dataset_ID_Fitting)),dpi = 100)