# Script to add atomization energy (AE) corrections for new level of theory in Arkane
## Start Here (do AE before BAC)

See documentation for current levels of theory: https://github.com/ReactionMechanismGenerator/RMG-Py/blob/2a1f9100390fc2ef1b8a8102925d06b941695a25/documentation/source/users/arkane/input.rst#L57

This paper explains atomization energy corrections and bond additivity corrections:
https://onlinelibrary.wiley.com/doi/10.1002/kin.21637


# Step 1: Load reference database

In [64]:
import os
# import glob

import ase
import ase.io.gaussian

import rmgpy.species

import arkane.encorr.data as data
import arkane.encorr.reference
import arkane.modelchem

import arkane.encorr.ae


import arkane.ess.gaussian

In [2]:
ref_db = arkane.encorr.reference.ReferenceDatabase()
ref_db.load()


# Step 2: Set up DFT calculations using reference data's geometries

In [25]:
# working_dir = os.path.abspath('./ae_calcs/m062x_ccpvtz')
# working_dir = os.path.abspath('./ae_calcs/b3lyp_6311G3df2p')
working_dir = os.path.abspath('./ae_calcs/ccsdt_ccpvtz')
os.makedirs(working_dir, exist_ok=True)

In [95]:
# adjust these parameters as needed
lot = arkane.modelchem.LevelOfTheory(
    method='ccsd(t)',
    basis='cc-pvtz',
    software='gaussian'
)

sp_indices = []

for i in range(len(ref_db.reference_sets['main'])):
    
    # only do the reference species
    if ref_db.reference_sets['main'][i].label not in arkane.encorr.ae.SPECIES_LABELS:
        continue
    sp_indices.append(i)
    sp_dir = os.path.join(working_dir, f'species_{i:04}')
    os.makedirs(sp_dir, exist_ok=True)
    
    # Use the CCCBDB geometries for energy calculations - see Notes in AEJob class description
    syms = ref_db.reference_sets['main'][i].reference_data['CCCBDB'].xyz_dict['symbols']
    xyz = ref_db.reference_sets['main'][i].reference_data['CCCBDB'].xyz_dict['coords']
    atoms = ase.Atoms(symbols=syms, positions=xyz)
    
    s = rmgpy.species.Species().from_adjacency_list(ref_db.reference_sets['main'][i].adjacency_list)
    
    radical_electrons = any([x.radical_electrons for x in s.molecule[0].atoms])
    
    # specific modification for CCSD(T)
    specific_method = lot.method
    if radical_electrons and lot.method == 'ccsd(t)':
        specific_method = 'uccsd(t)'
    
    with open(os.path.join(sp_dir, 'sp.com'), 'w') as f:
        ase.io.gaussian.write_gaussian_in(
            f,
            atoms,
            properties=['energy'],
            method=specific_method,
            basis=lot.basis,
            mult=s.multiplicity,
        )

# write the slurm script
run_script = os.path.join(working_dir, 'run.sh')
with open(run_script, 'w') as f:
    f.write("""#!/bin/bash
#SBATCH --job-name=g16_aes
#SBATCH --error=error.log
#SBATCH --nodes=1
#SBATCH --partition=west,short
#SBATCH --exclude=c5003
#SBATCH --mem=20Gb
#SBATCH --time=24:00:00
#SBATCH --cpus-per-task=16
""" +
f'#SBATCH --array={str(sp_indices)[1:-1].replace(" ", "")}%30\n' +
"""
export GAUSS_SCRDIR=/scratch/harris.se/guassian_scratch
mkdir -p $GAUSS_SCRDIR
module load gaussian/g16
source /shared/centos7/gaussian/g16/bsd/g16.profile

RUN_i=$(printf "%04.0f" $(($SLURM_ARRAY_TASK_ID)))

cd "species_${RUN_i}"
g16 sp.com

""")


# Step 3 - run the calculations

In [59]:
os.chdir(working_dir)
# os.system(f'chmod +x {run_script}') <-- might need to change file permissions
os.system(f'sbatch {run_script}')

# Step 4 - grab the energies for the species_energies dictionary

In [77]:
species_energies = {}
for i in sp_indices:
    # get the gaussian logfile
    logfile = os.path.join(working_dir, f'species_{i:04}', 'sp.log')
    
    with open(logfile, 'r') as f:
        gaussian_log = arkane.ess.gaussian.GaussianLog(logfile)
        # Notes from arkane.encorr.ae say ZPE should not be included
        # arkane.ess.gaussian comments say ZPE is not included
        energy = gaussian_log.load_energy() / 2625500  # convert to Hartrees
        species_energies[ref_db.reference_sets['main'][i].label] = energy
        

In [81]:
species_energies

{'Ammonia': -56.473138146461636,
 'Chloromethane': -499.56425118016676,
 'Dibromine': -5145.3984137228335,
 'Dichlorine': -919.4262424500058,
 'Difluorine': -199.2960925886373,
 'Dihydrogen': -1.1723356328563836,
 'Dinitrogen': -109.37383052964486,
 'Dioxygen': -150.12899776103887,
 'Disulfur': -795.4547689278554,
 'Hydrogen bromide': -2573.3082102785634,
 'Hydrogen chloride': -460.3371865863367,
 'Hydrogen fluoride': -100.338350667079,
 'Hydrogen sulfide': -398.9353544287617,
 'Methane': -40.4380873954994,
 'Methyl': -39.76097421302741,
 'Water': -76.3321899731317}

In [86]:
# write the arkane ae job file (or I could do this in the code...)
ae_jobfile = os.path.join(working_dir, 'input.py')


with open(ae_jobfile, 'w') as f:
    f.write("""title = 'Atom energy correction fitting for """ + f'{lot.method}/{lot.basis} ' + """level of theory'

lot = LevelOfTheory(\n""" +
f"    method='{lot.method}',\n" +
f"    basis='{lot.basis}',\n" +
"""    software=gaussian
)

""" + 

f"species_energies={species_energies}\n" + 

"""
ae(
    species_energies=species_energies,
    level_of_theory=lot,
    write_to_database=False,
    overwrite=False
)

""")




In [96]:
# same thing as the job above, but in code
ae_job = arkane.encorr.ae.AEJob(
    species_energies=species_energies,
    level_of_theory=lot,
    write_to_database=False,
    overwrite=False
)
ae_job.execute()

In [97]:
ae_job.ae.atom_energies

{'H': -0.4984018068562638,
 'C': -37.7753179083434,
 'N': -54.504742750163096,
 'O': -74.96800652928341,
 'F': -99.6170283217903,
 'S': -397.64659434301717,
 'Cl': -459.66706171381963,
 'Br': -2572.663251330409}

In [99]:
ae_job.ae.confidence_intervals

{'H': 0.001050460972303508,
 'C': 0.004479971009883298,
 'N': 0.00223272929700218,
 'O': 0.0021827474574089606,
 'F': 0.002152201262035069,
 'S': 0.0021827474574089606,
 'Cl': 0.0020157776593258906,
 'Br': 0.0021522012620350684}

In [101]:
ae_job.ae.atom_energies.keys()

dict_keys(['H', 'C', 'N', 'O', 'F', 'S', 'Cl', 'Br'])

In [None]:
# Copy formatted energies into 

In [125]:
# add these to the atom_energies dictionary in RMG-database/input/quantum_corrections/data.py

for atom in ae_job.ae.atom_energies.keys():
    print(f'# {atom}\t{ae_job.ae.atom_energies[atom]:.8f}\t+/- {ae_job.ae.confidence_intervals[atom]:.8f} Hartree')

print(f'"{lot} ":','{')


for atom in ae_job.ae.atom_energies.keys():
    print(f'    "{atom}": {ae_job.ae.atom_energies[atom]},')

print('}')
# ae_job.ae.atom_energies



# H	-0.49840181	+/- 0.00105046 Hartree
# C	-37.77531791	+/- 0.00447997 Hartree
# N	-54.50474275	+/- 0.00223273 Hartree
# O	-74.96800653	+/- 0.00218275 Hartree
# F	-99.61702832	+/- 0.00215220 Hartree
# S	-397.64659434	+/- 0.00218275 Hartree
# Cl	-459.66706171	+/- 0.00201578 Hartree
# Br	-2572.66325133	+/- 0.00215220 Hartree
"LevelOfTheory(method='ccsd(t)',basis='ccpvtz',software='gaussian') ": {
    "H": -0.4984018068562638,
    "C": -37.7753179083434,
    "N": -54.504742750163096,
    "O": -74.96800652928341,
    "F": -99.6170283217903,
    "S": -397.64659434301717,
    "Cl": -459.66706171381963,
    "Br": -2572.663251330409,
}
