In [1]:
import numpy as np
import pandas as pd
import seaborn as sns

%load_ext autotime
import os
import shutil
import yaml
import matplotlib.pyplot as plt
from scipy import interpolate
from scipy.optimize import minimize
from matplotlib.colors import ListedColormap
from matplotlib.colors import to_hex
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
import scipy.ndimage.filters as filters
import scipy.ndimage.morphology as morphology

from arc.species.species import ARCSpecies
from arc.species.converter import modify_coords, xyz_to_xyz_file_format, cluster_confs_by_rmsd, compare_confs
from arc.plotter import show_sticks
from arc.common import key_by_val
from arc.parser import parse_e_elect, parse_geometry, parse_frequencies
import rmgpy.constants as constants 
from arc.exceptions import ParserError
import re

In [2]:
cwd = os.getcwd()
print(cwd)

/home/oscarwu/Dropbox (MIT)/Scripts/TS_conf_search
time: 491 µs


In [3]:
def get_energy(logfile):
    energy_dict = dict()
    e_j_mol = parse_e_elect(logfile)
    energy_dict['J/mol'] = e_j_mol
    e_kj_mol = e_j_mol / 1000
    energy_dict['kJ/mol'] = e_kj_mol
    e_kcal_mol = e_j_mol / 4184
    energy_dict['kcal/mol'] = e_kcal_mol
    e_scf = round(e_j_mol/(constants.E_h * constants.Na / 1000), 9)
    energy_dict['scf'] = e_scf
    return energy_dict

time: 11.5 ms


In [4]:
def get_geometry(logfile, plot=False):
    xyz = parse_geometry(logfile)
    if plot:
        show_sticks(xyz)
    return xyz

time: 6.48 ms


In [5]:
# sp_results_folder = 'ts2005_solvation_correction_comparison_results'
sp_results_folder = 'imipramine_4_oo_sp_after_opt_results'

time: 778 µs


In [6]:
# subfolders = os.listdir(os.path.join(cwd, sp_results_folder))
# subfolders.remove('dlpno_wb97xd_def2svp_liq')
# subfolders.remove('dlpno_wb97xd_def2tzvp_liq')
# subfolders

time: 2.04 ms


In [7]:
index_to_coord = dict()
# for f in os.listdir(os.path.join(cwd, sp_results_folder, subfolders[0])):
for f in os.listdir(os.path.join(cwd, sp_results_folder)):
    findex = int(f.split('_')[0])
    d1 = tuple([int(x) for x in re.search('d1(.*)deg1', f).group(1).split('_') if x.isnumeric()])
    deg1 = int(tuple([x for x in re.search('deg1(.*)n', f).group(1).split('_') if x.isnumeric()])[0])
    d2 = tuple([int(x) for x in re.search('d2(.*)deg2', f).group(1).split('_') if x.isnumeric()])
    deg2 = int(tuple([x for x in re.search('deg2(.*)log', f).group(1).split('_') if x.isnumeric()])[0])
    
    try:
        index_to_coord[(d1, deg1, d2, deg2, findex)] = get_geometry(os.path.join(cwd, sp_results_folder, subfolders[0], f))
    except:
        continue

time: 4.16 ms


In [8]:
# index_to_coord

time: 924 µs


In [9]:
all_energy_dict = dict()
for k in index_to_coord.keys():
    all_energy_dict[k] = dict()

time: 2.12 ms


In [10]:
# all_energy_dict

time: 2.12 ms


In [11]:
for f in os.listdir(os.path.join(cwd, sp_results_folder)):
    findex = int(f.split('_')[0])
    d1 = tuple([int(x) for x in re.search('d1(.*)deg1', f).group(1).split('_') if x.isnumeric()])
    deg1 = int(tuple([x for x in re.search('deg1(.*)n', f).group(1).split('_') if x.isnumeric()])[0])
    d2 = tuple([int(x) for x in re.search('d2(.*)deg2', f).group(1).split('_') if x.isnumeric()])
    deg2 = int(tuple([x for x in re.search('deg2(.*)log', f).group(1).split('_') if x.isnumeric()])[0])

    try:
        all_energy_dict[(d1, deg1, d2, deg2, findex)] = get_energy(os.path.join(cwd, sp_results_folder, f))['scf']
    except:
        continue

time: 72.1 ms


In [12]:
# for d in subfolders:
#     for f in os.listdir(os.path.join(cwd, sp_results_folder, d)):
#         findex = int(f.split('_')[0])
#         d1 = tuple([int(x) for x in re.search('d1(.*)deg1', f).group(1).split('_') if x.isnumeric()])
#         deg1 = int(tuple([x for x in re.search('deg1(.*)n', f).group(1).split('_') if x.isnumeric()])[0])
#         d2 = tuple([int(x) for x in re.search('d2(.*)deg2', f).group(1).split('_') if x.isnumeric()])
#         deg2 = int(tuple([x for x in re.search('deg2(.*)log', f).group(1).split('_') if x.isnumeric()])[0])

#         try:
#             all_energy_dict[(d1, deg1, d2, deg2, findex)][d] = get_energy(os.path.join(cwd, sp_results_folder, d, f))['scf']
#         except:
#             continue

time: 265 µs


In [13]:
all_energy_dict

{((3, 1, 2, 4), 294, (1, 3, 10, 18), 46, 78): -457.395312107,
 ((3, 1, 2, 4), 284, (2, 4, 11, 21), 303, 55): -457.395976413,
 ((1, 2, 4, 11), 91, (2, 4, 11, 21), 82, 70): -457.399797001,
 ((2, 1, 7, 14), 243, (1, 2, 4, 11), 308, 2): -457.390507673,
 ((1, 3, 10, 18), 48, (2, 4, 11, 21), 299, 82): -457.394509827,
 ((2, 1, 7, 14), 369, (1, 2, 4, 11), 179, 91): -457.400987843,
 ((2, 1, 3, 9), 75, (2, 1, 7, 14), 355, 51): -457.391721305,
 ((1, 3, 9, 15), 182, (1, 3, 10, 18), 340, 72): -457.40196993,
 ((1, 2, 4, 11), 181, (1, 3, 10, 18), 338, 41): -457.392658469,
 ((1, 3, 9, 15), 0, (1, 3, 10, 18), 56, 95): -457.396452294,
 ((3, 1, 2, 4), 293, (2, 1, 7, 14), 44, 37): -457.393067666,
 ((2, 1, 3, 9), 335, (2, 4, 11, 21), 179, 76): -457.402194615,
 ((1, 2, 4, 11), 0, (2, 4, 11, 21), 0, 3): -457.400700633,
 ((2, 1, 3, 9), 302, (2, 1, 7, 14), 109, 28): -457.403534233,
 ((1, 3, 10, 18), 47, (2, 4, 11, 21), 82, 29): -457.396267037,
 ((1, 2, 4, 11), 124, (2, 4, 11, 21), 294, 113): -457.400393795,
 (

time: 31.2 ms


In [14]:
df = pd.DataFrame.from_dict(all_energy_dict, orient='index')

time: 2.2 ms


In [15]:
df

Unnamed: 0,0
"((3, 1, 2, 4), 294, (1, 3, 10, 18), 46, 78)",-457.395312
"((3, 1, 2, 4), 284, (2, 4, 11, 21), 303, 55)",-457.395976
"((1, 2, 4, 11), 91, (2, 4, 11, 21), 82, 70)",-457.399797
"((2, 1, 7, 14), 243, (1, 2, 4, 11), 308, 2)",-457.390508
"((1, 3, 10, 18), 48, (2, 4, 11, 21), 299, 82)",-457.394510
...,...
"((3, 1, 2, 4), 281, (1, 2, 4, 11), 79, 85)",-457.394561
"((2, 1, 3, 9), 30, (1, 2, 4, 11), 178, 21)",-457.394929
"((3, 1, 2, 4), 296, (1, 3, 10, 18), 0, 18)",-457.395370
"((1, 3, 10, 18), 48, (2, 4, 11, 21), 210, 48)",-457.394249


time: 14.5 ms


In [16]:
df.to_excel('imipramine_4_oo_dlpno_sp.xlsx')

time: 145 ms
