In [1]:
import psi4
import os
import time

In [2]:
def get_data_folder_path():
    return os.path.join(os.getcwd(), '..', '..', 'dsgdb9nsd')

In [61]:
def get_molecule_from_file(filenum):
    filepath = os.path.join(get_data_folder_path(), 
                           'dsgdb9nsd_' + str(filenum).zfill(6) + '.xyz')
    f = open(filepath, 'r')
    lines = f.readlines()
    f.close()
    num_atoms = int(lines[0])
    atom_list = lines[2:2+num_atoms]
    for i in range(len(atom_list)):
        atom_list[i] = atom_list[i][:atom_list[i].rfind("\t")] + "\n"
    return psi4.geometry("".join(atom_list))

In [62]:
def generate_output_file_path(filenum):
    return os.path.join('psi4_output', 'output_'+str(filenum)+'.dat')

In [63]:
def process_molecule(filenum, thermochemical=False):
    psi4.core.set_output_file(generate_output_file_path(filenum), False)
    psi4.set_memory('2 GB')
    molecule = get_molecule_from_file(filenum)
    if thermochemical:
        e, wfn = psi4.freq('b3lyp/cc-pvqz', molecule=molecule, return_wfn=True)
    else:
        e, wfn = psi4.energy('b3lyp/cc-pvqz', molecule=molecule, return_wfn=True)
    return wfn

In [64]:
def extract_rotational_constants(filenum, wfn):
    f = open(generate_output_file_path(filenum), 'r')
    lines = f.readlines()
    f.close()
    for i in range(len(lines)-1, -1, -1):
        if lines[i].find('Rotational constants:') > -1 and lines[i].find('[MHz]') > -1:
            words = lines[i].split()
            return float(words[4])/1000, float(words[7])/1000, float(words[10])/1000

In [65]:
def extract_dipole_moment(filenum, wfn):
    f = open(generate_output_file_path(filenum), 'r')
    lines = f.readlines()
    f.close()
    for i in range(len(lines)-1, -1, -1):
        if lines[i].find("Dipole Moment: [D]") > -1:
            return float(lines[i+1][lines[i+1].find("Total:") + 6:])

In [66]:
def extract_homo_lumo(filenum, wfn):
    homo = wfn.epsilon_a_subset("AO", "ALL").get(wfn.nalpha())
    lumo = wfn.epsilon_a_subset("AO", "ALL").get(wfn.nalpha() + 1)
    return homo, lumo

In [67]:
def extract_gap(filenum, wfn):
    homo, lumo = extract_homo_lumo_gap(fileum, wfn)
    return lumo - homo

In [68]:
def extract_zpve(filenum, wfn):
    f = open(generate_output_file_path(filenum), 'r')
    lines = f.readlines()
    f.close()
    for i in range(len(lines)-1, -1, -1):
        if lines[i].find("Total ZPE, Electronic energy at 0 [K]") > -1:
            words = lines[i].split()
            return float(words[-2])

In [69]:
def extract_zero_point_internal_energy(filenum, wfn):
    f = open(generate_output_file_path(filenum), 'r')
    lines = f.readlines()
    f.close()
    for i in range(len(lines)-1, -1, -1):
        if lines[i].find("Total E0, Electronic energy") > -1:
            words = lines[i].split()
            return float(words[-2])

In [70]:
def extract_internal_energy(filenum, wfn):
    f = open(generate_output_file_path(filenum), 'r')
    lines = f.readlines()
    f.close()
    for i in range(len(lines)-1, -1, -1):
        if lines[i].find("Total E, Electronic energy at  298.15 [K]") > -1:
            words = lines[i].split()
            return float(words[-2])

In [71]:
def extract_enthalpy(filenum, wfn):
    f = open(generate_output_file_path(filenum), 'r')
    lines = f.readlines()
    f.close()
    for i in range(len(lines)-1, -1, -1):
        if lines[i].find("Total H, Enthalpy at  298.15 [K]") > -1:
            words = lines[i].split()
            return float(words[-2])

In [72]:
def extract_gibbs_free_energy(filenum, wfn):
    f = open(generate_output_file_path(filenum), 'r')
    lines = f.readlines()
    f.close()
    for i in range(len(lines)-1, -1, -1):
        if lines[i].find("Total G,") > -1:
            words = lines[i].split()
            return float(words[-2])

In [73]:
def extract_cv(filenum, wfn):
    f = open(generate_output_file_path(filenum), 'r')
    lines = f.readlines()
    f.close()
    for i in range(len(lines)-1, -1, -1):
        if lines[i].find('Total Cv') > -1:
            words = lines[i].split()
            return float(words[2])

In [74]:
def batch_process(start_num, end_num, thermochemical=False):
    f = open("output.csv", "w")
    output_header = "Index,A,B,C,Dipole,HOMO,LUMO"
    if thermochemical:
        output_header += ",zpve,H 298.15,G 298.15"
    output_header += "\n"
    f.write(output_header)
    for filenum in range(start_num, end_num+1):
        wfn = process_molecule(filenum, thermochemical=thermochemical)
        a, b, c = extract_rotational_constants(filenum, wfn)
        dipole = extract_dipole_moment(filenum, wfn)
        homo, lumo = extract_homo_lumo(filenum, wfn)
        output = str(filenum) + "," + str(a) + "," + str(b) + "," + str(c) + "," + str(dipole) + "," + str(homo) + "," + str(lumo)
        if thermochemical:
            zpve = extract_zpve(filenum, wfn)
            enthalpy = extract_enthalpy(filenum, wfn)
            gibbs_free_energy = extract_gibbs_free_energy(filenum, wfn)
            output += "," + str(zpve) + "," + str(enthalpy) + "," + str(gibbs_free_energy)
        output += "\n" 
        f.write(output)
    f.close()

In [78]:
def predict_properties(filenum, thermochemical=False, print_output=True):
    wfn = process_molecule(filenum, thermochemical=thermochemical)
    a, b, c = extract_rotational_constants(filenum, wfn)
    dipole = extract_dipole_moment(filenum, wfn)
    homo, lumo = extract_homo_lumo(filenum, wfn)
    if print_output:
        print('A', a, '\nB', b, '\nC', c, '\nDipole', dipole)
        print('homo', homo, '\nlumo', lumo, 'gap', lumo-homo)
    ret_dict = dict()
    ret_dict['a'] = a
    ret_dict['b'] = b
    ret_dict['c'] = c
    ret_dict['mu'] = dipole
    ret_dict['homo'] = homo
    ret_dict['lumo'] = lumo
    ret_dict['gap'] = lumo-homo
    if thermochemical:
        zpve = extract_zpve(filenum, wfn)
        internal_energy = extract_internal_energy(filenum, wfn)
        u0 = extract_zero_point_internal_energy(filenum, wfn)
        enthalpy = extract_enthalpy(filenum, wfn)
        gibbs_free_energy = extract_gibbs_free_energy(filenum, wfn)
        cv = extract_cv(filenum, wfn)
        if print_output:
            print('zpve', zpve, '\nU0', u0,'\nU298', internal_energy, '\nH298', enthalpy, '\nG298', gibbs_free_energy, 'Cv', cv)
        ret_dict['zpve'] = zpve
        ret_dict['u0'] = u0
        ret_dict['u298'] = internal_energy
        ret_dict['h298'] = enthalpy
        ret_dict['g298'] = gibbs_free_energy
        ret_dict['cv'] = cv
    return ret_dict

In [79]:
def prompt_user_for_calculation():
    num = -1
    while num < 1 or num > 133885:
        num = input('Choose a molecule index (1-133885): ')
    properties = ['a', 'b', 'c', 'mu', 'homo', 'lumo', 'gap', 'zpve', 'u0', 'u298', 'h298', 'g298', 'cv']
    prop = None
    while prop not in properties:
        prop = input('Choose a property: ')
    
    print('Beginning DFT calculation')
    start = time.time()
    thermochemical = prop in properties[7:]
    ret_dict = predict_properties(num, thermochemical=thermochemical)
    print(ret_dict[prop])
    end = time.time()
    print('DFT calculation took', end-start, 'ms')

In [76]:
# FOR TESTING PURPOSES
start = time.time()
# batch_process(1, 3, thermochemical=True)
# predict_properties(1, thermochemical=True)
# end = time.time()
print(end-start)

4.315376281738281e-05


In [77]:
# FOR TESTING PURPOSES
filenum = 1
wfn = None
a, b, c = extract_rotational_constants(filenum, wfn)
dipole = extract_dipole_moment(filenum, wfn)
# homo, lumo = extract_homo_lumo(filenum, wfn)
print('A', a, '\nB', b, '\nC', c, '\nDipole', dipole)
# print('homo', homo, '\nlumo', lumo, 'gap', lumo-homo)
zpve = extract_zpve(filenum, wfn)
print('zpve', zpve)
internal_energy = extract_internal_energy(filenum, wfn)
print('U298', internal_energy)
u0 = extract_zero_point_internal_energy(filenum, wfn)
enthalpy = extract_enthalpy(filenum, wfn)
gibbs_free_energy = extract_gibbs_free_energy(filenum, wfn)
cv = extract_cv(filenum, wfn)
print('U0', u0, '\nH298', enthalpy, '\nG298', gibbs_free_energy, '\nCv', cv)

A 157.71179902 
B 157.70996942000002 
C 157.70699448 
Dipole 0.0
zpve -40.4971856
U298 -40.49431803
U0 -40.54146019 
H298 -40.49337384 
G298 -40.5168522 
Cv 6.459


In [32]:
# FOR TESTING PURPOSES
f = open(generate_output_file_path(filenum), 'r')
lines = f.readlines()
f.close()
print(lines[-2])
for i in range(len(lines)):
    if lines[i].find("Total G") > -1:
        print(lines[i])
        words = lines[i].split()
        print(float(words[-2]))
print('unsuccessful')

  Correction G             15.442 [kcal/mol]       64.608 [kJ/mol]       0.02460799 [Eh]

  -Total Gradient:



ValueError: could not convert string to float: '-Total'

In [17]:
generate_output_file_path(1)

'/mnt/c/Users/Sohom/Documents/senior_research/research_shared/multitask/psi4_output/output_1.dat'

In [29]:
psi4.core.set_output_file('psi4_output/output_1.dat')

In [30]:
psi4.set_memory('2 GB')
molecule = get_molecule_from_file(1)
e, wfn = psi4.energy('b3lyp/cc-pvqz', molecule=molecule, return_wfn=True)