In [1]:
import psi4
from rdkit import Chem
from rdkit.Chem import AllChem
import numpy as np
import pandas as pd
from rdkit.Chem import Descriptors
from rdkit.Chem.rdMolDescriptors import CalcMolFormula

# Import CSV file containing fuels

In [2]:
df = pd.read_csv('FuelList1.csv')
df.drop(['Boiling Point (F)'],axis='columns',inplace=True)
df.drop(df.index[39:102],inplace=True)
df

Unnamed: 0,ID,Fuel,Boiling Point (K)
0,1,"1,3,5-trimethylbenzene",437.872222
1,2,1-Butene,267.038889
2,3,1-Pentene,303.038889
3,4,"2,3-Dimethylpentane",363.15
4,5,"2,5-dimethylhexane",382.261111
5,6,"2,6,10-trimethyldodecane",522.25
6,7,2-Butene,277.15
7,8,2-Hexanone,401.15
8,9,2-methylheptane,389.15
9,10,2-Pentanone,374.15


# Function to query the NIH database for molecules matching the input string and then perform chemical structure identifier conversion: IUPAC to SMILES

In [3]:
from urllib.request import urlopen
from urllib.parse import quote
from rdkit.Chem import AllChem

#Chemical name to SMILES:

def toSmiles(ids):
    try:
        url = 'https://cactus.nci.nih.gov/chemical/structure/' + quote(ids) + '/smiles'
        ans = urlopen(url).read().decode('utf8')
        return ans
    except:
        return 'Enter a valid IUPAC name'

# Generate SMILES column and add it to existing dataframe

In [4]:
fuel_name = df['Fuel']
smiles_list = []
index = -1
for ids in fuel_name:
    index+=1
    smiles_list.append(toSmiles(ids))
    if toSmiles(ids) == 'Enter a valid IUPAC name':
        print(ids + ' failed at index ' + str(index))

# Define function that will optimize a molecule's 3D structure to obtain the structure with the lowest energy - Round 1
# **Important for desriptor calculations

In [5]:
def optimize(m):
    """ 
        Input: RDKit molecule object
        
        Optimizes molecular structure by applying the Merck molecular force field 94(MMFF94) before performing tasks:
        
        1. Calculates total number of atoms
        
        2. Coverts geometry of mol object to a string of xyz coordinates
    
    """
    m = Chem.AddHs(m)#add hydrogens to structure
    #Convert from 2D to 3D
    AllChem.EmbedMolecule(m,randomSeed=0xf00d,useExpTorsionAnglePrefs=True,useBasicKnowledge=True)#intialize molecular conformation
    AllChem.MMFFOptimizeMolecule(m,'MMFF94')#apply force field
    atoms = m.GetAtoms()
    string = "\n"
    for i, atom in enumerate(atoms):
        pos = m.GetConformer().GetAtomPosition(atom.GetIdx())
        string += "{} {} {} {}\n".format(atom.GetSymbol(), pos.x, pos.y, pos.z)
    string += "units angstrom\n"
    string += "symmetry c1\n"
    #-------------------------------------------
    totAtoms = m.GetNumAtoms()
    #-----------------------------------------------
    
    
    return totAtoms, string, m

# Calculate descriptors: Number of atoms, molecular weight, IC0, PJI3, SIC0, GATS1v, Wiener Index, Zagreb Index

In [6]:
from mordred import InformationContent
from mordred import GeometricalIndex
from mordred import Autocorrelation
from mordred import WienerIndex
from mordred import ZagrebIndex

IC0_calc = InformationContent.InformationContent(order=0)
petitjean_calc = GeometricalIndex.PetitjeanIndex3D()
SIC0_calc = InformationContent.StructuralIC(order=0)
GATS1v_calc = Autocorrelation.GATS(order=1,prop='v')
wiener_index = WienerIndex.WienerIndex()
zagreb_index1 = ZagrebIndex.ZagrebIndex(version = 1) 



IC0_list = [] # Information content index (neighborhood symmetry of 0-order)
petitjean_3D = [] #3D petitjean shape index
SIC0_list = []
GATS1v_list = []
wiener_list= [] #stores wiener indices
Z1_list = [] #stores zagreb1 indices



MW_list = [] #stores molecular weight
mol_form = [] #stores molecular formulas
N_atoms = [] #stores number of atoms
xyzgeom_list = [] #stores xyz coordinates


for smiles in smiles_list:

    mol = Chem.MolFromSmiles(smiles)
    MW = Descriptors.MolWt(mol)
    MW_list.append(MW)
    form = CalcMolFormula(mol) 
    mol_form.append(form)
    
    totN_atoms, xyzcoords, mol2 = optimize(mol)
    xyzgeom_list.append(xyzcoords)
    N_atoms.append(totN_atoms)
    
    ic0 = IC0_calc(mol2)
    IC0_list.append(ic0)
    petit_index = petitjean_calc(mol2)
    petitjean_3D.append(petit_index)
    sic0_index = SIC0_calc(mol2)
    SIC0_list.append(sic0_index)
    gats = GATS1v_calc(mol2)
    GATS1v_list.append(gats)
    wiener = wiener_index(mol2)
    wiener_list.append(wiener)
    Z1 = zagreb_index1(mol2)
    Z1_list.append(Z1)

# Add columns with new properties to existing dataframe

In [7]:
df['Molecular Formula'] = mol_form
df['N_Atoms'] = N_atoms
df['Molecular Weight (g/mol)'] = MW_list
df['IC0'] = IC0_list
df['PJ3'] = petitjean_3D
df['SIC0'] = SIC0_list
df['GATS1v'] = GATS1v_list
df['Wiener'] = wiener_list
df['Z1'] = Z1_list
df

Unnamed: 0,ID,Fuel,Boiling Point (K),Molecular Formula,N_Atoms,Molecular Weight (g/mol),IC0,PJ3,SIC0,GATS1v,Wiener,Z1
0,1,"1,3,5-trimethylbenzene",437.872222,C9H12,21,120.195,0.985228,0.318633,0.224307,1.111111,84,42.0
1,2,1-Butene,267.038889,C4H8,12,56.108,0.918296,0.363684,0.256152,1.5,10,10.0
2,3,1-Pentene,303.038889,C5H10,15,70.135,0.918296,0.372016,0.235045,1.5,20,14.0
3,4,"2,3-Dimethylpentane",363.15,C7H16,23,100.205,0.886541,0.454902,0.195983,1.642857,46,26.0
4,5,"2,5-dimethylhexane",382.261111,C8H18,26,114.232,0.890492,0.374392,0.189449,1.625,74,30.0
5,6,"2,6,10-trimethyldodecane",522.25,C15H32,47,212.421,0.903454,0.479094,0.16265,1.566667,484,60.0
6,7,2-Butene,277.15,C4H8,12,56.108,0.918296,0.392235,0.256152,1.5,10,10.0
7,8,2-Hexanone,401.15,C6H12O,19,100.161,1.167437,0.423096,0.274825,1.492303,52,24.0
8,9,2-methylheptane,389.15,C8H18,26,114.232,0.890492,0.459535,0.189449,1.625,79,28.0
9,10,2-Pentanone,374.15,C5H10O,16,86.134,1.198192,0.446664,0.299548,1.491124,32,20.0


# Import molecular geometries from cartesian xyz coordinates

In [8]:
psi4_mols = []
for geo in xyzgeom_list:
    psi4mol = psi4.geometry(geo)
    psi4_mols.append(psi4mol)
    
E_list = []
wfn_list = []
homo_list = []
lumo_list = []
DM_list = []
const = psi4.constants.dipmom_au2debye #atomic units to Debye conversion factor for dipoles

# Molecular structure optimization round 2 + calculate HOMO, LUMO, and DM

In [9]:
psi4.set_options({'reference': 'uhf'})

k=0

for molec in psi4_mols: #iterate over list of molecular geometries
    
    E, wfn= psi4.optimize("B3LYP/6-31G*", molecule = molec,return_wfn=True) #returns energy and wavefunction of molecule
    
    E_list.append(E)
    wfn_list.append(wfn)
    
    HOMO = ( np.array(wfn.epsilon_a_subset("AO", "ALL")) )[wfn.nalpha()-1]
    LUMO = ( np.array(wfn.epsilon_a_subset("AO", "ALL")) )[wfn.nalpha()]
    dipole_xyz = wfn.variable("SCF DIPOLE")
    dipole_debye = np.linalg.norm(dipole_xyz) *const
  
    homo_list.append(HOMO)
    lumo_list.append(LUMO)
    DM_list.append(dipole_debye)
    
    k+=1
    strID = "fuel"+str(k)+ ".xyz"
    molec.save_xyz_file(strID,1) #write final optimized geometry to XYZ file
    print("Fuel "+str(k)+" done, "+ "k: "+str(k))

Optimizer: Optimization complete!
Fuel 1 done, k: 1
Optimizer: Optimization complete!
Fuel 2 done, k: 2
Optimizer: Optimization complete!
Fuel 3 done, k: 3
Optimizer: Optimization complete!
Fuel 4 done, k: 4
Optimizer: Optimization complete!
Fuel 5 done, k: 5
Optimizer: Optimization complete!
Fuel 6 done, k: 6
Optimizer: Optimization complete!
Fuel 7 done, k: 7
Optimizer: Optimization complete!
Fuel 8 done, k: 8
Optimizer: Optimization complete!
Fuel 9 done, k: 9
Optimizer: Optimization complete!
Fuel 10 done, k: 10
Optimizer: Optimization complete!
Fuel 11 done, k: 11
Optimizer: Optimization complete!
Fuel 12 done, k: 12
Optimizer: Optimization complete!
Fuel 13 done, k: 13
Optimizer: Optimization complete!
Fuel 14 done, k: 14
Optimizer: Optimization complete!
Fuel 15 done, k: 15
Optimizer: Optimization complete!
Fuel 16 done, k: 16
Optimizer: Optimization complete!
Fuel 17 done, k: 17
Optimizer: Optimization complete!
Fuel 18 done, k: 18
Optimizer: Optimization complete!
Fuel 19 done

# Add HOMO, LUMO, DM lists to dataframe and save as CSV file

In [10]:
df2 = pd.DataFrame(list(zip(homo_list,lumo_list,DM_list)), columns=['HOMO','LUMO','DM'])
df2

Unnamed: 0,HOMO,LUMO,DM
0,-0.227243,0.009793,0.03760093
1,-0.249853,0.02589,0.3367899
2,-0.249613,0.025174,0.328765
3,-0.296727,0.086465,0.0560097
4,-0.299486,0.081422,0.07813802
5,-0.286436,0.079358,0.1693138
6,-0.234996,0.036312,1.167217e-06
7,-0.242486,-0.009403,2.647785
8,-0.294644,0.076233,0.06105559
9,-0.240036,-0.011356,2.704513


In [None]:
df3 = pd.concat([df,df2], axis=1)
df3

In [None]:
df3.to_csv('FuelDescriptors1.csv', encoding='utf-8', index=False)