# Imports

**NOTE: Make sure to use the get_properties_environment file to set your conda environment.**

In [1]:
import os,re,sys,pickle,datetime,time,random,itertools,glob
import warnings
warnings.filterwarnings("ignore")
import openpyxl
import pandas as pd
from rdkit import Chem
import get_properties_functions as gp

#imports and patterns for amine specific function to get LP energy and occupancy

npa_pattern = re.compile("Summary of Natural Population Analysis:")
nbo_os_pattern = re.compile("beta spin orbitals")
nborbs_pattern = "NATURAL BOND ORBITALS (Summary):" #"Natural Bond Orbitals (Summary)"
nborbs2_pattern = re.compile("NATURAL BOND ORBITALS (Summary):")

def get_one_lp_energy(dataframe, a_list): #a function to get the NB orbitals for all atoms (a_list, form ["C1", "C4", "O2"]) in a dataframe that contains file name and atom number
    nborbs_dataframe = pd.DataFrame(columns=[]) #define an empty df to place results in
                
    for index, row in dataframe.iterrows(): #iterate over the dataframe 
        #if True:
        try: #try to get the data
            atomnum_list = [] 
            for atom in a_list: 
                atomnum = row[str(atom)] #the atom number (i.e., 16) to add to the list is the df entry of this row for the labeled atom (i.e., "C1")
                atomnum_list.append(str(atomnum)) #append that to atomnum_list to make a list of the form [16, 17, 29]
            
            log_file = row['log_name'] #read file name from df
            filecont, error = gp.get_filecont(log_file) #read the contents of the log file
            if error != "":
                print(error)
                row_i = {}
                for a in range(0, len(a_list)):
                    entry = {'NBO_charge_'+str(a_list[a]): "no data"}
                    row_i.update(entry)
                nborbs_dataframe = nborbs_dataframe.append(row_i, ignore_index=True)
                continue
        
            nborbsstart = 0
            #this section finds the line (nborbsstart) where the nbo data is located
            for i in range(len(filecont)-1,0,-1):
                if nborbs_pattern in filecont[i]:#search the file content for the phrase which indicates the start of the NB orbitals section 
                    nborbsstart = i   
            if nborbsstart == 0: 
                error = "****no Natural Bond Orbitals found in: " + str(row['log_name']) + ".log"
                print(error)
                row_i = {}
                for a in range(0, len(a_list)):
                    entry = {'NBO_charge_'+str(a_list[a]): "no data"}
                    row_i.update(entry)
                nborbs_dataframe = nborbs_dataframe.append(row_i, ignore_index=True)
                continue
           
            for atom in a_list: 
                k = 0
                atom_num = row[str(atom)]
                for j in range(nborbsstart,len(filecont)):
                    if str(atom_num) in " ".join(re.findall("([A-Z][a-z]? *[0-9]+)",filecont[j])).split() and ("LP" in filecont[j]):
                        orbital_section = re.search("[0-9]+\.[A-Z\*(0-9 ]+\)",filecont[j]).group(0) #type of MO
                        orbital = orbital_section.split(". ")
                        orb = orbital[1]
                        des = orb.split(" ")
                        orb_type = des[0]
                        occ_energy = [x for x in re.findall(r"[-+]?\d*\.\d+",filecont[j])]
                        occ = occ_energy[0]
                        energy = occ_energy[1]
                        k += 1
                        #print(k)
                if k == 0: 
                    error = "****no LPs for atom " + str(atom)+ " in: " + str(row['log_name']) + ".log"
                    print(error)
                    row_i = {}
                    for atom in a_list:
                        entry = {'NBO_LP_occupancy_' + str(atom): "no data", 'NBO_LP_energy_' + str(atom): "no data"}
                        row_i.update(entry)
                    nborbs_dataframe = nborbs_dataframe.append(row_i, ignore_index=True)
                    pass
                if k == 2: 
                    error = "****more than one LP for atom " + str(atom)+ " in: " + str(row['log_name']) + ".log"
                    print(error)
                    row_i = {}
                    for atom in a_list:
                        entry = {'NBO_LP_occupancy_' + str(atom): "no data", 'NBO_LP_energy_' + str(atom): "no data"}
                        row_i.update(entry)
                    nborbs_dataframe = nborbs_dataframe.append(row_i, ignore_index=True)
                    continue
            
            #this adds the data from the nboout into the new property df
            row_i = {}
            for atom in a_list:
                entry = {'NBO_LP_occupancy_' + str(atom): occ, 'NBO_LP_energy_' + str(atom): energy}
                row_i.update(entry)
            #print(row_i)
            #print(row_i)
            nborbs_dataframe = nborbs_dataframe.append(row_i, ignore_index=True)
        except:
            print('****Unable to acquire NBO orbitals for:', row['log_name'], ".log")
            row_i = {}
            for a in range(0, len(a_list)):
                entry = {'NBO_charge_'+str(a_list[a]): "no data"}
                row_i.update(entry)
            nborbs_dataframe = nborbs_dataframe.append(row_i, ignore_index=True)
    print("NBOrbs function has completed for", a_list)
    return(pd.concat([dataframe, nborbs_dataframe], axis = 1))

D3 import failed


# Atom Inputs Dataframe

## Generate dataframe with atom numbers

### Use command line or bash script to prepare files

To create files: navigate to folder that contains all the log files you wish to analyze.

    obabel *.log -osdf -m  
    ls *.log > log_ids.txt
    cat *.sdf >> molecules.sdf

### Define SMARTS substructure


Recommended to draw the common substructure (with general atoms) in Chemdraw and copy as SMILES (this will generate a SMARTS string)

More information about SMARTS and available characters here: https://www.daylight.com/dayhtml/doc/theory/theory.smarts.html


In [2]:
#this is the common smarts substructure for the molecules you will analyze
#you have to explicitly draw hydrogens into the SMARTS structure if you want to collect properties for hydrogen atoms
substructure = Chem.MolFromSmarts('[H]N([*])[*]')

### Generate preliminary dataframe

In [3]:
#generate a list of molecules using RDkit
all_compounds = Chem.SDMolSupplier('molecules.sdf', removeHs=False) 
#molecules.sdf is generated with the instructions above
#it is a single sdf that contains the structures/atom numbers etc. for every molecule you will analyze

#uses RDKit to search for the substructure in each compound you will analyze
atoms = []
i = 1
for molecule in all_compounds:
    if molecule is not None:
        submatch = molecule.GetSubstructMatches(substructure) #find substructure
        matchlist = list([item for sublist in submatch for item in sublist]) #list of zero-indexed atom numbers
        match_idx = [x+1 for x in matchlist] #this line changes from 0-indexed to 1-indexed (for Gaussian)
        atoms.append(match_idx) #append 1-indexed list to atoms (a list of lists)
        i +=1
    #else: #use this if there are issues with list length again, will print line number in the log_ids file to check
    #    print(i)
    
#this loop extracts log names from log_ids and splits them to the desired format
filenames = open("log_ids.txt", "r") #generate this with instruction above
#it is a text file that contains the file name for every molecule you will analyze
list_of_filenames = [(line.strip()).split() for line in filenames] #list of the file names (each of which includes all conformers)
list_of_files = []
for filename in list_of_filenames:
    file = filename[0].split(".")
    list_of_files.append(file[0])
filenames.close()

#put the atom numbers for the substructure for each log file into a dataframe
prelim_df = pd.DataFrame(atoms) 
index=list_of_files
prelim_df.insert(0,column='log_name',value=list_of_files)
display(prelim_df)

Unnamed: 0,log_name,0,1,2,3
0,SecN1_conf-1,15,6,1,5
1,SecN1_conf-2,15,6,1,5
2,SecN2_conf-1,13,4,5,3
3,SecN3_conf-1,12,4,5,3
4,SecN4_conf-1,7,2,1,3
5,SecN5_clust-1,24,2,1,3
6,SecN5_clust-10,24,2,3,1
7,SecN5_clust-11,24,2,3,1
8,SecN5_clust-12,24,2,1,3
9,SecN5_clust-13,24,2,1,3


### Define column headers using GaussView

Using the preliminary dataframe displayed above, open one of your files and check the atom numbers. 

Assign labels to each column using the cell below. You will call these column headers when you select your properties. 

**User input required:**

In [4]:
atom_labels = {'log_name': 'log_name',
                0: 'H4',
                1: 'N1', 
                2: 'C2',
                3: 'C3'}

### Generate labeled dataframe

**NOTE: it is very important you assign these correctly otherwise the properties you collect will be for the wrong atoms and not produce meaningful correlations.** 

We recommend checking the numbering/headers for at least two different compounds. 

Numbering for different conformers of the same compounds will likely be the same (but may not be for some symmetrical groups).

In [5]:
#rename columns using the user input above
atom_map_df = prelim_df.rename(columns=atom_labels)
display(atom_map_df.head())

#you can use this to clean up the table if you have more atoms in your substructure than you want to collect descriptors for
#atom_map_df = atom_map_df.drop(columns= ['C4', 'C1']) 
#display(atom_map_df.head())

df = atom_map_df #df is what properties will be appended to, this creates a copy so that you have the original preserved 

Unnamed: 0,log_name,H4,N1,C2,C3
0,SecN1_conf-1,15,6,1,5
1,SecN1_conf-2,15,6,1,5
2,SecN2_conf-1,13,4,5,3
3,SecN3_conf-1,12,4,5,3
4,SecN4_conf-1,7,2,1,3


### Save atom map to Excel (if desired)

In [6]:
writer = pd.ExcelWriter('SecN_example_atom_map.xlsx')
atom_map_df.to_excel(writer)
writer.save()

## Import a manually-generated atom mapping dataframe

If you need to manually generate the atom mapping dataframe, check out the atom_map_sample.xlsx to make sure you have the desired format. 

In [7]:
atom_map_df = pd.read_excel('SecN_example_atom_map.xlsx','Sheet1',index_col=0,header=0,engine='openpyxl')
display(atom_map_df.head())
df = atom_map_df #df is what properties will be appended to, this creates a copy so that you have the original preserved 

Unnamed: 0,log_name,H4,N1,C2,C3
0,SecN1_conf-1,15,6,1,5
1,SecN1_conf-2,15,6,1,5
2,SecN2_conf-1,13,4,5,3
3,SecN3_conf-1,12,4,5,3
4,SecN4_conf-1,7,2,1,3


# Define Properties to Collect

## Collect properties: 

In [8]:
df = atom_map_df

#---------------GoodVibes Energies---------------
#uses the GoodVibes 2021 Branch (Jupyter Notebook Compatible)
#calculates the quasi harmonic corrected G(T) and single point corrected G(T) as well as other thermodynamic properties
#inputs: dataframe, temperature
df = gp.get_goodvibes_e(df, 298.15)

#---------------Frontier Orbitals-----------------
#E(HOMO), E(LUMO), mu(chemical potential or negative of molecular electronegativity), eta(hardness/softness), omega(electrophilicity index)
df = gp.get_frontierorbs(df)

#---------------Polarizability--------------------
#Exact polarizability
df = gp.get_polarizability(df)

#---------------Dipole----------------------------
#Total dipole moment magnitude in Debye
df = gp.get_dipole(df)

#---------------Volume----------------------------
#Molar volume
#requires the Gaussian keyword = "volume" in the .com file
df = gp.get_volume(df)

#---------------SASA------------------------------
#Uses morfeus to calculate sovlent accessible surface area and the volume under the SASA
df = gp.get_SASA(df)

#---------------NBO-------------------------------
#natural charge from NBO
#requires the Gaussian keyword = "pop=nbo7" in the .com file
nbo_list = ["N1", "H4"]
df = gp.get_nbo(df, nbo_list) 

#---------------NMR-------------------------------
#isotropic NMR shift
#requires the Gaussian keyword = "nmr=giao" in the .com file
nmr_list = ["N1", "H4"]
df = gp.get_nmr(df, nmr_list) 

#---------------Pyramidalization------------------
#uses morfeus to calculate pyramidalization based on the 3 atoms in closest proximity to the defined atom
#collects values based on two definitions of pyramidalization
#details on these values can be found here: https://kjelljorner.github.io/morfeus/pyramidalization.html
pyr_list = ["N1"]
df = gp.get_pyramidalization(df, pyr_list)

#---------------Lone Pair Energy (custom from first cell)------------------
lp_list = ["N1"]
df = get_one_lp_energy(df, lp_list) 

#---------------Vbur Scan-------------------------
#uses morfeus to calculate the buried volume at a series of radii (including hydrogens)
#inputs: dataframe, list of atoms, start_radius, end_radius, and step_size
#if you only want a single radius, put the same value for start_radius and end_radius (keep step_size > 0)
vbur_list = ["N1"]
df = gp.get_vbur_scan(df, vbur_list, 2, 6, 0.5)

pd.options.display.max_columns = None
display(df)



   Using vibrational scale factor 1.0 for B3LYP/6-31G(d,p) level of theory

   Using vibrational scale factor 1.0 for B3LYP/6-31G(d,p) level of theory

   Using vibrational scale factor 1.0 for B3LYP/6-31G(d,p) level of theory

   Using vibrational scale factor 1.0 for B3LYP/6-31G(d,p) level of theory

   Using vibrational scale factor 1.0 for B3LYP/6-31G(d,p) level of theory

   Using vibrational scale factor 1.0 for B3LYP/6-31G(d,p) level of theory

   Using vibrational scale factor 1.0 for B3LYP/6-31G(d,p) level of theory

   Using vibrational scale factor 1.0 for B3LYP/6-31G(d,p) level of theory

   Using vibrational scale factor 1.0 for B3LYP/6-31G(d,p) level of theory

   Using vibrational scale factor 1.0 for B3LYP/6-31G(d,p) level of theory

   Using vibrational scale factor 1.0 for B3LYP/6-31G(d,p) level of theory

   Using vibrational scale factor 1.0 for B3LYP/6-31G(d,p) level of theory

   Using vibrational scale factor 1.0 for B3LYP/6-31G(d,p) level of theory

   Using v

Unnamed: 0,log_name,H4,N1,C2,C3,E_spc (Hartree),G(T)_spc(Hartree),H_spc(Hartree),T,T*S,T*qh_S,ZPE(Hartree),qh_G(T)_spc(Hartree),HOMO,LUMO,η,μ,ω,polar_aniso(au),polar_iso(au),dipole(Debye),volume(Bohr_radius³/mol),SASA_sphericity,SASA_surface_area(Å²),SASA_volume(Å³),NBO_charge_H4,NBO_charge_N1,NMR_shift_H4,NMR_shift_N1,pyramidalization_Agranat-Radhakrishnan_N1,pyramidalization_Gavrish_N1(°),NBO_LP_energy_N1,NBO_LP_occupancy_N1,%Vbur_N1_2.0Å,%Vbur_N1_2.5Å,%Vbur_N1_3.0Å,%Vbur_N1_3.5Å,%Vbur_N1_4.0Å,%Vbur_N1_4.5Å,%Vbur_N1_5.0Å,%Vbur_N1_5.5Å,%Vbur_N1_6.0Å
0,SecN1_conf-1,15,6,1,5,-287.779498,-287.672317,-287.637676,298.15,0.03464,0.034648,0.135601,-287.672324,-0.29546,0.0652,0.36066,-0.11513,0.01838,10.5042,57.5451,1.5653,826.759,0.944516,241.089202,323.113008,0.35902,-0.62627,31.2247,212.0578,0.74747,5.414665,-0.35077,1.92171,87.855114,74.907214,63.245866,51.823984,39.858637,30.140674,22.171526,16.653332,12.820876
1,SecN1_conf-2,15,6,1,5,-287.778043,-287.671033,-287.636313,298.15,0.03472,0.034727,0.135473,-287.671041,-0.28099,0.06473,0.34572,-0.10813,0.01691,10.2184,57.391,1.2632,947.22,0.947154,240.259201,322.793529,0.34349,-0.60971,31.9182,214.348,0.775895,5.679226,-0.35034,1.92431,86.944731,73.90448,62.120083,50.679044,39.214279,30.002279,22.163642,16.646542,12.81362
2,SecN2_conf-1,13,4,5,3,-251.873606,-251.742636,-251.707548,298.15,0.035088,0.035096,0.15958,-251.742644,-0.28732,0.06475,0.35207,-0.111285,0.01759,9.72385,65.4228,0.9071,924.428,0.940465,252.080035,343.237939,0.35665,-0.62796,31.3113,205.8491,0.740505,5.35424,-0.33915,1.91864,88.022986,75.333702,63.843676,52.673663,41.075197,31.831799,23.942205,18.045206,13.895188
3,SecN3_conf-1,12,4,5,3,-212.556203,-212.454903,-212.420234,298.15,0.03467,0.033928,0.130011,-212.454162,-0.29612,0.05754,0.35366,-0.11929,0.02012,7.224,53.3828,1.3713,663.23,0.94679,229.810124,301.792073,0.34111,-0.60432,31.5362,195.234,0.841016,6.34749,-0.35224,1.9246,85.32735,71.684139,59.364757,47.819023,36.455295,27.482416,20.144626,15.129046,11.648034
4,SecN4_conf-1,7,2,1,3,-135.141358,-135.07407,-135.043335,298.15,0.030734,0.03074,0.092713,-135.074076,-0.28878,0.06707,0.35585,-0.110855,0.01727,6.30066,34.9797,1.044,584.556,0.952869,195.900335,239.815138,0.34639,-0.61689,32.2691,235.08,0.744175,5.390829,-0.33957,1.91752,87.451575,72.646178,57.926103,42.800736,28.679939,20.138015,14.676098,11.019851,8.483831
5,SecN5_clust-1,24,2,1,3,-808.590252,-808.268805,-808.205473,298.15,0.063332,0.061224,0.366414,-808.266697,-0.26193,0.01198,0.27391,-0.124975,0.02851,85.0725,216.798,2.0638,2448.886,0.861015,477.432701,783.714925,0.37033,-0.64036,29.5562,228.1101,0.734131,5.306498,-0.32746,1.91541,89.114153,81.138495,72.950499,62.241428,50.76966,42.637146,36.178692,30.977869,26.854166
6,SecN5_clust-10,24,2,3,1,-808.592155,-808.27225,-808.207028,298.15,0.065221,0.062234,0.366506,-808.269262,-0.23886,0.01766,0.25652,-0.1106,0.02384,110.061,223.022,1.8151,2720.059,0.837504,500.252847,806.3782,0.3489,-0.63175,32.0514,215.2016,0.739686,5.355733,-0.35123,1.91705,92.561983,82.289361,70.755736,58.304603,45.762212,37.145602,30.812258,26.006153,22.375618
7,SecN5_clust-11,24,2,3,1,-808.595723,-808.276272,-808.210944,298.15,0.065328,0.062312,0.366139,-808.273257,-0.24686,0.01759,0.26445,-0.114635,0.02485,81.0436,221.098,1.5588,2410.425,0.82727,510.542693,816.193158,0.34771,-0.62739,32.5806,216.7862,0.742702,5.380388,-0.34607,1.91796,90.340909,78.73421,66.529164,53.438898,40.658556,32.37208,26.783096,23.007623,20.320649
8,SecN5_clust-12,24,2,1,3,-808.59463,-808.274851,-808.209758,298.15,0.065093,0.062083,0.366284,-808.271841,-0.25028,0.01551,0.26579,-0.117385,0.02592,62.099,218.783,1.2585,2357.256,0.840884,497.002578,803.370189,0.35343,-0.6197,31.6556,224.1577,0.736022,5.324942,-0.3374,1.91621,88.226369,74.998372,62.099598,49.664555,38.676603,31.670068,27.323918,24.359345,22.049526
9,SecN5_clust-13,24,2,1,3,-808.595101,-808.274238,-808.210081,298.15,0.064157,0.06155,0.366611,-808.271631,-0.25188,0.01422,0.2661,-0.11883,0.02653,58.8495,218.384,0.8629,2572.319,0.850554,488.342724,796.00124,0.36391,-0.63351,31.1705,213.3209,0.734327,5.302361,-0.33673,1.9098,92.523244,84.022985,75.022348,64.188292,52.894373,45.064476,39.081864,34.286098,29.859971


## Save collected properties to Excel

Helpful to save here in case the Notebook crashes or if you want to add more properties before post-processsing. Can be read in at 5.1.1.

In [9]:
writer = pd.ExcelWriter('All_Conformer_Properties_for_SecN_example.xlsx')
df.to_excel(writer)
writer.save()

# Post-processing

## User input for data processing

In [9]:
#for numerically named compounds, prefix is any text common to all BEFORE the number and suffix is common to all AFTER the number
#this is a template for our files that are all named "AcXXX_clust-X.log" or "AcXXX_conf-X.log"
prefix = "SecN" 
suffix = "_"

#columns that provide atom mapping information are dropped
atom_columns_to_drop = ["C2", "N1", "C3", "H4"]

#title of the column for the energy you want to use for boltzmann averaging and lowest E conformer determination
energy_col_header = "G(T)_spc(Hartree)"

### Option to import an Excel sheet if you're using properties or energies collected outside of this notebook

If you would like to use post-processing functionality (i.e. Boltzmann averaging, lowest E conformers, etc.) you can read in a dataframe with properties (e.g. QikProp properties) or energies (e.g. if you don't/can't run linked jobs) collected outside of this notebook. 

Check out the dataframe_sample.xlsx to make sure you have the desired format. 

In [10]:
df = pd.read_excel('All_Conformer_Properties_for_SecN_example.xlsx','Sheet1',index_col=0,header=0,engine='openpyxl')
display(df.head())

Unnamed: 0,log_name,H4,N1,C2,C3,E_spc (Hartree),G(T)_spc(Hartree),H_spc(Hartree),T,T*S,T*qh_S,ZPE(Hartree),qh_G(T)_spc(Hartree),HOMO,LUMO,η,μ,ω,polar_aniso(au),polar_iso(au),dipole(Debye),volume(Bohr_radius³/mol),SASA_sphericity,SASA_surface_area(Å²),SASA_volume(Å³),NBO_charge_H4,NBO_charge_N1,NMR_shift_H4,NMR_shift_N1,pyramidalization_Agranat-Radhakrishnan_N1,pyramidalization_Gavrish_N1(°),NBO_LP_energy_N1,NBO_LP_occupancy_N1,%Vbur_N1_2.0Å,%Vbur_N1_2.5Å,%Vbur_N1_3.0Å,%Vbur_N1_3.5Å,%Vbur_N1_4.0Å,%Vbur_N1_4.5Å,%Vbur_N1_5.0Å,%Vbur_N1_5.5Å,%Vbur_N1_6.0Å
0,SecN1_conf-1,15,6,1,5,-287.779498,-287.672317,-287.637676,298.15,0.03464,0.034648,0.135601,-287.672324,-0.29546,0.0652,0.36066,-0.11513,0.01838,10.5042,57.5451,1.5653,826.759,0.944516,241.089202,323.113008,0.35902,-0.62627,31.2247,212.0578,0.74747,5.414665,-0.35077,1.92171,87.855114,74.907214,63.245866,51.823984,39.858637,30.140674,22.171526,16.653332,12.820876
1,SecN1_conf-2,15,6,1,5,-287.778043,-287.671033,-287.636313,298.15,0.03472,0.034727,0.135473,-287.671041,-0.28099,0.06473,0.34572,-0.10813,0.01691,10.2184,57.391,1.2632,947.22,0.947154,240.259201,322.793529,0.34349,-0.60971,31.9182,214.348,0.775895,5.679226,-0.35034,1.92431,86.944731,73.90448,62.120083,50.679044,39.214279,30.002279,22.163642,16.646542,12.81362
2,SecN2_conf-1,13,4,5,3,-251.873606,-251.742636,-251.707548,298.15,0.035088,0.035096,0.15958,-251.742644,-0.28732,0.06475,0.35207,-0.111285,0.01759,9.72385,65.4228,0.9071,924.428,0.940465,252.080035,343.237939,0.35665,-0.62796,31.3113,205.8491,0.740505,5.35424,-0.33915,1.91864,88.022986,75.333702,63.843676,52.673663,41.075197,31.831799,23.942205,18.045206,13.895188
3,SecN3_conf-1,12,4,5,3,-212.556203,-212.454903,-212.420234,298.15,0.03467,0.033928,0.130011,-212.454162,-0.29612,0.05754,0.35366,-0.11929,0.02012,7.224,53.3828,1.3713,663.23,0.94679,229.810124,301.792073,0.34111,-0.60432,31.5362,195.234,0.841016,6.34749,-0.35224,1.9246,85.32735,71.684139,59.364757,47.819023,36.455295,27.482416,20.144626,15.129046,11.648034
4,SecN4_conf-1,7,2,1,3,-135.141358,-135.07407,-135.043335,298.15,0.030734,0.03074,0.092713,-135.074076,-0.28878,0.06707,0.35585,-0.110855,0.01727,6.30066,34.9797,1.044,584.556,0.952869,195.900335,239.815138,0.34639,-0.61689,32.2691,235.08,0.744175,5.390829,-0.33957,1.91752,87.451575,72.646178,57.926103,42.800736,28.679939,20.138015,14.676098,11.019851,8.483831


## Generating a list of compounds that have conformational ensembles

In [11]:
#this is a template for our files that are all named "AcXXX_clust-X.log"

compound_list = []
    
for index, row in df.iterrows():
    log_file = row['log_name'] #read file name from df
    prefix_and_compound = log_file.split(str(suffix)) #splits to get "AcXXX" (entry O) (and we don't use the "clust-X" (entry 1))
    #print(prefix_and_compound[0])
    compound = prefix_and_compound[0].split(str(prefix)) #splits again to get "XXX" (entry 1) (and we don't use the empty string "" (entry 0))
    #print(compound)
    compound_list.append(compound[1])

compound_list = list(set(compound_list)) #removes duplicate stuctures that result from having conformers of each
compound_list.sort() #reorders numerically (not sure if it reorders alphabetically)
print(compound_list)

#this should generate a list that looks like this: ['24', '27', '34', '48']

['1', '2', '3', '4', '5']


## Post-processing to get properties for each compound

In [12]:
all_df_master = pd.DataFrame(columns=[])
properties_df_master = pd.DataFrame(columns=[])

for compound in compound_list: 
    #defines the common start to all files using the input above 
    substring = str(prefix) + str(compound) + str(suffix)
    
    #makes a data frame for one compound at a time for post-processing
    valuesdf = df[df["log_name"].str.startswith(substring)]
    valuesdf = valuesdf.drop(columns = atom_columns_to_drop)
    valuesdf = valuesdf.reset_index(drop = True)  #you must re-index otherwise the 2nd, 3rd, etc. compounds fail
   
    #define columns that won't be included in summary properties or are treated differently because they don't make sense to Boltzmann average
    non_boltz_columns = ["G(Hartree)","∆G(Hartree)","∆G(kcal/mol)", "e^(-∆G/RT)","Mole Fraction"] #don't boltzman average columns containing these strings in the column label
    reg_avg_columns = ['CPU_time_total(hours)', 'Wall_time_total(hours)'] #don't boltzmann average these either, we average them in case that is helpful
    gv_extra_columns = ['E_spc (Hartree)', 'H_spc(Hartree)', 'T', 'T*S', 'T*qh_S', 'ZPE(Hartree)', 'qh_G(T)_spc(Hartree)', "G(T)_spc(Hartree)"]
    gv_extra_columns.remove(str(energy_col_header))
    
    #calculate the summary properties based on all conformers (Boltzmann Average, Minimum, Maximum, Boltzmann Weighted Std)
    valuesdf["∆G(Hartree)"] = valuesdf[energy_col_header] - valuesdf[energy_col_header].min()
    valuesdf["∆G(kcal/mol)"] = valuesdf["∆G(Hartree)"] * 627.5
    valuesdf["e^(-∆G/RT)"] = np.exp((valuesdf["∆G(kcal/mol)"] * -1000) / (1.987204 * 298.15)) #R is in cal/(K*mol)
    valuesdf["Mole Fraction"] = valuesdf["e^(-∆G/RT)"] / valuesdf["e^(-∆G/RT)"].sum()
    values_boltz_row = []
    values_min_row = []
    values_max_row = []
    values_boltz_stdev_row =[]
    #values_range_row = []
    values_exclude_columns = []
    
    for column in valuesdf:
        if "log_name" in column:
            values_boltz_row.append("Boltzmann Averages")
            values_min_row.append("Ensemble Minimum")
            values_max_row.append("Ensemble Maximum")
            values_boltz_stdev_row.append("Boltzmann Standard Deviation")
            #values_range_row.append("Ensemble Range")
            values_exclude_columns.append(column) #used later to build final dataframe
        elif any(phrase in column for phrase in non_boltz_columns) or any(phrase in column for phrase in gv_extra_columns):
            values_boltz_row.append("")
            values_min_row.append("")
            values_max_row.append("")
            values_boltz_stdev_row.append("")
            #values_range_row.append("")
        elif any(phrase in column for phrase in reg_avg_columns):
            values_boltz_row.append(valuesdf[column].mean()) #intended to print the average CPU/wall time in the boltz column
            values_min_row.append("")
            values_max_row.append("")
            values_boltz_stdev_row.append("")
            #values_range_row.append("")
        else:
            valuesdf[column] = pd.to_numeric(valuesdf[column]) #to hopefully solve the error that sometimes occurs where the float(Mole Fraction) cannot be mulitplied by the string(property)
            values_boltz_row.append((valuesdf[column] * valuesdf["Mole Fraction"]).sum())
            values_min_row.append(valuesdf[column].min())
            values_max_row.append(valuesdf[column].max())
            #values_range_row.append(valuesdf[column].max() - valuesdf[column].min())

            
            # this section generates the weighted std deviation (weighted by mole fraction) 
            # formula: https://www.statology.org/weighted-standard-deviation-excel/
    
            boltz = (valuesdf[column] * valuesdf["Mole Fraction"]).sum() #number
            delta_values_sq = []
    
            #makes a list of the "deviation" for each conformer           
            for index, row in valuesdf.iterrows(): 
                value = row[column]
                delta_value_sq = (value - boltz)**2
                delta_values_sq.append(delta_value_sq)
            
            #w is list of weights (i.e. mole fractions)
            w = list(valuesdf["Mole Fraction"])
            wstdev = np.sqrt( (np.average(delta_values_sq, weights=w)) / (((len(w)-1)/len(w))*np.sum(w)) )
            #np.average(delta_values_sq, weights=w) generates sum of each (delta_value_sq * mole fraction)
           
            #if there is only one conformer in the ensemble, set the weighted standard deviation to 0 
            if len(w) == 1:
                wstdev = 0
            values_boltz_stdev_row.append(wstdev)
            
            
    valuesdf.loc[len(valuesdf)] = values_boltz_row
    valuesdf.loc[len(valuesdf)] = values_boltz_stdev_row
    valuesdf.loc[len(valuesdf)] = values_min_row
    valuesdf.loc[len(valuesdf)] = values_max_row
    #valuesdf.loc[len(valuesdf)] = values_range_row

    #final output format is built here:
    explicit_order_front_columns = ["log_name", energy_col_header,"∆G(Hartree)","∆G(kcal/mol)","e^(-∆G/RT)","Mole Fraction"]
    
    #reorders the dataframe using front columns defined above
    valuesdf = valuesdf[explicit_order_front_columns + [col for col in valuesdf.columns if col not in explicit_order_front_columns and col not in values_exclude_columns]]
    
    #determine the index of the lowest energy conformer
    low_e_index = valuesdf[valuesdf["∆G(Hartree)"] == 0].index.tolist()
    
    #copy the row to a new_row with the name of the log changed to Lowest E Conformer
    new_row = valuesdf.loc[low_e_index[0]]
    new_row['log_name'] = "Lowest E Conformer"   
    valuesdf =  valuesdf.append(new_row, ignore_index=True)

#------------------------------EDIT THIS SECTION IF YOU WANT A SPECIFIC CONFORMER----------------------------------  
    #if you want all properties for a conformer with a particular property (i.e. all properties for the Vbur_min conformer)
    #this template can be adjusted for min/max/etc. 
    
    #find the index for the min or max column:
    #ensemble_min_index = valuesdf[valuesdf["log_name"] == "Ensemble Minimum"].index.tolist()
    
    #find the min or max value of the property (based on index above)
    #saves the value in a list (min_value) with one entry (this is why we call min_value[0])
    #min_value = valuesdf.loc[ensemble_min_index, "%Vbur_C4_3.0Å"].tolist()   
    #vbur_min_index = valuesdf[valuesdf["%Vbur_C4_3.0Å"] == min_value[0]].index.tolist()
    
    #copy the row to a new_row with the name of the log changed to Property_min_conformer
    #new_row = valuesdf.loc[vbur_min_index[0]]
    #new_row['log_name'] = "%Vbur_C4_3.0Å_min_Conformer"   
    #valuesdf =  valuesdf.append(new_row, ignore_index=True)
#--------------------------------------------------------------------------------------------------------------------    
    
    #appends the frame to the master output
    all_df_master = pd.concat([all_df_master, valuesdf])
    
    #drop all the individual conformers
    dropindex = valuesdf[valuesdf["log_name"].str.startswith(substring)].index
    valuesdf = valuesdf.drop(dropindex)
    valuesdf = valuesdf.reset_index(drop = True)
    
    #display(valuesdf)   
    
    #drop the columns created to determine the mole fraction and some that 
    valuesdf = valuesdf.drop(columns = explicit_order_front_columns)
    try:
        valuesdf = valuesdf.drop(columns = gv_extra_columns)
    except:
        pass
    try:
        valuesdf = valuesdf.drop(columns = reg_avg_columns)
    except:
        pass
        
#---------------------THIS MAY NEED TO CHANGE DEPENDING ON HOW YOU LABEL YOUR COMPOUNDS------------------------------  
    compound_name = prefix + str(compound) 
#--------------------------------------------------------------------------------------------------------------------      

    properties_df = pd.DataFrame({'Compound_Name': [compound_name]})
    
    #builds a dataframe (for each compound) by adding summary properties as new columns
    for (columnName, columnData) in valuesdf.iteritems():
        #the indexes need to match the values dataframe - display it to double check if you need to make changes 
        #(uncomment the display(valuesdf) in row 124 of this cell)
        properties_df[str(columnName) + "_Boltz"] = [columnData.values[0]]
        properties_df[str(columnName) + "_Boltz_stdev"] = [columnData.values[1]]
        properties_df[str(columnName) + "_min"] = [columnData.values[2]]
        properties_df[str(columnName) + "_max"] = [columnData.values[3]]
        #properties_df[str(columnName) + "_range"] = [columnData.values[4]]
        properties_df[str(columnName) + "_low_E"] = [columnData.values[4]]
        
        #if you're collecting properties for a specific conformer, add these here (note the index)
        #example:
        #properties_df[str(columnName) + "_V_bur_min"] = [columnData.values[6]]
        
        #if you only want a table with Boltz, you can comment out the other summary properties to generate a Boltz spreadsheet
        #of if you don't want to collect range, etc.
    #concatenates the individual acid properties df into the master properties df
    properties_df_master = pd.concat([properties_df_master, properties_df], axis = 0)

all_df_master = all_df_master.reset_index(drop = True)
properties_df_master = properties_df_master.reset_index(drop = True)


### Peek at your new dataframes

In [13]:
display(properties_df_master.head())
display(all_df_master)

Unnamed: 0,Compound_Name,HOMO_Boltz,HOMO_Boltz_stdev,HOMO_min,HOMO_max,HOMO_low_E,LUMO_Boltz,LUMO_Boltz_stdev,LUMO_min,LUMO_max,LUMO_low_E,η_Boltz,η_Boltz_stdev,η_min,η_max,η_low_E,μ_Boltz,μ_Boltz_stdev,μ_min,μ_max,μ_low_E,ω_Boltz,ω_Boltz_stdev,ω_min,ω_max,ω_low_E,polar_aniso(au)_Boltz,polar_aniso(au)_Boltz_stdev,polar_aniso(au)_min,polar_aniso(au)_max,polar_aniso(au)_low_E,polar_iso(au)_Boltz,polar_iso(au)_Boltz_stdev,polar_iso(au)_min,polar_iso(au)_max,polar_iso(au)_low_E,dipole(Debye)_Boltz,dipole(Debye)_Boltz_stdev,dipole(Debye)_min,dipole(Debye)_max,dipole(Debye)_low_E,volume(Bohr_radius³/mol)_Boltz,volume(Bohr_radius³/mol)_Boltz_stdev,volume(Bohr_radius³/mol)_min,volume(Bohr_radius³/mol)_max,volume(Bohr_radius³/mol)_low_E,SASA_sphericity_Boltz,SASA_sphericity_Boltz_stdev,SASA_sphericity_min,SASA_sphericity_max,SASA_sphericity_low_E,SASA_surface_area(Å²)_Boltz,SASA_surface_area(Å²)_Boltz_stdev,SASA_surface_area(Å²)_min,SASA_surface_area(Å²)_max,SASA_surface_area(Å²)_low_E,SASA_volume(Å³)_Boltz,SASA_volume(Å³)_Boltz_stdev,SASA_volume(Å³)_min,SASA_volume(Å³)_max,SASA_volume(Å³)_low_E,NBO_charge_H4_Boltz,NBO_charge_H4_Boltz_stdev,NBO_charge_H4_min,NBO_charge_H4_max,NBO_charge_H4_low_E,NBO_charge_N1_Boltz,NBO_charge_N1_Boltz_stdev,NBO_charge_N1_min,NBO_charge_N1_max,NBO_charge_N1_low_E,NMR_shift_H4_Boltz,NMR_shift_H4_Boltz_stdev,NMR_shift_H4_min,NMR_shift_H4_max,NMR_shift_H4_low_E,NMR_shift_N1_Boltz,NMR_shift_N1_Boltz_stdev,NMR_shift_N1_min,NMR_shift_N1_max,NMR_shift_N1_low_E,pyramidalization_Agranat-Radhakrishnan_N1_Boltz,pyramidalization_Agranat-Radhakrishnan_N1_Boltz_stdev,pyramidalization_Agranat-Radhakrishnan_N1_min,pyramidalization_Agranat-Radhakrishnan_N1_max,pyramidalization_Agranat-Radhakrishnan_N1_low_E,pyramidalization_Gavrish_N1(°)_Boltz,pyramidalization_Gavrish_N1(°)_Boltz_stdev,pyramidalization_Gavrish_N1(°)_min,pyramidalization_Gavrish_N1(°)_max,pyramidalization_Gavrish_N1(°)_low_E,NBO_LP_energy_N1_Boltz,NBO_LP_energy_N1_Boltz_stdev,NBO_LP_energy_N1_min,NBO_LP_energy_N1_max,NBO_LP_energy_N1_low_E,NBO_LP_occupancy_N1_Boltz,NBO_LP_occupancy_N1_Boltz_stdev,NBO_LP_occupancy_N1_min,NBO_LP_occupancy_N1_max,NBO_LP_occupancy_N1_low_E,%Vbur_N1_2.0Å_Boltz,%Vbur_N1_2.0Å_Boltz_stdev,%Vbur_N1_2.0Å_min,%Vbur_N1_2.0Å_max,%Vbur_N1_2.0Å_low_E,%Vbur_N1_2.5Å_Boltz,%Vbur_N1_2.5Å_Boltz_stdev,%Vbur_N1_2.5Å_min,%Vbur_N1_2.5Å_max,%Vbur_N1_2.5Å_low_E,%Vbur_N1_3.0Å_Boltz,%Vbur_N1_3.0Å_Boltz_stdev,%Vbur_N1_3.0Å_min,%Vbur_N1_3.0Å_max,%Vbur_N1_3.0Å_low_E,%Vbur_N1_3.5Å_Boltz,%Vbur_N1_3.5Å_Boltz_stdev,%Vbur_N1_3.5Å_min,%Vbur_N1_3.5Å_max,%Vbur_N1_3.5Å_low_E,%Vbur_N1_4.0Å_Boltz,%Vbur_N1_4.0Å_Boltz_stdev,%Vbur_N1_4.0Å_min,%Vbur_N1_4.0Å_max,%Vbur_N1_4.0Å_low_E,%Vbur_N1_4.5Å_Boltz,%Vbur_N1_4.5Å_Boltz_stdev,%Vbur_N1_4.5Å_min,%Vbur_N1_4.5Å_max,%Vbur_N1_4.5Å_low_E,%Vbur_N1_5.0Å_Boltz,%Vbur_N1_5.0Å_Boltz_stdev,%Vbur_N1_5.0Å_min,%Vbur_N1_5.0Å_max,%Vbur_N1_5.0Å_low_E,%Vbur_N1_5.5Å_Boltz,%Vbur_N1_5.5Å_Boltz_stdev,%Vbur_N1_5.5Å_min,%Vbur_N1_5.5Å_max,%Vbur_N1_5.5Å_low_E,%Vbur_N1_6.0Å_Boltz,%Vbur_N1_6.0Å_Boltz_stdev,%Vbur_N1_6.0Å_min,%Vbur_N1_6.0Å_max,%Vbur_N1_6.0Å_low_E
0,SecN1,-0.292504,0.008251,-0.29546,-0.28099,-0.29546,0.065104,0.000268,0.06473,0.0652,0.0652,0.357608,0.008519,0.34572,0.36066,0.36066,-0.1137,0.003991,-0.11513,-0.10813,-0.11513,0.01808,0.000838,0.01691,0.01838,0.01838,10.445814,0.162959,10.2184,10.5042,10.5042,57.513619,0.087865,57.391,57.5451,57.5451,1.503584,0.172253,1.2632,1.5653,1.5653,851.367925,68.685028,826.759,947.22,826.759,0.945055,0.001504,0.944516,0.947154,0.944516,240.919641,0.473254,240.259201,241.089202,241.089202,323.047741,0.182162,322.793529,323.113008,323.113008,0.355847,0.008855,0.34349,0.35902,0.35902,-0.622887,0.009442,-0.62627,-0.60971,-0.62627,31.366375,0.395423,31.2247,31.9182,31.2247,212.525664,1.305837,212.0578,214.348,212.0578,0.753277,0.016208,0.74747,0.775895,0.74747,5.468712,0.150849,5.414665,5.679226,5.414665,-0.350682,0.000245,-0.35077,-0.35034,-0.35077,1.922241,0.001482,1.92171,1.92431,1.92171,87.669132,0.519086,86.944731,87.855114,87.855114,74.702366,0.571744,73.90448,74.907214,74.907214,63.01588,0.641904,62.120083,63.245866,63.245866,51.590085,0.652828,50.679044,51.823984,51.823984,39.727001,0.367403,39.214279,39.858637,39.858637,30.112401,0.07891,30.002279,30.140674,30.140674,22.169915,0.004495,22.163642,22.171526,22.171526,16.651945,0.003872,16.646542,16.653332,16.653332,12.819394,0.004138,12.81362,12.820876,12.820876
1,SecN2,-0.28732,0.0,-0.28732,-0.28732,-0.28732,0.06475,0.0,0.06475,0.06475,0.06475,0.35207,0.0,0.35207,0.35207,0.35207,-0.111285,0.0,-0.111285,-0.111285,-0.111285,0.01759,0.0,0.01759,0.01759,0.01759,9.72385,0.0,9.72385,9.72385,9.72385,65.4228,0.0,65.4228,65.4228,65.4228,0.9071,0.0,0.9071,0.9071,0.9071,924.428,0.0,924.428,924.428,924.428,0.940465,0.0,0.940465,0.940465,0.940465,252.080035,0.0,252.080035,252.080035,252.080035,343.237939,0.0,343.237939,343.237939,343.237939,0.35665,0.0,0.35665,0.35665,0.35665,-0.62796,0.0,-0.62796,-0.62796,-0.62796,31.3113,0.0,31.3113,31.3113,31.3113,205.8491,0.0,205.8491,205.8491,205.8491,0.740505,0.0,0.740505,0.740505,0.740505,5.35424,0.0,5.35424,5.35424,5.35424,-0.33915,0.0,-0.33915,-0.33915,-0.33915,1.91864,0.0,1.91864,1.91864,1.91864,88.022986,0.0,88.022986,88.022986,88.022986,75.333702,0.0,75.333702,75.333702,75.333702,63.843676,0.0,63.843676,63.843676,63.843676,52.673663,0.0,52.673663,52.673663,52.673663,41.075197,0.0,41.075197,41.075197,41.075197,31.831799,0.0,31.831799,31.831799,31.831799,23.942205,0.0,23.942205,23.942205,23.942205,18.045206,0.0,18.045206,18.045206,18.045206,13.895188,0.0,13.895188,13.895188,13.895188
2,SecN3,-0.29612,0.0,-0.29612,-0.29612,-0.29612,0.05754,0.0,0.05754,0.05754,0.05754,0.35366,0.0,0.35366,0.35366,0.35366,-0.11929,0.0,-0.11929,-0.11929,-0.11929,0.02012,0.0,0.02012,0.02012,0.02012,7.224,0.0,7.224,7.224,7.224,53.3828,0.0,53.3828,53.3828,53.3828,1.3713,0.0,1.3713,1.3713,1.3713,663.23,0.0,663.23,663.23,663.23,0.94679,0.0,0.94679,0.94679,0.94679,229.810124,0.0,229.810124,229.810124,229.810124,301.792073,0.0,301.792073,301.792073,301.792073,0.34111,0.0,0.34111,0.34111,0.34111,-0.60432,0.0,-0.60432,-0.60432,-0.60432,31.5362,0.0,31.5362,31.5362,31.5362,195.234,0.0,195.234,195.234,195.234,0.841016,0.0,0.841016,0.841016,0.841016,6.34749,0.0,6.34749,6.34749,6.34749,-0.35224,0.0,-0.35224,-0.35224,-0.35224,1.9246,0.0,1.9246,1.9246,1.9246,85.32735,0.0,85.32735,85.32735,85.32735,71.684139,0.0,71.684139,71.684139,71.684139,59.364757,0.0,59.364757,59.364757,59.364757,47.819023,0.0,47.819023,47.819023,47.819023,36.455295,0.0,36.455295,36.455295,36.455295,27.482416,0.0,27.482416,27.482416,27.482416,20.144626,0.0,20.144626,20.144626,20.144626,15.129046,0.0,15.129046,15.129046,15.129046,11.648034,0.0,11.648034,11.648034,11.648034
3,SecN4,-0.28878,0.0,-0.28878,-0.28878,-0.28878,0.06707,0.0,0.06707,0.06707,0.06707,0.35585,0.0,0.35585,0.35585,0.35585,-0.110855,0.0,-0.110855,-0.110855,-0.110855,0.01727,0.0,0.01727,0.01727,0.01727,6.30066,0.0,6.30066,6.30066,6.30066,34.9797,0.0,34.9797,34.9797,34.9797,1.044,0.0,1.044,1.044,1.044,584.556,0.0,584.556,584.556,584.556,0.952869,0.0,0.952869,0.952869,0.952869,195.900335,0.0,195.900335,195.900335,195.900335,239.815138,0.0,239.815138,239.815138,239.815138,0.34639,0.0,0.34639,0.34639,0.34639,-0.61689,0.0,-0.61689,-0.61689,-0.61689,32.2691,0.0,32.2691,32.2691,32.2691,235.08,0.0,235.08,235.08,235.08,0.744175,0.0,0.744175,0.744175,0.744175,5.390829,0.0,5.390829,5.390829,5.390829,-0.33957,0.0,-0.33957,-0.33957,-0.33957,1.91752,0.0,1.91752,1.91752,1.91752,87.451575,0.0,87.451575,87.451575,87.451575,72.646178,0.0,72.646178,72.646178,72.646178,57.926103,0.0,57.926103,57.926103,57.926103,42.800736,0.0,42.800736,42.800736,42.800736,28.679939,0.0,28.679939,28.679939,28.679939,20.138015,0.0,20.138015,20.138015,20.138015,14.676098,0.0,14.676098,14.676098,14.676098,11.019851,0.0,11.019851,11.019851,11.019851,8.483831,0.0,8.483831,8.483831,8.483831
4,SecN5,-0.247399,0.002025,-0.26604,-0.23318,-0.24607,0.017401,0.001296,0.00935,0.01878,0.01841,0.2648,0.000917,0.2478,0.28014,0.26448,-0.114999,0.001637,-0.12597,-0.1072,-0.11383,0.024978,0.000645,0.0228,0.02851,0.0245,70.344991,8.332563,58.8495,131.781,64.334,219.189637,1.413487,216.798,225.528,218.184,1.490042,0.237971,0.8056,2.2425,1.6516,2194.300595,273.327839,1968.923,3264.839,1968.923,0.8437,0.013829,0.825953,0.874461,0.853926,494.976345,13.059111,464.472428,511.662562,485.462595,802.02131,11.870618,769.705367,816.924825,793.666324,0.34874,0.004018,0.34679,0.37033,0.34699,-0.62777,0.007624,-0.64637,-0.6145,-0.62979,32.456943,0.416591,29.5562,32.7169,32.7169,219.996201,4.000945,213.3209,231.8234,220.3757,0.739724,0.007414,0.69942,0.746438,0.7415,5.354085,0.063911,5.003268,5.417212,5.368792,-0.342116,0.002975,-0.35132,-0.32138,-0.34181,1.91743,0.001979,1.9098,1.92267,1.91688,90.283146,1.121692,88.058497,92.561983,90.980114,79.475045,2.552335,74.84373,84.022985,81.431501,68.304337,3.804207,62.016724,75.022348,71.355408,56.315748,4.689439,48.845742,64.188292,60.14897,44.420832,5.109388,36.559261,52.894373,48.622059,36.526249,5.150221,28.675864,45.064476,40.759324,31.122983,4.920919,23.612469,39.485313,35.188696,27.243381,4.5664,20.187118,34.650563,31.034849,24.206528,4.063828,17.82235,30.303415,27.600005


Unnamed: 0,log_name,G(T)_spc(Hartree),∆G(Hartree),∆G(kcal/mol),e^(-∆G/RT),Mole Fraction,E_spc (Hartree),H_spc(Hartree),T,T*S,T*qh_S,ZPE(Hartree),qh_G(T)_spc(Hartree),HOMO,LUMO,η,μ,ω,polar_aniso(au),polar_iso(au),dipole(Debye),volume(Bohr_radius³/mol),SASA_sphericity,SASA_surface_area(Å²),SASA_volume(Å³),NBO_charge_H4,NBO_charge_N1,NMR_shift_H4,NMR_shift_N1,pyramidalization_Agranat-Radhakrishnan_N1,pyramidalization_Gavrish_N1(°),NBO_LP_energy_N1,NBO_LP_occupancy_N1,%Vbur_N1_2.0Å,%Vbur_N1_2.5Å,%Vbur_N1_3.0Å,%Vbur_N1_3.5Å,%Vbur_N1_4.0Å,%Vbur_N1_4.5Å,%Vbur_N1_5.0Å,%Vbur_N1_5.5Å,%Vbur_N1_6.0Å
0,SecN1_conf-1,-287.672317,0.0,0.0,1.0,0.79571,-287.779498,-287.637676,298.15,0.03464,0.034648,0.135601,-287.672324,-0.29546,0.0652,0.36066,-0.11513,0.01838,10.5042,57.5451,1.5653,826.759,0.944516,241.089202,323.113008,0.35902,-0.62627,31.2247,212.0578,0.74747,5.414665,-0.35077,1.92171,87.855114,74.907214,63.245866,51.823984,39.858637,30.140674,22.171526,16.653332,12.820876
1,SecN1_conf-2,-287.671033,0.001284,0.8056,0.256739,0.20429,-287.778043,-287.636313,298.15,0.03472,0.034727,0.135473,-287.671041,-0.28099,0.06473,0.34572,-0.10813,0.01691,10.2184,57.391,1.2632,947.22,0.947154,240.259201,322.793529,0.34349,-0.60971,31.9182,214.348,0.775895,5.679226,-0.35034,1.92431,86.944731,73.90448,62.120083,50.679044,39.214279,30.002279,22.163642,16.646542,12.81362
2,Boltzmann Averages,,,,,,,,,,,,,-0.292504,0.065104,0.357608,-0.1137,0.01808,10.445814,57.513619,1.503584,851.367925,0.945055,240.919641,323.047741,0.355847,-0.622887,31.366375,212.525664,0.753277,5.468712,-0.350682,1.922241,87.669132,74.702366,63.01588,51.590085,39.727001,30.112401,22.169915,16.651945,12.819394
3,Boltzmann Standard Deviation,,,,,,,,,,,,,0.008251,0.000268,0.008519,0.003991,0.000838,0.162959,0.087865,0.172253,68.685028,0.001504,0.473254,0.182162,0.008855,0.009442,0.395423,1.305837,0.016208,0.150849,0.000245,0.001482,0.519086,0.571744,0.641904,0.652828,0.367403,0.07891,0.004495,0.003872,0.004138
4,Ensemble Minimum,,,,,,,,,,,,,-0.29546,0.06473,0.34572,-0.11513,0.01691,10.2184,57.391,1.2632,826.759,0.944516,240.259201,322.793529,0.34349,-0.62627,31.2247,212.0578,0.74747,5.414665,-0.35077,1.92171,86.944731,73.90448,62.120083,50.679044,39.214279,30.002279,22.163642,16.646542,12.81362
5,Ensemble Maximum,,,,,,,,,,,,,-0.28099,0.0652,0.36066,-0.10813,0.01838,10.5042,57.5451,1.5653,947.22,0.947154,241.089202,323.113008,0.35902,-0.60971,31.9182,214.348,0.775895,5.679226,-0.35034,1.92431,87.855114,74.907214,63.245866,51.823984,39.858637,30.140674,22.171526,16.653332,12.820876
6,Lowest E Conformer,-287.672317,0.0,0.0,1.0,0.79571,-287.779498,-287.637676,298.15,0.03464,0.034648,0.135601,-287.672324,-0.29546,0.0652,0.36066,-0.11513,0.01838,10.5042,57.5451,1.5653,826.759,0.944516,241.089202,323.113008,0.35902,-0.62627,31.2247,212.0578,0.74747,5.414665,-0.35077,1.92171,87.855114,74.907214,63.245866,51.823984,39.858637,30.140674,22.171526,16.653332,12.820876
7,SecN2_conf-1,-251.742636,0.0,0.0,1.0,1.0,-251.873606,-251.707548,298.15,0.035088,0.035096,0.15958,-251.742644,-0.28732,0.06475,0.35207,-0.111285,0.01759,9.72385,65.4228,0.9071,924.428,0.940465,252.080035,343.237939,0.35665,-0.62796,31.3113,205.8491,0.740505,5.35424,-0.33915,1.91864,88.022986,75.333702,63.843676,52.673663,41.075197,31.831799,23.942205,18.045206,13.895188
8,Boltzmann Averages,,,,,,,,,,,,,-0.28732,0.06475,0.35207,-0.111285,0.01759,9.72385,65.4228,0.9071,924.428,0.940465,252.080035,343.237939,0.35665,-0.62796,31.3113,205.8491,0.740505,5.35424,-0.33915,1.91864,88.022986,75.333702,63.843676,52.673663,41.075197,31.831799,23.942205,18.045206,13.895188
9,Boltzmann Standard Deviation,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### Save to Microsoft Excelᵀᴹ 

In [14]:
all_df_master.to_excel('All_Conformer_and_Summary_Properties_for_SecN_example.xlsx', index = False)
properties_df_master.to_excel('Summary_Properties_for_SecN_example.xlsx', index = False)