<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Generate-.com-file-from-SMILES-(using-rdkit-to-generate-initial-geometry)" data-toc-modified-id="Generate-.com-file-from-SMILES-(using-rdkit-to-generate-initial-geometry)-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Generate .com file from SMILES (using rdkit to generate initial geometry)</a></span></li><li><span><a href="#Generate-SLURM-file" data-toc-modified-id="Generate-SLURM-file-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Generate SLURM file</a></span></li><li><span><a href="#Extract-optimised-coordinates-from-a-.log-file" data-toc-modified-id="Extract-optimised-coordinates-from-a-.log-file-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Extract optimised coordinates from a .log file</a></span></li><li><span><a href="#Write-a-.com-file-using-final-geometry-of-a-.log-file" data-toc-modified-id="Write-a-.com-file-using-final-geometry-of-a-.log-file-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>Write a .com file using final geometry of a .log file</a></span></li><li><span><a href="#Read-energy-from-successfully-optimised-.log-file" data-toc-modified-id="Read-energy-from-successfully-optimised-.log-file-5"><span class="toc-item-num">5&nbsp;&nbsp;</span>Read energy from successfully optimised .log file</a></span><ul class="toc-item"><li><span><a href="#Just-1-file" data-toc-modified-id="Just-1-file-5.1"><span class="toc-item-num">5.1&nbsp;&nbsp;</span>Just 1 file</a></span></li><li><span><a href="#All-files-in-a-folder,-and-write-to-csv" data-toc-modified-id="All-files-in-a-folder,-and-write-to-csv-5.2"><span class="toc-item-num">5.2&nbsp;&nbsp;</span>All files in a folder, and write to csv</a></span></li></ul></li><li><span><a href="#Extract-coordinates-from-.log-files-to-copy-paste-into-SI" data-toc-modified-id="Extract-coordinates-from-.log-files-to-copy-paste-into-SI-6"><span class="toc-item-num">6&nbsp;&nbsp;</span>Extract coordinates from .log files to copy-paste into SI</a></span></li></ul></div>

# Generate .com file from SMILES (using rdkit to generate initial geometry)

In [1]:
from DFT_utilities import generate_com_file_from_smiles
from DFT_utilities import generate_k2Ar_minus_com_files
from DFT_utilities import generate_k2Ar_TS_com_files
import pandas as pd

In [2]:
#Example of how to use, on k2Ar intermediate Ar-

df = pd.read_csv('data/Cox-molecules/Cox-molecules-overview.csv')

k2Ar_intermediate_molecules = generate_k2Ar_minus_com_files(df, 'k2Ar', 'data/Cox-molecules/DFT/example_com_files_k2Ar')
k2Ar_TS_molecules = generate_k2Ar_TS_com_files(df, 'k2Ar', 'data/Cox-molecules/DFT/example_com_files_k2Ar')


In [3]:
k2Ar_TS_molecules

['COc1ccc([B-](O)(O)O)cc1',
 'O[B-](O)(O)c1c(F)cccc1F',
 'O[B-](O)(O)c1c(F)c(F)c(F)c(F)c1F',
 'O[B-](O)(O)c1ccccc1',
 'O[B-](O)(O)c1ccccc1F',
 'O[B-](O)(O)c1cccc(F)c1',
 'O[B-](O)(O)c1ccc(F)cc1',
 'O[B-](O)(O)c1cccc(F)c1F',
 'O[B-](O)(O)c1ccc(F)cc1F',
 'O[B-](O)(O)c1cc(F)ccc1F',
 'O[B-](O)(O)c1ccc(F)c(F)c1',
 'O[B-](O)(O)c1cc(F)cc(F)c1',
 'O[B-](O)(O)c1ccc(F)c(F)c1F',
 'O[B-](O)(O)c1cc(F)cc(F)c1F',
 'O[B-](O)(O)c1cc(F)c(F)cc1F',
 'O[B-](O)(O)c1cc(F)c(F)c(F)c1',
 'O[B-](O)(O)c1cc(F)c(F)c(F)c1F',
 'O[B-](O)(O)c1c(F)ccc(F)c1F',
 'O[B-](O)(O)c1c(F)cc(F)cc1F',
 'O[B-](O)(O)c1c(F)cc(F)c(F)c1F',
 'O[B-](O)(O)c1c(F)c(F)cc(F)c1F',
 'COc1ccc([B-](O)(O)O)c(F)c1',
 'O[B-](O)(O)c1ccc(C(F)(F)F)cc1F',
 'FC1=C([B-](O)(O)O)C=C([N+]([O-])=O)C=C1',
 'COc1cc(F)c([B-](O)(O)O)c(F)c1',
 'COc1c(F)c(F)c([B-](O)(O)O)c(F)c1F',
 'O[B-](O)(O)c1cc(C(F)(F)F)cc(C(F)(F)F)c1',
 'O[B-](O)(O)C1=CC([N+]([O-])=O)=CC([N+]([O-])=O)=C1',
 'Cc1ccc([B-](O)(O)O)cc1',
 'O[B-](O)(O)c1cccc(Cl)c1']

# Generate SLURM file

In [4]:
from os import walk
from shutil import copyfile

In [5]:
# NB: You need to change the template to include the the name of the project to be charged:

#! Which project should be charged:
#SBATCH -A < >

def slurm_file_generation(folder_path, your_name):
    """
    folder_path: to a folder containing .com files
    Read the file names of all the .com files, and create a slurm file for each one
    """
    #Get all the filenames in the folder
    filenames = next(walk(folder_path), (None, None, []))[2]  # [] if no file
    for filename in filenames:
        if filename == '.DS_Store':
            continue
        if filename[-4:] == '.com':
            filename = filename[:-4]

            #copy the template file to make an apporpriate number of new slurm files with correct names
            copyfile( 'data/Cox-molecules/DFT/slurm_submit_template.peta4-cclake', \
                     f'{folder_path}/slurm_submit_{filename}.peta4-cclake')

            #edit the slurm files so the name matches what's inside
            f = open(f"{folder_path}/slurm_submit_{filename}.peta4-cclake", "r")
            list_of_lines = f.readlines()
            folder_names = folder_path[19:]
            list_of_lines[60] = f'application="g16 /home/{your_name}/{folder_names}/{filename}.com /home/{your_name}/{folder_names}/{filename}.log"\n'
            list_of_lines[12] = f'#SBATCH -J {filename}\n'
            f = open(f"{folder_path}/slurm_submit_{filename}.peta4-cclake", "w")
            f.writelines(list_of_lines)
            f.close()
            print(f'sbatch slurm_submit_{filename}.peta4-cclake')


In [6]:
slurm_file_generation('data/Cox-molecules/DFT/example_com_files_k2Ar', 'dsw46')

sbatch slurm_submit_91_k2Ar_TS.peta4-cclake
sbatch slurm_submit_84_k2Ar_TS.peta4-cclake
sbatch slurm_submit_79_k2Ar_TS.peta4-cclake
sbatch slurm_submit_77_k2Ar_Ar-.peta4-cclake
sbatch slurm_submit_88_k2Ar_Ar-.peta4-cclake
sbatch slurm_submit_98_k2Ar_TS.peta4-cclake
sbatch slurm_submit_81_k2Ar_TS.peta4-cclake
sbatch slurm_submit_89_k2Ar_Ar-.peta4-cclake
sbatch slurm_submit_76_k2Ar_Ar-.peta4-cclake
sbatch slurm_submit_71_k2Ar_Ar-.peta4-cclake
sbatch slurm_submit_88_k2Ar_TS.peta4-cclake
sbatch slurm_submit_75_k2Ar_TS.peta4-cclake
sbatch slurm_submit_94_k2Ar_TS.peta4-cclake
sbatch slurm_submit_96_k2Ar_Ar-.peta4-cclake
sbatch slurm_submit_91_k2Ar_Ar-.peta4-cclake
sbatch slurm_submit_97_k2Ar_TS.peta4-cclake
sbatch slurm_submit_84_k2Ar_Ar-.peta4-cclake
sbatch slurm_submit_76_k2Ar_TS.peta4-cclake
sbatch slurm_submit_83_k2Ar_Ar-.peta4-cclake
sbatch slurm_submit_28_k2Ar_TS.peta4-cclake
sbatch slurm_submit_82_k2Ar_TS.peta4-cclake
sbatch slurm_submit_82_k2Ar_Ar-.peta4-cclake
sbatch slurm_submit_28

# Extract optimised coordinates from a .log file

In [7]:
from DFT_utilities import extract_coordinates

In [8]:
# Extract the optimised coordinates of molecule 61: k1 intermediate
extract_coordinates('data/Cox-molecules/DFT/k1/61_int.log')

(['O  1.566958   -1.162343   -0.42686\n',
  'C  1.353330   -0.112653    0.27273\n',
  'C -0.056507    0.000306    0.61830\n',
  'C -0.576068   -1.272102    0.04604\n',
  'N  0.321882   -1.943142   -0.58815\n',
  'C -1.964465   -1.772433    0.11399\n',
  'H -2.042348   -2.757085   -0.34161\n',
  'H -2.638342   -1.088044   -0.40723\n',
  'H -2.291781   -1.827110    1.15444\n',
  'C  2.462767    0.783205    0.57111\n',
  'H  2.472594    1.006920    1.64029\n',
  'H  2.279838    1.732055    0.05639\n',
  'H  3.413656    0.364598    0.25349\n',
  'B -0.767315    1.311710   -0.05224\n',
  'O -2.003425    1.707101    0.29669\n',
  'O -0.040855    1.929251   -1.00217\n',
  'H -0.473071    2.699977   -1.38757\n',
  'H -2.421110    1.258442    1.03685\n',
  'H -0.211796    0.103688    1.69877\n'],
 1)

# Write a .com file using final geometry of a .log file

In [9]:
from os import walk
from DFT_utilities import write_com_file

In [10]:
# Select a header, comment out the other ones

#header = ['#n M06L/6-311++G** SCRF=(Solvent=Water) Opt']
header = ['#n M06L/6-311++G** SCRF=(Solvent=Water) Opt=(TS,CalcFC,noeigentest)']
#header = ['#n B3LYP/6-31G(d) Opt']
#header = ['#n B3LYP/6-31G(d) Opt=(TS,CalcFC,noeigentest)']

folder_path = 'data/Cox-molecules/DFT/k2'
output_folder = 'data/Cox-molecules/DFT/example_com_files_k1'
filenames = next(walk(folder_path), (None, None, []))[2]  # [] if no file
for filename in filenames:
    if filename[-4:] == '.log' and 'TS' in filename:
        write_com_file(f'{folder_path}/{filename}', filename, output_folder, header)


# Read energy from successfully optimised .log file

## Just 1 file

In [11]:
from DFT_utilities import read_energy

In [12]:
file_path = 'data/Cox-molecules/DFT/k2/61_TS.log'
read_energy(file_path)

'E(RM06L) =  -653.204896475'

## All files in a folder, and write to csv

In [13]:
from DFT_utilities import read_energy_in_whole_folder

In [14]:
folder_path = 'data/Cox-molecules/DFT/k2'
read_energy_in_whole_folder(folder_path)

0  errors out of  35  calculations.


# Extract coordinates from .log files to copy-paste into SI

In [15]:
from DFT_utilities import prepare_coordinates_for_SI
from DFT_utilities import generate_coordinates_file_for_SI

In [16]:
# Example of what it looks like for 1 molecular optimisation
filepath = 'data/Cox-molecules/DFT/k2/61_TS.log'
prepare_coordinates_for_SI(61, 'k2', 'TS', filepath)

# Note: the "\begin{lstlisting}" and "\end{lstlisting}" is for the overleaf Latex to read

['\\begin{lstlisting}',
 'Molecule 61: k2 - TS',
 '#n M06L/6-311++G** SCRF=(Solvent=Water) Opt=(TS,CalcFC,noeigentest)',
 'E(RM06L) =  -653.204896475',
 'Charge, multiplicity: -1, 1',
 'Geometry:',
 'C  0.432700   -0.033705    0.32222',
 'B -1.729075    0.302128   -0.67570',
 'O -2.303357    1.179482    0.23382',
 'O -1.281550    0.811182   -1.89278',
 'H -1.040741    1.735949   -1.80619',
 'O -2.162617   -1.028647   -0.67105',
 'H -2.094696   -1.405022    1.61996',
 'H -0.870217   -0.505235    1.57167',
 'O -1.617715   -0.788558    2.18656',
 'H -1.702043   -1.536514   -1.34326',
 'H -2.374446    0.731291    1.08964',
 'C  1.394611    1.033825    0.26957',
 'N  2.616833    0.675355   -0.09040',
 'O  2.533179   -0.707277   -0.30226',
 'C  1.232319   -1.079574   -0.04419',
 'C  1.152549    2.475374    0.57079',
 'C  0.975527   -2.530302   -0.23038',
 'H  0.421706    2.906731   -0.11994',
 'H  2.070973    3.062225    0.50190',
 'H  0.737471    2.600228    1.57498',
 'H  1.693993   -3.147

In [17]:
# generate coordinates file for SI for all Cox's molecules

generate_coordinates_file_for_SI('data/Cox-molecules/DFT/k1')
generate_coordinates_file_for_SI('data/Cox-molecules/DFT/k2')
generate_coordinates_file_for_SI('data/Cox-molecules/DFT/k2Ar')
generate_coordinates_file_for_SI('data/Cox-molecules/DFT/k2cat')
generate_coordinates_file_for_SI('data/Cox-molecules/DFT/k3')
generate_coordinates_file_for_SI('data/Cox-molecules/DFT/k4')
generate_coordinates_file_for_SI('data/Cox-molecules/DFT/k5')


Coordinates created successfully
Coordinates created successfully
Coordinates created successfully
Coordinates created successfully
Coordinates created successfully
Coordinates created successfully
Coordinates created successfully


In [18]:
# generate coordinates file for SI for all novel molecules

generate_coordinates_file_for_SI('data/novel-molecules/DFT/k1')
generate_coordinates_file_for_SI('data/novel-molecules/DFT/k2')
generate_coordinates_file_for_SI('data/novel-molecules/DFT/k2Ar')
generate_coordinates_file_for_SI('data/novel-molecules/DFT/k2cat')
generate_coordinates_file_for_SI('data/novel-molecules/DFT/k3')
generate_coordinates_file_for_SI('data/novel-molecules/DFT/k4')
generate_coordinates_file_for_SI('data/novel-molecules/DFT/k5')


Coordinates created successfully
Coordinates created successfully
Coordinates created successfully
Coordinates created successfully
Coordinates created successfully
Coordinates created successfully
Coordinates created successfully
