### End-to-end workflow to calculate conformer proportions of strychnine with AQME from SMILES

In [3]:
# cell with import, system name and PATHs
import os, glob, subprocess
from pathlib import Path                                                                                                                                                          
from aqme.csearch import csearch
from aqme.qprep import qprep
from aqme.qcorr import qcorr

# system name
name = 'Strychnine'
# PATHs
w_dir_main = Path(os.getcwd())
sdf_path = w_dir_main.joinpath(f'{name}_sdf-files')
com_path = w_dir_main.joinpath(f'{name}_com-files')
fixed_inp_folder = com_path.joinpath('unsuccessful_QM_outputs/run_1/fixed_QM_inputs')
success_dir = w_dir_main.joinpath('successful_QM_outputs')

###### Step 1: CSEARCH conformational sampling (creates SDF files)

In [None]:
# set working directory and SMILES string
smi = 'C1CN2CC3=CCOC4CC(=O)N5C6C4C3CC2C61C7=CC=CC=C75'

# choose program for conformer sampling
# 1) RDKit ('rdkit'): Fast sampling, only works for systems with one molecule
# 2) CREST ('crest'): Slower sampling, works for noncovalent complexes and 
# transition structures (see example of TS in the CSEARCH_CREST_TS.ipynb notebook
#  from the CSEARCH_CMIN_conformer_generation folder)
program = 'rdkit'

# run CSEARCH conformational sampling, specifying:
# 1) Working directory (w_dir_main=w_dir_main)
# 2) PATH to create the new SDF files (destination=sdf_path)
# 3) Program for conformer sampling (program=program)
# 4) SMILES string (smi=smi)
# 5) Name for the output SDF files (name=name)
# 6) Include CREGEN post-analysis for CREST sampling (cregen=True)
csearch(w_dir_main=w_dir_main,destination=sdf_path,
        program=program,smi=smi,name=name,cregen=True)

###### Step 2: Writing Gaussian input files with the SDF obtained from CSEARCH

In [None]:
# set SDF filenames and directory where the new com files will be created
sdf_rdkit_files = glob.glob(f'{sdf_path}/*.sdf')

# choose program for input file generation, with the corresponding keywords line, memory and processors:
# 1) Gaussian ('gaussian')
# 2) ORCA ('orca')
program = 'gaussian'
qm_input = 'wb97xd/6-31+G(d,p) scrf=(solvent=chloroform,smd) opt freq'
mem='24GB'
nprocs=12

# run QPREP input files generator, with:
# 1) Working directory (w_dir_main=sdf_path)
# 2) PATH to create the new SDF files (destination=com_path)
# 3) Files to convert (files=sdf_rdkit_files)
# 4) QM program for the input (program=program)
# 5) Keyword line for the Gaussian inputs (qm_input=qm_input)
# 6) Memory to use in the calculations (mem='24GB')
# 7) Processors to use in the calcs (nprocs=8)
qprep(w_dir_main=sdf_path,destination=com_path,files=sdf_rdkit_files,program=program,
        qm_input=qm_input,mem=mem,nprocs=nprocs)
 

###### Step 3: run the input files with the command line you normally use

In [None]:
# move to the folder with the input files
os.chdir(com_path)

# files to submit
input_files = '*.com'

# submit to the HPC or local computer
command = ['qsub', '*.com']
subprocess.run(command)

# returns to working dir
os.chdir(w_dir_main)

###### Step 4: QCORR analysis including isomerization filter

In [None]:
# run the QCORR analyzer, with:
# 1) Working directory (w_dir_main=com_path)
# 2) Names of the QM output files (files='*.log')
# 3) Detect and fix calcs that converged during geometry optimization but didn't converge during frequency calcs (freq_conv='opt=(calcfc,maxstep=5)')
# 4) Type of initial input files where the LOG files come from (isom='com')
# 5) Folder with the initial input files (isom_inputs=com_path)
qcorr(w_dir_main=com_path,files='*.log',freq_conv='opt=(calcfc,maxstep=5)',
      isom='com',isom_inputs=com_path)

###### Step 5: resubmission of unsuccessful calculations with suggestions from AQME (if any)

In [None]:
# move to the folder with the input files
os.chdir(fixed_inp_folder)

# files to submit
input_files = '*.com'

# submit to the HPC or local computer
command = ['qsub', '*.com']
subprocess.run(command)

# returns to working dir
os.chdir(w_dir_main)

###### Step 6: analyze the new jobs (if any)

In [None]:
# type of files to analize with QCORR
qm_files = '*.log'

# run the QCORR analyzer, with:
# 1) Working directory (w_dir_main=fixed_inp_folder)
# 2) Names of the QM output files (files='*.log')
# 3) Detect and fix calcs that converged during geometry optimization but didn't converge during frequency calcs (freq_conv='opt=(calcfc,maxstep=5)')
# 4) Type of initial input files where the LOG files come from (isom='com')
# 5) Folder with the initial input files (isom_inputs=fixed_inp_folder)
qcorr(w_dir_main=fixed_inp_folder,files='*.log',freq_conv='opt=(calcfc,maxstep=5)',
      isom='com',isom_inputs=fixed_inp_folder)

###### Step 8: creation of DLPNO input files for ORCA single-point energy calculations

In [None]:
# choose output files to get atoms and coordinates to generate inputs for single-point energy calculations
qm_files = '*.log'

# choose program for input file generation with QPREP, with the corresponding keywords line, memory and processors:
# 1) Gaussian ('gaussian')
# 2) ORCA ('orca')
program = 'orca'
# a DLPNO example keywords line for ORCA calculations
qm_input = 'Extrapolate(2/3,cc) def2/J cc-pVTZ/C DLPNO-CCSD(T) NormalPNO TightSCF RIJCOSX\n'
qm_input += '%cpcm\n'
qm_input += 'smd true\n'
qm_input += 'SMDsolvent \"Chloroform\"\n'
qm_input += 'end\n'
qm_input += '%scf maxiter 500\n'
qm_input += 'end\n'
qm_input += '% mdci\n'
qm_input += 'Density None\n'
qm_input += 'end\n'
qm_input += '% output\n'
qm_input += 'printlevel mini\n'
qm_input += 'print[ P_SCFInfo ] 1\n'
qm_input += 'print[ P_SCFIterInfo ] 1\n'
qm_input += 'print[ P_OrbEn ] 0\n'
qm_input += 'print[ P_Cartesian ] 0\n'
qm_input += 'end\n'
qm_input += '% elprop\n'
qm_input += 'Dipole False\n'
qm_input += 'end'
mem='4GB'
nprocs=12

# run QPREP input files generator, with:
# 1) Working directory (w_dir_main=sdf_path)
# 2) PATH to create the new SDF files (destination=com_path)
# 3) Files to convert (files=sdf_rdkit_files)
# 4) QM program for the input (program=program)
# 5) Keyword line for the Gaussian inputs (qm_input=qm_input)
# 6) Memory to use in the calculations (mem='24GB')
# 7) Processors to use in the calcs (nprocs=8)
qprep(w_dir_main=success_dir,destination=success_dir,files=qm_files,program=program,
        qm_input=qm_input,mem=mem,nprocs=nprocs)
 

###### Step 9: run DLPNO calculations

In [None]:
# move to the folder with the input files
os.chdir(success_dir)

# files to submit
input_files = '*.inp'

# submit to the HPC or local computer
command = ['qsub', '*.inp']
subprocess.run(command)

# returns to working dir
os.chdir(w_dir_main)

###### Step 10: calculate population distribution with GoodVibes

In [None]:
# track all the output files from Gaussian and ORCA
opt_files = glob.glob(f'{success_dir}/*.log')
spc_files = glob.glob(f'{success_dir}/*.out')
all_files = opt_files + spc_files

# move all the output files together to a folder called "GoodVibes_analysis" for simplicity

GV_folder = w_dir_main.joinpath('GoodVibes_analysis')
GV_folder.mkdir(exist_ok=True, parents=True)

for file in all_files:
	file_name = file.replace('/','\\').split('\\')[-1]
	filepath = Path(file)
	filepath.rename(GV_folder/file_name)

# this commands runs GoodVibes, including the population % of each conformer 
# (final results in the GoodVibes.out file)
os.chdir(GV_folder)
subprocess.run(['python', '-m', 'goodvibes', '--boltz', '--spc', 'DLPNO', '*.log'])
os.chdir(w_dir_main)