### End-to-end workflow to calculate conformer proportions of strychnine with AQME from SMILES

In [None]:
import os, glob, subprocess
from pathlib import Path 
import shutil
from aqme.csearch import csearch
from aqme.qprep import qprep
from aqme.qcorr import qcorr

###### Step 1: CSEARCH conformational sampling (creates SDF files)

In [None]:
# system name
name = 'Strychnine'
# set working directory and SMILES string
smi = 'C1CN2CC3=CCO[C@H]4CC(=O)N5[C@H]6[C@H]4[C@H]3C[C@H]2[C@@]61C7=CC=CC=C75'

# PATHs
w_dir_main = Path(os.getcwd())
sdf_path = w_dir_main.joinpath(f'{name}_sdf-files')

# choose program for conformer sampling
# 1) RDKit ('rdkit'): Fast sampling, only works for systems with one molecule
# 2) CREST ('crest'): Slower sampling, works for noncovalent complexes and 
# transition structures (see example of TS in the CSEARCH_CREST_TS.ipynb notebook
#  from the CSEARCH_CMIN_conformer_generation folder)
program = 'rdkit'

# run CSEARCH conformational sampling, specifying:
# 1) Working directory (w_dir_main)
# 2) PATH to create the new SDF files (destination)
# 3) Program for conformer sampling (program)
# 4) SMILES string (smi)
# 5) Name for the output SDF files (name)
csearch(w_dir_main=w_dir_main,destination=sdf_path,
        program=program,smi=smi,name=name)

###### Step 2: Writing Gaussian input files with the SDF obtained from CSEARCH

In [None]:
# set SDF filenames and directory where the new com files will be created
sdf_rdkit_files = glob.glob(f'{sdf_path}/*.sdf')
com_path = w_dir_main.joinpath(f'{name}_com-files')

# choose program for input file generation, with the corresponding keywords line, memory and processors:
# 1) Gaussian ('gaussian')
# 2) ORCA ('orca')
program = 'gaussian'
qm_input = 'wB97xd/def2TZVPP scrf=(solvent=chloroform,smd) opt freq'
mem='24GB'
nprocs=12

# run QPREP input files generator, with:
# 1) Working directory (w_dir_main)
# 2) PATH to create the new SDF files (destination)
# 3) Files to convert (files)
# 4) QM program for the input (program)
# 5) Keyword line for the Gaussian inputs (qm_input)
# 6) Memory to use in the calculations (mem)
# 7) Processors to use in the calcs (nprocs)
qprep(w_dir_main=sdf_path,destination=com_path,files=sdf_rdkit_files,program=program,
        qm_input=qm_input,mem=mem,nprocs=nprocs)
 

###### Step 3: run the input files with the command line you normally use

In [None]:
# move to the folder with the input files
os.chdir(com_path)

# files to submit
input_files = '*.com'

# submit to the HPC or local computer
command = ['qsub', '*.com']
subprocess.run(command)

# returns to working dir
os.chdir(w_dir_main)

###### Step 4: QCORR analysis including isomerization filter

In [None]:
# run the QCORR analyzer, with:
# 1) Working directory (w_dir_main)
# 2) Names of the QM output files (files)
# 3) Detect and fix calcs that converged during geometry optimization but didn't converge during frequency calcs (freq_conv)
# 4) Type of initial input files where the LOG files come from (isom_type)
# 5) Folder with the initial input files (isom_inputs)
qcorr(w_dir_main=com_path,files='*.log',freq_conv='opt=(calcfc,maxstep=5)',
      isom_type='com',isom_inputs=com_path, nprocs=24, mem='96GB')

###### Step 5: resubmission of unsuccessful calculations with suggestions from AQME (if any)

In [None]:
# move to the folder with the input files
fixed_inp_folder = com_path.joinpath('unsuccessful_QM_outputs/run_1/fixed_QM_inputs')
os.chdir(fixed_inp_folder)

# files to submit
input_files = '*.com'

# submit to the HPC or local computer
command = ['qsub', '*.com']
subprocess.run(command)

# returns to working dir
os.chdir(w_dir_main)

###### Step 6: analyze the new jobs (if any)

In [None]:
# type of files to analize with QCORR
qm_files = '*.log'

# run the QCORR analyzer, with:
# 1) Working directory (w_dir_main)
# 2) Names of the QM output files (files)
# 3) Detect and fix calcs that converged during geometry optimization but didn't converge during frequency calcs (freq_conv)
# 4) Type of initial input files where the LOG files come from (isom)
# 5) Folder with the initial input files (isom_inputs)
qcorr(w_dir_main=fixed_inp_folder,files='*.log',freq_conv='opt=(calcfc,maxstep=5)',
      isom='com',isom_inputs=fixed_inp_folder,nprocs=24, mem='96GB')

###### Step 8: calculate population distribution with GoodVibes

In [None]:
# track all the output files from Gaussian
success_dir = com_path.joinpath('successful_QM_outputs')
opt_files = glob.glob(f'{success_dir}/*.log')

# move all the output files together to a folder called "GoodVibes_analysis" for simplicity
GV_folder = w_dir_main.joinpath('GoodVibes_analysis')
GV_folder.mkdir(exist_ok=True, parents=True)

for file in opt_files:
	shutil.copy(file, GV_folder)

# this commands runs GoodVibes, including the population % of each conformer 
# (final results in the GoodVibes.out file)
os.chdir(GV_folder)
subprocess.run(['python', '-m', 'goodvibes', '--boltz', '--xyz', '*.log'])
os.chdir(w_dir_main)