### Startup imports

In [1]:
import pandas as pd

from autoqchem.molecule import molecule
from autoqchem.slurm_manager import slurm_manager
from autoqchem.helper_classes import slurm_status
from autoqchem.draw_utils import draw

import logging
logging.basicConfig(level=logging.INFO)

### Initialize the slurm manager

Slurm manager is going to manage what jobs you have currently running, or created. It caches your information, so you can kill the notebook, turn-off your computer, go on vacation, and pick it up later. It will remember which jobs you haven't retrieved from the cluster, etc.

Note: it won't ask you for login unless you execute a method that requires remote access.

Here, input your Rice NetID for the user name (e.g. user = 'yournetID99')

In [2]:
sm=slurm_manager(user='rs216', host='nots.rice.edu')

This cell will prompt you to login to NOTS with your Rice password.

In [3]:
sm.connect()


INFO:autoqchem.slurm_manager:Creating connection to nots.rice.edu as rs216
INFO:paramiko.transport:Connected (version 2.0, client OpenSSH_8.7)
INFO:paramiko.transport:Authentication (password) successful!
INFO:autoqchem.slurm_manager:Connected to nots.rice.edu as rs216.


### Provide some smiles string(s)

We are going to use a csv file with the SMILES strings in it.  This can be easily downloaded from Reaxys or wherever you are getting your molecules from.  Just input the name of your CSV file in the parentheses after pd.read_csv.  Please note that the strings must be in the first column and there should be no headers.

In [12]:
df = pd.read_csv("tertacid_smiles_filtered.csv", names = ['smiles'])
df

Unnamed: 0,smiles
0,O=C(O)C(F)(F)c1ccc(OC(F)(F)F)c(Br)c1
1,O=C(O)C(F)(F)c1ccccc1OC(F)(F)F
2,CCOC(=O)C1(C(=O)O)CC1
3,CC(C)(C)OC(=O)N1CC2CC2(C(=O)O)C1
4,CC(C)(C(=O)O)c1ccc(O)cc1
...,...
527,CC(C)(Oc1ccc(Cl)cc1Cl)C(=O)O
528,CC1(C(=O)O)CN(C(=O)OCc2ccccc2)C1
529,CC(C)CC(C)(NC(=O)OCC1c2ccccc2-c2ccccc21)C(=O)O
530,CC(C)(Oc1ccc(C(=O)c2ccc(Cl)cc2)cc1)C(=O)O


### Initialize the molecules and generate conformations

We will use some ```for``` loops to loop over the smiles

Sometimes, this step can have some errors, if  your molecules have stereochemistry, it seems like the code cannot parse it.  If the stereochemistry is not important, you can single out the molecules with chirality and change the SMILES to remove the stereochem. 

In [13]:
mols = []
for smile in df['smiles']:
    print(smile)
    mols.append(molecule(smile, num_conf=8))

O=C(O)C(F)(F)c1ccc(OC(F)(F)F)c(Br)c1
O=C(O)C(F)(F)c1ccccc1OC(F)(F)F
CCOC(=O)C1(C(=O)O)CC1
CC(C)(C)OC(=O)N1CC2CC2(C(=O)O)C1
CC(C)(C(=O)O)c1ccc(O)cc1
Cc1cccc(OC(C)(C)C(=O)O)c1C
O=C(O)C(F)(Cl)C(F)(F)F
O=C(O)C1(c2ccccc2Br)CC1
CC(C)(C)OCC(C)(NC(=O)OCC1c2ccccc2-c2ccccc21)C(=O)O
CC(C)(C(=O)O)c1ccc([N+](=O)[O-])cc1
CCC(F)(F)C(=O)O
NC1(C(=O)O)CCCCCC1
CC(C)(C(=O)O)c1cccc(Cl)c1





CCC(NS(=O)(=O)c1ccccc1)(C(=O)O)c1ccccc1
CC(C)(C)OC(=O)N1CCCC1(C)C(=O)O
COc1ccc(C2(C(=O)O)CC2)cc1OC
O=C(O)C(F)(F)OC(F)(F)C(F)(F)OC(F)(F)C(F)(F)C(F)(F)C(F)(F)F
CC(C)(C(=O)O)c1ccc(F)cc1
O=C(O)C1(c2ccc(Cl)cc2)CC1
CCC(C)(N)C(=O)O
O=C(O)C1(C(F)(F)F)CC(F)(F)C1
CC1=CCC(NC(=O)OCC2c3ccccc3-c3ccccc32)(C(=O)O)CC1
CC(CO)(NC(=O)OCC1c2ccccc2-c2ccccc21)C(=O)O
CCC1(C(=O)O)CCCN(C(=O)OC(C)(C)C)C1
O=C(O)C12CC(C(F)(F)F)(C1)C2
CC(C)(C)OC(=O)NC1(C(=O)O)CCCC1
CC(C)(CCC(=O)O)C(=O)O
CC(C)(Oc1ccc(F)cc1)C(=O)O
CC(C)(ON1C(=O)CCC1=O)C(=O)O
CC(C)(O)C(=O)O
CC(C)(Cc1cccc(CC(C)(C)C(=O)O)c1)C(=O)O
CC(C)(CO[Si](c1ccccc1)(c1ccccc1)C(C)(C)C)C(=O)O
CC(C)(C)OC(=O)N1CCCC1(Cc1ccccc1)C(=O)O
CC(C)(C)OC(=O)NCC1(C(=O)O)CCOCC1
CC1=CC=CC2=NC(=O)CC(C)(C(=O)O)N12
CC(C)(C)OC(=O)N[C@@H]1CN(C(=O)OCc2ccccc2)[C@](C)(C(=O)O)C1
COc1c(C)c(C(C)(C)C(=O)O)c(C)c(C)c1O
O=C(O)C1(O)CC1
O=C(NC1(C(=O)O)Cc2ccccc2C1)OCC1c2ccccc2-c2ccccc21
CN1CCC(N)(C(=O)O)CC1
CC(C)(Br)C(=O)O
COC(C(=O)O)(c1ccccc1)C(F)(F)F
C=C(C)[C@@H]1CC[C@]2(C(=O)O)CC[C@]3(C)[C@H](CC[C@




NC1(C(=O)O)CCC1
CC1(C)CC1(C(=O)O)c1ccccc1F
CC1(C)CC[C@]2(C(=O)O)CC[C@]3(C)C(=CC[C@@H]4[C@@]5(C)CC[C@H](O)C(C)(C)[C@@H]5CC[C@]43C)[C@@H]2C1
CCC(N)(CC)C(=O)O
CC(C)(Oc1ccc(F)cc1F)C(=O)O
CC12CCC(C(=O)O)(OC1=O)C2(C)C
O=C(O)C1(O)CCN(Cc2ccccc2)CC1
O=C(O)C1(c2ccccc2)CCC1
O=C(O)C12CCC(CC1)CC2
O=C(O)C12CC3CC(CC(O)(C3)C1)C2
O=C(O)C(F)(F)c1ccc([N+](=O)[O-])cc1
CC(C)(C)OC(=O)NCCCCC(C)(NC(=O)OCC1c2ccccc2-c2ccccc21)C(=O)O





COc1ccc(OC(C)(C)C(=O)O)cc1
CC(C)(Oc1ccc(C(C)(C)c2ccccc2)cc1)C(=O)O
O=C(O)C1(O)c2ccccc2-c2ccccc21
CC(C)(C)OC(=O)NC1(C(=O)O)CCNCC1
COC(C)(C(=O)O)c1cccc2ccccc12
O=C(O)C12CC3CC(CC(Cl)(C3)C1)C2
CC1(C(=O)O)CCOCC1
CC(C)(CN)C(=O)O
C=CCCCC(C)(NC(=O)OCC1c2ccccc2-c2ccccc21)C(=O)O
NC1(C(=O)O)CCN(Cc2ccccc2)C1
O=C(O)C12CCC(C(=O)O)(CC1)CC2
NC1(C(=O)O)CCC2(CC1)OCCO2
O=C(O)C1(CO)CC1
Cc1c(C)c2c(c(C)c1O)CCC(C)(C(=O)O)O2
N#CC1(C(=O)O)CCOCC1
CC(C)C1=CC2=CC[C@@H]3[C@](C)(CCC[C@@]3(C)C(=O)O)[C@H]2CC1
Cc1ccc(C(C)(C)C(=O)O)cc1
O=C(O)C(F)(F)c1ccc2ncccc2c1
O=C(O)C(F)(F)c1ccc(Cl)cc1
C=CCC(C(=O)O)(c1ccccc1)c1ccccc1
O=C(O)C12CC(F)(C1)C2
CC(C)(C(=O)O)c1ccc(C(F)(F)F)cc1
O=C(O)C1(c2ccccc2F)CCCCC1
O=C(O)C1(c2ccc(C(F)(F)F)cc2)CC1
O=C(O)C1(F)C(F)(F)C(F)(F)C(F)(F)C(F)(F)C1(F)F
CCCCCCCCCCC(C(=O)O)C1(C(=O)O)CCC(=O)O1
C=C[C@@H]1C[C@]1(NC(=O)OC(C)(C)C)C(=O)O
C[C@H]1[C@H](C)CC[C@]2(C(=O)O)CC[C@]3(C)C(=CC[C@@H]4[C@@]5(C)CC[C@H](O)C(C)(C)[C@@H]5CC[C@]43C)[C@H]12
CC(C)(Oc1ccc(Br)cc1Cl)C(=O)O
COc1ccc(C2(C(=O)O)CCCC2)cc1
O=C(O)C(F)





CC(C)(C)OC(=O)N1CC(N)(C(=O)O)C1
CCCCCCCC(C)(C)C(=O)O
O=C(O)C(F)(F)c1cccc(Br)c1
O=C(O)CC(F)(F)C(=O)O
O=C(O)C1(C(=O)O)CC(OCc2ccccc2)C1
Nc1ccc(C2(C(=O)O)CC2)cc1Cl
O=C(O)C1(S(=O)(=O)c2ccc(Cl)cc2)CCCC1
O=C(O)C(O)(C(F)(F)F)C(F)(F)F
O=C(O)C1(c2ccc(Cl)cc2F)CC1
CC(Cc1ccccc1F)(NC(=O)OCC1c2ccccc2-c2ccccc21)C(=O)O
CCC(N)(C(=O)O)c1ccccc1
O=C(OCC1c2ccccc2-c2ccccc21)N1CCC(C(=O)O)(c2ccccc2)CC1
CCC1(C(=O)O)COC(C)(C)OC1
NC1(C(=O)O)CCOCC1
CC(C)(C)OC(=O)NC1(C(=O)O)CCOCC1
O=C(O)C1(c2c(F)cccc2F)CCC1
CC(C)(Oc1ccc2c(c1)CCC2)C(=O)O
CC(C)(C)OC(=O)NC12CCC(C(=O)O)(CC1)CC2
Cc1ccc(OC(C)(C)C(=O)O)cc1
COC(=O)CC(C)(C)C(=O)O
CC(CCl)(CCl)C(=O)O
CC(=O)NC(C)(C(=O)O)c1ccccc1F
CC(C)(CC(=O)O)C(=O)O
CC(O)(C(=O)O)c1ccccc1
COc1cccc(C2(C(=O)O)CC2)c1
CC1(C)OCC(C)(C(=O)O)CO1
COC(=O)C1(C(=O)O)CCN(C(=O)OC(C)(C)C)CC1
CC(N)(C(=O)O)c1ccco1
CC(C)(Oc1ccc(C(C)(C)C)cc1)C(=O)O
O=C(O)C(F)(F)c1ccc(F)c(Cl)c1
CC(C)(Oc1ccc(C#N)cc1)C(=O)O
CC(C)c1ccc(OC(C)(C)C(=O)O)cc1
CC1(C(=O)O)CC(F)(F)C1
O=C(O)C1(c2ccc(OC(F)(F)F)cc2)CC1
O=C(O)C1(C2CC2)CC1
N#CC1




CCC(C)(NC(=O)OC(C)(C)C)C(=O)O
O=C(O)C(CCl)(CCl)CCl
CCOC(=O)C1(C(=O)O)CCC1
O=C(O)C1(C(=O)Nc2ccc(F)cc2)CC1
CC(C)(C)OC(=O)NC(Cc1ccc(OC(C)(C)C)cc1)C(=O)NC(C)(C)C(=O)O
CC(N)(Cc1ccccc1)C(=O)O
CC(C)(C)c1ccc(C(F)(F)C(=O)O)cc1
O=C(O)C1(c2cccc(F)c2)CCC1
O=C(O)C1(Br)CCCCC1
CC(C)(C)OC(=O)NC(C)(Cc1ccccc1)C(=O)O
CC(C)(C)OC(=O)NC(C)(C)C(=O)O
O=C(O)C1(c2ccc(Cl)c(Cl)c2)CC1
CC(C)(C)OC(=O)NCC1(C(=O)O)CCCC1
O=C(O)C(CCBr)(c1ccccc1)c1ccccc1
NC1(C(=O)O)CCCC1
CCc1ccc(C(C)(C)C(=O)O)cc1I
CC(C)(Cc1cccc(Br)c1)C(=O)O
CCC(C)(CC)C(=O)O
CC(C)(Oc1ccc(C(N)=O)cc1)C(=O)O
CC(C)(C)OC(=O)NC12CC(C(=O)O)(C1)C2
O=C(O)C12CC3CC(C1)C(O)C(C3)C2
Cc1ccc(C2(C(=O)O)CCCC2)cc1
CC(C)(C)OC(=O)N1CC(F)(C(=O)O)C1
O=C(O)CCC(CC(=O)O)(C(=O)O)P(=O)(O)O
O=C(O)C(F)(F)C(F)(F)C(F)(F)C(F)(F)F
O=C(O)CC1(C(=O)O)CCCC1
CCCCC(C)(O)C(=O)O
CC(C)(C(=O)O)c1cccc(Br)c1
NC(C(=O)O)(c1ccccc1)c1ccccc1
C=C(Cl)C(C)(C)C(=O)O
COC1(C(=O)O)CCN(C(=O)OC(C)(C)C)CC1
O=C(O)C(F)(F)OC(F)(F)C(F)(F)OC(F)(F)C(=O)O
NC(=O)C1(C(=O)O)CC1
O=C(O)C(F)(F)C(F)(Cl)C(F)(F)C(F)(Cl)C(F)(F)C(F)




O=C(O)C(F)(F)S(=O)(=O)F
COC(=O)C12CC(C(=O)O)(C1)C2
CC(C)(C)OC(=O)NCC1(C(=O)O)CC1
CCN(C(=O)OC(C)(C)C)C(C)(C)C(=O)O
O=C(O)C1(c2ccccc2)CCCCC1
COc1cccc(C2(C(=O)O)CCCC2)c1
CC[C@H](C)[C@H](NC(=O)OCC1c2ccccc2-c2ccccc21)C(=O)NC(C)(C)C(=O)O
O=C(O)C1(c2ccccc2F)CCCC1
CCC(C)(O)C(=O)O
CC(C)(C)OC(=O)N1CCC(C#N)(C(=O)O)CC1
O=C(O)C1(c2cccs2)CC1
CCCC(CCC)(C(=O)O)C(=O)O
CCc1ccc(C(C)(C)C(=O)O)cc1
CC(C)(SC(=S)SC(C)(C)C(=O)O)C(=O)O
C=CCC(C)(C)C(=O)O
CC(N)(CO)C(=O)O
Cc1cc(OC(C)(C)C(=O)O)ccc1[N+](=O)[O-]
O=C(O)C(c1ccccc1)(c1ccccc1)c1ccccc1
O=C(O)C1(c2ccc(O)cc2)CC1
CC(=O)N[C@@]12C[C@H]3C[C@@H](C1)C[C@](C(=O)O)(C3)C2
CC(C(=O)O)(c1ccccc1)c1ccccc1
CCCC(C(=O)O)(c1ccccc1)c1ccccc1





CC(C)C(C)(NC(=O)OC(C)(C)C)C(=O)O
O=C(O)C(F)(F)c1cccc(Cl)c1
CC(C)(C)OC(=O)NC1(C(=O)O)CC1
CCC(C)(C)C(=O)O
O=C(O)C12CC3CC(CC(Br)(C3)C1)C2
CC(C)(C)OC(=O)N1CCCC(C)(C(=O)O)C1
CC(C)(C)OC(=O)NC1(C(=O)O)CCc2ccccc2C1
NC1(C(=O)O)CCCCC1
CC1(C(=O)O)COC(=O)OC1
CC(C)(C)OC(=O)N1CCCCC1(C)C(=O)O
CC(CO)(CO)C(=O)O
O=C(O)C1(c2ccccc2)CC1(F)F
O=C(O)C1(c2ccc3c(c2)OC(F)(F)O3)CC1
N#CC1(C(=O)O)CC1
CC(=O)N[C@@H]1[C@@H](O)C[C@@](O)(C(=O)O)O[C@H]1C(O)[C@H](O)CO
O=C(O)C1(c2cccs2)CCCC1
O=C(O)C1(C(=O)O)CCC1
C=CCC(CC=C)(NC(=O)OCC1c2ccccc2-c2ccccc21)C(=O)O
C=CCCCC(CCCC=C)(NC(=O)OCC1c2ccccc2-c2ccccc21)C(=O)O
CC(C)(Oc1ccc(C2CC2(Cl)Cl)cc1)C(=O)O
O=C(O)CC12CC3CC(C1)CC(C(=O)O)(C3)C2
CC1(C(=O)O)CC(=O)C1
O=C(O)C1(O)CCC1
O=C(O)C(F)(F)C(F)(Br)C(F)(F)Cl
CC(C)=CCC(C#N)(CC=C(C)C)C(=O)O
Cc1ccc(OC(C)(C)C(=O)O)nc1
O=C(O)C1(C(=O)O)CC1
O=C(O)C1(c2cccc(Br)c2)CCC1
O=C(O)C1(c2ccccc2)CC1
Cc1ccc(C2(C(=O)O)CC2)cc1
O=C(O)C(F)(F)C(F)(F)C(F)(F)C(F)(F)C(F)(F)C(F)F
Cc1cc(C)c(C(F)(F)C(=O)O)c(C)c1
O=C(O)C1(O)CCCCC1
O=C(O)C1(c2ccc(F)c(F)c2)CC1
CC(C)(

Draw the molecules -- note that an error will appear at first.  Simply change the confld to one of the other numbers using the dropdown to see the molecule.

In [None]:
draw(mols[0].mol)

interactive(children=(Dropdown(description='confId', options=(0, 1, 2, 3, 4, 5, 6, 7), value=0), Output()), _d…

<function autoqchem.draw_utils._graph_conf(m, confId=0, energies=[])>

### Create gaussian jobs for each conformation

This can be modified according to what level of theory you want, which solvent you want (lookup how Gaussian encodes solvents), and heavy/light basis sets.  

In [15]:
for mol in mols:
    sm.create_jobs_for_molecule(mol, theory="APFD",
heavy_basis_set="def2tzvp",
light_basis_set='def2svp',
max_light_atomic_number=10,
solvent='Acetonitrile'
    )

INFO:autoqchem.gaussian_input_generator:Generating Gaussian input files for 8 conformations.
INFO:autoqchem.gaussian_input_generator:Generating Gaussian input files for 8 conformations.
INFO:autoqchem.gaussian_input_generator:Generating Gaussian input files for 7 conformations.
INFO:autoqchem.gaussian_input_generator:Generating Gaussian input files for 8 conformations.
INFO:autoqchem.gaussian_input_generator:Generating Gaussian input files for 6 conformations.
INFO:autoqchem.gaussian_input_generator:Generating Gaussian input files for 7 conformations.
INFO:autoqchem.gaussian_input_generator:Generating Gaussian input files for 7 conformations.
INFO:autoqchem.gaussian_input_generator:Generating Gaussian input files for 6 conformations.
INFO:autoqchem.gaussian_input_generator:Generating Gaussian input files for 8 conformations.
INFO:autoqchem.gaussian_input_generator:Generating Gaussian input files for 7 conformations.
INFO:autoqchem.gaussian_input_generator:Generating Gaussian input file

In [16]:
# view the workflow status of all jobs
sm.get_job_stats(split_by_can=True)

status,created
can,Unnamed: 1_level_1
C#CC(C)(C)C(=O)O,3
C#CC1(C(=O)O)CC1,2
C=C(C)C(C)(C)C(=O)O,5
C=C(C)[C@@H]1CC[C@]2(C(=O)O)CC[C@]3(C)[C@H](CC[C@@H]4[C@@]5(C)CC[C@H](O)C(C)(C)[C@@H]5CC[C@]43C)[C@@H]12,6
C=C(Cl)C(C)(C)C(=O)O,5
...,...
O=C(OCC1c2ccccc2-c2ccccc21)N1CCC(C(=O)O)(c2ccccc2)CC1,8
O=C(OCc1ccccc1)N1CCC(F)(C(=O)O)C1,8
O=C1C2CC3CC1CC(C(=O)O)(C3)C2,2
O=C1CCC(C(=O)O)(c2cccc(F)c2)CC1,8


### Submit the jobs to the server 

if you are not connected, it will ask for password + Duo

In [63]:
sm.submit_jobs()

INFO:autoqchem.slurm_manager:Submitting 0 jobs.


Check the status of the jobs on the server

In [9]:
sm.squeue(summary=False)

Unnamed: 0,JOBID,PARTITION,NAME,USER,ST,TIME,NODES,NODELIST(REASON)


### Retrieve jobs

If there are some finished jobs the log files will be checked for completion and downloaded locally.

In [103]:
sm.retrieve_jobs()

INFO:autoqchem.slurm_manager:There are no jobs submitted to cluster. Nothing to retrieve.


If at this point, your jobs have failed or did not submit properly due to errors in the GJF file or SH script, in order to create them again after troubleshooting slurm_manager.py, you have to locate the old jobs and delete them.  On a Mac, go to your Library folder and find Application Support.  There should be a folder in here called "nots."  Open this folder and delete everything.  Then, restart your autoqchem Jupyter kernel and then try again. 

### Resubmit incomplete jobs

In [104]:
sm.resubmit_incomplete_jobs()

INFO:autoqchem.slurm_manager:Resubmitting incomplete jobs:


### Upload molecules to the database

In [78]:
sm.upload_done_molecules_to_db(tags=["test_AMZ"])

INFO:autoqchem.slurm_manager:There are 1 finished molecules ['C(C1C(C(C(C(O1)O)O)O)O)O'].
INFO:autoqchem.slurm_manager:Molecule C(C1C(C(C(C(O1)O)O)O)O)O has 0 / 8 duplicate conformers.
INFO:autoqchem.slurm_manager:Removing 0 / 8 jobs and log files that contain duplicate conformers.
INFO:autoqchem.slurm_manager:Uploaded descriptors to DB for smiles: C(C1C(C(C(C(O1)O)O)O)O)O, number of conformers: 8, DB molecule id 61bbad58bb6b37fd936249b9.
