In [1]:
import qcportal as ptl
import pandas as pd
import datetime
import time

In [1]:
# connect without auth
# read only
client = ptl.FractalClient()

In [2]:
# connect with authentication, therefore write access
# don't use unless you plan to submit things
client = ptl.FractalClient.from_file()

In [3]:
client

## Dataset Tracking

### OpenFF Fragmenter Validation 1.0

In [4]:
ds = client.get_collection("TorsionDriveDataset", "OpenFF Fragmenter Validation 1.0")

In [5]:
ds.list_specifications()

Unnamed: 0_level_0,Description
Name,Unnamed: 1_level_1
default,Standard OpenFF torsiondrive specification.
B3LYP-d3bj/aug-cc-pvdz,OpenFF model B3LYP-d3bj/aug-cc-pvdz TorsionDri...
B3LYP-d3bj/aug-cc-pvtz,OpenFF model B3LYP-d3bj/aug-cc-pvtz TorsionDri...
B3LYP-d3bj/def2-tzvppd,OpenFF model B3LYP-d3bj/def2-tzvppd TorsionDri...
BLYP-d3bj/dzvp,OpenFF model BLYP-d3bj/dzvp TorsionDrive explo...
BLYP-d3bj/aug-cc-pvdz,OpenFF model BLYP-d3bj/aug-cc-pvdz TorsionDriv...
BLYP-d3bj/aug-cc-pvtz,OpenFF model BLYP-d3bj/aug-cc-pvtz TorsionDriv...
BLYP-d3bj/def2-tzvppd,OpenFF model BLYP-d3bj/def2-tzvppd TorsionDriv...
LRC-WPBEH/dzvp,OpenFF model LRC-WPBEH/dzvp TorsionDrive explo...
LRC-WPBEH/aug-cc-pvdz,OpenFF model LRC-WPBEH/aug-cc-pvdz TorsionDriv...


'default' always means 'b3lyp-d3bj' for us.

In [45]:
ds.status('B3LYP-d3bj/aug-cc-pvtz')

Unnamed: 0,B3LYP-d3bj/aug-cc-pvtz
ERROR,18
COMPLETE,3
RUNNING,1


In [7]:
ds.status("default")

Unnamed: 0,default
COMPLETE,16
ERROR,3
RUNNING,3


In [60]:
dataset_torsiondrives = {key: value.object_map for key, value in ds.data.records.items()}#list(ds.data.records.values())[0].object_map

In [70]:
ids = set()
for val in dataset_torsiondrives.values():
    ids.update(set(val.values()))

In [72]:
len(ids)

440

In [73]:
res = client.query_procedures(ids)

In [78]:
res[0].dict()

{'id': '19712153',
 'hash_index': '6f2f9b115349695840b5c3b05c289a61e1d823df',
 'procedure': 'torsiondrive',
 'program': 'torsiondrive',
 'version': 1,
 'protocols': None,
 'extras': {},
 'stdout': None,
 'stderr': None,
 'error': None,
 'task_id': None,
 'manager_name': None,
 'status': <RecordStatusEnum.complete: 'COMPLETE'>,
 'modified_on': datetime.datetime(2020, 5, 1, 19, 25, 49, 169132),
 'created_on': datetime.datetime(2020, 5, 1, 19, 25, 49, 169134),
 'provenance': {'creator': 'torsiondrive',
  'version': 'v0.9.8.1',
  'routine': 'torsiondrive.td_api'},
 'initial_molecule': ['13285308'],
 'keywords': {'dihedrals': [(5, 8, 17, 19)],
  'grid_spacing': [15],
  'dihedral_ranges': None,
  'energy_decrease_thresh': None,
  'energy_upper_limit': 0.1},
 'optimization_spec': {'program': 'geometric',
  'keywords': {'coordsys': 'tric',
   'enforce': 0.1,
   'reset': True,
   'qccnv': True,
   'epsilon': 0}},
 'qc_spec': {'driver': <DriverEnum.gradient: 'gradient'>,
  'method': 'b3lyp-d3bj'

In [81]:
angle_optimizations = set()
for tdr in res:
    if tdr.status == 'COMPLETE':
        continue
    
    for val in tdr.optimization_history.values():
        angle_optimizations.update(set(val))

In [136]:
tdr.dict()

{'id': '20551205',
 'hash_index': 'e8749dd2ab56c182ddc792f6131677f816159c14',
 'procedure': 'torsiondrive',
 'program': 'torsiondrive',
 'version': 1,
 'protocols': None,
 'extras': {},
 'stdout': None,
 'stderr': None,
 'error': None,
 'task_id': None,
 'manager_name': None,
 'status': <RecordStatusEnum.error: 'ERROR'>,
 'modified_on': datetime.datetime(2020, 5, 15, 18, 5, 13, 473407),
 'created_on': datetime.datetime(2020, 5, 15, 18, 5, 13, 473409),
 'provenance': {'creator': 'torsiondrive',
  'version': 'v0.9.8.1',
  'routine': 'torsiondrive.td_api'},
 'initial_molecule': ['13285326'],
 'keywords': {'dihedrals': [(9, 13, 8, 17)],
  'grid_spacing': [15],
  'dihedral_ranges': None,
  'energy_decrease_thresh': None,
  'energy_upper_limit': 0.1},
 'optimization_spec': {'program': 'geometric',
  'keywords': {'coordsys': 'tric',
   'enforce': 0.1,
   'reset': True,
   'qccnv': True,
   'epsilon': 0}},
 'qc_spec': {'driver': <DriverEnum.gradient: 'gradient'>,
  'method': 'tpssh-d3bj',
  'b

In [85]:
angle_optimizations = list(angle_optimizations)

In [87]:
len(angle_optimizations[7000:8000])

621

In [88]:
res_angle_optimizations = []
for i in range(0,8000,1000):
    ids_i = angle_optimizations[i:i+1000]
    res_i = client.query_procedures(ids_i)
    res_angle_optimizations.extend(res_i)
    
len(res_angle_optimizations)

7621

In [90]:
opt_rec = res_angle_optimizations[0]

In [96]:
opt_rec = res_angle_optimizations[7]

In [107]:
print(opt_rec.get_error().error_message)

geomeTRIC run_json error:
Traceback (most recent call last):
  File "/opt/conda/envs/qcfractal/lib/python3.6/site-packages/geometric/run_json.py", line 225, in geometric_run_json
    geometric.optimize.Optimize(coords, M, IC, engine, None, params)
  File "/opt/conda/envs/qcfractal/lib/python3.6/site-packages/geometric/optimize.py", line 1331, in Optimize
    return optimizer.optimizeGeometry()
  File "/opt/conda/envs/qcfractal/lib/python3.6/site-packages/geometric/optimize.py", line 1293, in optimizeGeometry
    self.calcEnergyForce()
  File "/opt/conda/envs/qcfractal/lib/python3.6/site-packages/geometric/optimize.py", line 1002, in calcEnergyForce
    spcalc = self.engine.calc(self.X, self.dirname)
  File "/opt/conda/envs/qcfractal/lib/python3.6/site-packages/geometric/engine.py", line 873, in calc
    return self.calc_new(coords, dirname)
  File "/opt/conda/envs/qcfractal/lib/python3.6/site-packages/geometric/engine.py", line 865, in calc_new
    raise QCEngineAPIEngineError("QCEngin

In [122]:
client.query_results('20152193')

[]

In [None]:
client.query_tasks(program='psi4', )

In [121]:
opt_rec.dict()

{'id': '20152193',
 'hash_index': '05528ad428fbdabf22071fbe140657ef97c9c3fe',
 'procedure': 'optimization',
 'program': 'geometric',
 'version': 1,
 'protocols': {},
 'extras': {},
 'stdout': None,
 'stderr': None,
 'error': '23402848',
 'task_id': None,
 'manager_name': 'PacificResearchPlatform3-openff-qca3-768458b8cf-gtj4t-c4e106ed-b8ef-4e02-af4f-afdf4925c8c3',
 'status': <RecordStatusEnum.error: 'ERROR'>,
 'modified_on': datetime.datetime(2020, 5, 11, 20, 45, 19, 873046),
 'created_on': datetime.datetime(2020, 5, 10, 22, 24, 7, 311897),
 'provenance': None,
 'schema_version': 1,
 'initial_molecule': '13285325',
 'qc_spec': {'driver': <DriverEnum.gradient: 'gradient'>,
  'method': 'b3lyp-d3bj',
  'basis': 'aug-cc-pvtz',
  'keywords': '2',
  'program': 'psi4'},
 'keywords': {'coordsys': 'tric',
  'enforce': 0.1,
  'reset': True,
  'qccnv': True,
  'epsilon': 0,
  'constraints': {'set': [{'type': 'dihedral',
     'indices': [9, 13, 8, 20],
     'value': 180}]},
  'program': 'psi4'},
 '

In [103]:
len([i for i in res_angle_optimizations if i.status == 'ERROR'])

294

In [109]:
error_messages = [i.get_error().error_message for i in res_angle_optimizations if i.status == 'ERROR']

In [114]:
review = (i for i in error_messages)

In [273]:
print(next(review))

geomeTRIC run_json error:
Traceback (most recent call last):
  File "/data/homezvol0/tgokey/.local/miniconda3/envs/qcf/lib/python3.7/site-packages/geometric/run_json.py", line 225, in geometric_run_json
    geometric.optimize.Optimize(coords, M, IC, engine, None, params)
  File "/data/homezvol0/tgokey/.local/miniconda3/envs/qcf/lib/python3.7/site-packages/geometric/optimize.py", line 1331, in Optimize
    return optimizer.optimizeGeometry()
  File "/data/homezvol0/tgokey/.local/miniconda3/envs/qcf/lib/python3.7/site-packages/geometric/optimize.py", line 1293, in optimizeGeometry
    self.calcEnergyForce()
  File "/data/homezvol0/tgokey/.local/miniconda3/envs/qcf/lib/python3.7/site-packages/geometric/optimize.py", line 1002, in calcEnergyForce
    spcalc = self.engine.calc(self.X, self.dirname)
  File "/data/homezvol0/tgokey/.local/miniconda3/envs/qcf/lib/python3.7/site-packages/geometric/engine.py", line 873, in calc
    return self.calc_new(coords, dirname)
  File "/data/homezvol0/tgo

In [97]:
opt_rec.dict()

{'id': '20152193',
 'hash_index': '05528ad428fbdabf22071fbe140657ef97c9c3fe',
 'procedure': 'optimization',
 'program': 'geometric',
 'version': 1,
 'protocols': {},
 'extras': {},
 'stdout': None,
 'stderr': None,
 'error': '23402848',
 'task_id': None,
 'manager_name': 'PacificResearchPlatform3-openff-qca3-768458b8cf-gtj4t-c4e106ed-b8ef-4e02-af4f-afdf4925c8c3',
 'status': <RecordStatusEnum.error: 'ERROR'>,
 'modified_on': datetime.datetime(2020, 5, 11, 20, 45, 19, 873046),
 'created_on': datetime.datetime(2020, 5, 10, 22, 24, 7, 311897),
 'provenance': None,
 'schema_version': 1,
 'initial_molecule': '13285325',
 'qc_spec': {'driver': <DriverEnum.gradient: 'gradient'>,
  'method': 'b3lyp-d3bj',
  'basis': 'aug-cc-pvtz',
  'keywords': '2',
  'program': 'psi4'},
 'keywords': {'coordsys': 'tric',
  'enforce': 0.1,
  'reset': True,
  'qccnv': True,
  'epsilon': 0,
  'constraints': {'set': [{'type': 'dihedral',
     'indices': [9, 13, 8, 20],
     'value': 180}]},
  'program': 'psi4'},
 '

In [25]:
ds.df

Unnamed: 0,default
Cc1c[c:2]([c:1]2ccccc2c1OP(=O)([O-])[O-])[O:3][P:4](=O)([O-])[O-],"TorsionDriveRecord(id='19712153', status='COMP..."
c1cc[c:1]2c(c1)ccc[c:2]2[O:3][P:4](=O)([O-])[O-],"TorsionDriveRecord(id='20548975', status='RUNN..."
c1ccc(cc1)C[CH2:1][NH:2]/[C:3](=[NH+:4]/C(=[NH2+])N)/N,"TorsionDriveRecord(id='19712155', status='COMP..."
C[CH2:1][NH:2]/[C:3](=[NH+:4]/C=[NH2+])/N,"TorsionDriveRecord(id='20548976', status='COMP..."
C[CH2:1][NH:2]/[C:3](=[NH+:4]/C)/N,"TorsionDriveRecord(id='20548977', status='COMP..."
Cc1c([nH]cn1)CSC[CH2:1][NH:2]/[C:3](=N\C#N)/[NH:4]C,"TorsionDriveRecord(id='19712158', status='RUNN..."
C[CH2:1][NH:2]/[C:3](=[N:4]\C#N)/NC,"TorsionDriveRecord(id='20548978', status='COMP..."
C[CH2:1][NH:2]/[C:3](=[N:4]\C)/NC,"TorsionDriveRecord(id='20548979', status='COMP..."
c1cc(oc1)[CH2:1][NH:2][c:3]2cc(c(c[c:4]2C(=O)[O-])S(=O)(=O)N)Cl,"TorsionDriveRecord(id='19712161', status='RUNN..."
c1cc(oc1)[CH2:4][NH:3][c:2]2cc(c(c[c:1]2C(=O)[O-])S(=O)(=O)N)Cl,"TorsionDriveRecord(id='19712162', status='COMP..."


In [38]:
erred = client.query_procedures('20548994')[0]

In [41]:
erred.dict()

{'id': '20548994',
 'hash_index': 'b84cdd6e0d9d10f46169aebdc95dc63047f60939',
 'procedure': 'torsiondrive',
 'program': 'torsiondrive',
 'version': 1,
 'protocols': None,
 'extras': {},
 'stdout': None,
 'stderr': None,
 'error': None,
 'task_id': None,
 'manager_name': None,
 'status': <RecordStatusEnum.error: 'ERROR'>,
 'modified_on': datetime.datetime(2020, 5, 15, 17, 44, 23, 194510),
 'created_on': datetime.datetime(2020, 5, 15, 17, 44, 23, 194512),
 'provenance': {'creator': 'torsiondrive',
  'version': 'v0.9.8.1',
  'routine': 'torsiondrive.td_api'},
 'initial_molecule': ['13285318'],
 'keywords': {'dihedrals': [(5, 4, 9, 7)],
  'grid_spacing': [15],
  'dihedral_ranges': None,
  'energy_decrease_thresh': None,
  'energy_upper_limit': 0.1},
 'optimization_spec': {'program': 'geometric',
  'keywords': {'coordsys': 'tric',
   'enforce': 0.1,
   'reset': True,
   'qccnv': True,
   'epsilon': 0}},
 'qc_spec': {'driver': <DriverEnum.gradient: 'gradient'>,
  'method': 'b3lyp-d3bj',
  'b

In [33]:
ds.df.default.iloc[]

TorsionDriveRecord(id='19712155', status='COMPLETE')

In [26]:
len(ds.df)

22

In [22]:
ds.get_records("b3lyp-d3bj")

AttributeError: 'TorsionDriveDataset' object has no attribute 'get_records'

In [143]:
client.query_results(task_id='10482724')

AttributeError: 'dict' object has no attribute '__dict__'

In [145]:
failed_task = client.query_tasks('10482724')[0]

In [148]:
failed_task.dict()

{'id': '10482724',
 'spec': {'function': 'qcengine.compute_procedure',
  'args': [{'id': None,
    'hash_index': '01ff1e7a13b2dd05275b6bf6bee4da02b2fb93bb',
    'schema_name': 'qcschema_optimization_input',
    'schema_version': 1,
    'keywords': {'coordsys': 'tric',
     'enforce': 0.1,
     'reset': True,
     'qccnv': True,
     'epsilon': 0,
     'constraints': {'set': [{'type': 'dihedral',
        'indices': [8, 15, 4, 12],
        'value': 0}]},
     'program': 'psi4'},
    'extras': {},
    'protocols': {},
    'input_specification': {'schema_name': 'qcschema_input',
     'schema_version': 1,
     'driver': 'gradient',
     'model': {'method': 'blyp-d3bj', 'basis': 'aug-cc-pvdz'},
     'keywords': {'maxiter': 200,
      'scf_properties': ['dipole',
       'quadrupole',
       'wiberg_lowdin_indices',
       'mayer_indices']},
     'extras': {'_qcfractal_tags': {'program': 'psi4', 'keywords': '2'}}},
    'initial_molecule': {'schema_name': 'qcschema_molecule',
     'schema_versi

In [149]:
client.query_results('20213737')

[]

In [155]:
print(client.query_procedures('20213737')[0].get_error().error_message)

geomeTRIC run_json error:
Traceback (most recent call last):
  File "/data/homezvol0/tgokey/.local/miniconda3/envs/qcf/lib/python3.7/site-packages/geometric/run_json.py", line 225, in geometric_run_json
    geometric.optimize.Optimize(coords, M, IC, engine, None, params)
  File "/data/homezvol0/tgokey/.local/miniconda3/envs/qcf/lib/python3.7/site-packages/geometric/optimize.py", line 1331, in Optimize
    return optimizer.optimizeGeometry()
  File "/data/homezvol0/tgokey/.local/miniconda3/envs/qcf/lib/python3.7/site-packages/geometric/optimize.py", line 1301, in optimizeGeometry
    raise GeomOptNotConvergedError("Optimizer.optimizeGeometry() failed to converge.")
geometric.errors.GeomOptNotConvergedError: Optimizer.optimizeGeometry() failed to converge.



#### Restarting failed tasks

In [159]:
ds = client.get_collection("TorsionDriveDataset", "OpenFF Fragmenter Validation 1.0")

In [181]:
resubmit = []
for i in ds.df.default.values:
    if i.status == 'ERROR':
        resubmit.append(i.id)

In [194]:
resubmit = []

for spec in ds.list_specifications().index.tolist():
    print(spec)
    ds.status(spec)
    ds.status(spec)
    for i in ds.df[spec].values:
        if i.status == 'ERROR':
            resubmit.append(i.id)

default
B3LYP-d3bj/aug-cc-pvdz
B3LYP-d3bj/aug-cc-pvtz
B3LYP-d3bj/def2-tzvppd
BLYP-d3bj/dzvp
BLYP-d3bj/aug-cc-pvdz
BLYP-d3bj/aug-cc-pvtz
BLYP-d3bj/def2-tzvppd
LRC-WPBEH/dzvp
LRC-WPBEH/aug-cc-pvdz
LRC-WPBEH/aug-cc-pvtz
LRC-WPBEH/def2-tzvppd
MN15-d3bJ/dzvp
MN15-d3bJ/aug-cc-pvdz
MN15-d3bJ/aug-cc-pvtz
MN15-d3bJ/def2-tzvppd
TPSSH-d3bJ/dzvp
TPSSH-d3bJ/aug-cc-pvdz
TPSSH-d3bJ/aug-cc-pvtz
TPSSH-d3bJ/def2-tzvppd


In [202]:
resubmit[:10]

['20548994',
 '20548997',
 '20548998',
 '19950384',
 '20550574',
 '19950392',
 '20550577',
 '19950408',
 '20550598',
 '19950411']

In [236]:
len(resubmit)

237

In [204]:
for r in resubmit:
    client.modify_services(operation='restart', procedure_id=r)

In [232]:
ds_new = client.get_collection("TorsionDriveDataset", "OpenFF Fragmenter Validation 1.0")

In [233]:
sanity_check = []

for spec in ds_new.list_specifications().index.tolist():
    print(spec)
    try:
        ds_new.status(spec)
    except:
        pass
    ds_new.status(spec)
    for i in ds_new.df[spec].values:
        if i.status == 'ERROR':
            sanity_check.append(i.id)

default
B3LYP-d3bj/aug-cc-pvdz
B3LYP-d3bj/aug-cc-pvtz
B3LYP-d3bj/def2-tzvppd
BLYP-d3bj/dzvp
BLYP-d3bj/aug-cc-pvdz
BLYP-d3bj/aug-cc-pvtz
BLYP-d3bj/def2-tzvppd
LRC-WPBEH/dzvp
LRC-WPBEH/aug-cc-pvdz
LRC-WPBEH/aug-cc-pvtz
LRC-WPBEH/def2-tzvppd
MN15-d3bJ/dzvp
MN15-d3bJ/aug-cc-pvdz
MN15-d3bJ/aug-cc-pvtz
MN15-d3bJ/def2-tzvppd
TPSSH-d3bJ/dzvp
TPSSH-d3bJ/aug-cc-pvdz
TPSSH-d3bJ/aug-cc-pvtz
TPSSH-d3bJ/def2-tzvppd


In [238]:
len(sanity_check)

236

In [239]:
sanity_check[:10]

['20548997',
 '20548998',
 '19950384',
 '20550574',
 '19950392',
 '20550577',
 '19950408',
 '20550598',
 '19950411',
 '20550600']

In [240]:
for r in sanity_check:
    print(client.modify_services(operation='restart', procedure_id=int(r)))

Data(n_updated=1)
Data(n_updated=1)
Data(n_updated=1)
Data(n_updated=1)
Data(n_updated=1)
Data(n_updated=1)
Data(n_updated=1)
Data(n_updated=1)
Data(n_updated=1)
Data(n_updated=1)
Data(n_updated=1)
Data(n_updated=1)
Data(n_updated=1)
Data(n_updated=1)
Data(n_updated=1)
Data(n_updated=1)
Data(n_updated=1)
Data(n_updated=1)
Data(n_updated=1)
Data(n_updated=1)
Data(n_updated=1)
Data(n_updated=1)
Data(n_updated=1)
Data(n_updated=1)
Data(n_updated=1)
Data(n_updated=1)
Data(n_updated=1)
Data(n_updated=1)
Data(n_updated=1)
Data(n_updated=1)
Data(n_updated=1)
Data(n_updated=1)
Data(n_updated=1)
Data(n_updated=1)
Data(n_updated=1)
Data(n_updated=1)
Data(n_updated=1)
Data(n_updated=1)
Data(n_updated=1)
Data(n_updated=1)
Data(n_updated=1)
Data(n_updated=1)
Data(n_updated=1)
Data(n_updated=1)
Data(n_updated=1)
Data(n_updated=1)
Data(n_updated=1)
Data(n_updated=1)
Data(n_updated=1)
Data(n_updated=1)
Data(n_updated=1)
Data(n_updated=1)
Data(n_updated=1)
Data(n_updated=1)
Data(n_updated=1)
Data(n_upd

In [251]:
ds_new2 = client.get_collection("TorsionDriveDataset", "OpenFF Fragmenter Validation 1.0")

In [252]:
sanity_check2 = []

for spec in ds_new2.list_specifications().index.tolist():
    print(spec)
    try:
        ds_new2.status(spec)
    except:
        pass
    ds_new2.status(spec)
    for i in ds_new2.df[spec].values:
        if i.status == 'ERROR':
            sanity_check2.append(i.id)

default
B3LYP-d3bj/aug-cc-pvdz
B3LYP-d3bj/aug-cc-pvtz
B3LYP-d3bj/def2-tzvppd
BLYP-d3bj/dzvp
BLYP-d3bj/aug-cc-pvdz
BLYP-d3bj/aug-cc-pvtz
BLYP-d3bj/def2-tzvppd
LRC-WPBEH/dzvp
LRC-WPBEH/aug-cc-pvdz
LRC-WPBEH/aug-cc-pvtz
LRC-WPBEH/def2-tzvppd
MN15-d3bJ/dzvp
MN15-d3bJ/aug-cc-pvdz
MN15-d3bJ/aug-cc-pvtz
MN15-d3bJ/def2-tzvppd
TPSSH-d3bJ/dzvp
TPSSH-d3bJ/aug-cc-pvdz
TPSSH-d3bJ/aug-cc-pvtz
TPSSH-d3bJ/def2-tzvppd


In [253]:
len(sanity_check2)

237

In [254]:
sanity_check2[:10]

['20548994',
 '20548997',
 '20548998',
 '19950384',
 '20550574',
 '19950392',
 '20550577',
 '19950408',
 '20550598',
 '19950411']

In [250]:
ds_new2.status('default')

Unnamed: 0,default
COMPLETE,11
RUNNING,8
ERROR,3


### Fragmenter paper

In [5]:
ds = client.get_collection("TorsionDriveDataset", "Fragmenter paper")

In [7]:
ds.status("default")

Unnamed: 0,default
COMPLETE,8
ERROR,8
RUNNING,6


In [None]:
ds.df

In [8]:
ds.df

Unnamed: 0,default
Cc1c[c:2]([c:1]2ccccc2c1OP(=O)([O-])[O-])[O:3][P:4](=O)([O-])[O-],"TorsionDriveRecord(id='19712153', status='RUNN..."
c1cc[c:1]2c(c1)ccc[c:2]2[O:3][P:4](=O)([O-])[O-],"TorsionDriveRecord(id='19712154', status='RUNN..."
c1ccc(cc1)C[CH2:1][NH:2]/[C:3](=[NH+:4]/C(=[NH2+])N)/N,"TorsionDriveRecord(id='19712155', status='RUNN..."
C[CH2:1][NH:2]/[C:3](=[NH+:4]/C=[NH2+])/N,"TorsionDriveRecord(id='19712156', status='RUNN..."
C[CH2:1][NH:2]/[C:3](=[NH+:4]/C)/N,"TorsionDriveRecord(id='19712157', status='RUNN..."
Cc1c([nH]cn1)CSC[CH2:1][NH:2]/[C:3](=N\C#N)/[NH:4]C,"TorsionDriveRecord(id='19712158', status='RUNN..."
C[CH2:1][NH:2]/[C:3](=[N:4]\C#N)/NC,"TorsionDriveRecord(id='19712159', status='RUNN..."
C[CH2:1][NH:2]/[C:3](=[N:4]\C)/NC,"TorsionDriveRecord(id='19712160', status='RUNN..."
c1cc(oc1)[CH2:1][NH:2][c:3]2cc(c(c[c:4]2C(=O)[O-])S(=O)(=O)N)Cl,"TorsionDriveRecord(id='19712161', status='RUNN..."
c1cc(oc1)[CH2:4][NH:3][c:2]2cc(c(c[c:1]2C(=O)[O-])S(=O)(=O)N)Cl,"TorsionDriveRecord(id='19712162', status='RUNN..."


In [21]:
tdr = ds.df.default.iloc[0]
tdr

TorsionDriveRecord(id='19712153', status='INCOMPLETE')

In [25]:
tdr.detailed_status()

{'status': 'INCOMPLETE',
 'total_points': 24,
 'computed_points': 0,
 'complete_tasks': 0,
 'incomplete_tasks': 0,
 'error_tasks': 0,
 'current_tasks': 0,
 'percent_complete': 0.0,
 'errors': []}

## Data model

In [27]:
dm = ds.data

In [35]:
list(dm.records.values())[0]

TDEntry(name='Cc1c[c:2]([c:1]2ccccc2c1OP(=O)([O-])[O-])[O:3][P:4](=O)([O-])[O-]', initial_molecules={'13285308'}, td_keywords=TDKeywords(dihedrals=[(5, 8, 17, 19)], grid_spacing=[15], dihedral_ranges=None, energy_decrease_thresh=None, energy_upper_limit=0.1), attributes={'canonical_smiles': 'Cc1cc(c2ccccc2c1OP(=O)([O-])[O-])OP(=O)([O-])[O-]', 'canonical_isomeric_smiles': 'Cc1cc(c2ccccc2c1OP(=O)([O-])[O-])OP(=O)([O-])[O-]', 'canonical_explicit_hydrogen_smiles': '[H]c1c(c(c2c(c1[H])c(c(c(c2OP(=O)([O-])[O-])C([H])([H])[H])[H])OP(=O)([O-])[O-])[H])[H]', 'canonical_isomeric_explicit_hydrogen_smiles': '[H]c1c(c(c2c(c1[H])c(c(c(c2OP(=O)([O-])[O-])C([H])([H])[H])[H])OP(=O)([O-])[O-])[H])[H]', 'canonical_isomeric_explicit_hydrogen_mapped_smiles': '[H:22][c:1]1[c:2]([c:4]([c:7]2[c:6]([c:3]1[H:24])[c:9]([c:5]([c:8]([c:10]2[O:19][P:21](=[O:17])([O-:14])[O-:15])[C:11]([H:27])([H:28])[H:29])[H:26])[O:18][P:20](=[O:16])([O-:12])[O-:13])[H:25])[H:23]', 'molecular_formula': 'C11H8O8P2', 'standard_inchi

In [37]:
mol = client.query_molecules('13285308')[0]

In [41]:
mol.__class__

qcelemental.models.molecule.Molecule

In [43]:
client.query_results?

In [44]:
client.query_results(molecule='13285308')

[]

In [47]:
client.query_procedures('19712153')

[TorsionDriveRecord(id='19712153', status='INCOMPLETE')]

In [51]:
ds = client.get_collection("OptimizationDataset", "OpenFF Gen 2 Opt Set 5 Bayer")

In [52]:
ds

<OptimizationDataset(name=`OpenFF Gen 2 Opt Set 5 Bayer`, id='270', client='https://api.qcarchive.molssi.org:443/') >

In [58]:
ds.status("default")

Unnamed: 0,default
COMPLETE,1783
INCOMPLETE,59
ERROR,8


In [59]:
ds = client.get_collection("Dataset", "OpenFF Gen 2 Opt Set 5 Bayer")

In [61]:
ds.status('default')

AttributeError: 'Dataset' object has no attribute 'status'

In [63]:
recs = ds.get_records('b3lyp-d3bj')

In [64]:
recs

Unnamed: 0_level_0,record
index,Unnamed: 1_level_1
CC(C)C(=O)c1cc(cc(c1OC)Cl)O-0,"ResultRecord(id='19453263', status='COMPLETE')"
C[C@@H](c1ccccc1F)/[NH+]=C\2/NCCO2-3,"ResultRecord(id='19519307', status='COMPLETE')"
CCc1cccc(c1)N(CC)C(=O)CO-1,"ResultRecord(id='19519329', status='COMPLETE')"
CC(C)c1ccc(cc1C(=O)O)O-0,
CCCOc1ccc(cc1OC)CCN-0,"ResultRecord(id='19453440', status='COMPLETE')"
...,...
CC(C)Nc1c2cc(c(cc2ncn1)OCCCC[S@@](=N)(=O)C)OC-3,"ResultRecord(id='19453499', status='COMPLETE')"
CC(C)Nc1c2cc(c(cc2ncn1)OCCCC[S@@](=N)(=O)C)OC-4,"ResultRecord(id='19453514', status='COMPLETE')"
Cc1c2c3c(cccc3[nH]1)[C@@H]4C[C@@H](CN([C@@H]4C2)C#N)C(=O)OC-1,"ResultRecord(id='19453586', status='COMPLETE')"
Cc1c2c3c(cccc3[nH]1)[C@@H]4C[C@@H](CN([C@@H]4C2)C#N)C(=O)OC-2,"ResultRecord(id='19453279', status='COMPLETE')"


In [65]:
ds.data.records

[MoleculeEntry(name='CC(C)C(=O)c1cc(cc(c1OC)Cl)O-0', molecule_id='13038409', comment=None, local_results={}),
 MoleculeEntry(name='C[C@@H](c1ccccc1F)/[NH+]=C\\2/NCCO2-3', molecule_id='13130068', comment=None, local_results={}),
 MoleculeEntry(name='CCc1cccc(c1)N(CC)C(=O)CO-1', molecule_id='13131469', comment=None, local_results={}),
 MoleculeEntry(name='CC(C)c1ccc(cc1C(=O)O)O-0', molecule_id='13112153', comment=None, local_results={}),
 MoleculeEntry(name='CCCOc1ccc(cc1OC)CCN-0', molecule_id='13043359', comment=None, local_results={}),
 MoleculeEntry(name='Cc1c2c3c([nH]1)ccc(c3CC[C@@H]2C(=O)[O-])F-0', molecule_id='13131543', comment=None, local_results={}),
 MoleculeEntry(name='Cc1c2c3c(cccc3[nH]1)[C@@H]4C[C@@H](CN([C@@H]4C2)C#N)C(=O)OC-0', molecule_id='13060839', comment=None, local_results={}),
 MoleculeEntry(name='C[C@@H](Cc1ccc(cc1)OCCO)N-0', molecule_id='13038609', comment=None, local_results={}),
 MoleculeEntry(name='Cc1cc(cc(c1CC(=O)O)C)C2CC2-1', molecule_id='13039289', comment=

In [None]:
recs.iloc[0,0].get_stdout()

## Torsion coverage

In [36]:
ds = client.get_collection("TorsionDriveDataset", "OpenFF Substituted Phenyl Set 1")

In [37]:
len(ds.df)

154

In [38]:
ds.data.records["[H:4][O:3][c:2]1ccc(n[cH:1]1)[N+](C)(C)C".lower()]

TDEntry(name='[H:4][O:3][c:2]1ccc(n[cH:1]1)[N+](C)(C)C', initial_molecules={'108604'}, td_keywords=TDKeywords(dihedrals=[(2, 3, 10, 23)], grid_spacing=[15], dihedral_ranges=None, energy_decrease_thresh=None, energy_upper_limit=0.1), attributes={'canonical_explicit_hydrogen_smiles': '[H]c1c(c(nc(c1O[H])[H])[N+](C([H])([H])[H])(C([H])([H])[H])C([H])([H])[H])[H]', 'canonical_isomeric_explicit_hydrogen_mapped_smiles': '[H:12][c:1]1[c:2]([c:5]([n:9][c:3]([c:4]1[O:11][H:24])[H:14])[N+:10]([C:6]([H:15])([H:16])[H:17])([C:7]([H:18])([H:19])[H:20])[C:8]([H:21])([H:22])[H:23])[H:13]', 'canonical_isomeric_explicit_hydrogen_smiles': '[H]c1c(c(nc(c1O[H])[H])[N+](C([H])([H])[H])(C([H])([H])[H])C([H])([H])[H])[H]', 'canonical_isomeric_smiles': 'C[N+](C)(C)c1ccc(cn1)O', 'canonical_smiles': 'C[N+](C)(C)c1ccc(cn1)O', 'inchi_key': 'LCSQOVUGHRUNTD-UHFFFAOYSA-O', 'molecular_formula': 'C8H13N2O', 'provenance': 'cmiles_v0.1.4+9.g7ed3036_openeye_2019.Apr.2', 'standard_inchi': 'InChI=1S/C8H12N2O/c1-10(2,3)8-5-

In [28]:
import json

In [29]:
with open('qca-dataset-submission/2019-07-25-phenyl-set/phenyl_set_torsiondrive_inputs.json', 'r') as f:
    recs = json.load(f)

In [35]:
len(recs.keys())

144

In [39]:
with open('qca-dataset-submission/2019-07-25-phenyl-set/biphenyls_set_input.json', 'r') as f:
    biphenyls = json.load(f)

In [40]:
len(biphenyls.keys())

15