In [1]:
from qcsubmit.factories import TorsiondriveDatasetFactory
from qcsubmit.datasets import TorsiondriveDataset
from qcsubmit import workflow_components 
from qcsubmit.common_structures import TorsionIndexer 
from openforcefield.topology import Molecule as OFFMolecule

# from qcelemental.models import Molecule as QCEMolecule
import os, json, tqdm



In [2]:
factory = TorsiondriveDatasetFactory()
factory.scf_properties = ['dipole', 'quadrupole', 'wiberg_lowdin_indices', 'mayer_indices']
factory

TorsiondriveDatasetFactory(method='B3LYP-D3BJ', basis='DZVP', program='psi4', maxiter=200, driver=<DriverEnum.gradient: 'gradient'>, scf_properties=['dipole', 'quadrupole', 'wiberg_lowdin_indices', 'mayer_indices'], spec_name='default', spec_description='Standard OpenFF optimization quantum chemistry specification.', priority='normal', dataset_tags=['openff'], compute_tag='openff', workflow={}, optimization_program=GeometricProcedure(program='geometric', coordsys='tric', enforce=0.1, epsilon=0.0, reset=True, qccnv=True, molcnv=False, check=0, trust=0.1, tmax=0.3, maxiter=300, convergence_set='GAU', constraints={}), grid_spacings=[15], energy_upper_limit=0.05, dihedral_ranges=None, energy_decrease_thresh=None)

In [3]:
# now write the settings out
factory.export_settings("theory-bm-set_setttings.yaml")

In [4]:
# now create the dataset from the pdbs in the pdb folder

dataset = factory.create_dataset(dataset_name="Theory benchmarking set B3LYP-D3BJ DZVP", molecules=[], description="A torsiondrive dataset for benchmarking B3LYP-D3BJ/DZVP", tagline="Torsiondrives for benchmarking B3LYP-D3BJ/DZVP")

In [5]:
with open('input_torsions.json') as infile:
    selected_torsions = json.load(infile)

In [6]:
for idx, (canonical_torsion_index, torsion_data) in enumerate(tqdm.tqdm(selected_torsions.items())):
    attributes = torsion_data["attributes"]
    torsion_atom_indices = torsion_data["atom_indices"]
    grid_spacings = [15] * len(torsion_atom_indices)
    initial_molecules = torsion_data["initial_molecules"]
    # molecule = OFFMolecule.from_qcschema(torsion_data, client=client) # not working for some reason. need to  dig into
    molecule = OFFMolecule.from_qcschema(torsion_data)
    molecule.generate_conformers(n_conformers = 5)
    print(f'{idx}: {molecule.n_conformers}')
    dataset.add_molecule(index=idx,  molecule= molecule, attributes=attributes, dihedrals=torsion_atom_indices)

8%|▊         | 3/36 [00:00<00:08,  3.67it/s]0: 2
1: 1
2: 1
 14%|█▍        | 5/36 [00:00<00:06,  4.76it/s]3: 5
4: 2
 19%|█▉        | 7/36 [00:00<00:05,  5.62it/s]5: 2
6: 1
7: 2
 31%|███       | 11/36 [00:01<00:03,  8.02it/s]8: 1
9: 2
10: 2
 36%|███▌      | 13/36 [00:01<00:02,  8.39it/s]11: 2
12: 5
 42%|████▏     | 15/36 [00:01<00:02,  9.30it/s]13: 1
14: 2
15: 1
 53%|█████▎    | 19/36 [00:01<00:01, 10.75it/s]16: 2
17: 1
18: 1
 64%|██████▍   | 23/36 [00:02<00:01, 12.72it/s]19: 1
20: 4
21: 4
22: 1
 69%|██████▉   | 25/36 [00:02<00:00, 12.69it/s]23: 4
24: 1
25: 3
26: 3
 81%|████████  | 29/36 [00:02<00:00, 14.18it/s]27: 2
28: 2
29: 1
 92%|█████████▏| 33/36 [00:02<00:00, 12.38it/s]30: 2
31: 3
32: 2
 97%|█████████▋| 35/36 [00:03<00:00, 12.23it/s]33: 3
34: 5
100%|██████████| 36/36 [00:03<00:00, 10.63it/s]35: 5



In [8]:
dataset.spec_name

'default'

In [9]:
dataset.n_molecules

31

In [10]:
dataset.n_records

36

In [11]:
dataset.metadata.long_description_url = "https://github.com/openforcefield/qca-dataset-submission/tree/master/submissions/2020-07-27-theory-bm-set-b3lyp-d3bj-dzvp"

In [12]:

# export the dataset
dataset.export_dataset("dataset.json")

In [13]:
dataset.molecules_to_file("theory-bm-set-curated.smi", "smi")

In [14]:
# export the molecules to pdf with torsions highlighted
dataset.visualize("theory-bm-set-curated.pdf", 'openeye')