In [1]:
from qcsubmit.factories import TorsiondriveDatasetFactory
from qcsubmit.datasets import TorsiondriveDataset
from qcsubmit import workflow_components 
from qcsubmit.common_structures import TorsionIndexer 
from openforcefield.topology import Molecule as OFFMolecule

# from qcelemental.models import Molecule as QCEMolecule
import os, json, tqdm



In [2]:
factory = TorsiondriveDatasetFactory()
factory.scf_properties = ['dipole', 'quadrupole', 'wiberg_lowdin_indices', 'mayer_indices']
# change basis set
factory.basis = 'def2-TZVPP'

In [3]:
# now write the settings out
factory.export_settings("theory-bm-set_setttings.yaml")

In [4]:
# now create the dataset from the pdbs in the pdb folder

dataset = factory.create_dataset(dataset_name="OpenFF Theory Benchmarking Set B3LYP-D3BJ def2-TZVPP v1.0", molecules=[], description="A torsiondrive dataset for benchmarking B3LYP-D3BJ/def2-TZVPP", tagline="Torsiondrives for benchmarking B3LYP-D3BJ/def2-TZVPP")

In [5]:
with open('input_torsions.json') as infile:
    selected_torsions = json.load(infile)

In [6]:
for idx, (canonical_torsion_index, torsion_data) in enumerate(tqdm.tqdm(selected_torsions.items())):
    attributes = torsion_data["attributes"]
    torsion_atom_indices = torsion_data["atom_indices"]
    grid_spacings = [15] * len(torsion_atom_indices)
    initial_molecules = torsion_data["initial_molecules"]
    # molecule = OFFMolecule.from_qcschema(torsion_data, client=client) # not working for some reason. need to  dig into
    molecule = OFFMolecule.from_qcschema(torsion_data)
    molecule.generate_conformers(n_conformers = 5)
    print(f'{idx}: {molecule.n_conformers}')
    dataset.add_molecule(index=idx,  molecule= molecule, attributes=attributes, dihedrals=torsion_atom_indices)

11%|█         | 4/36 [00:00<00:06,  4.59it/s]0: 2
1: 1
2: 1
3: 5
 19%|█▉        | 7/36 [00:00<00:04,  6.26it/s]4: 2
5: 2
6: 1
 31%|███       | 11/36 [00:00<00:02,  9.36it/s]7: 2
8: 1
9: 2
10: 2
 42%|████▏     | 15/36 [00:01<00:01, 12.28it/s]11: 2
12: 5
13: 1
14: 2
 53%|█████▎    | 19/36 [00:01<00:01, 15.21it/s]15: 1
16: 2
17: 1
18: 1
19: 1
 64%|██████▍   | 23/36 [00:01<00:00, 16.85it/s]20: 4
21: 4
22: 1
23: 4
 75%|███████▌  | 27/36 [00:01<00:00, 16.70it/s]24: 1
25: 3
26: 3
27: 2
 83%|████████▎ | 30/36 [00:02<00:00, 16.72it/s]28: 2
29: 1
 94%|█████████▍| 34/36 [00:02<00:00, 14.49it/s]30: 2
31: 3
32: 2
33: 3
100%|██████████| 36/36 [00:02<00:00, 13.75it/s]34: 5
35: 5



In [7]:
dataset.spec_name

'default'

In [8]:
dataset.n_molecules

31

In [9]:
dataset.n_records

36

In [10]:
dataset.metadata.long_description_url = "https://github.com/openforcefield/qca-dataset-submission/tree/master/submissions/2020-07-30-theory-bm-set-b3lyp-d3bj-def2-tzvpp"

In [11]:

# export the dataset
dataset.export_dataset("dataset.json")

In [12]:
dataset.molecules_to_file("theory-bm-set-curated.smi", "smi")

In [13]:
# export the molecules to pdf with torsions highlighted
dataset.visualize("theory-bm-set-curated.pdf", 'openeye')