In [1]:
from qcsubmit.factories import TorsiondriveDatasetFactory
from qcsubmit.datasets import TorsiondriveDataset
from qcsubmit import workflow_components 
from qcsubmit.common_structures import TorsionIndexer 
from openforcefield.topology import Molecule as OFFMolecule

# from qcelemental.models import Molecule as QCEMolecule
import os, json, tqdm



In [2]:
factory = TorsiondriveDatasetFactory()
factory.scf_properties = ['dipole', 'quadrupole', 'wiberg_lowdin_indices', 'mayer_indices']
# change basis set
factory.basis = '6-31+G**'

In [3]:
# now write the settings out
factory.export_settings("theory-bm-set_setttings.yaml")

In [4]:
# now create the dataset from the pdbs in the pdb folder

dataset = factory.create_dataset(dataset_name="OpenFF Theory Benchmarking Set B3LYP-D3BJ 6-31+Gss", molecules=[], description="A torsiondrive dataset for benchmarking B3LYP-D3BJ/6-31+G**", tagline="Torsiondrives for benchmarking B3LYP-D3BJ/6-31+G**")

In [5]:
with open('input_torsions.json') as infile:
    selected_torsions = json.load(infile)

In [6]:
for idx, (canonical_torsion_index, torsion_data) in enumerate(tqdm.tqdm(selected_torsions.items())):
    attributes = torsion_data["attributes"]
    torsion_atom_indices = torsion_data["atom_indices"]
    grid_spacings = [15] * len(torsion_atom_indices)
    initial_molecules = torsion_data["initial_molecules"]
    # molecule = OFFMolecule.from_qcschema(torsion_data, client=client) # not working for some reason. need to  dig into
    molecule = OFFMolecule.from_qcschema(torsion_data)
    molecule.generate_conformers(n_conformers = 5)
    print(f'{idx}: {molecule.n_conformers}')
    dataset.add_molecule(index=idx,  molecule= molecule, attributes=attributes, dihedrals=torsion_atom_indices)

 11%|█         | 4/36 [00:00<00:13,  2.41it/s]0: 2
1: 1
2: 1
3: 5
 19%|█▉        | 7/36 [00:01<00:07,  3.84it/s]4: 2
5: 2
6: 1
 31%|███       | 11/36 [00:01<00:03,  6.39it/s]7: 2
8: 1
9: 2
10: 2
 36%|███▌      | 13/36 [00:01<00:03,  7.62it/s]11: 2
12: 5
13: 1
14: 2
 53%|█████▎    | 19/36 [00:01<00:01, 11.72it/s]15: 1
16: 2
17: 1
18: 1
 64%|██████▍   | 23/36 [00:02<00:00, 14.55it/s]19: 1
20: 4
21: 4
22: 1
 75%|███████▌  | 27/36 [00:02<00:00, 16.01it/s]23: 4
24: 1
25: 3
26: 3
 81%|████████  | 29/36 [00:02<00:00, 17.00it/s]27: 2
28: 2
29: 1
 92%|█████████▏| 33/36 [00:02<00:00, 13.82it/s]30: 2
31: 3
32: 2
33: 3
 97%|█████████▋| 35/36 [00:02<00:00, 14.10it/s]34: 5
100%|██████████| 36/36 [00:03<00:00, 11.81it/s]35: 5



In [7]:
dataset.spec_name

'default'

In [8]:
dataset.n_molecules

31

In [9]:
dataset.n_records

36

In [10]:
dataset.metadata.long_description_url = "https://github.com/openforcefield/qca-dataset-submission/tree/master/submissions/2020-10-29-theory-bm-set-b3lyp-d3bj-6-31+Gss"

In [11]:

# export the dataset
dataset.export_dataset("dataset.json")

In [12]:
dataset.molecules_to_file("theory-bm-set-curated.smi", "smi")

In [13]:
# export the molecules to pdf with torsions highlighted
dataset.visualize("theory-bm-set-curated.pdf", 'openeye')