In [1]:
import os, json, tqdm

from qcsubmit.factories import TorsiondriveDatasetFactory
from qcsubmit.datasets import TorsiondriveDataset
from qcsubmit import workflow_components 
from qcsubmit.common_structures import TorsionIndexer 
from openforcefield.topology import Molecule as OFFMolecule



In [2]:
factory = TorsiondriveDatasetFactory()
factory.scf_properties = ['dipole', 'quadrupole', 'wiberg_lowdin_indices', 'mayer_indices']

In [3]:
# now write the settings out
factory.export_settings("theory-bm-set_settings.yaml")

In [4]:
dataset = factory.create_dataset(
    dataset_name="OpenFF Theory Benchmarking Set B3LYP-D3BJ def2-TZVPPD",
    molecules=[],
    description="A torsiondrive dataset for benchmarking B3LYP-D3BJ/def2-TZVPPD",
    tagline="Torsiondrives for benchmarking B3LYP-D3BJ/def2-TZVPPD",
    verbose=False)

# Change the default spec name
dataset.qc_specifications.pop('default')
dataset.add_qc_spec(method="B3LYP-D3BJ",
                    basis="def2-TZVPPD",
                    program="psi4", 
                    spec_name="default",
                    spec_description="A torsiondrive dataset for benchmarking B3LYP-D3BJ/def2-TZVPPD")

Deduplication                 : 0it [00:00, ?it/s]


In [5]:
with open('input_torsions.json') as infile:
    selected_torsions = json.load(infile)

In [6]:
output = []
import pprint
for idx, (canonical_torsion_index, torsion_data) in enumerate(tqdm.tqdm(selected_torsions.items())):
    attributes = torsion_data["attributes"]
    torsion_atom_indices = torsion_data["atom_indices"]
    grid_spacings = [15] * len(torsion_atom_indices)
    initial_molecules = torsion_data["initial_molecules"]
    # molecule = OFFMolecule.from_qcschema(torsion_data, client=client) # not working for some reason. need to  dig into
    molecule = OFFMolecule.from_qcschema(torsion_data)
    molecule.generate_conformers(n_conformers = 5)
    output.append(f'{idx}: {molecule.n_conformers}')
    dataset.add_molecule(index=idx,  molecule= molecule, attributes=attributes, dihedrals=torsion_atom_indices)
pprint.pprint(output)

100%|██████████| 36/36 [00:03<00:00, 10.98it/s]

['0: 2',
 '1: 1',
 '2: 1',
 '3: 5',
 '4: 2',
 '5: 2',
 '6: 1',
 '7: 2',
 '8: 1',
 '9: 2',
 '10: 2',
 '11: 2',
 '12: 5',
 '13: 1',
 '14: 2',
 '15: 1',
 '16: 2',
 '17: 1',
 '18: 1',
 '19: 1',
 '20: 4',
 '21: 4',
 '22: 1',
 '23: 4',
 '24: 1',
 '25: 3',
 '26: 3',
 '27: 2',
 '28: 2',
 '29: 1',
 '30: 2',
 '31: 3',
 '32: 2',
 '33: 3',
 '34: 5',
 '35: 5']





In [7]:
dataset.n_molecules

31

In [8]:
dataset.n_records

36

In [9]:
dataset.metadata.long_description_url = "https://github.com/openforcefield/qca-dataset-submission/tree/master/submissions/2020-10-29-theory-bm-set-b3lyp-d3bj-def2-tzvppd"
dataset.metadata.submitter = 'hyejang'

In [10]:
# export the dataset
dataset.export_dataset("dataset.json.bz2")

In [11]:
dataset.molecules_to_file("theory-bm-set-curated.smi", "smi")

In [12]:
# export the molecules to pdf with torsions highlighted
dataset.visualize("theory-bm-set-curated.pdf", toolkit='openeye')