In [1]:
import os 
from qcsubmit.factories import TorsiondriveDatasetFactory
from qcsubmit.datasets import TorsiondriveDataset
from qcsubmit import workflow_components

In [2]:
# for each of the targets we will create a fragmented dataset to look at the size and number of fragments made
# set up the factory
factory = TorsiondriveDatasetFactory()
factory

TorsiondriveDatasetFactory(qc_specifications={'default': QCSpec(method='B3LYP-D3BJ', basis='DZVP', program='psi4', spec_name='default', spec_description='Standard OpenFF optimization quantum chemistry specification.', store_wavefunction=<WavefunctionProtocolEnum.none: 'none'>)}, maxiter=200, driver=<DriverEnum.gradient: 'gradient'>, scf_properties=['dipole', 'quadrupole', 'wiberg_lowdin_indices', 'mayer_indices'], priority='normal', dataset_tags=['openff'], compute_tag='openff', workflow={}, optimization_program=GeometricProcedure(program='geometric', coordsys='tric', enforce=0.1, epsilon=0.0, reset=True, qccnv=True, molcnv=False, check=0, trust=0.1, tmax=0.3, maxiter=300, convergence_set='GAU', constraints={}), grid_spacings=[15], energy_upper_limit=0.05, dihedral_ranges=None, energy_decrease_thresh=None)

In [3]:
fragmenter = workflow_components.WBOFragmenter()
# apply settings
fragmenter.keep_non_rotor_ring_substituents = True

# add fragmenter to the pipeline
factory.add_workflow_component(fragmenter)

# set up the conformer generator
conformers = workflow_components.StandardConformerGenerator(max_conformers=10)
factory.add_workflow_component(conformers)
factory

TorsiondriveDatasetFactory(qc_specifications={'default': QCSpec(method='B3LYP-D3BJ', basis='DZVP', program='psi4', spec_name='default', spec_description='Standard OpenFF optimization quantum chemistry specification.', store_wavefunction=<WavefunctionProtocolEnum.none: 'none'>)}, maxiter=200, driver=<DriverEnum.gradient: 'gradient'>, scf_properties=['dipole', 'quadrupole', 'wiberg_lowdin_indices', 'mayer_indices'], priority='normal', dataset_tags=['openff'], compute_tag='openff', workflow={'WBOFragmenter': WBOFragmenter(component_name='WBOFragmenter', component_description='Fragment a molecule across all rotatble bonds using the WBO fragmenter.', component_fail_message='The molecule could not fragmented correctly.', toolkit='openeye', threshold=0.03, keep_non_rotor_ring_substituents=True, functional_groups=None, heuristic='path_length', include_parent=False), 'StandardConformerGenerator': StandardConformerGenerator(component_name='StandardConformerGenerator', component_description='Gene

In [5]:
# export the factory settings
factory.export_settings("fragment_settings.yaml")

In [4]:
# test making a dataset
dataset = factory.create_dataset(dataset_name=f"OpenFF-benchmark-ligand-fragments-v1.0", molecules="sdfs", description=f"Torsiondrives of fragments of the JACS benchmark inhibitors.", tagline="Fragmented torsiondrives of JACS benchmark inhibitors.")


Problematic atoms are:
Atom atomic num: 16, name: , idx: 44, aromatic: False, chiral: True with bonds:
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 10, aromatic: True, chiral: False
bond order: 2, chiral: False to atom atomic num: 8, name: , idx: 45, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 46, aromatic: False, chiral: False













Problematic atoms are:
Atom atomic num: 16, name: , idx: 44, aromatic: False, chiral: True with bonds:
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 10, aromatic: True, chiral: False
bond order: 2, chiral: False to atom atomic num: 8, name: , idx: 45, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 46, aromatic: False, chiral: False

























  problematic_bonds, oechem.OEMolToSmiles(molecule)))








In [5]:
dataset.metadata.elements

{'Br', 'C', 'Cl', 'F', 'H', 'I', 'N', 'O', 'S'}

In [6]:
dataset.n_molecules

368

In [7]:
dataset.n_records

481

In [11]:
# add in the other two specs
dataset.add_qc_spec(method="openff-1.0.0", basis="smirnoff", program="openmm", spec_name="openff-1.0.0", spec_description="default openff-1.0.0 spec")

In [12]:
dataset.add_qc_spec(method="gaff-2.11", basis="antechamber", program="openmm", spec_name="gaff-2.11", spec_description="default gaff-2.11 spec")

In [13]:
dataset.visualize("fragments.pdf")

In [14]:
dataset.export_dataset("dataset.json")

In [10]:
dataset.filtered_molecules

{'WBOFragmenter': FilterEntry(component_name='WBOFragmenter', component_description={'component_name': 'WBOFragmenter', 'component_description': 'Fragment a molecule across all rotatble bonds using the WBO fragmenter.', 'component_fail_message': 'The molecule could not fragmented correctly.', 'toolkit': 'openeye', 'threshold': 0.03, 'keep_non_rotor_ring_substituents': True, 'functional_groups': None, 'heuristic': 'path_length', 'include_parent': False}, component_provenance={'OpenforcefieldToolkit': '0.7.1', 'QCSubmit': '0+untagged.147.g792d828', 'openeye': '2019.Oct.2', 'fragmenter': '0.0.6+2.g6bcae53.dirty'}, molecules=['[H]c1c(c(c(c(c1OC2=C(c3c(nc(nc3N(C2=O)C([H])([H])[H])N([H])C([H])(C([H])([H])C([H])([H])O[H])C([H])([H])C([H])([H])O[H])[H])[H])F)[H])F)[H]']),
 'StandardConformerGenerator': FilterEntry(component_name='StandardConformerGenerator', component_description={'component_name': 'StandardConformerGenerator', 'component_description': 'Generate conformations for the given mol