This notebook suppliments the `OpenFF-benchmark-ligand-fragments-v1.0` dataset which was fragmented using fragmenter=0.7.0 (openeye only) with new molecules generated using openff-fragmenter=0.1.2 with both openeye and ambertools. 

## Workflow
- First we use openeye to fragment the dataset
- Next fragment again using ambertools
- Then we combine the two datasets together using qcsubmit. This will deduplicate molecules and torsion drives but does not check for conformer duplication. 
- Load the orginal dataset and add the new combined dataset to it
- Now loop over the original dataset and for any scans which are old replace the input molecules with the orginal inputs to help QCFractal deduplicate the tasks.

In [1]:
from openff.qcsubmit.factories import TorsiondriveDatasetFactory
from openff.qcsubmit import workflow_components
from openff.qcsubmit.datasets import TorsiondriveDataset, load_dataset
from openff.qcsubmit.serializers import deserialize
from openff.toolkit.topology import Molecule
from openff.toolkit.utils.toolkits import GLOBAL_TOOLKIT_REGISTRY, OpenEyeToolkitWrapper, AmberToolsToolkitWrapper

In [2]:
# configure the toolkit registry to only use openeye, openff-fragmenter should respect this
GLOBAL_TOOLKIT_REGISTRY.deregister_toolkit(AmberToolsToolkitWrapper())
print(GLOBAL_TOOLKIT_REGISTRY.registered_toolkits)

[ToolkitWrapper around OpenEye Toolkit version 2021.1.1, ToolkitWrapper around The RDKit version 2021.03.4, ToolkitWrapper around Built-in Toolkit version None]


In [3]:
factory = TorsiondriveDatasetFactory()
factory.add_workflow_components(workflow_components.WBOFragmenter(keep_non_rotor_ring_substituents=True))
factory.add_workflow_components(workflow_components.StandardConformerGenerator(max_conformers=4))

In [4]:
factory.dict()

{'qc_specifications': {'default': {'method': 'B3LYP-D3BJ',
   'basis': 'DZVP',
   'program': 'psi4',
   'spec_name': 'default',
   'spec_description': 'Standard OpenFF optimization quantum chemistry specification.',
   'store_wavefunction': 'none',
   'implicit_solvent': None,
   'maxiter': 200,
   'scf_properties': ['dipole',
    'quadrupole',
    'wiberg_lowdin_indices',
    'mayer_indices'],
   'keywords': None}},
 'driver': 'gradient',
 'priority': 'normal',
 'dataset_tags': ['openff'],
 'compute_tag': 'openff',
 'type': 'TorsiondriveDatasetFactory',
 'workflow': [{'type': 'WBOFragmenter',
   'threshold': 0.03,
   'keep_non_rotor_ring_substituents': True,
   'heuristic': 'path_length'},
  {'type': 'StandardConformerGenerator',
   'rms_cutoff': None,
   'max_conformers': 4,
   'clear_existing': True}],
 'optimization_program': {'program': 'geometric',
  'coordsys': 'dlc',
  'enforce': 0.1,
  'epsilon': 0.0,
  'reset': True,
  'qccnv': True,
  'molcnv': False,
  'check': 0,
  'trust'

In [5]:
oe_dataset = factory.create_dataset(dataset_name="OpenFF-benchmark-ligand-fragments-v2.0", 
                                 molecules="../2020-07-27-OpenFF-Benchmark-Ligands/sdfs/", 
                                 description="Ligand fragments generated via openff-fragmenter using openeye/ambertools for the JACS benchmark systems. These fragments are then used to fit bespoke torsion parameters for the bespokefit paper.",
                                tagline="Ligand fragments from the JACS benchmark systems.",
                                toolkit_registry=GLOBAL_TOOLKIT_REGISTRY)

Problematic atoms are:
Atom atomic num: 16, name: , idx: 44, aromatic: False, chiral: True with bonds:
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 10, aromatic: True, chiral: False
bond order: 2, chiral: False to atom atomic num: 8, name: , idx: 45, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 46, aromatic: False, chiral: False

Deduplication                 : 100%|████████| 199/199 [00:00<00:00, 713.98it/s]
Problematic atoms are:
Atom atomic num: 16, name: , idx: 26, aromatic: False, chiral: True with bonds:
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 6, aromatic: True, chiral: False
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 17, aromatic: False, chiral: False
bond order: 2, chiral: False to atom atomic num: 8, name: , idx: 24, aromatic: False, chiral: False

Problematic atoms are:
Atom atomic num: 16, name: , idx: 26, aromatic: False, chiral: True with bonds:
bond order:

Preparation                   : 100%|█████████| 350/350 [00:08<00:00, 42.95it/s]


In [6]:
oe_dataset.metadata

Metadata(submitter='joshua', creation_date=datetime.date(2021, 8, 23), collection_type='TorsionDriveDataset', dataset_name='OpenFF-benchmark-ligand-fragments-v2.0', short_description='Ligand fragments from the JACS benchmark systems.', long_description_url=None, long_description='Ligand fragments generated via openff-fragmenter using openeye/ambertools for the JACS benchmark systems. These fragments are then used to fit bespoke torsion parameters for the bespokefit paper.', elements={'S', 'N', 'Br', 'C', 'H', 'O', 'Cl', 'F', 'I'})

In [7]:
oe_dataset.filtered_molecules

{'WBOFragmenter': FilterEntry(component='WBOFragmenter', component_settings={'type': 'WBOFragmenter', 'threshold': 0.03, 'keep_non_rotor_ring_substituents': True, 'heuristic': 'path_length'}, component_provenance={'openff-toolkit': '0.10.0', 'openff-qcsubmit': '0.2.2+4.gbd61e26', 'OpenEyeToolkitWrapper': '2021.1.1', 'RDKitToolkitWrapper': '2021.03.4', 'openff-fragmenter': '0.1.2'}, molecules=['[H]c1c(c(c(c(c1N([H])c2nc3c(c(n2)OC([H])([H])C4(C(C(C(C(C4([H])[H])([H])[H])([H])[H])([H])[H])([H])[H])[H])N=C(N3[H])[H])[H])[H])S(=O)C([H])([H])[H])[H]']),
 'StandardConformerGenerator': FilterEntry(component='StandardConformerGenerator', component_settings={'type': 'StandardConformerGenerator', 'rms_cutoff': None, 'max_conformers': 4, 'clear_existing': True}, component_provenance={'openff-toolkit': '0.10.0', 'openff-qcsubmit': '0.2.2+4.gbd61e26', 'OpenEyeToolkitWrapper': '2021.1.1', 'RDKitToolkitWrapper': '2021.03.4'}, molecules=[])}

In [8]:
# now put ambertools back in
GLOBAL_TOOLKIT_REGISTRY.deregister_toolkit(OpenEyeToolkitWrapper())
GLOBAL_TOOLKIT_REGISTRY.register_toolkit(AmberToolsToolkitWrapper())
print(GLOBAL_TOOLKIT_REGISTRY.registered_toolkits)

[ToolkitWrapper around The RDKit version 2021.03.4, ToolkitWrapper around Built-in Toolkit version None, ToolkitWrapper around AmberTools version 21.0]


In [9]:
# now make the ambertools fragments
am_dataset = factory.create_dataset(dataset_name="OpenFF-benchmark-ligand-fragments-v2.0", 
                                 molecules="../2020-07-27-OpenFF-Benchmark-Ligands/sdfs/", 
                                 description="Ligand fragments generated via openff-fragmenter using openeye/ambertools for the JACS benchmark systems. These fragments are then used to fit bespoke torsion parameters for the bespokefit paper.",
                                tagline="Ligand fragments from the JACS benchmark systems.",
                                toolkit_registry=GLOBAL_TOOLKIT_REGISTRY)

 - Atom S (index 44)

Deduplication                 : 100%|████████| 199/199 [00:00<00:00, 593.78it/s]
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(


  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances

  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(


  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
 - Atom S (index 18)

  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances 

  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances

  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
A new stereocenter formed at atom 19
A new stereocenter formed at atom 19
A new stereocenter formed at atom 19
A new stereocenter formed at atom 19
A new stereocenter formed at atom 19
A new stereocenter formed at atom 19
A new stereocenter formed at atom 19
A new stereocenter formed at atom 19
A new stereocenter formed at atom 19
A new stereocenter formed at atom 19
A new stereocenter formed at atom 19
A new stereocenter formed at atom 19
A new stereocenter formed at atom 19
A new stereocenter formed at atom 19
A new stereocenter formed at atom 19
A new stereocenter formed at at

  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances

  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances

  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances

  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances

  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances

  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances

  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances

  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances

  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances

  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances

  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances

  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances

  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances

  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances

  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
Stereochemistry for atom 24 flipped from S to R
Stereochemistry for atom 24 flipped from S to R
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  dista

  distances = np.sqrt(
  distances = np.sqrt(
Stereochemistry for atom 24 flipped from S to R
Stereochemistry for atom 24 flipped from S to R
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  dista

  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances

  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances

  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
Stereochemi

  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances

  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
A new stere

A new stereocenter formed at atom 29
A new stereocenter formed at atom 29
A new stereocenter formed at atom 29
A new stereocenter formed at atom 29
A new stereocenter formed at atom 29
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distance

  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
A new stereocenter formed at atom 29
A new stereocenter formed at atom 29
A new stereocenter formed at atom 29
A new stereocenter formed at atom 29
A new stereocenter formed at atom 29
A new stereocenter formed at atom 29
A new stereocenter formed at atom 29
A new stereocenter formed at atom 29
A new stereocenter formed at atom 29
A new stereocenter formed at atom 29
A new stereocenter formed at atom 29
A new stereocenter formed at atom 29
A new stereocenter formed at atom 29
A new stereocenter formed at atom 29
A new stereocenter formed at atom 29
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
A new st

  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
A new stereocenter formed at atom 29
A new stereocenter formed at atom 29
A new stereocenter formed at atom 29
A new stereocenter formed at atom 29
A new stereocenter formed at atom 29
A new stereocenter formed at atom 29
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sq

  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances

  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances

  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances = np.sqrt(
  distances

WBOFragmenter                 : 100%|███████| 199/199 [3:26:44<00:00, 62.33s/it]
StandardConformerGenerator    : 100%|█████████| 370/370 [00:13<00:00, 27.05it/s]


ded/removed
Proton(s) added/removed


removed


Preparation                   : 100%|█████████| 370/370 [00:08<00:00, 42.52it/s]


In [10]:
am_dataset.metadata

Metadata(submitter='joshua', creation_date=datetime.date(2021, 8, 23), collection_type='TorsionDriveDataset', dataset_name='OpenFF-benchmark-ligand-fragments-v2.0', short_description='Ligand fragments from the JACS benchmark systems.', long_description_url=None, long_description='Ligand fragments generated via openff-fragmenter using openeye/ambertools for the JACS benchmark systems. These fragments are then used to fit bespoke torsion parameters for the bespokefit paper.', elements={'S', 'N', 'Br', 'C', 'H', 'O', 'Cl', 'F', 'I'})

In [11]:
am_dataset.filtered_molecules

{'WBOFragmenter': FilterEntry(component='WBOFragmenter', component_settings={'type': 'WBOFragmenter', 'threshold': 0.03, 'keep_non_rotor_ring_substituents': True, 'heuristic': 'path_length'}, component_provenance={'openff-toolkit': '0.10.0', 'openff-qcsubmit': '0.2.2+4.gbd61e26', 'RDKitToolkitWrapper': '2021.03.4', 'AmberToolsToolkitWrapper': '21.0', 'openff-fragmenter': '0.1.2'}, molecules=['[H][C]1=[N][c]2[c]([O][C]([H])([H])[C]3([H])[C]([H])([H])[C]([H])([H])[C]([H])([H])[C]([H])([H])[C]3([H])[H])[n][c]([N]([H])[c]3[c]([H])[c]([H])[c]([S](=[O])[C]([H])([H])[H])[c]([H])[c]3[H])[n][c]2[N]1[H]', '[H][c]1[c]([H])[c]([H])[c]([C]([H])([H])[C@]([H])([C](=[O])[N]2[C]([H])([H])[C]([H])([H])[C]([H])([H])[C@@]2([H])[C](=[O])[N]([H])[C]([H])([H])[c]2[c]([H])[c]([H])[c]([H])[c]([C]([H])([H])[H])[c]2[H])[N+]([H])([H])[H])[c]([H])[c]1[H]']),
 'StandardConformerGenerator': FilterEntry(component='StandardConformerGenerator', component_settings={'type': 'StandardConformerGenerator', 'rms_cutoff': Non

In [13]:
# add the new datasets to deduplicate torsiondrives
new_dataset = oe_dataset + am_dataset

In [16]:
# load the old dataset for record deduplication
old_dataset_data = deserialize("../2020-07-27-OpenFF-Benchmark-Ligands/dataset.json.bz2")
# the filtered molecules data has changed so remove this to stop errors
del old_dataset_data["filtered_molecules"]
old_dataset = TorsiondriveDataset.parse_obj(old_dataset_data)





In [18]:
# add the two datasets together
combinded_dataset = new_dataset + old_dataset

In [21]:
# save to file
combinded_dataset.export_dataset("dataset.json.xz")

In [32]:
# get a list of entries which should be removed from the combined dataset and replaced with old entries
replacments = {} # (combined_id: old_entry)
for entry in old_dataset.dataset.values():
    old_molecule = entry.get_off_molecule()
    new_ids = combinded_dataset.get_molecule_entry(old_molecule)
    if new_ids:
        for id_entry in new_ids:
            new_entry = combinded_dataset.dataset[id_entry]
            new_molecule = new_entry.get_off_molecule()
            iso, atom_map = Molecule.are_isomorphic(old_molecule, new_molecule, return_atom_map=True)
            old_dihedral = entry.dihedrals[0][1:3]
            new_dihedral = new_entry.dihedrals[0][1:3]
            # now see if the central bond is the same
            if atom_map[old_dihedral[0]] == new_dihedral[0] and atom_map[old_dihedral[1]] == new_dihedral[1] or atom_map[old_dihedral[1]] == new_dihedral[0] and atom_map[old_dihedral[0]] == new_dihedral[1]:
                # log which entry should be replaced
                replacments[id_entry] = entry
                break

In [33]:
# the number of reused torsiondrives
len(replacments)

481

In [37]:
# now edit the dataset and replace the entries
for index, entry in replacments.items():
    del combinded_dataset.dataset[index]
    combinded_dataset.dataset[entry.index] = entry

In [38]:
combinded_dataset.n_molecules

490

In [39]:
combinded_dataset.n_records

671

In [40]:
for entry in replacments.values():
    assert entry.index in combinded_dataset.dataset

In [49]:
combinded_dataset.metadata.long_description_url = "https://github.com/openforcefield/qca-dataset-submission/tree/master/submissions/2021-08-10-OpenFF-JACS-Fragments-v2.0"
combinded_dataset.metadata.submitter = "JTHorton"

In [2]:
# a restart cell
# combinded_dataset = TorsiondriveDataset.parse_file("dataset.json.bz2")







In [3]:
# reduce the max number of conformers from 10 to 4
for entry in combinded_dataset.dataset.values():
    if len(entry.initial_molecules) > 4:
        entry.initial_molecules = entry.initial_molecules[:4]



In [4]:
# collect dataset info
from openeye import oechem
import numpy as np

confs = np.array([len(mol.conformers) for mol in combinded_dataset.molecules])

print("Number of unique molecules       ", combinded_dataset.n_molecules)
print("Number of filtered molecules     ", combinded_dataset.n_filtered)
print("Number of torsiondrives             ", combinded_dataset.n_records)
print("Number of conformers min mean max",
      confs.min(), "{:6.2f}".format(confs.mean()), confs.max())

masses = []
for molecule in combinded_dataset.molecules:
    oemol = molecule.to_openeye()
    mass = oechem.OECalculateMolecularWeight(oemol)
    masses.append(mass)

print(f'Mean molecular weight: {np.mean(np.array(masses)):.2f}')
print(f'Max molecular weight: {np.max(np.array(masses)):.2f}')
print("Charges:", sorted(set(m.total_charge/m.total_charge.unit for m in combinded_dataset.molecules)))


Number of unique molecules        490
Number of filtered molecules      1
Number of torsiondrives              671
Number of conformers min mean max 1   2.89 4
Mean molecular weight: 259.64
Max molecular weight: 536.44
Charges: [-2.0, -1.0, 0.0, 1.0]


In [6]:
from pprint import pprint
pprint(combinded_dataset.metadata.dict())

NameError: name 'combinded_dataset' is not defined

In [6]:
for spec, obj in combinded_dataset.qc_specifications.items():
    print("Spec:", spec)
    pprint(obj.dict())

Spec: default
{'basis': 'DZVP',
 'implicit_solvent': None,
 'keywords': None,
 'maxiter': 200,
 'method': 'B3LYP-D3BJ',
 'program': 'psi4',
 'scf_properties': ['dipole',
                    'quadrupole',
                    'wiberg_lowdin_indices',
                    'mayer_indices'],
 'spec_description': 'Standard OpenFF optimization quantum chemistry '
                     'specification.',
 'spec_name': 'default',
 'store_wavefunction': 'none'}


In [7]:
# export the final dataset

combinded_dataset.export_dataset("dataset.json.bz2")
combinded_dataset.molecules_to_file("dataset.smi", "smi")

combinded_dataset.visualize("dataset.pdf", columns=8)

In [2]:
# load the dataset to add more compute specs
dataset = load_dataset("dataset.json.bz2")







In [4]:
# add all xtb specs
dataset.add_qc_spec(method="gfn0xtb", basis=None, program="xtb", spec_name="gfn0xtb", spec_description="A default spec for gn0xtb")
dataset.add_qc_spec(method="gfn1xtb", basis=None, program="xtb", spec_name="gfn1xtb", spec_description="A default spec for gfn1xtb")
dataset.add_qc_spec(method="gfn2xtb", basis=None, program="xtb", spec_name="gfn2xtb", spec_description="A default spec for gfn2xtb")
dataset.add_qc_spec(method="gfnff", basis=None, program="xtb", spec_name="gfnff", spec_description="A default spec for gfnff")

In [5]:
# add ani2x we know that this will fail for a lot of molecules
dataset.add_qc_spec(method="ani2x", basis=None, program="torchani", spec_name="ani2x", spec_description="A default spec for ani2x")

In [8]:
# add all of the forcefields
dataset.add_qc_spec(method="openff-1.0.0", basis="smirnoff", spec_name="openff-1.0.0", spec_description="A default spec for openff-1.0.0", program="openmm")
dataset.add_qc_spec(method="openff-1.1.1", basis="smirnoff", spec_name="openff-1.1.1", spec_description="A default spec for openff-1.1.1", program="openmm")
dataset.add_qc_spec(method="openff-1.2.1", basis="smirnoff", spec_name="openff-1.2.1", spec_description="A default spec for openff-1.2.1", program="openmm")
dataset.add_qc_spec(method="openff-1.3.0", basis="smirnoff", spec_name="openff-1.3.0", spec_description="A default spec for openff-1.3.0", program="openmm")
dataset.add_qc_spec(method="openff-2.0.0", basis="smirnoff", spec_name="openff-2.0.0", spec_description="A default spec for openff-2.0.0", program="openmm")
dataset.add_qc_spec(method="gaff-2.11", basis="antechamber", spec_name="gaff-2.11", spec_description="A default spec for gaff-2.11", program="openmm")

In [11]:
for spec, obj in dataset.qc_specifications.items():
    print("Spec:", spec)
    pprint(obj.dict())

Spec: default
{'basis': 'DZVP',
 'implicit_solvent': None,
 'keywords': None,
 'maxiter': 200,
 'method': 'B3LYP-D3BJ',
 'program': 'psi4',
 'scf_properties': ['dipole',
                    'quadrupole',
                    'wiberg_lowdin_indices',
                    'mayer_indices'],
 'spec_description': 'Standard OpenFF optimization quantum chemistry '
                     'specification.',
 'spec_name': 'default',
 'store_wavefunction': 'none'}
Spec: gfn0xtb
{'basis': None,
 'implicit_solvent': None,
 'keywords': None,
 'maxiter': 200,
 'method': 'gfn0xtb',
 'program': 'xtb',
 'scf_properties': ['dipole',
                    'quadrupole',
                    'wiberg_lowdin_indices',
                    'mayer_indices'],
 'spec_description': 'A default spec for gn0xtb',
 'spec_name': 'gfn0xtb',
 'store_wavefunction': 'none'}
Spec: gfn1xtb
{'basis': None,
 'implicit_solvent': None,
 'keywords': None,
 'maxiter': 200,
 'method': 'gfn1xtb',
 'program': 'xtb',
 'scf_properties': ['dipol

In [12]:
dataset.export_dataset("dataset.json.bz2")