In [1]:
import logging
import bz2
import sys
import os

In [2]:
import numpy as np

In [9]:
import qcfractal.interface as ptl
import tqdm
import sys
from openff.qcsubmit import workflow_components
from openff.qcsubmit.factories import TorsiondriveDatasetFactory
from openforcefield.topology import Molecule

In [10]:
# Need to turn off undefined stereochemistry warnings
logging.getLogger().setLevel(logging.ERROR)

In [11]:
def test_submit(qcs_ds, client):
    result = qcs_ds.submit(client)
    print(result)
    print("Total tasks:", sum(result.values()))

def test_submit_to_local_server(qcs_ds):
    print("Submitting to test-run to local server")
    client = ptl.FractalClient("localhost:7777", verify=False)
    test_submit(qcs_ds, client)
    
def test_submit_to_test_server(qcs_ds):
    from qcfractal import FractalSnowflakeHandler
    print("Submitting test-run to in-memory server")
    server = FractalSnowflakeHandler()
    test_submit(qcs_ds, server.client())


In [12]:
sdf_file = "./pubLigsNeutralGoodDensity.sdf"

molecules = Molecule.from_file(sdf_file,
                          "sdf",
                          allow_undefined_stereo=True,
                         )
# Considering molecules that have rotors greater than 3 for fragmentation down the line
mols = []
for molecule in molecules:
    if len(molecule.find_rotatable_bonds()) > 3:
        mols.append(molecule)
    else:
        continue
print("\nFiles loaded; molecules generated.")


Files loaded; molecules generated.


In [22]:
# Generate the workflow to apply to the molecules
qcs_ds = TorsiondriveDatasetFactory()

component = workflow_components.WBOFragmenter()
component.keep_non_rotor_ring_substituents = True
qcs_ds.add_workflow_component(component)

component = workflow_components.EnumerateStereoisomers()
component.max_isomers = 10
component.toolkit = "openeye"
qcs_ds.add_workflow_component(component)

component = workflow_components.StandardConformerGenerator()
component.max_conformers = 10
component.toolkit = "openeye"
component.rms_cutoff = 3.0
qcs_ds.add_workflow_component(component)

In [23]:
dataset = qcs_ds.create_dataset(
        dataset_name="Genentech PDB Ligand Expo fragment torsiondrive neutral v1.0",
        molecules=mols,
        description="Genentech PDB Ligand Expo optimization dataset with fragmented neutral molecules",
        tagline="TorsionDriveDataset")
print("Workflow complete; dataset generated.")

Deduplication                 : 100%|████████| 521/521 [00:00<00:00, 570.57it/s]
WBOFragmenter                 :   0%|                   | 0/519 [00:00<?, ?it/s]


MaybeEncodingError: Error sending result: '<multiprocessing.pool.ExceptionWithTraceback object at 0x7f3c4fcc06d0>'. Reason: 'RecursionError('maximum recursion depth exceeded while calling a Python object')'

In [9]:
dataset.metadata.short_description = "Optimization set"
dataset.metadata.long_description_url = "https://github.com/openforcefield/qca-dataset-submission/tree/master/submissions/2020-12-02-Genentech-PDB-Ligand-Expo-fragment-optimization-neutral-v1.0"
dataset.metadata.long_description = "Genentech PDB Ligand Expo neutral molecules set"

confs = np.array([len(mol.conformers) for mol in dataset.molecules])
print("Number of unique molecules       ", dataset.n_molecules)
print("Number of filtered molecules     ", dataset.n_filtered)
print("Number of conformers             ", dataset.n_records)
print("Number of conformers min mean max", 
      confs.min(), "{:6.2f}".format(confs.mean()), confs.max())

dataset.export_dataset("dataset.json.bz2")

Number of unique molecules        2319
Number of filtered molecules      98
Number of conformers              2366
Number of conformers min mean max 1   1.02 3


In [10]:
%time test_submit_to_test_server(dataset)

Submitting test-run to in-memory server
{'default': 2363}
Total tasks: 2363
CPU times: user 49.4 s, sys: 2.46 s, total: 51.9 s
Wall time: 1min 29s


In [13]:
from openff.qcsubmit.datasets import OptimizationDataset
dataset = OptimizationDataset.parse_file('./dataset.json.bz2')
dataset.metadata.long_description = "This dataset contains Genentech PDB Ligand Expo neutral set of molecules that fragments molecules with greater than 3 rotors"
dataset.metadata.submitter = 'pavankum'
dataset.export_dataset("dataset.json.bz2")

In [15]:
from openff.qcsubmit.datasets import OptimizationDataset
ds = OptimizationDataset.parse_file('./dataset.json.bz2')
ds.visualize('visualize.pdf')