In [1]:
from openff.qcsubmit.results import TorsionDriveResultCollection
from openff.bespokefit.workflows import BespokeWorkflowFactory
from openff.bespokefit.schema.optimizers import ForceBalanceSchema
from openff.bespokefit.schema.targets import TorsionProfileTargetSchema
from qcportal import FractalClient

In [2]:
# build the fitting schema
workflow = BespokeWorkflowFactory(initial_forcefield="openff_unconstrained-1.3.0.offxml")

In [3]:
# build the optimizer and settings
fb = ForceBalanceSchema()
fb.dict()

{'type': 'ForceBalance',
 'max_iterations': 10,
 'job_type': 'optimize',
 'penalty_type': 'L2',
 'step_convergence_threshold': 0.01,
 'objective_convergence_threshold': 0.01,
 'gradient_convergence_threshold': 0.01,
 'n_criteria': 2,
 'eigenvalue_lower_bound': 0.01,
 'finite_difference_h': 0.01,
 'penalty_additive': 1.0,
 'initial_trust_radius': -0.25,
 'minimum_trust_radius': 0.05,
 'error_tolerance': 1.0,
 'adaptive_factor': 0.2,
 'adaptive_damping': 1.0,
 'normalize_weights': False,
 'extras': {}}

In [4]:
# make a target, the settings should be fine
target = TorsionProfileTargetSchema()

In [5]:
# put these in the workflow
workflow.optimizer = fb
workflow.target_templates = [target]
# you can see in the settings proper torsions are being targeted with bespoke smirks.
workflow.dict()

{'initial_force_field': 'openff_unconstrained-1.3.0.offxml',
 'optimizer': {'type': 'ForceBalance',
  'max_iterations': 10,
  'job_type': 'optimize',
  'penalty_type': 'L2',
  'step_convergence_threshold': 0.01,
  'objective_convergence_threshold': 0.01,
  'gradient_convergence_threshold': 0.01,
  'n_criteria': 2,
  'eigenvalue_lower_bound': 0.01,
  'finite_difference_h': 0.01,
  'penalty_additive': 1.0,
  'initial_trust_radius': -0.25,
  'minimum_trust_radius': 0.05,
  'error_tolerance': 1.0,
  'adaptive_factor': 0.2,
  'adaptive_damping': 1.0,
  'normalize_weights': False,
  'extras': {}},
 'target_templates': [{'weight': 1.0,
   'reference_data': None,
   'extras': {},
   'type': 'TorsionProfile',
   'attenuate_weights': True,
   'energy_denominator': 1.0,
   'energy_cutoff': 10.0}],
 'parameter_settings': [{'parameter_type': <SmirksType.ProperTorsions: 'ProperTorsions'>,
   'parameter_subtype': 'Proper',
   'target': 'k',
   'prior': 6.0}],
 'target_smirks': [<SmirksType.ProperTors

In [9]:
# connect to qcarchive and get the name of the dataset
client = FractalClient()
client.list_collections("torsiondrivedataset")

Unnamed: 0_level_0,Unnamed: 1_level_0,tagline
collection,name,Unnamed: 2_level_1
TorsionDriveDataset,Fragment Stability Benchmark,
TorsionDriveDataset,Fragmenter paper,
TorsionDriveDataset,OpenFF Amide Torsion Set v1.0,"Amides, thioamides and amidines diversely func..."
TorsionDriveDataset,OpenFF Aniline 2D Impropers v1.0,Substituted aniline derivatives with various e...
TorsionDriveDataset,OpenFF DANCE 1 eMolecules t142 v1.0,
TorsionDriveDataset,OpenFF Fragmenter Validation 1.0,
TorsionDriveDataset,OpenFF Full TorsionDrive Benchmark 1,
TorsionDriveDataset,OpenFF Gen 2 Torsion Set 1 Roche,
TorsionDriveDataset,OpenFF Gen 2 Torsion Set 1 Roche 2,
TorsionDriveDataset,OpenFF Gen 2 Torsion Set 2 Coverage,


In [10]:
# lets pull down the rowley biaryl set
# note only complete optimizations will be collected
results = TorsionDriveResultCollection.from_server(client, datasets="OpenFF Rowley Biaryl v1.0", spec_name="default")

In [11]:
# now we can make a bespoke fitting schema for each result record
schema = workflow.optimization_schemas_from_results(results=results, combine=False)

Building Fitting Schema : 100%|█████████████████| 87/87 [00:18<00:00,  4.71it/s]


In [12]:
# we have found 87 unqiue torsiondrives and generated 87 unqiue fitting tasks
len(schema)

87

In [13]:
# each task has smirks that will be fit
# the atoms are the atoms that will be hit in the molecule
# Note the terms have been expanded to k4 if you do not want this see the workflow options
schema[0].target_smirks

[BespokeTorsionSmirks(atoms={(3, 8, 9, 4)}, smirks='[#6H1X3x2r6+0a:1](-;!@[#1H0X1x0!r+0A])(:;@[#6H1X3x2r6+0a](-;!@[#1H0X1x0!r+0A]):;@[#6H1X3x2r6+0a](-;!@[#1H0X1x0!r+0A]):;@[#7H0X2x2r6+0a]:;@[#6H1X3x2r6+0a]-;!@[#1H0X1x0!r+0A]):;@[#6H0X3x2r6+0a:2]-;!@[#6H0X3x2r6+0a:3](:;@[#7H0X2x2r6+0a]:;@[#6H1X3x2r6+0a](-;!@[#1H0X1x0!r+0A]):;@[#6H1X3x2r6+0a](-;!@[#1H0X1x0!r+0A]):;@[#6H1X3x2r6+0a]-;!@[#1H0X1x0!r+0A]):;@[#6H1X3x2r6+0a:4]-;!@[#1H0X1x0!r+0A]', type=<SmirksType.ProperTorsions: 'ProperTorsions'>, parameterize=set(), terms={'2': BespokeTorsionTerm(periodicity='2', phase='180.0 * degree', k='1.048715180139 * mole**-1 * kilocalorie', idivf='1'), '1': BespokeTorsionTerm(periodicity='1', phase='0 * degree', k='1e-06 * mole**-1 * kilocalorie', idivf='1.0'), '3': BespokeTorsionTerm(periodicity='3', phase='0 * degree', k='1e-06 * mole**-1 * kilocalorie', idivf='1.0'), '4': BespokeTorsionTerm(periodicity='4', phase='180 * degree', k='1e-06 * mole**-1 * kilocalorie', idivf='1.0')}),
 BespokeTorsionSmir

In [16]:
# we can inspect the molecule
# there are 4 dihedrals for this molecule
schema[0].target_molecule

MoleculeSchema(attributes=MoleculeAttributes(canonical_smiles='c1ccnc(c1)c2cccnc2', canonical_isomeric_smiles='c1ccnc(c1)c2cccnc2', canonical_explicit_hydrogen_smiles='[H]c1c(c(nc(c1[H])c2c(c(c(nc2[H])[H])[H])[H])[H])[H]', canonical_isomeric_explicit_hydrogen_smiles='[H]c1c(c(nc(c1[H])c2c(c(c(nc2[H])[H])[H])[H])[H])[H]', canonical_isomeric_explicit_hydrogen_mapped_smiles='[H:13][c:1]1[c:2]([c:7]([n:12][c:10]([c:5]1[H:17])[c:9]2[c:4]([c:3]([c:6]([n:11][c:8]2[H:20])[H:18])[H:15])[H:16])[H:19])[H:14]', molecular_formula='C10H8N2', standard_inchi='InChI=1S/C10H8N2/c1-2-7-12-10(5-1)9-4-3-6-11-8-9/h1-8H', inchi_key='VEKIYFGCEAJDDT-UHFFFAOYSA-N', fixed_hydrogen_inchi='InChI=1/C10H8N2/c1-2-7-12-10(5-1)9-4-3-6-11-8-9/h1-8H', fixed_hydrogen_inchi_key='VEKIYFGCEAJDDT-UHFFFAOYNA-N'), task_id='[H]c1c(c(nc(c1[H])c2c(c(c(nc2[H])[H])[H])[H])[H])[H]', fragment_data=[], fragmentation_engine=None)

In [15]:
# save the schema to file using the utility function which can seralise a list of schema, make sure to compress 
from openff.bespokefit.bespoke import serialize_schema
serialize_schema(schema, "rowley.json.xz")