# Fitting ARChive torsiondrive data using QUBEKit

In [1]:
# Open up an instance of the client
import qcportal as ptl
client = ptl.FractalClient()
client

In [2]:
# Gather the torsiondrive data set
ds = client.get_collection("TorsionDriveDataset", "OpenFF Fragmenter Phenyl Benchmark")
# Access the B3LYP-D3 data
ds.query("B3LYP-D3")

'B3LYP-D3'

In [3]:
ds.df.head()

Unnamed: 0,B3LYP-D3
c1c[cH:1][c:2](cc1)[C:3](=[O:4])O,TorsionDriveRecord(id='1761822' status='COMPLE...
c1[cH:1][c:2](cnc1)[C:3](=[O:4])O,TorsionDriveRecord(id='1761823' status='COMPLE...
[cH:1]1cncc[c:2]1[C:3](=[O:4])O,TorsionDriveRecord(id='1761824' status='COMPLE...
[cH:1]1cc(nc[c:2]1[C:3](=[O:4])O)[O-],TorsionDriveRecord(id='1761825' status='COMPLE...
Cc1c[cH:1][c:2](cn1)[C:3](=[O:4])O,TorsionDriveRecord(id='1761826' status='COMPLE...


In [4]:
# Take the first complete surface and check the shape 
ds.visualize("c1c[cH:1][c:2](cc1)[C:3](=[O:4])O", "B3LYP-D3")

In [5]:
# Extract the tdrive record to find the initial input molecule
td = ds.df.loc["c1c[cH:1][c:2](cc1)[C:3](=[O:4])O", "B3LYP-D3"]

In [6]:
# Find the torsion we are rotating and make sure we can extract the energies
print("Torsion of interest                : {}".format(td.keywords.dihedrals))
print("Final optimization energy in hartree: {}".format(td.get_final_energies(180)))
dihedral = td.keywords.dihedrals[0]
scan = (dihedral[1], dihedral[2])

Torsion of interest                : [(3, 5, 6, 7)]
Final optimization energy in hartree: -420.53228825229655


In [7]:
# Get the molecule id that can be used to make a qubekit molecule
mol_id = td.initial_molecule
molecule = client.query_molecules(id=mol_id)[0]

In [8]:
# Now make an instance of the qubekit ligand and make sure that settings for the torsion fitting matches
from QUBEKit.ligand import Ligand
print(molecule.json_dict())
# Instance from a json dict
mol = Ligand(molecule.json_dict(), 'torsion_example')

# Now we should reset the basis and theory to match
mol.basis = 'dzvp'
mol.theory = 'b3lyp-d3bj'
mol.rotatable

{'symbols': ['C', 'C', 'C', 'C', 'C', 'C', 'C', 'O', 'O', 'H', 'H', 'H', 'H', 'H', 'H'], 'geometry': [3.56240001, -1.95833547, -0.21248155, 5.51989949, -3.08188906, 1.14922468, 1.52417026, -0.8749633, 1.06014828, 5.43917204, -3.12181328, 3.78358601, 1.44342839, -0.91488881, 3.69451784, 3.40094015, -2.03833394, 5.0562946, 3.31669192, -2.07979474, 7.80286246, 1.61026022, -1.18056487, 9.11811383, 5.36563711, -3.24586183, 8.83238218, 3.62521668, -1.9271764, -2.26358995, 7.10664707, -3.92597066, 0.15831427, 0.00041235, 0.00010031, -0.00041224, 6.97514671, -4.00227555, 4.82517315, -0.15387215, -0.06509658, 4.66710276, 5.32585842, -3.28521476, 10.68535458], 'masses': [12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 15.99491461957, 15.99491461957, 1.00782503223, 1.00782503223, 1.00782503223, 1.00782503223, 1.00782503223, 1.00782503223], 'real': [True, True, True, True, True, True, True, True, True, True, True, True, True, True, True], 'fragments': [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]]

[(5, 6), (6, 8)]

In [9]:
# Now check the structure of the molecule and assign initial parameters using antechamber
mol.write_pdb()
from QUBEKit.parametrisation import AnteChamber

AnteChamber(mol)
mol.rotatable

Unable to load toolkit <openforcefield.utils.toolkits.OpenEyeToolkitWrapper object at 0x152b19ac0ac8>.


[(5, 6), (6, 8)]

In [10]:
# The ligand needs to know which torsion of the two rotatable options it has found is being fit
mol.scan_order = [scan]  # This is the central bond identified by td.keywords.dihedrals


In [11]:
# Now we need to extract all of the optimised geometries, we have a helper function that stores them into the
# required internal data structures
from QUBEKit.utils.helpers import collect_archive_tdrive

# Set the qm_scan data to a dict with keys coresponding to the central dihedral bond
mol.qm_scans = {}

# Pass the torsiondrive record and the client instance to the helper function to get ordered lists of energies and geometries    
mol.qm_scans[mol.scan_order[0]] = collect_archive_tdrive(td, client)  


In [12]:
# Now try and do a quick single point fitting of the molecule, turn of the refinement method

mol.refinement_method = None
# Import the optimiser set the constraints to None and run the optimisation.

from QUBEKit.dihedrals import TorsionOptimiser
mol.constraints_file = None
optimiser = TorsionOptimiser(mol)
optimiser.refinement = None
optimiser.run()

Running SciPy BFGS optimiser ... 
         Current function value: 0.072605
         Iterations: 13
         Function evaluations: 113
         Gradient evaluations: 103
SciPy optimisation complete


<Figure size 432x288 with 0 Axes>

In [13]:
# This has produced a new folder with a graph of the results but we can plot the graph again here,
# The output infomation is stored in the optimisation_log text file
import plotly
import plotly.graph_objects as go
# Collect the data, angles, qm , mm, inital mm
data = [[], [], [], []]
with open('Optimiser_log.txt') as opt_log:
    lines = opt_log.readlines()
    for i, line in enumerate(lines):
        if 'Angle' in line:
            i += 1
            break
            
for line in lines[i:]:
    for x in range(len(line.split())):
        data[x].append(float(line.split()[x])) 

# Set up the plot         
QM_data = go.Scatter(
    x = data[0],
    y = data[1],
    mode = 'markers',
    name = 'QM data',
    marker=dict(size=8)
)

mm_data = go.Scatter(
    x=data[0],
    y=data[2],
    name='Final parameters',
    mode='lines')

initial = go.Scatter(
    x=data[0],
    y=data[3],
    name='Starting parameters',
    mode='lines',
    line=dict(dash='dash'))

# layout
mol_di = mol.dihedrals[mol.scan_order[0]][0]
layout = go.Layout(
    title=f'Relative energy surface for dihedral {mol_di[0]}-{mol_di[1]}-{mol_di[2]}-{mol_di[3]}',
    xaxis_title='Dihedral angle [degrees]',
    yaxis_title='Relative energy [kcal / mol]'
)
data = [QM_data, mm_data, initial]
fig = go.Figure(data=data, layout=layout)
# Plot and embed in ipython notebook!
plotly.offline.iplot(fig, filename='TorsionDrive fitting')
