In [1]:
import parmed as pmd
import numpy as np
import os
import pandas as pd

import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
from matplotlib.colors import colorConverter
import seaborn as sns

%load_ext autoreload
%autoreload 2
%matplotlib inline

# Dihedral parameter comparison.

Let's iterate through the structure and look at the parameters, one-by-one. This seems to be the only reliable way to compare apples with apples.

First, find the unique set of four atoms, by atom name, in a single alpha-cyclodextrin residue. Then we'll look at the dihedrals applied to these 4 atom sets. Thankfully, when we query ParmEd for a mask of four atoms, ParmEd looks for the atoms in either ordering (e.g., `a-b-c-d` and `d-c-b-a`), which are the same dihedral, but may be listed separately.

In [210]:
smirnoff_prmtop = pmd.load_file("systems/a-bam-p/smirnoff/a000/hg.prmtop")
smirnoff_prmtop = smirnoff_prmtop[":4"]

gaff_prmtop = pmd.load_file("systems/a-bam-p/bgbg-tip3p/hg.topo")
gaff_prmtop = gaff_prmtop[":4"]

In [211]:
def find_dihedrals(structure):
    df = pd.DataFrame()
    for dihedral in structure.dihedrals:
        df = df.append(
            pd.DataFrame(
                {
                    "atom1": dihedral.atom1.name,
                    "atom2": dihedral.atom2.name,
                    "atom3": dihedral.atom3.name,
                    "atom4": dihedral.atom4.name,
                },
                index=[0],
            ),
            ignore_index=True,
        )
    return df

In [212]:
gaff_prmtop_dihedrals = find_dihedrals(gaff_prmtop)
smirnoff_prmtop_dihedrals = find_dihedrals(smirnoff_prmtop)

These are just labeled by *atom name* so it is safe to drop duplicates by only keeping the unique rows.

In [213]:
gaff_prmtop_dihedrals.drop_duplicates(inplace=True)
smirnoff_prmtop_dihedrals.drop_duplicates(inplace=True)

In [215]:
assert len(gaff_prmtop_dihedrals) == len(smirnoff_prmtop_dihedrals)

In [216]:
print(f"There are {len(gaff_prmtop_dihedrals)} sets of four atoms in the structure.")

There are 54 sets of four atoms in the structure.


In [217]:
gaff_prmtop_dihedrals.sort_values(by=["atom1", "atom2", "atom3", "atom4"]).head()

Unnamed: 0,atom1,atom2,atom3,atom4
26,C1,C2,C3,C4
81,C1,C2,C3,H3
25,C1,C2,C3,O3
79,C1,C2,O2,HO2
29,C1,O5,C5,C4


I think there are 54 unique dihedrals by name in a single `:MGO` residue and both GAFF v1.7 and SMIRNOFF99Frosst agree on the names. A little tricky to test for strict equality in the DataFrame because everything is a string, but I'm feeling pretty confident that this is correct.

Next, let's loop through each row, and print the dihedral in GAFF v1.7 and SMIRNOFF99Frosst.

In [218]:
def parse_parmed_output(printDihedrals_string):
    df = pd.DataFrame()
    lines = printDihedrals_string.split("\n")
    for line in lines[1:-1]:
        df = df.append(
            pd.DataFrame(
                {
                    "atom1": "".join(line[10:15]),
                    "atom1 serial": int("".join(line[5:10])),
                    "atom2": "".join(line[30:35]),
                    "atom2 serial": int("".join(line[25:30])),
                    "atom3": "".join(line[52:57]),
                    "atom3 serial": int("".join(line[47:52])),
                    "atom4": "".join(line[72:77]),
                    "atom4 serial": int("".join(line[67:72])),
                    "height": float("".join(line[89:95])),
                    "periodicity": float("".join(line[100:105])),
                    "phase": float("".join(line[111:116])),
                    "EEL scale": float("".join(line[122:128])),
                    "VDW scale": float("".join(line[133:138])),               },
                index=[0],
            ),
            ignore_index=True,
        )
    return df

In [238]:
def compare_df(gaff_df, smirnoff_df):
    for (_, gaff_row), (_, smirnoff_row) in zip(gaff_df.iterrows(), smirnoff_df.iterrows()):
        
        if np.abs(gaff_row["height"] - smirnoff_row["height"]) > 0.01:
            print("Disparity in barrier height.")
            atoms = gaff_row["atom1"] + "-" + gaff_row["atom2"] + "-" + gaff_row["atom3"] + "-" + gaff_row["atom4"]
            print(f"Look at {atoms}")

        if gaff_row["periodicity"] != smirnoff_row["periodicity"]:
            print("Disparity in periodicity.")
            atoms = gaff_row["atom1"] + "-" + gaff_row["atom2"] + "-" + gaff_row["atom3"] + "-" + gaff_row["atom4"]
            print(f"Look at {atoms}")

        if gaff_row["phase"] != smirnoff_row["phase"]:
            print("Disparity in phase.")
            atoms = gaff_row["atom1"] + "-" + gaff_row["atom2"] + "-" + gaff_row["atom3"] + "-" + gaff_row["atom4"]
            print(f"Look at {atoms}")
            
        # Can optionally check for scaling here, but I do it separately down below.


In [220]:
for index, row in gaff_prmtop_dihedrals.sort_values(by=["atom1", "atom2", "atom3", "atom4"]).iterrows():
    # Create an atom mask from the four atoms.
    mask = f"@{row['atom1']} @{row['atom2']} @{row['atom3']} @{row['atom4']}"
    gaff_string = str(pmd.tools.actions.printDihedrals(gaff_prmtop, mask))
    # Turn the "printDihedrals" output from ParmEd into a DataFrame
    gaff_df = parse_parmed_output(gaff_string)
    
    smirnoff_string = str(pmd.tools.actions.printDihedrals(smirnoff_prmtop, mask))
    smirnoff_df = parse_parmed_output(smirnoff_string)
    
    # Check whether these four atoms have more entry one in FF than the other
    if not len(gaff_df) == len(smirnoff_df):
        print(f"GAFF v1.7 has {len(gaff_df)} and SMIRNOFF99Frosst has {len(smirnoff_df)}")
        print(f"{row['atom1']}-{row['atom2']}-{row['atom3']}-{row['atom4']}")
        
    # Sort the DataFrames and then compare that each row has the same barrier height, periodicity, and phase
    compare_df(gaff_df.sort_values(by=["atom1", "atom2", "atom3", "atom4", "periodicity", "phase"]), 
               smirnoff_df.sort_values(by=["atom1", "atom2", "atom3", "atom4", "periodicity", "phase"]))


GAFF v1.7 has 1 and SMIRNOFF99Frosst has 2
H1-C1-C2-O2
Disparity in barrier height.
Look at   H1 -   C1-  C2 -   O2
Disparity in periodicity.
Look at   H1 -   C1-  C2 -   O2


In [221]:
pmd.tools.actions.printDihedrals(smirnoff_prmtop, "@H1 @C1 @C2 @O2")

               Atom 1               Atom 2               Atom 3               Atom 4     Height  Periodic.      Phase  EEL Scale  VDW Scale
        2   H1 (  65)        1   C1 (  64)        4   C2 (  67)        6   O2 (  69)     0.0000     3.0000     0.0000     1.2000     2.0000
M       2   H1 (  65)        1   C1 (  64)        4   C2 (  67)        6   O2 (  69)     0.2500     1.0000     0.0000     1.0000     1.0000

In [223]:
pmd.tools.actions.printDihedrals(gaff_prmtop, "@H1 @C1 @C2 @O2")

               Atom 1               Atom 2               Atom 3               Atom 4     Height  Periodic.      Phase  EEL Scale  VDW Scale
        2   H1 (  h2)        1   C1 (  c3)        4   C2 (  c3)        6   O2 (  oh)     0.1556     3.0000     0.0000     1.2000     2.0000

We also need to check the torsions that go between two residues.

In [224]:
interresidue_masks = [
":1@C2 :1@C1 :1@O1 :2@C4",
":1@O5 :1@C1 :1@O1 :2@C4",
":1@H1 :1@C1 :1@O1 :2@C4",
":1@C1 :1@O1 :2@C4 :2@H4",
":1@C1 :1@O1 :2@C4 :2@C3",
":1@C1 :1@O1 :2@C4 :2@C5",
":1@O1 :2@C4 :2@C3 :2@H3",
":1@O1 :2@C4 :2@C3 :2@O3",
":1@O1 :2@C4 :2@C3 :2@C2",
":1@O1 :2@C4 :2@C5 :2@H5",
":1@O1 :2@C4 :2@C5 :2@O5",
":1@O1 :2@C4 :2@C5 :2@C6",
]

In [225]:
smirnoff_prmtop = pmd.load_file("systems/a-bam-p/smirnoff/a000/hg.prmtop")
smirnoff_two_residue = smirnoff_prmtop[":4|:5"]

gaff_prmtop = pmd.load_file("systems/a-bam-p/bgbg-tip3p/hg.topo")
gaff_two_residue = gaff_prmtop[":4|:5"]

In [233]:
def parse_parmed_output_with_marker(printDihedrals_string):
    df = pd.DataFrame()
    lines = printDihedrals_string.split("\n")
    for line in lines[1:-1]:
        df = df.append(
            pd.DataFrame(
                {
                    "marker": line[0],
                    "atom1": "".join(line[10:15]),
                    "atom1 serial": int("".join(line[5:10])),
                    "atom2": "".join(line[30:35]),
                    "atom2 serial": int("".join(line[25:30])),
                    "atom3": "".join(line[52:57]),
                    "atom3 serial": int("".join(line[47:52])),
                    "atom4": "".join(line[72:77]),
                    "atom4 serial": int("".join(line[67:72])),
                    "height": float("".join(line[89:95])),
                    "periodicity": float("".join(line[100:105])),
                    "phase": float("".join(line[111:116])),
                    "EEL scale": float("".join(line[122:128])),
                    "VDW scale": float("".join(line[133:138])),               },
                index=[0],
            ),
            ignore_index=True,
        )
    return df

In [235]:
for mask in interresidue_masks:
    
    gaff_string = str(pmd.tools.actions.printDihedrals(gaff_two_residue, mask))
    gaff_df = parse_parmed_output_with_marker(gaff_string)
    smirnoff_string = str(pmd.tools.actions.printDihedrals(smirnoff_two_residue, mask))
    smirnoff_df = parse_parmed_output_with_marker(smirnoff_string)
    
    if not len(gaff_df) == len(smirnoff_df):
        print(f"GAFF v1.7 has {len(gaff_df)} and SMIRNOFF99Frosst has {len(smirnoff_df)}")
        print(f"{row['atom1']}-{row['atom2']}-{row['atom3']}-{row['atom4']}")
        
    compare_df(gaff_df.sort_values(by=["atom1", "atom2", "atom3", "atom4", "periodicity", "phase"]), 
               smirnoff_df.sort_values(by=["atom1", "atom2", "atom3", "atom4", "periodicity", "phase"]))

    compare_df_for_scaling(gaff_df.sort_values(by=["atom1", "atom2", "atom3", "atom4", "periodicity", "phase"]), 
           smirnoff_df.sort_values(by=["atom1", "atom2", "atom3", "atom4", "periodicity", "phase"]))


In [237]:
smirnoff_df

Unnamed: 0,marker,atom1,atom1 serial,atom2,atom2 serial,atom3,atom3 serial,atom4,atom4 serial,height,periodicity,phase,EEL scale,VDW scale
0,,O1,3,C4,33,C5,35,C6,38,0.156,3.0,0.0,1.2,2.0
