In [3]:
import parmed as pmd
import numpy as np
import os
import pandas as pd

import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
from matplotlib.colors import colorConverter
import seaborn as sns

%load_ext autoreload
%autoreload 2
%matplotlib inline

# Parameter comparisons

I find there are a different number of dihedral parameters applied to a system parameterized with GAFF v1.7 and SMIRNOFF99Frosst.

## Method 1 (use ParmEd)

In [4]:
smirnoff_prmtop = pmd.load_file("systems/a-bam-p/smirnoff/a000/hg.prmtop")
smirnoff_prmtop = smirnoff_prmtop[":MGO"]

gaff_prmtop = pmd.load_file("systems/a-bam-p/bgbg-tip3p/hg.topo")
gaff_prmtop = gaff_prmtop[":MGO"]

In [11]:
print(f"SMIRNOFF99Frosst: {len(smirnoff_prmtop.dihedrals)} dihedrals and {len(smirnoff_prmtop.impropers)} impropers.")

SMIRNOFF99Frosst: 630 dihedrals and 0 impropers.


In [12]:
print(f"GAFF v1.7: {len(gaff_prmtop.dihedrals)} dihedrals and {len(gaff_prmtop.impropers)} impropers.")

GAFF v1.7: 624 dihedrals and 0 impropers.


In [33]:
def find_dihedrals(structure):
    df = pd.DataFrame()
    for dihedral in structure.dihedrals:
        df = df.append(
            pd.DataFrame(
                {
                    "atom1": dihedral.atom1.name,
                    "atom2": dihedral.atom2.name,
                    "atom3": dihedral.atom3.name,
                    "atom4": dihedral.atom4.name,
                    "phi_k": dihedral.type.phi_k,
                    "per": dihedral.type.per,
                    "phase": dihedral.type.phase,
                },
                index=[0],
            ),
            ignore_index=True,
        )
    return df

In [37]:
gaff_prmtop_dihedrals = find_dihedrals(gaff_prmtop)
assert len(gaff_prmtop.dihedrals) == len(gaff_prmtop_dihedrals)
gaff_prmtop_dihedrals.drop_duplicates(inplace=True)

In [40]:
print(f"GAFF v1.7: {len(gaff_prmtop_dihedrals)} unique dihedrals.")

GAFF v1.7: 127 unique dihedrals.


In [41]:
smirnoff_prmtop_dihedrals = find_dihedrals(smirnoff_prmtop)
assert len(smirnoff_prmtop.dihedrals) == len(smirnoff_prmtop_dihedrals)
smirnoff_prmtop_dihedrals.drop_duplicates(inplace=True)

In [43]:
print(f"SMIRNOFF99Frosst: {len(smirnoff_prmtop_dihedrals)} unique dihedrals.")

SMIRNOFF99Frosst: 131 unique dihedrals.


Where are they different?

In [55]:
dihedral_differences = smirnoff_prmtop_dihedrals.merge(gaff_prmtop_dihedrals, 
                                                       indicator=True,
                                                       suffixes=("_smirnoff", "_bgbg_tip3p"),
                                                       how="outer")

In [115]:
all_differences = dihedral_differences[dihedral_differences["_merge"] != "both"]
all_differences.head()

Unnamed: 0,atom1,atom2,atom3,atom4,phi_k,per,phase,_merge
2,O1,C1,C2,C3,0.156,3,0.0,left_only
4,O5,C1,C2,O2,1.175,2,0.0,left_only
5,O5,C1,C2,C3,0.156,3,0.0,left_only
8,O2,C2,C3,C4,0.156,3,0.0,left_only
9,C1,C2,C3,O3,0.156,3,0.0,left_only


In [70]:
smirnoff_only = all_differences[all_differences["_merge"] == "left_only"]
gaff_only = all_differences[all_differences["_merge"] == "right_only"]

In [71]:
print(f"SMIRNOFF99Frosst only: {len(smirnoff_only)} dihedrals.")
print(f"GAFF v1.7 only: {len(gaff_only)} dihedrals.")

SMIRNOFF99Frosst only: 52 dihedrals.
GAFF v1.7 only: 48 dihedrals.


Because of periodicity, some sets of four atoms are listed multiple times. **I am interested in where there are any set of four atoms for which one force field applies a dihedral and the other force field does not.** If we ignore periodicity are there a different number of SMIRNOFF99Frosst-only and GAFF v1.7-only dihedrals?

In [78]:
smirnoff_tmp = smirnoff_only.drop(columns=["_merge"])
gaff_tmp = gaff_only.drop(columns=["_merge"])

dihedral_atom_name_merge = smirnoff_tmp.merge(gaff_tmp,
                           on=["atom1", "atom2", "atom3", "atom4"],
                           indicator=True,
                           suffixes=("_smirnoff", "_bgbg_tip3p"),
                           how="outer")

In [82]:
dihedral_atom_name_merge[dihedral_atom_name_merge["_merge"] != "both"]

Unnamed: 0,atom1,atom2,atom3,atom4,phi_k_smirnoff,per_smirnoff,phase_smirnoff,phi_k_bgbg_tip3p,per_bgbg_tip3p,phase_bgbg_tip3p,_merge
1,O5,C1,C2,O2,1.175,2,0.0,,,,left_only
2,O5,C1,C2,C3,0.156,3,0.0,,,,left_only
46,O5,C1,C2,H2,0.25,1,0.0,,,,left_only


It seems that only SMIRNOFF99Frosst applies dihedrals to these four atoms.

In [87]:
name_type_map = dict()
for atom in gaff_prmtop.atoms:
    name_type_map[atom.name] = atom.type

In [90]:
for index, row in dihedral_atom_name_merge[dihedral_atom_name_merge["_merge"] != "both"].iterrows():
    print(f'{name_type_map[row["atom1"]]}-{name_type_map[row["atom2"]]}-{name_type_map[row["atom3"]]}-{name_type_map[row["atom4"]]}')

os-c3-c3-oh
os-c3-c3-c3
os-c3-c3-h1


I can confirm that I don't see these dihedrals in the `frcmod` file.

# Method 2 (look at the dihedrals through ParmEd API)

In [129]:
pmd.tools.actions.printDihedrals(smirnoff_prmtop, ":4@O5 :4@C1 :4@C2 :4@C3")

               Atom 1               Atom 2               Atom 3               Atom 4     Height  Periodic.      Phase  EEL Scale  VDW Scale
       71   C3 (  71)       67   C2 (  67)       64   C1 (  64)       79   O5 (  79)     0.1560     3.0000     0.0000     1.2000     2.0000

In [130]:
pmd.tools.actions.printDihedrals(gaff_prmtop, ":4@O5 :4@C1 :4@C2 :4@C3")

               Atom 1               Atom 2               Atom 3               Atom 4     Height  Periodic.      Phase  EEL Scale  VDW Scale
       71   C3 (  c3)       67   C2 (  c3)       64   C1 (  c3)       79   O5 (  os)     0.1556     3.0000     0.0000     1.2000     2.0000

In [124]:
gaff_lines = str(pmd.tools.actions.printDihedrals(gaff_prmtop)).split("\n")
smirnoff_lines = str(pmd.tools.actions.printDihedrals(smirnoff_prmtop)).split("\n")

In [127]:
gaff_lines = str(pmd.tools.actions.printDihedrals(gaff_prmtop[":4"])).split("\n")

In [128]:
gaff_lines

['               Atom 1               Atom 2               Atom 3               Atom 4     Height  Periodic.      Phase  EEL Scale  VDW Scale',
 '       16   O5 (  os)       14   C5 (  c3)       17   C6 (  c3)       20   O6 (  oh)     1.1750     2.0000     0.0000     1.2000     2.0000',
 'M      16   O5 (  os)       14   C5 (  c3)       17   C6 (  c3)       20   O6 (  oh)     0.1440     3.0000     0.0000     1.2000     2.0000',
 '       12   C4 (  c3)       14   C5 (  c3)       17   C6 (  c3)       20   O6 (  oh)     0.1556     3.0000     0.0000     1.2000     2.0000',
 '       10   O3 (  oh)        8   C3 (  c3)       12   C4 (  c3)       14   C5 (  c3)     0.1556     3.0000     0.0000     1.2000     2.0000',
 '        8   C3 (  c3)        4   C2 (  c3)        1   C1 (  c3)       16   O5 (  os)     0.1556     3.0000     0.0000     1.2000     2.0000',
 'M       8   C3 (  c3)       12   C4 (  c3)       14   C5 (  c3)       16   O5 (  os)     0.1556     3.0000     0.0000     1.2000     2

In [261]:
def dihedrals_to_df(lines):
    df = pd.DataFrame()
    for line in lines:
        df = df.append(
            pd.DataFrame(
                {
                    "atom1": "".join(line[10:15]),
                    "atom1 serial": int("".join(line[5:10])),
                    "atom2": "".join(line[30:35]),
                    "atom2 serial": int("".join(line[25:30])),
                    "atom3": "".join(line[52:57]),
                    "atom3 serial": int("".join(line[47:52])),
                    "atom4": "".join(line[72:77]),
                    "atom4 serial": int("".join(line[67:72])),
                    "height": float("".join(line[89:95])),
                    "periodicity": float("".join(line[100:105])),
                    "phase": float("".join(line[111:116])),
                    "EEL scale": float("".join(line[122:128])),
                    "VDW scale": float("".join(line[133:138])),               },
                index=[0],
            ),
            ignore_index=True,
        )
    return df

In [262]:
gaff_dihedrals = dihedrals_to_df(list(gaff_lines[1:-1]))
smirnoff_dihedrals = dihedrals_to_df(list(smirnoff_lines[1:-1]))

In [263]:
gaff_dihedrals[gaff_dihedrals["atom1"].str.contains("O5") &
              gaff_dihedrals["atom2"].str.contains("C1") & 
              gaff_dihedrals["atom3"].str.contains("C2") &
              gaff_dihedrals["atom4"].str.contains("C3")]

Unnamed: 0,atom1,atom1 serial,atom2,atom2 serial,atom3,atom3 serial,atom4,atom4 serial,height,periodicity,phase,EEL scale,VDW scale


In [264]:
gaff_dihedrals[gaff_dihedrals["atom1"].str.contains("C3") &
              gaff_dihedrals["atom2"].str.contains("C2") & 
              gaff_dihedrals["atom3"].str.contains("C1") &
              gaff_dihedrals["atom4"].str.contains("O5")]

Unnamed: 0,atom1,atom1 serial,atom2,atom2 serial,atom3,atom3 serial,atom4,atom4 serial,height,periodicity,phase,EEL scale,VDW scale
19,C3,8,C2,4,C1,1,O5,16,0.1556,3.0,0.0,1.2,2.0
70,C3,29,C2,25,C1,22,O5,37,0.1556,3.0,0.0,1.2,2.0
118,C3,50,C2,46,C1,43,O5,58,0.1556,3.0,0.0,1.2,2.0
166,C3,71,C2,67,C1,64,O5,79,0.1556,3.0,0.0,1.2,2.0
214,C3,92,C2,88,C1,85,O5,100,0.1556,3.0,0.0,1.2,2.0
259,C3,113,C2,109,C1,106,O5,121,0.1556,3.0,0.0,1.2,2.0


In [265]:
smirnoff_dihedrals[smirnoff_dihedrals["atom1"].str.contains("O5") &
              smirnoff_dihedrals["atom2"].str.contains("C1") & 
              smirnoff_dihedrals["atom3"].str.contains("C2") &
              smirnoff_dihedrals["atom4"].str.contains("C3")]

Unnamed: 0,atom1,atom1 serial,atom2,atom2 serial,atom3,atom3 serial,atom4,atom4 serial,height,periodicity,phase,EEL scale,VDW scale
5,O5,16,C1,1,C2,4,C3,8,0.156,3.0,0.0,1.2,2.0


In [266]:
smirnoff_dihedrals[smirnoff_dihedrals["atom1"].str.contains("C3") &
              smirnoff_dihedrals["atom2"].str.contains("C2") & 
              smirnoff_dihedrals["atom3"].str.contains("C1") &
              smirnoff_dihedrals["atom4"].str.contains("O5")]

Unnamed: 0,atom1,atom1 serial,atom2,atom2 serial,atom3,atom3 serial,atom4,atom4 serial,height,periodicity,phase,EEL scale,VDW scale
35,C3,29,C2,25,C1,22,O5,37,0.156,3.0,0.0,1.2,2.0
65,C3,50,C2,46,C1,43,O5,58,0.156,3.0,0.0,1.2,2.0
95,C3,71,C2,67,C1,64,O5,79,0.156,3.0,0.0,1.2,2.0
125,C3,92,C2,88,C1,85,O5,100,0.156,3.0,0.0,1.2,2.0
155,C3,113,C2,109,C1,106,O5,121,0.156,3.0,0.0,1.2,2.0


I guess it's odd that GAFF has _no parameters_ for O5-C1-C2-C3, _six parameters_ for C3-C2-C1-O5, whereas SMIRNOFF99Frosst has _one parameter_ for O5-C1-C2-C3, and _five parameters_ for C3-C2-C1-O5.

From this it is clear that one of the dihedrals, in SMIRNOFF has reversed order.

We still see a different number of total dihedrals applied to the system.

In [259]:
len(gaff_dihedrals)

624

In [260]:
 len(smirnoff_dihedrals)

630

# Method 3 (compare `frcmod`)

In [276]:
pmd.tools.actions.writeFrcmod(gaff_prmtop[":4"], "results/gaff_prmtop.frcmod").execute()

In [277]:
pmd.tools.actions.writeFrcmod(smirnoff_prmtop[":4"], "results/smirnoff_prmtop.frcmod").execute()

In [7]:
atom_map = dict()
for gaff_atom, smirnoff_atom in zip(gaff_prmtop.atoms, smirnoff_prmtop.atoms):
    atom_map[smirnoff_atom.type] = gaff_atom.type

In [8]:
atom_map

{'1': 'c3',
 '2': 'h2',
 '3': 'os',
 '4': 'c3',
 '5': 'h1',
 '6': 'oh',
 '7': 'ho',
 '8': 'c3',
 '9': 'h1',
 '10': 'oh',
 '11': 'ho',
 '12': 'c3',
 '13': 'h1',
 '14': 'c3',
 '15': 'h1',
 '16': 'os',
 '17': 'c3',
 '18': 'h1',
 '19': 'h1',
 '20': 'oh',
 '21': 'ho',
 '22': 'c3',
 '23': 'h2',
 '24': 'os',
 '25': 'c3',
 '26': 'h1',
 '27': 'oh',
 '28': 'ho',
 '29': 'c3',
 '30': 'h1',
 '31': 'oh',
 '32': 'ho',
 '33': 'c3',
 '34': 'h1',
 '35': 'c3',
 '36': 'h1',
 '37': 'os',
 '38': 'c3',
 '39': 'h1',
 '40': 'h1',
 '41': 'oh',
 '42': 'ho',
 '43': 'c3',
 '44': 'h2',
 '45': 'os',
 '46': 'c3',
 '47': 'h1',
 '48': 'oh',
 '49': 'ho',
 '50': 'c3',
 '51': 'h1',
 '52': 'oh',
 '53': 'ho',
 '54': 'c3',
 '55': 'h1',
 '56': 'c3',
 '57': 'h1',
 '58': 'os',
 '59': 'c3',
 '60': 'h1',
 '61': 'h1',
 '62': 'oh',
 '63': 'ho',
 '64': 'c3',
 '65': 'h2',
 '66': 'os',
 '67': 'c3',
 '68': 'h1',
 '69': 'oh',
 '70': 'ho',
 '71': 'c3',
 '72': 'h1',
 '73': 'oh',
 '74': 'ho',
 '75': 'c3',
 '76': 'h1',
 '77': 'c3',
 '78': '

In [46]:
import re
with open("results/smirnoff_prmtop.frcmod") as f:
    content = f.read()
dihedrals = re.search(r'DIHE\n.*?IMPROPER', content, re.DOTALL).group()
dihedrals = dihedrals.split("\n")[1:-2]
atoms =  [i.split("    ")[0] for i in dihedrals]

In [47]:
dihedrals    

['66-64-67-69    1     0.14400000    0.000  -3.0    SCEE=1.20000048 SCNB=2.0',
 '66-64-67-69    1     1.17500000    0.000   2.0    SCEE=1.0 SCNB=1.0',
 '66-64-67-71    1     0.15600000    0.000   3.0    SCEE=1.20000048 SCNB=2.0',
 '69-67-64-79    1     0.14400000    0.000  -3.0    SCEE=1.20000048 SCNB=2.0',
 '69-67-64-79    1     1.17500000    0.000   2.0    SCEE=1.0 SCNB=1.0',
 '71-67-64-79    1     0.15600000    0.000   3.0    SCEE=1.20000048 SCNB=2.0',
 '69-67-71-73    1     0.14400000    0.000  -3.0    SCEE=1.20000048 SCNB=2.0',
 '69-67-71-73    1     1.17500000    0.000   2.0    SCEE=1.0 SCNB=1.0',
 '69-67-71-75    1     0.15600000    0.000   3.0    SCEE=1.20000048 SCNB=2.0',
 '64-67-71-73    1     0.15600000    0.000   3.0    SCEE=1.20000048 SCNB=2.0',
 '64-67-71-75    1     0.18000000    0.000  -3.0    SCEE=1.20000048 SCNB=2.0',
 '64-67-71-75    1     0.25000000  180.000  -2.0    SCEE=1.0 SCNB=1.0',
 '64-67-71-75    1     0.20000000  180.000   1.0    SCEE=1.0 SCNB=1.0',
 '73-71-

In [49]:
amber_type_string = []
for atom_string in atoms:
    amber_types = []
    for index in atom_string.split("-"):
        amber_types.append(atom_map[index])
    # print(f"{atom_string} → {'-'.join(amber_types)}")
    amber_type_string.append("-".join(amber_types))

In [87]:
smirnoff_as_amber = pd.DataFrame()

for index, dihedral in enumerate(dihedrals):
    dihedral_split = dihedral.split()
    dihedral_split[0] = amber_type_string[index]
    
    smirnoff_as_amber = smirnoff_as_amber.append(
        pd.DataFrame(
            {
                "types": dihedral_split[0],
                "div": dihedral_split[1],
                "height": dihedral_split[2],
                "phase": dihedral_split[3],
                "per": dihedral_split[4],
                "SCEE": dihedral_split[5],
                "SCNB": dihedral_split[6]
            },
            index=[0],
        ),
        ignore_index=True,
    )


In [88]:
smirnoff_as_amber.head()

Unnamed: 0,types,div,height,phase,per,SCEE,SCNB
0,os-c3-c3-oh,1,0.144,0.0,-3.0,SCEE=1.20000048,SCNB=2.0
1,os-c3-c3-oh,1,1.175,0.0,2.0,SCEE=1.0,SCNB=1.0
2,os-c3-c3-c3,1,0.156,0.0,3.0,SCEE=1.20000048,SCNB=2.0
3,oh-c3-c3-os,1,0.144,0.0,-3.0,SCEE=1.20000048,SCNB=2.0
4,oh-c3-c3-os,1,1.175,0.0,2.0,SCEE=1.0,SCNB=1.0


In [86]:
smirnoff_as_amber.drop_duplicates()

Unnamed: 0,types,div,height,phase,per,SCEE,SCNB
0,os-c3-c3-oh,1,0.144,0.0,-3.0,SCEE=1.20000048,SCNB=2.0
1,os-c3-c3-oh,1,1.175,0.0,2.0,SCEE=1.0,SCNB=1.0
2,os-c3-c3-c3,1,0.156,0.0,3.0,SCEE=1.20000048,SCNB=2.0
3,oh-c3-c3-os,1,0.144,0.0,-3.0,SCEE=1.20000048,SCNB=2.0
4,oh-c3-c3-os,1,1.175,0.0,2.0,SCEE=1.0,SCNB=1.0
5,c3-c3-c3-os,1,0.156,0.0,3.0,SCEE=1.20000048,SCNB=2.0
6,oh-c3-c3-oh,1,0.144,0.0,-3.0,SCEE=1.20000048,SCNB=2.0
7,oh-c3-c3-oh,1,1.175,0.0,2.0,SCEE=1.0,SCNB=1.0
8,oh-c3-c3-c3,1,0.156,0.0,3.0,SCEE=1.20000048,SCNB=2.0
9,c3-c3-c3-oh,1,0.156,0.0,3.0,SCEE=1.20000048,SCNB=2.0


In [99]:
with open("results/gaff_prmtop.frcmod") as f:
    content = f.read()
gaff_dihedrals_frcmod = re.search(r'DIHE\n.*?IMPROPER', content, re.DOTALL).group()
gaff_dihedrals_frcmod = gaff_dihedrals_frcmod.split("\n")[1:-2]

In [100]:
gaff_dihedrals = pd.DataFrame()

for index, dihedral in enumerate(gaff_dihedrals_frcmod):
    dihedral_split = dihedral.split()
    
    gaff_dihedrals = gaff_dihedrals.append(
        pd.DataFrame(
            {
                "types": dihedral_split[0],
                "div": dihedral_split[1],
                "height": dihedral_split[2],
                "phase": dihedral_split[3],
                "per": dihedral_split[4],
                "SCEE": dihedral_split[5],
                "SCNB": dihedral_split[6]
            },
            index=[0],
        ),
        ignore_index=True,
    )


In [101]:
len(smirnoff_as_amber)

84

In [103]:
len(gaff_dihedrals)

28

In [104]:
len(smirnoff_as_amber.drop_duplicates())

44

In [109]:
from IPython.display import display_html
def display_side_by_side(*args):
    html_str=''
    for df in args:
        html_str+=df.to_html()
    display_html(html_str.replace('table','table style="display:inline"'),raw=True)

In [118]:
display_side_by_side(smirnoff_as_amber.drop_duplicates().sort_values(by="types"), gaff_dihedrals.sort_values(by="types"))

Unnamed: 0,types,div,height,phase,per,SCEE,SCNB
12,c3-c3-c3-c3,1,0.2,180.0,1.0,SCEE=1.0,SCNB=1.0
11,c3-c3-c3-c3,1,0.25,180.0,-2.0,SCEE=1.0,SCNB=1.0
10,c3-c3-c3-c3,1,0.18,0.0,-3.0,SCEE=1.20000048,SCNB=2.0
43,c3-c3-c3-h1,1,0.16,0.0,3.0,SCEE=1.20000048,SCNB=2.0
9,c3-c3-c3-oh,1,0.156,0.0,3.0,SCEE=1.20000048,SCNB=2.0
5,c3-c3-c3-os,1,0.156,0.0,3.0,SCEE=1.20000048,SCNB=2.0
17,c3-c3-c3-os,1,0.156,0.0,3.0,SCEE=1.0,SCNB=1.0
71,c3-c3-oh-ho,1,0.25,0.0,1.0,SCEE=1.0,SCNB=1.0
70,c3-c3-oh-ho,1,0.16,0.0,-3.0,SCEE=1.20000048,SCNB=2.0
31,c3-c3-os-c3,1,0.383,0.0,-3.0,SCEE=1.0,SCNB=1.0

Unnamed: 0,types,div,height,phase,per,SCEE,SCNB
4,c3-c3-c3-c3,1,0.2,180.0,-1.0,SCEE=1.2,SCNB=2.0
5,c3-c3-c3-c3,1,0.25,180.0,-2.0,SCEE=1.2,SCNB=2.0
6,c3-c3-c3-c3,1,0.18,0.0,3.0,SCEE=1.2,SCNB=2.0
2,c3-c3-c3-oh,1,0.15555556,0.0,3.0,SCEE=1.2,SCNB=2.0
3,c3-c3-c3-os,1,0.15555556,0.0,3.0,SCEE=1.2,SCNB=2.0
20,c3-c3-oh-ho,1,0.25,0.0,-1.0,SCEE=1.2,SCNB=2.0
21,c3-c3-oh-ho,1,0.16,0.0,3.0,SCEE=1.2,SCNB=2.0
10,c3-c3-os-c3,1,0.383,0.0,3.0,SCEE=1.2,SCNB=2.0
9,c3-c3-os-c3,1,0.1,180.0,-2.0,SCEE=1.2,SCNB=2.0
27,c3-os-c3-h1,1,0.38333333,0.0,3.0,SCEE=1.2,SCNB=2.0


In [120]:
smirnoff_as_amber.drop_duplicates().sort_values(by="types").to_csv("results/smirnoff_as_amber.frcmod.csv")
gaff_dihedrals.sort_values(by="types").to_csv("results/gaff_dihedrals.frcmod.csv")