In [1]:
import parmed as pmd
import numpy as np
import os
import pandas as pd

import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
from matplotlib.colors import colorConverter
import seaborn as sns

%load_ext autoreload
%autoreload 2
%matplotlib inline
%load_ext blackcellmagic

  return f(*args, **kwds)
  return f(*args, **kwds)
  return f(*args, **kwds)


In [2]:
from adjustText import adjust_text
import plotting

# Dihedral parameter comparison.

Let's iterate through the structure and look at the parameters, one-by-one. This seems to be the only reliable way to compare apples with apples.

First, find the unique set of four atoms, by atom name, in a single alpha-cyclodextrin residue. Then we'll look at the dihedrals applied to these 4 atom sets. Thankfully, when we query ParmEd for a mask of four atoms, ParmEd looks for the atoms in either ordering (e.g., `a-b-c-d` and `d-c-b-a`), which are the same dihedral, but may be listed separately.

In [4]:
smirnoff_prmtop = pmd.load_file("systems/a-bam-p/smirnoff/hg.prmtop")

bgbg_prmtop = pmd.load_file("systems/a-bam-p/bgbg-tip3p/hg.topo")

bg2bg2_prmtop = pmd.load_file("systems/a-bam-p/bg2bg2-tip3p/vac.topo")


In [5]:
def find_dihedrals(structure, residue=5):
    df = pd.DataFrame()
    for dihedral in structure[f":{residue}"].dihedrals:
        df = df.append(
            pd.DataFrame(
                {
                    "atom1": dihedral.atom1.name,
                    "atom2": dihedral.atom2.name,
                    "atom3": dihedral.atom3.name,
                    "atom4": dihedral.atom4.name,
                },
                index=[0],
            ),
            ignore_index=True,
        )
    return df

In [6]:
bgbg_prmtop_dihedrals = find_dihedrals(bgbg_prmtop)
bg2bg2_prmtop_dihedrals = find_dihedrals(bg2bg2_prmtop)
smirnoff_prmtop_dihedrals = find_dihedrals(smirnoff_prmtop)

These are just labeled by *atom name* so it is safe to drop duplicates by only keeping the unique rows.

In [7]:
bgbg_prmtop_dihedrals.drop_duplicates(inplace=True)
bg2bg2_prmtop_dihedrals.drop_duplicates(inplace=True)
smirnoff_prmtop_dihedrals.drop_duplicates(inplace=True)

In [8]:
assert len(bgbg_prmtop_dihedrals) == len(smirnoff_prmtop_dihedrals)
assert len(bgbg_prmtop_dihedrals) == len(bg2bg2_prmtop_dihedrals)

In [9]:
print(f"There are {len(bgbg_prmtop_dihedrals)} sets of four atoms in the structure.")

There are 54 sets of four atoms in the structure.


In [10]:
def parse_parmed_output(printDihedrals_string):
    df = pd.DataFrame()
    lines = printDihedrals_string.split("\n")
    for line in lines[1:-1]:
        df = df.append(
            pd.DataFrame(
                {
                    "atom1": "".join(line[10:15]),
                    "atom2": "".join(line[30:35]),
                    "atom3": "".join(line[52:57]),
                    "atom4": "".join(line[72:77]),
                    "height": float("".join(line[89:95])),
                    "periodicity": float("".join(line[100:105])),
                    "phase": float("".join(line[111:116])),
                    },
                index=[0],
            ),
            ignore_index=True,
        )
    return df

In [11]:
smirnoff_dihedrals = find_dihedrals(smirnoff_prmtop)
smirnoff_dihedrals.drop_duplicates(inplace=True)

In [12]:
bgbg_dihedrals = find_dihedrals(bgbg_prmtop)
bgbg_dihedrals.drop_duplicates(inplace=True)

In [13]:
bg2bg2_dihedrals = find_dihedrals(bg2bg2_prmtop)
bg2bg2_dihedrals.drop_duplicates(inplace=True)

In [14]:
def organize_dihedrals(dihedrals, prmtop):
    df = pd.DataFrame()
    for index, row in dihedrals.iterrows():
        atom1 = row["atom1"]
        atom2 = row["atom2"]
        atom3 = row["atom3"]
        atom4 = row["atom4"]
        mask = f":4@{atom1} :4@{atom2} :4@{atom3} :4@{atom4}"
        parmed_output = str(pmd.tools.actions.printDihedrals(prmtop, mask))
        parmed_df = parse_parmed_output(parmed_output)
        df = df.append(parmed_df, ignore_index=True)
        df = df.round(decimals = 2)
    return df

In [15]:
smirnoff = organize_dihedrals(smirnoff_dihedrals, smirnoff_prmtop)
bgbg = organize_dihedrals(bgbg_dihedrals, bgbg_prmtop)
bg2bg2 = organize_dihedrals(bg2bg2_dihedrals, bg2bg2_prmtop)

In [16]:
def differing_rows(df_one, df_two):
    df = pd.merge(df_one, df_two, how='outer', indicator=True)
    rows_in_df1_not_in_df2 = df[df['_merge']=='left_only'][df_one.columns]
    rows_in_df2_not_in_df1 = df[df['_merge']=='right_only'][df_two.columns]
    differences = rows_in_df1_not_in_df2.merge(rows_in_df2_not_in_df1, 
                                           on=["atom1", "atom2", "atom3", "atom4", "periodicity", "phase"],
                                           how="outer",
                                           suffixes = ("_one", "_two"))    
    return differences

In [17]:
differences = differing_rows(smirnoff, bgbg)
differences

Unnamed: 0,atom1,atom2,atom3,atom4,height_one,periodicity,phase,height_two
0,H1,C1,C2,O2,0.0,3.0,0.0,0.16
1,H1,C1,C2,O2,0.25,1.0,0.0,
2,H1,C1,C2,H2,0.15,3.0,0.0,0.16
3,H2,C2,C3,H3,0.15,3.0,0.0,0.16
4,H3,C3,C4,H4,0.15,3.0,0.0,0.16
5,H4,C4,C5,H5,0.15,3.0,0.0,0.16
6,H5,C5,C6,H61,0.15,3.0,0.0,0.16
7,H5,C5,C6,H62,0.15,3.0,0.0,0.16


All of the differences come from H-C-C-O, or H-C-C-H (which is a really tiny difference.) 

For each of these differences, we should find the other terms involving these atoms.

In [18]:
def detail_differences(differences, df_one, df_two):

    comprehensive_differences = pd.DataFrame()

    for index, row in differences.iterrows():
        one = df_one[
            (df_one["atom1"] == row["atom1"])
            & (df_one["atom2"] == row["atom2"])
            & (df_one["atom3"] == row["atom3"])
            & (df_one["atom4"] == row["atom4"])
        ]

        two = df_two[
            (df_two["atom1"] == row["atom1"])
            & (df_two["atom2"] == row["atom2"])
            & (df_two["atom3"] == row["atom3"])
            & (df_two["atom4"] == row["atom4"])
        ]

        this_dihedral_differences = one.merge(
            two,
            how="outer",
            on=["atom1", "atom2", "atom3", "atom4", "periodicity", "phase"],
        )
        comprehensive_differences = comprehensive_differences.append(
            this_dihedral_differences, ignore_index=True
        )
    rows_with_missing_data = comprehensive_differences[
        comprehensive_differences.isnull().any(axis=1)
    ]
    dihedrals_to_exclude = pd.DataFrame()
    for index, row in rows_with_missing_data.iterrows():
        dihedrals_to_exclude = dihedrals_to_exclude.append(
            pd.DataFrame(
                {
                    "atom1": row["atom1"],
                    "atom2": row["atom2"],
                    "atom3": row["atom3"],
                    "atom4": row["atom4"],
                },
                index=[0],
            ),
            ignore_index=True,
        )

    if len(comprehensive_differences) > 1:
        # https://stackoverflow.com/questions/33282119/pandas-filter-dataframe-by-another-dataframe-by-row-elements
        keys = list(dihedrals_to_exclude.columns.values)
        i1 = comprehensive_differences.set_index(keys).index
        i2 = dihedrals_to_exclude.set_index(keys).index
        comprehensive_differences = comprehensive_differences[~i1.isin(i2)]

        comprehensive_differences = comprehensive_differences.sort_values(
            by=["atom1", "atom2", "atom3", "atom4", "periodicity"]
        )
        return comprehensive_differences[
            [
                "atom1",
                "atom2",
                "atom3",
                "atom4",
                "periodicity",
                "phase",
                "height_x",
                "height_y",
            ]
        ]
    else:
        return pd.DataFrame()


In [19]:
detail_differences(differences, smirnoff, bgbg)

Unnamed: 0,atom1,atom2,atom3,atom4,periodicity,phase,height_x,height_y
4,H1,C1,C2,H2,3.0,0.0,0.15,0.16
5,H2,C2,C3,H3,3.0,0.0,0.15,0.16
6,H3,C3,C4,H4,3.0,0.0,0.15,0.16
7,H4,C4,C5,H5,3.0,0.0,0.15,0.16
8,H5,C5,C6,H61,3.0,0.0,0.15,0.16
9,H5,C5,C6,H62,3.0,0.0,0.15,0.16


Now let's print things that aren't *just* differences of in height.

In [20]:
def missing(df):
    missing = df[df.isnull().any(axis=1)]
    return missing

In [21]:
missing_dihedrals = missing(differences)
missing_dihedrals.head()

Unnamed: 0,atom1,atom2,atom3,atom4,height_one,periodicity,phase,height_two
1,H1,C1,C2,O2,0.25,1.0,0.0,


But to tell the complete story, for each of these dihedrals, we should print what either FF has for those given atoms (i.e., not just the missing terms here, but all the terms for these atoms.)

In [22]:
def detail_missing(missing, df_one, df_two):

    comprehensive_missing = pd.DataFrame()

    for index, row in missing.iterrows():
        one = df_one[
            (df_one["atom1"] == row["atom1"])
            & (df_one["atom2"] == row["atom2"])
            & (df_one["atom3"] == row["atom3"])
            & (df_one["atom4"] == row["atom4"])
        ]

        two = df_two[
            (df_two["atom1"] == row["atom1"])
            & (df_two["atom2"] == row["atom2"])
            & (df_two["atom3"] == row["atom3"])
            & (df_two["atom4"] == row["atom4"])
        ]

        this_dihedral_differences = one.merge(
            two,
            how="outer",
            on=["atom1", "atom2", "atom3", "atom4", "periodicity", "phase"],
        )
        comprehensive_missing = comprehensive_missing.append(
            this_dihedral_differences, ignore_index=True
        )
    comprehensive_missing = comprehensive_missing.replace(np.nan, "--", regex=True)


    comprehensive_missing = comprehensive_missing.sort_values(by=["atom1", "atom2", "atom3", "atom4", "periodicity"])
    return comprehensive_missing[["atom1", "atom2", "atom3", "atom4", "periodicity", "phase", "height_x", "height_y"]]

In [23]:
detail_missing(missing_dihedrals, smirnoff, bgbg)

Unnamed: 0,atom1,atom2,atom3,atom4,periodicity,phase,height_x,height_y
1,H1,C1,C2,O2,1.0,0.0,0.25,--
0,H1,C1,C2,O2,3.0,0.0,0.0,0.16


## Work specifically on the interresidue dihedrals

In [24]:
interresidue_masks = [
":5@C2 :5@C1 :5@O1 :6@C4",
":5@O5 :5@C1 :5@O1 :6@C4",
":5@H1 :5@C1 :5@O1 :6@C4",
":5@C1 :5@O1 :6@C4 :6@H4",
":5@C1 :5@O1 :6@C4 :6@C3",
":5@C1 :5@O1 :6@C4 :6@C5",
":5@O1 :6@C4 :6@C3 :6@H3",
":5@O1 :6@C4 :6@C3 :6@O3",
":5@O1 :6@C4 :6@C3 :6@C2",
":5@O1 :6@C4 :6@C5 :6@H5",
":5@O1 :6@C4 :6@C5 :6@O5",
":5@O1 :6@C4 :6@C5 :6@C6",
]

In [25]:
def organize_interresidue_dihedrals(masks, prmtop):
    df = pd.DataFrame()
    for index, mask in enumerate(masks):
        atoms = mask.split(" ")
        mask = f":4@{atoms[0]} :4@{atoms[1]} :4@{atoms[2]} :4@{atoms[3]}"
        parmed_output = str(pmd.tools.actions.printDihedrals(prmtop, mask))
        parmed_df = parse_parmed_output(parmed_output)
        df = df.append(parmed_df, ignore_index=True)
        df = df.round(decimals = 2)
    return df

In [26]:
smirnoff_interresidue = organize_interresidue_dihedrals(interresidue_masks, smirnoff_prmtop)
smirnoff_interresidue.drop_duplicates(inplace=True)

bgbg_interresidue = organize_interresidue_dihedrals(interresidue_masks, bgbg_prmtop)
bgbg_interresidue.drop_duplicates(inplace=True)

In [27]:
interresidue_differences = differing_rows(smirnoff_interresidue, bgbg_interresidue)

In [28]:
interresidue_differences.head()

Unnamed: 0,height_one,atom1,atom2,atom3,atom4,height_two,periodicity,phase


# Now, test on BG2BG2

In [29]:
differences = differing_rows(smirnoff, bg2bg2)

Here are the dihedral differences that don't have any missing terms.

In [30]:
detailed_differences = detail_differences(differences, smirnoff, bg2bg2).drop_duplicates()

In [31]:
detailed_differences.head()

Unnamed: 0,atom1,atom2,atom3,atom4,periodicity,phase,height_x,height_y
20,C1,C2,C3,C4,1.0,0.0,0.2,0.11
19,C1,C2,C3,C4,2.0,0.0,0.25,0.29
18,C1,C2,C3,C4,3.0,0.0,0.18,0.13
17,C1,C2,C3,O3,3.0,0.0,0.16,0.21
114,C1,O5,C5,H5,3.0,0.0,0.38,0.34


In [32]:
missing_dihedrals = missing(differences)
detailed = detail_missing(missing_dihedrals, smirnoff, bg2bg2)

And here are the differences with missing values.

In [33]:
detailed.head()

Unnamed: 0,atom1,atom2,atom3,atom4,periodicity,phase,height_x,height_y
5,C1,C2,O2,HO2,1.0,0.0,0.25,--
4,C1,C2,O2,HO2,3.0,0.0,0.16,0
26,C1,O5,C5,C4,1.0,0.0,--,0
25,C1,O5,C5,C4,2.0,0.0,0.1,0.16
24,C1,O5,C5,C4,3.0,0.0,0.38,0.24


In [34]:
bg2bg2_interresidue = organize_interresidue_dihedrals(interresidue_masks, bg2bg2_prmtop)
bg2bg2_interresidue.drop_duplicates(inplace=True)

In [35]:
interresidue_differences = differing_rows(smirnoff_interresidue, bg2bg2_interresidue)

In [36]:
interresidue_detail_differences = detail_differences(interresidue_differences, smirnoff, bg2bg2)

In [37]:
interresidue_detail_differences.head()

Thus, all of the differences from inter-residue dihedrals arise due to mismatched periodicity and phases.

In [38]:
missing_interresidue = missing(interresidue_differences)

In [39]:
detailed_interresidue = detail_missing(missing_interresidue, smirnoff_interresidue, bg2bg2_interresidue)

In [40]:
detailed_interresidue

Unnamed: 0,atom1,atom2,atom3,atom4,periodicity,phase,height_x,height_y
5,C1,O1,C4,C3,1.0,0.0,--,0.0
4,C1,O1,C4,C3,2.0,0.0,0.1,0.16
3,C1,O1,C4,C3,3.0,0.0,0.38,0.24
8,C1,O1,C4,C5,1.0,0.0,--,0.0
7,C1,O1,C4,C5,2.0,0.0,0.1,0.16
6,C1,O1,C4,C5,3.0,0.0,0.38,0.24
2,C2,C1,O1,C4,1.0,0.0,--,0.0
1,C2,C1,O1,C4,2.0,0.0,0.1,0.16
0,C2,C1,O1,C4,3.0,0.0,0.38,0.24
11,O1,C4,C3,O3,1.0,0.0,--,0.02
