In [376]:
import parmed as pmd
import numpy as np
import os
import pandas as pd

import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
from matplotlib.colors import colorConverter
import seaborn as sns

%load_ext autoreload
%autoreload 2
%matplotlib inline

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [377]:
from adjustText import adjust_text
import plotting

# Dihedral parameter comparison.

Let's iterate through the structure and look at the parameters, one-by-one. This seems to be the only reliable way to compare apples with apples.

First, find the unique set of four atoms, by atom name, in a single alpha-cyclodextrin residue. Then we'll look at the dihedrals applied to these 4 atom sets. Thankfully, when we query ParmEd for a mask of four atoms, ParmEd looks for the atoms in either ordering (e.g., `a-b-c-d` and `d-c-b-a`), which are the same dihedral, but may be listed separately.

In [378]:
smirnoff_prmtop = pmd.load_file("systems/a-bam-p/smirnoff/a000/hg.prmtop")

bgbg_prmtop = pmd.load_file("systems/a-bam-p/bgbg-tip3p/hg.topo")

bg2bg2_prmtop = pmd.load_file("systems/a-bam-p/bg2bg2-tip3p/vac.topo")


In [379]:
def find_dihedrals(structure, residue=5):
    df = pd.DataFrame()
    for dihedral in structure[f":{residue}"].dihedrals:
        df = df.append(
            pd.DataFrame(
                {
                    "atom1": dihedral.atom1.name,
                    "atom2": dihedral.atom2.name,
                    "atom3": dihedral.atom3.name,
                    "atom4": dihedral.atom4.name,
                },
                index=[0],
            ),
            ignore_index=True,
        )
    return df

In [380]:
bgbg_prmtop_dihedrals = find_dihedrals(bgbg_prmtop)
bg2bg2_prmtop_dihedrals = find_dihedrals(bg2bg2_prmtop)
smirnoff_prmtop_dihedrals = find_dihedrals(smirnoff_prmtop)

These are just labeled by *atom name* so it is safe to drop duplicates by only keeping the unique rows.

In [381]:
bgbg_prmtop_dihedrals.drop_duplicates(inplace=True)
bg2bg2_prmtop_dihedrals.drop_duplicates(inplace=True)
smirnoff_prmtop_dihedrals.drop_duplicates(inplace=True)

In [382]:
assert len(bgbg_prmtop_dihedrals) == len(smirnoff_prmtop_dihedrals)
assert len(bgbg_prmtop_dihedrals) == len(bg2bg2_prmtop_dihedrals)

In [436]:
print(f"There are {len(bgbg_prmtop_dihedrals)} sets of four atoms in the structure.")

There are 54 sets of four atoms in the structure.


In [437]:
def parse_parmed_output(printDihedrals_string):
    df = pd.DataFrame()
    lines = printDihedrals_string.split("\n")
    for line in lines[1:-1]:
        df = df.append(
            pd.DataFrame(
                {
                    "atom1": "".join(line[10:15]),
                    "atom2": "".join(line[30:35]),
                    "atom3": "".join(line[52:57]),
                    "atom4": "".join(line[72:77]),
                    "height": float("".join(line[89:95])),
                    "periodicity": float("".join(line[100:105])),
                    "phase": float("".join(line[111:116])),
                    },
                index=[0],
            ),
            ignore_index=True,
        )
    return df

In [438]:
smirnoff_dihedrals = find_dihedrals(smirnoff_prmtop)
smirnoff_dihedrals.drop_duplicates(inplace=True)

In [439]:
bgbg_dihedrals = find_dihedrals(bgbg_prmtop)
bgbg_dihedrals.drop_duplicates(inplace=True)

In [440]:
bg2bg2_dihedrals = find_dihedrals(bg2bg2_prmtop)
bg2bg2_dihedrals.drop_duplicates(inplace=True)

In [441]:
def organize_dihedrals(dihedrals, prmtop):
    df = pd.DataFrame()
    for index, row in dihedrals.iterrows():
        atom1 = row["atom1"]
        atom2 = row["atom2"]
        atom3 = row["atom3"]
        atom4 = row["atom4"]
        mask = f":4@{atom1} :4@{atom2} :4@{atom3} :4@{atom4}"
        parmed_output = str(pmd.tools.actions.printDihedrals(prmtop, mask))
        parmed_df = parse_parmed_output(parmed_output)
        df = df.append(parmed_df, ignore_index=True)
        df = df.round(decimals = 2)
    return df

In [442]:
smirnoff = organize_dihedrals(smirnoff_dihedrals, smirnoff_prmtop)
bgbg = organize_dihedrals(bgbg_dihedrals, bgbg_prmtop)
bg2bg2 = organize_dihedrals(bg2bg2_dihedrals, bg2bg2_prmtop)

In [443]:
def differing_rows(df_one, df_two):
    df = pd.merge(df_one, df_two, how='outer', indicator=True)
    rows_in_df1_not_in_df2 = df[df['_merge']=='left_only'][df_one.columns]
    rows_in_df2_not_in_df1 = df[df['_merge']=='right_only'][df_two.columns]
    differences = rows_in_df1_not_in_df2.merge(rows_in_df2_not_in_df1, 
                                           on=["atom1", "atom2", "atom3", "atom4", "periodicity", "phase"],
                                           how="outer",
                                           suffixes = ("_one", "_two"))    
    return differences

In [444]:
differences = differing_rows(smirnoff, bgbg)
differences.head()

Unnamed: 0,atom1,atom2,atom3,atom4,height_one,periodicity,phase,height_two
0,H1,C1,C2,O2,0.0,3.0,0.0,0.16
1,H1,C1,C2,O2,0.25,1.0,0.0,
2,H1,C1,C2,H2,0.15,3.0,0.0,0.16
3,H2,C2,C3,H3,0.15,3.0,0.0,0.16
4,H3,C3,C4,H4,0.15,3.0,0.0,0.16


Now let's print things that aren't *just* differences of in height.

In [445]:
def missing(df):
    missing = df[df.isnull().any(axis=1)]
    return missing

In [446]:
missing_dihedrals = missing(differences)
missing_dihedrals.head()

Unnamed: 0,atom1,atom2,atom3,atom4,height_one,periodicity,phase,height_two
1,H1,C1,C2,O2,0.25,1.0,0.0,


But to tell the complete story, for each of these dihedrals, we should print what either FF has for those given atoms (i.e., not just the missing terms here, but all the terms for these atoms.)

In [447]:
def detail_missing(missing, df_one, df_two):

    comprehensive_missing = pd.DataFrame()

    for index, row in missing.iterrows():
        one = df_one[
            (df_one["atom1"] == row["atom1"])
            & (df_one["atom2"] == row["atom2"])
            & (df_one["atom3"] == row["atom3"])
            & (df_one["atom4"] == row["atom4"])
        ]

        two = df_two[
            (df_two["atom1"] == row["atom1"])
            & (df_two["atom2"] == row["atom2"])
            & (df_two["atom3"] == row["atom3"])
            & (df_two["atom4"] == row["atom4"])
        ]

        this_dihedral_differences = one.merge(
            two,
            how="outer",
            on=["atom1", "atom2", "atom3", "atom4", "periodicity", "phase"],
        )
        comprehensive_missing = comprehensive_missing.append(
            this_dihedral_differences, ignore_index=True
        )
    comprehensive_missing = comprehensive_missing.replace(np.nan, "--", regex=True)


    comprehensive_missing = comprehensive_missing.sort_values(by=["atom1", "atom2", "atom3", "atom4", "periodicity"])
    return comprehensive_missing[["atom1", "atom2", "atom3", "atom4", "periodicity", "phase", "height_x", "height_y"]]

In [448]:
detail_missing(missing_dihedrals, smirnoff, bgbg)

Unnamed: 0,atom1,atom2,atom3,atom4,periodicity,phase,height_x,height_y
1,H1,C1,C2,O2,1.0,0.0,0.25,--
0,H1,C1,C2,O2,3.0,0.0,0.0,0.16


## Work specifically on the interresidue dihedrals

In [455]:
interresidue_masks = [
":5@C2 :5@C1 :5@O1 :6@C4",
":5@O5 :5@C1 :5@O1 :6@C4",
":5@H1 :5@C1 :5@O1 :6@C4",
":5@C1 :5@O1 :6@C4 :6@H4",
":5@C1 :5@O1 :6@C4 :6@C3",
":5@C1 :5@O1 :6@C4 :6@C5",
":5@O1 :6@C4 :6@C3 :6@H3",
":5@O1 :6@C4 :6@C3 :6@O3",
":5@O1 :6@C4 :6@C3 :6@C2",
":5@O1 :6@C4 :6@C5 :6@H5",
":5@O1 :6@C4 :6@C5 :6@O5",
":5@O1 :6@C4 :6@C5 :6@C6",
]

In [458]:
def organize_interresidue_dihedrals(masks, prmtop):
    df = pd.DataFrame()
    for index, mask in enumerate(masks):
        atoms = mask.split(" ")
        mask = f":4@{atoms[0]} :4@{atoms[1]} :4@{atoms[2]} :4@{atoms[3]}"
        parmed_output = str(pmd.tools.actions.printDihedrals(prmtop, mask))
        parmed_df = parse_parmed_output(parmed_output)
        df = df.append(parmed_df, ignore_index=True)
        df = df.round(decimals = 2)
    return df

In [462]:
smirnoff_interresidue = organize_interresidue_dihedrals(interresidue_masks, smirnoff_prmtop)
smirnoff_interresidue.drop_duplicates(inplace=True)

bgbg_interresidue = organize_interresidue_dihedrals(interresidue_masks, bgbg_prmtop)
bgbg_interresidue.drop_duplicates(inplace=True)

In [463]:
interresidue_differences = differing_rows(smirnoff_interresidue, bgbg_interresidue)

In [464]:
interresidue_differences.head()

Unnamed: 0,height_one,atom1,atom2,atom3,atom4,height_two,periodicity,phase


# Now, test on BG2BG2

In [449]:
differences = differing_rows(smirnoff, bg2bg2)
missing_dihedrals = missing(differences)
detailed = detail_missing(missing_dihedrals, smirnoff, bg2bg2)

In [450]:
detailed.to_clipboard(excel=True, index=False)

In [453]:
detailed.to_csv("10b.csv", index=False)

In [465]:
bg2bg2_interresidue = organize_interresidue_dihedrals(interresidue_masks, bg2bg2_prmtop)
bg2bg2_interresidue.drop_duplicates(inplace=True)

In [466]:
interresidue_differences = differing_rows(smirnoff_interresidue, bg2bg2_interresidue)

In [468]:
missing_interresidue = missing(interresidue_differences)

In [469]:
detailed_interresidue = detail_missing(missing_interresidue, smirnoff_interresidue, bg2bg2_interresidue)

In [470]:
detailed_interresidue

Unnamed: 0,atom1,atom2,atom3,atom4,periodicity,phase,height_x,height_y
5,C1,O1,C4,C3,1.0,0.0,--,0.0
4,C1,O1,C4,C3,2.0,0.0,0.1,0.16
3,C1,O1,C4,C3,3.0,0.0,0.38,0.24
8,C1,O1,C4,C5,1.0,0.0,--,0.0
7,C1,O1,C4,C5,2.0,0.0,0.1,0.16
6,C1,O1,C4,C5,3.0,0.0,0.38,0.24
2,C2,C1,O1,C4,1.0,0.0,--,0.0
1,C2,C1,O1,C4,2.0,0.0,0.1,0.16
0,C2,C1,O1,C4,3.0,0.0,0.38,0.24
11,O1,C4,C3,O3,1.0,0.0,--,0.02


In [473]:
detailed_interresidue.to_csv("smirnoff_bg2bg_interresidue_dihedral_differences.csv", index=False)