In [14]:
import pandas as pd
import scipy as sc
import numpy as np
import os


import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
from matplotlib.colors import colorConverter
import seaborn as sns

%load_ext autoreload
%autoreload 2
%load_ext blackcellmagic
%matplotlib inline

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
The blackcellmagic extension is already loaded. To reload it, use:
  %reload_ext blackcellmagic


Project-level imports.

In [15]:
from constants import guest_types
from constants import colors
from bootstrap import bootstrap
from bootstrap import thermodynamic_bootstrap
import plotting

# Load in results

## Experimental data from Rekharsky et al.

In [16]:
from constants import experimental_deltaG

In [17]:
experimental_list = experimental_deltaG.split("\n")
experimental = pd.DataFrame([i.split("\t") for i in experimental_list],
            columns=["System", "Delta G", "SEM"])
experimental["Delta G"] = pd.to_numeric(experimental["Delta G"])
experimental["SEM"] = pd.to_numeric(experimental["SEM"])

## Load in the BGBG-TIP3P and BGBG-TIP3P data

In [18]:
from constants import systems

In [19]:
bgbg_tip3p = pd.DataFrame()

for system in systems:

    # BGBG-TIP3P (GAFF v1.7)

    prefix = os.path.join("systems", system, "bgbg-tip3p")
    bgbg_tip3p_attach = np.genfromtxt(os.path.join(prefix, "ti-a.dat"))
    bgbg_tip3p_pull = np.genfromtxt(os.path.join(prefix, "ti-u.dat"))

    if system[0] == "a":
        bgbg_tip3p_release = np.genfromtxt(
            os.path.join("systems", "a-release", "bgbg-tip3p", "ti-r.dat")
        )
    else:
        bgbg_tip3p_release = np.genfromtxt(
            os.path.join("systems", "b-release", "bgbg-tip3p", "ti-r.dat")
        )
    bgbg_tip3p_analytic = 7.14

    delta_g = -1 * (
        bgbg_tip3p_attach[-1, 1]
        + bgbg_tip3p_pull[-1, 1]
        - bgbg_tip3p_release[-1, 1]
        - bgbg_tip3p_analytic
    )
    delta_g_sem = np.sqrt(
        bgbg_tip3p_attach[-1, 2] ** 2
        + bgbg_tip3p_pull[-1, 2] ** 2
        + bgbg_tip3p_release[-1, 2] ** 2
    )

    bgbg_tip3p = bgbg_tip3p.append(
        {"System": system, "Delta G": delta_g, "SEM": delta_g_sem, "Type": guest_types[system[0:-2]]}, ignore_index=True
    )


In [28]:
bgbg_tip3p.head()

Unnamed: 0,Delta G,SEM,System,Type
0,-0.016103,0.234765,a-bam-p,aliphatic_ammoniums
1,-0.616992,0.270263,a-bam-s,aliphatic_ammoniums
2,-0.335027,0.312059,a-but-p,aliphatic_carboxylates
3,-0.846324,0.255411,a-but-s,aliphatic_carboxylates
4,-0.2649,0.265886,a-cbu-p,cyclic_alcohols


In [21]:
bg2bg2_tip3p = pd.DataFrame()

for system in systems:

    # BG2BG2-TIP3P (GAFF v2.1)

    prefix = os.path.join("systems", system, "bg2bg2-tip3p")
    bg2bg2_tip3p_attach = np.genfromtxt(os.path.join(prefix, "ti-a.dat"))
    bg2bg2_tip3p_pull = np.genfromtxt(os.path.join(prefix, "ti-u.dat"))

    if system[0] == "a":
        bg2bg2_tip3p_release = np.genfromtxt(
            os.path.join("systems", "a-release", "bg2bg2-tip3p", "ti-r.dat")
        )
    else:
        bg2bg2_tip3p_release = np.genfromtxt(
            os.path.join("systems", "b-release", "bg2bg2-tip3p", "ti-r.dat")
        )
    bg2bg2_tip3p_analytic = 7.14

    delta_g = -1 * (
        bg2bg2_tip3p_attach[-1, 1]
        + bg2bg2_tip3p_pull[-1, 1]
        - bg2bg2_tip3p_release[-1, 1]
        - bg2bg2_tip3p_analytic
    )
    delta_g_sem = np.sqrt(
        bg2bg2_tip3p_attach[-1, 2] ** 2
        + bg2bg2_tip3p_pull[-1, 2] ** 2
        + bg2bg2_tip3p_release[-1, 2] ** 2
    )

    bg2bg2_tip3p = bg2bg2_tip3p.append(
        {"System": system, "Delta G": delta_g, "SEM": delta_g_sem, "Type": guest_types[system[0:-2]]}, ignore_index=True
    )


In [27]:
bg2bg2_tip3p.head()

Unnamed: 0,Delta G,SEM,System,Type
0,0.150013,0.229452,a-bam-p,aliphatic_ammoniums
1,-2.930536,0.235066,a-bam-s,aliphatic_ammoniums
2,-1.369206,0.2348,a-but-p,aliphatic_carboxylates
3,-3.103348,0.231945,a-but-s,aliphatic_carboxylates
4,-2.064314,0.24957,a-cbu-p,cyclic_alcohols


## Load in SMIRNOFF99Frosst data

In [23]:
from paprika.restraints_json import json_numpy_obj_hook
import json

In [24]:
def load_results(filepath="results.json"):
    with open(os.path.join(filepath), "r") as f:
        json_data = f.read()
    loaded = json.loads(json_data, object_hook=json_numpy_obj_hook)
    return loaded


In [25]:
smirnoff = pd.DataFrame()

for system in systems:
    results = load_results(f"results/{system}-results.json")
    results_release = load_results(f"results/{system[0]}-release.json")
    
    smirnoff_attach = results["attach"]["ti-block"]["fe"]
    smirnoff_pull = results["pull"]["ti-block"]["fe"]
    smirnoff_release = results_release["release"]["ti-block"]["fe"]
    
    smirnoff_attach_sem = results["attach"]["ti-block"]["sem"]
    smirnoff_pull_sem = results["pull"]["ti-block"]["sem"]
    smirnoff_release_sem = results_release["release"]["ti-block"]["sem"]
        
    smirnoff_analytic = 7.14

    delta_g = (
        -1 * (smirnoff_attach
        + smirnoff_pull
        - smirnoff_release
        - smirnoff_analytic)
    )
    delta_g_sem = np.sqrt(
        smirnoff_attach_sem ** 2
        + smirnoff_pull_sem ** 2
        + smirnoff_release_sem ** 2
    )

    smirnoff = smirnoff.append(
        {"System": system, "Delta G": delta_g, "SEM": delta_g_sem, "Type": guest_types[system[0:-2]]}, ignore_index=True
    )


In [37]:
smirnoff.head()

Unnamed: 0,Delta G,SEM,System,Type
0,-3.236993,0.451365,a-bam-p,aliphatic_ammoniums
1,-0.544564,0.380773,a-bam-s,aliphatic_ammoniums
2,0.202175,0.298335,a-but-p,aliphatic_carboxylates
3,-1.440784,0.287627,a-but-s,aliphatic_carboxylates
4,-0.789488,0.289186,a-cbu-p,cyclic_alcohols


In [38]:
def combine_data(df):

    combined = pd.DataFrame()
    df["Short"] = [i[0:-2] for i in df["System"].values]

    for hg in df["Short"].unique():
        tmp = df[df["Short"] == hg]
        for _, row in tmp.iterrows():
            for p in row["System"].split("-")[2]:
                primary_fe = row["Delta G"]
                primary_sem = row["SEM"]
            else:
                secondary_fe = row["Delta G"]
                secondary_sem = row["SEM"]
        results = thermodynamic_bootstrap(
            primary_fe, primary_sem, secondary_fe, secondary_sem
        )

        print(f"{hg} P: {primary_fe:+0.4f} ± {primary_sem:+0.2f} S: {secondary_fe:+0.2f} ± {secondary_sem:+0.2f} → Boot: {results['mean']:+0.2f} ± {results['sem']:+0.2f}")
        combined = combined.append(
            {
                "System": hg,
                "Delta G": results["mean"],
                "SEM": results["sem"],
                "Type": guest_types[hg],
            },
            ignore_index=True,
        )
    return combined


In [39]:
smirnoff_combined = combine_data(smirnoff)

a-bam P: -0.5446 ± +0.38 S: -0.54 ± +0.38 → Boot: -1.02 ± +0.28
a-but P: -1.4408 ± +0.29 S: -1.44 ± +0.29 → Boot: -1.88 ± +0.20
a-cbu P: -0.9796 ± +0.24 S: -0.98 ± +0.24 → Boot: -1.42 ± +0.17
a-chp P: -2.3202 ± +0.31 S: -2.32 ± +0.31 → Boot: -2.77 ± +0.22
a-cpe P: -1.5650 ± +0.26 S: -1.56 ± +0.26 → Boot: -2.00 ± +0.20
a-coc P: -1.7633 ± +0.30 S: -1.76 ± +0.30 → Boot: -2.21 ± +0.22
a-hep P: -3.7776 ± +0.25 S: -3.78 ± +0.25 → Boot: -4.21 ± +0.18
a-ham P: -3.4800 ± +0.53 S: -3.48 ± +0.53 → Boot: -4.00 ± +0.41
a-hp6 P: -3.0652 ± +0.30 S: -3.07 ± +0.30 → Boot: -3.52 ± +0.21
a-hex P: -2.5262 ± +0.26 S: -2.53 ± +0.26 → Boot: -2.98 ± +0.18
a-hx2 P: -2.2245 ± +0.24 S: -2.22 ± +0.24 → Boot: -2.67 ± +0.17
a-hpa P: -3.9136 ± +0.85 S: -3.91 ± +0.85 → Boot: -4.55 ± +0.67
a-mba P: -1.1912 ± +0.32 S: -1.19 ± +0.32 → Boot: -1.65 ± +0.22
a-hx3 P: -0.9247 ± +0.31 S: -0.92 ± +0.31 → Boot: -1.38 ± +0.22
a-mhp P: -3.8299 ± +0.35 S: -3.83 ± +0.35 → Boot: -4.30 ± +0.27
a-mha P: -3.5475 ± +0.31 S: -3.55 ± +0.3

# Investiate a few outliers

In [42]:
df = smirnoff_combined.merge(experimental, on=["System"], suffixes=("_smirnoff",
                                                                "_experiment"))


In [44]:
df[abs(df["Delta G_smirnoff"] - df["Delta G_experiment"]) > 1.5]

Unnamed: 0,Delta G_smirnoff,SEM_smirnoff,System,Type,Delta G_experiment,SEM_experiment
13,-1.376249,0.224058,a-hx3,aliphatic_carboxylates,-3.01,0.01
21,-0.753809,0.322994,a-pam,aliphatic_ammoniums,-2.72,0.0
35,-4.622982,0.32013,b-mo4,aliphatic_carboxylates,-2.51,0.01


In [43]:
df.head()

Unnamed: 0,Delta G_smirnoff,SEM_smirnoff,System,Type,Delta G_experiment,SEM_experiment
0,-1.02095,0.281391,a-bam,aliphatic_ammoniums,-1.58,0.02
1,-1.882162,0.204196,a-but,aliphatic_carboxylates,-1.51,0.04
2,-1.420308,0.170185,a-cbu,cyclic_alcohols,-2.02,0.02
3,-2.771116,0.21902,a-chp,cyclic_alcohols,-2.51,0.06
4,-2.004806,0.195619,a-cpe,cyclic_alcohols,-2.13,0.02
