In [1]:
import pandas as pd
import scipy as sc
import numpy as np
import os


import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
from matplotlib.colors import colorConverter
import seaborn as sns

%load_ext autoreload
%autoreload 2
%load_ext blackcellmagic
%matplotlib inline

In [2]:
from constants import guest_types
from constants import colors
from bootstrap import bootstrap
from bootstrap import dG_bootstrap
from bootstrap import dH_bootstrap
import plotting

In [3]:
from constants import experimental_deltaG
from constants import experimental_deltaH
from constants import systems

In [4]:
from summarize_statistics import write_statistics

# Calculate or load in the summary results

In [8]:
bgbg = pd.read_csv("results/bgbg_tip3p_by_orientation.csv")
bgbg_combined = pd.read_csv("results/bgbg_tip3p_combined.csv")

bg2bg2 = pd.read_csv("results/bg2bg2_tip3p_by_orientation.csv")
bg2bg2_combined = pd.read_csv("results/bg2bg2_tip3p_combined.csv")

smirnoff = pd.read_csv("results/smirnoff_by_orientation.csv")
smirnoff_combined = pd.read_csv("results/smirnoff_combined.csv")

experimental = pd.read_csv("results/experimental.csv")

In [9]:
data_sets = [bgbg_combined, bg2bg2_combined, smirnoff_combined, experimental]
names = ["GAFF v1.7", "GAFF v2.1", "SMIRNOFF99Frosst", "Experimental"]

# Binding free energy

(Create statistics files for re-use)

In [8]:
from multiprocessing import Pool

In [18]:
def return_bootstrapped_dictionary(
    df_one, df_two, thermodynamic_quantity, cycles=100000,
    name="smirnoff_experimental"
):
    try:
        df = df_one.merge(df_two, on=["System", "Type"], suffixes=("_i", "_j"))
    except KeyError:
        # Experimental doesn't have a Type column, so won't merge cleanly.
        df = df_one.merge(df_two, on=["System"], suffixes=("_i", "_j"))
    
    
    results = bootstrap(
        x=df[f"Delta {thermodynamic_quantity}_i"],
        x_sem=df[f"{thermodynamic_quantity}_SEM_i"],
        y=df[f"Delta {thermodynamic_quantity}_j"],
        y_sem=df[f"{thermodynamic_quantity}_SEM_j"],
        cycles=cycles,
    )
    key = dict()
    key[name] = results
    return key

In [32]:
results = dict()
results.update(return_bootstrapped_dictionary(experimental, smirnoff_combined, "G", name="smirnoff_experimental"))
results.update(return_bootstrapped_dictionary(experimental, bgbg_combined, "G", name="bgbg_experimental"))
results.update(return_bootstrapped_dictionary(experimental, bg2bg2_combined, "G", name="bg2bg2_experimental"))

In [33]:
import json

In [34]:
with open("results/dG-overall.json", "w") as f:
    json.dump(results, f)

In [35]:
with Pool(12) as p:

    p.starmap(
        write_statistics,
        [
            (experimental, smirnoff_combined, "G", "experimental_smirnoff"),
            (experimental, bgbg_combined, "G", "experimental_bgbg"),
            (experimental, bg2bg2_combined, "G", "experimental_bg2bg2"),
            (bgbg_combined, smirnoff_combined, "G", "bgbg_smirnoff"),
            (bg2bg2_combined, smirnoff_combined, "G", "bg2bg2_smirnoff"),
            (bgbg_combined, bg2bg2_combined, "G", "bgbg_bg2bg2"),
            (bgbg, bg2bg2, "G", "bgbg_bg2bg2_by_orientation"),
            (bgbg, smirnoff, "G", "bgbg_smirnoff_by_orientation"),
            (bg2bg2, smirnoff, "G", "bg2bg2_smirnoff_by_orientation"),
        ],
    )


Bootstrapping results/experimental_bgbg_dG_statistics_overall.csv
Bootstrapping results/experimental_smirnoff_dG_statistics_overall.csv
Bootstrapping results/bgbg_smirnoff_dG_statistics_overall.csv
Bootstrapping results/experimental_bg2bg2_dG_statistics_overall.csv
Bootstrapping results/bg2bg2_smirnoff_by_orientation_dG_statistics_overall.csv
Bootstrapping results/bg2bg2_smirnoff_dG_statistics_overall.csv
Bootstrapping results/bgbg_bg2bg2_by_orientation_dG_statistics_overall.csv
Bootstrapping results/bgbg_smirnoff_by_orientation_dG_statistics_overall.csv
Bootstrapping results/bgbg_bg2bg2_dG_statistics_overall.csv
Bootstrapping results/bgbg_smirnoff_dG_statistics_aliphatic_ammoniums.csv
Bootstrapping results/experimental_smirnoff_dG_statistics_aliphatic_ammoniums.csv
Bootstrapping results/bgbg_bg2bg2_dG_statistics_aliphatic_ammoniums.csv
Bootstrapping results/bg2bg2_smirnoff_dG_statistics_aliphatic_ammoniums.csv
Bootstrapping results/bgbg_smirnoff_dG_statistics_cyclic_alcohols.csv
Boots

## Enthalpy

In [9]:
with Pool(12) as p:

    p.starmap(
        write_statistics,
        [
            (experimental, smirnoff_combined, "H", "experimental_smirnoff"),
            (experimental, bgbg_combined, "H", "experimental_bgbg"),
            (experimental, bg2bg2_combined, "H", "experimental_bg2bg2"),
            (bgbg_combined, smirnoff_combined, "H", "bgbg_smirnoff"),
            (bg2bg2_combined, smirnoff_combined, "H", "bg2bg2_smirnoff"),
            (bgbg_combined, bg2bg2_combined, "H", "bgbg_bg2bg2"),
            (bgbg, bg2bg2, "H", "bgbg_bg2bg2_by_orientation"),
            (bgbg, smirnoff, "H", "bgbg_smirnoff_by_orientation"),
            (bg2bg2, smirnoff, "H", "bg2bg2_smirnoff_by_orientation"),
        ],
    )


Bootstrapping results/bgbg_smirnoff_dH_statistics_overall.csv
Bootstrapping results/experimental_smirnoff_dH_statistics_overall.csv
Bootstrapping results/experimental_bgbg_dH_statistics_overall.csv
Bootstrapping results/experimental_bg2bg2_dH_statistics_overall.csv
Bootstrapping results/bgbg_bg2bg2_by_orientation_dH_statistics_overall.csv
Bootstrapping results/bg2bg2_smirnoff_dH_statistics_overall.csv
Bootstrapping results/bg2bg2_smirnoff_by_orientation_dH_statistics_overall.csv
Bootstrapping results/bgbg_smirnoff_by_orientation_dH_statistics_overall.csv
Bootstrapping results/bgbg_bg2bg2_dH_statistics_overall.csv
Bootstrapping results/bgbg_bg2bg2_dH_statistics_aliphatic_ammoniums.csv
Bootstrapping results/bg2bg2_smirnoff_dH_statistics_aliphatic_ammoniums.csv
Bootstrapping results/experimental_bgbg_dH_statistics_aliphatic_ammoniums.csv
Bootstrapping results/bgbg_bg2bg2_dH_statistics_cyclic_alcohols.csv
Bootstrapping results/bgbg_smirnoff_dH_statistics_aliphatic_ammoniums.csv
Bootstrappi

# Entropy

In [10]:
bgbg["-TdS"] = bgbg["Delta G"] - bgbg["Delta H"]
bgbg["-TdS_SEM"] = np.sqrt(bgbg["G_SEM"]**2 + bgbg["H_SEM"]**2)

bg2bg2["-TdS"] = bg2bg2["Delta G"] - bg2bg2["Delta H"]
bg2bg2["-TdS_SEM"] = np.sqrt(bg2bg2["G_SEM"]**2 + bg2bg2["H_SEM"]**2)

smirnoff["-TdS"] = smirnoff["Delta G"] - smirnoff["Delta H"]
smirnoff["-TdS_SEM"] = np.sqrt(smirnoff["G_SEM"]**2 + smirnoff["H_SEM"]**2)


smirnoff_combined["-TdS"] = smirnoff_combined["Delta G"] - smirnoff_combined["Delta H"]
smirnoff_combined["-TdS_SEM"] = np.sqrt(smirnoff_combined["G_SEM"]**2 + smirnoff_combined["H_SEM"]**2)

bgbg_combined["-TdS"] = bgbg_combined["Delta G"] - bgbg_combined["Delta H"]
bgbg_combined["-TdS_SEM"] = np.sqrt(bgbg_combined["G_SEM"]**2 + bgbg_combined["H_SEM"]**2)

bg2bg2_combined["-TdS"] = bg2bg2_combined["Delta G"] - bg2bg2_combined["Delta H"]
bg2bg2_combined["-TdS_SEM"] = np.sqrt(bg2bg2_combined["G_SEM"]**2 + bg2bg2_combined["H_SEM"]**2)

experimental["-TdS"] = experimental["Delta G"] - experimental["Delta H"]
experimental["-TdS_SEM"] = np.sqrt(experimental["G_SEM"]**2 + experimental["H_SEM"]**2)

In [11]:
from summarize_statistics import write_entropy_statistics

In [12]:
with Pool(12) as p:

    p.starmap(
        write_entropy_statistics,
        [
            (experimental, smirnoff_combined, "-TdS", "experimental_smirnoff"),
            (experimental, bgbg_combined, "-TdS", "experimental_bgbg"),
            (experimental, bg2bg2_combined, "-TdS", "experimental_bg2bg2"),
            (bgbg_combined, smirnoff_combined, "-TdS", "bgbg_smirnoff"),
            (bg2bg2_combined, smirnoff_combined, "-TdS", "bg2bg2_smirnoff"),
            (bgbg_combined, bg2bg2_combined, "-TdS", "bgbg_bg2bg2"),
            (bgbg, bg2bg2, "-TdS", "bgbg_bg2bg2_by_orientation"),
            (bgbg, smirnoff, "-TdS", "bgbg_smirnoff_by_orientation"),
            (bg2bg2, smirnoff, "-TdS", "bg2bg2_smirnoff_by_orientation"),
        ],
    )


Bootstrapping results/experimental_smirnoff_-TdS_statistics_overall.csv
Bootstrapping results/bgbg_smirnoff_-TdS_statistics_overall.csv
Bootstrapping results/experimental_bgbg_-TdS_statistics_overall.csv
Bootstrapping results/bg2bg2_smirnoff_-TdS_statistics_overall.csv
Bootstrapping results/experimental_bg2bg2_-TdS_statistics_overall.csv
Bootstrapping results/bgbg_bg2bg2_-TdS_statistics_overall.csv
Bootstrapping results/bg2bg2_smirnoff_by_orientation_-TdS_statistics_overall.csv
Bootstrapping results/bgbg_smirnoff_by_orientation_-TdS_statistics_overall.csv
Bootstrapping results/bgbg_bg2bg2_by_orientation_-TdS_statistics_overall.csv
Bootstrapping results/experimental_smirnoff_-TdS_statistics_aliphatic_ammoniums.csv
Bootstrapping results/bg2bg2_smirnoff_-TdS_statistics_aliphatic_ammoniums.csv
Bootstrapping results/bgbg_smirnoff_-TdS_statistics_aliphatic_ammoniums.csv
Bootstrapping results/experimental_smirnoff_-TdS_statistics_cyclic_alcohols.csv
Bootstrapping results/bgbg_bg2bg2_-TdS_stat

# Bootstrap the MBAR results

In [6]:
bgbg = pd.read_csv("results/bgbg_tip3p_by_orientation.csv")
bgbg_combined = pd.read_csv("results/bgbg_tip3p_combined.csv")

bg2bg2 = pd.read_csv("results/bg2bg2_tip3p_by_orientation.csv")
bg2bg2_combined = pd.read_csv("results/bg2bg2_tip3p_combined.csv")

smirnoff = pd.read_csv("results/smirnoff_mbar_by_orientation.csv")
smirnoff_combined = pd.read_csv("results/smirnoff_mbar_combined.csv")

experimental = pd.read_csv("results/experimental.csv")

In [7]:
data_sets = [bgbg_combined, bg2bg2_combined, smirnoff_combined, experimental]
names = ["GAFF v1.7", "GAFF v2.1", "SMIRNOFF99Frosst", "Experimental"]

In [18]:
def return_bootstrapped_dictionary(
    df_one, df_two, thermodynamic_quantity, cycles=100000,
    name="smirnoff_experimental"
):
    try:
        df = df_one.merge(df_two, on=["System", "Type"], suffixes=("_i", "_j"))
    except KeyError:
        # Experimental doesn't have a Type column, so won't merge cleanly.
        df = df_one.merge(df_two, on=["System"], suffixes=("_i", "_j"))
    
    if not thermodynamic_quantity == "-TdS":
        column = f"Delta {thermodynamic_quantity}"
    else:
        column = f"{thermodynamic_quantity}"

    results = bootstrap(
        x=df[f"{column}_i"],
        x_sem=df[f"{thermodynamic_quantity}_SEM_i"],
        y=df[f"{column}_j"],
        y_sem=df[f"{thermodynamic_quantity}_SEM_j"],
        cycles=cycles,
    )
    key = dict()
    key[name] = results
    return key

In [9]:
results = dict()
results.update(return_bootstrapped_dictionary(experimental, smirnoff_combined, "G", name="smirnoff_experimental"))

In [10]:
results

{'smirnoff_experimental': {'mean': {'slope': 0.52890785365297732,
   'intercept': -1.4504055739032329,
   'R': 0.66664089973143947,
   'R**2': 0.45170010487586837,
   'RMSE': 0.79500744385756994,
   'MSE': -0.041187470092817847,
   'MUE': 0.61970635509652461,
   'Tau': 0.48198688141325097},
  'sem': {'slope': 0.099080330007001394,
   'intercept': 0.32294934116307938,
   'R': 0.085381588654260501,
   'R**2': 0.10909640376193572,
   'RMSE': 0.097979118495851114,
   'MSE': 0.12155954058333204,
   'MUE': 0.077501117449154236,
   'Tau': 0.077257182105958291},
  'ci_low': {'slope': 0.33371837950933958,
   'intercept': -2.067423361833427,
   'R': 0.47178970990966135,
   'R**2': 0.22258553037664242,
   'RMSE': 0.61636330203158973,
   'MSE': -0.28096921830942029,
   'MUE': 0.47469023578776465,
   'Tau': 0.31946799630706646},
  'ci_high': {'slope': 0.72558645075794681,
   'intercept': -0.79060243458596524,
   'R': 0.80482448057030798,
   'R**2': 0.64774244452526608,
   'RMSE': 1.004053497705802,

In [11]:
results.update(return_bootstrapped_dictionary(experimental, smirnoff_combined, "H", name="smirnoff_experimental"))

In [13]:
results

{'smirnoff_experimental': {'mean': {'slope': 0.84231801079445534,
   'intercept': 0.35653983554889396,
   'R': 0.6558646763011966,
   'R**2': 0.43840427163839296,
   'RMSE': 1.8265121256361585,
   'MSE': 0.72621281855725062,
   'MUE': 1.3685756059035254,
   'Tau': 0.52715849390919156},
  'sem': {'slope': 0.1605492515181357,
   'intercept': 0.50772018563238919,
   'R': 0.090806376531162969,
   'R**2': 0.11554588137353196,
   'RMSE': 0.23267974168898475,
   'MSE': 0.25810152384152857,
   'MUE': 0.18836203970522608,
   'Tau': 0.088495609580905785},
  'ci_low': {'slope': 0.54290789194262012,
   'intercept': -0.5378322982735011,
   'R': 0.45760501215033217,
   'R**2': 0.20940234714510564,
   'RMSE': 1.3697914953202168,
   'MSE': 0.2259657238010159,
   'MUE': 1.0162021895586144,
   'Tau': 0.33776301218161681},
  'ci_high': {'slope': 1.1767477446865602,
   'intercept': 1.4638295162323893,
   'R': 0.8116227422075768,
   'R**2': 0.65873147566854662,
   'RMSE': 2.279240094628971,
   'MSE': 1.239

In [22]:
smirnoff["-TdS"] = smirnoff["Delta G"] - smirnoff["Delta H"]
smirnoff["-TdS_SEM"] = np.sqrt(smirnoff["G_SEM"]**2 + smirnoff["H_SEM"]**2)


smirnoff_combined["-TdS"] = smirnoff_combined["Delta G"] - smirnoff_combined["Delta H"]
smirnoff_combined["-TdS_SEM"] = np.sqrt(smirnoff_combined["G_SEM"]**2 + smirnoff_combined["H_SEM"]**2)

experimental["-TdS"] = experimental["Delta G"] - experimental["Delta H"]
experimental["-TdS_SEM"] = np.sqrt(experimental["G_SEM"]**2 + experimental["H_SEM"]**2)

In [23]:
results.update(return_bootstrapped_dictionary(experimental, smirnoff_combined, "-TdS", name="smirnoff_experimental"))

In [24]:
results

{'smirnoff_experimental': {'mean': {'slope': 0.87258353361334451,
   'intercept': -0.83673525720323538,
   'R': 0.62586341930702738,
   'R**2': 0.40274773176955997,
   'RMSE': 1.8422417321127627,
   'MSE': -0.76654987557790899,
   'MUE': 1.4303475128408021,
   'Tau': 0.32192006644518273},
  'sem': {'slope': 0.18678458299447881,
   'intercept': 0.24621775456538025,
   'R': 0.10508430968929544,
   'R**2': 0.12490490868843854,
   'RMSE': 0.20406663275917974,
   'MSE': 0.25695459799092457,
   'MUE': 0.17910344262530811,
   'Tau': 0.096916873911185641},
  'ci_low': {'slope': 0.49945447463088966,
   'intercept': -1.3252091794895102,
   'R': 0.38678651210032333,
   'R**2': 0.14960380594273356,
   'RMSE': 1.4470347695776069,
   'MSE': -1.2625501563085686,
   'MUE': 1.0902224262204983,
   'Tau': 0.1140642303433001},
  'ci_high': {'slope': 1.2392097482678666,
   'intercept': -0.36136842727597651,
   'R': 0.79266327079152998,
   'R**2': 0.62831506086192634,
   'RMSE': 2.2490213989621708,
   'MSE'