In [2]:
import pandas as pd
import scipy as sc
import numpy as np
import os


import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
from matplotlib.colors import colorConverter
import seaborn as sns

%load_ext autoreload
%autoreload 2
%load_ext blackcellmagic
%matplotlib inline

In [3]:
from constants import guest_types
from constants import colors
from bootstrap import bootstrap
from bootstrap import dG_bootstrap
from bootstrap import dH_bootstrap
import plotting

In [4]:
from constants import experimental_deltaG
from constants import experimental_deltaH
from constants import systems

In [5]:
from summarize_statistics import write_statistics

# Calculate or load in the summary results

In [6]:
bgbg = pd.read_csv("results/bgbg_tip3p_by_orientation.csv")
bgbg_combined = pd.read_csv("results/bgbg_tip3p_combined.csv")

bg2bg2 = pd.read_csv("results/bg2bg2_tip3p_by_orientation.csv")
bg2bg2_combined = pd.read_csv("results/bg2bg2_tip3p_combined.csv")

smirnoff = pd.read_csv("results/smirnoff_by_orientation.csv")
smirnoff_combined = pd.read_csv("results/smirnoff_combined.csv")

experimental = pd.read_csv("results/experimental.csv")

In [7]:
data_sets = [bgbg_combined, bg2bg2_combined, smirnoff_combined, experimental]
names = ["GAFF v1.7", "GAFF v2.1", "SMIRNOFF99Frosst", "Experimental"]

# Binding free energy

(Create statistics files for re-use)

In [8]:
from multiprocessing import Pool

In [31]:
def return_bootstrapped_dictionary(
    df_one, df_two, thermodynamic_quantity, cycles=100000,
    name="smirnoff_experimental"
):
    try:
        df = df_one.merge(df_two, on=["System", "Type"], suffixes=("_i", "_j"))
    except KeyError:
        # Experimental doesn't have a Type column, so won't merge cleanly.
        df = df_one.merge(df_two, on=["System"], suffixes=("_i", "_j"))
    
    
    results = bootstrap(
        x=df[f"Delta {thermodynamic_quantity}_i"],
        x_sem=df[f"{thermodynamic_quantity}_SEM_i"],
        y=df[f"Delta {thermodynamic_quantity}_j"],
        y_sem=df[f"{thermodynamic_quantity}_SEM_j"],
        cycles=cycles,
    )
    key = dict()
    key[name] = results
    return key

In [32]:
results = dict()
results.update(return_bootstrapped_dictionary(experimental, smirnoff_combined, "G", name="smirnoff_experimental"))
results.update(return_bootstrapped_dictionary(experimental, bgbg_combined, "G", name="bgbg_experimental"))
results.update(return_bootstrapped_dictionary(experimental, bg2bg2_combined, "G", name="bg2bg2_experimental"))

In [33]:
import json

In [34]:
with open("results/dG-overall.json", "w") as f:
    json.dump(results, f)

In [35]:
with Pool(12) as p:

    p.starmap(
        write_statistics,
        [
            (experimental, smirnoff_combined, "G", "experimental_smirnoff"),
            (experimental, bgbg_combined, "G", "experimental_bgbg"),
            (experimental, bg2bg2_combined, "G", "experimental_bg2bg2"),
            (bgbg_combined, smirnoff_combined, "G", "bgbg_smirnoff"),
            (bg2bg2_combined, smirnoff_combined, "G", "bg2bg2_smirnoff"),
            (bgbg_combined, bg2bg2_combined, "G", "bgbg_bg2bg2"),
            (bgbg, bg2bg2, "G", "bgbg_bg2bg2_by_orientation"),
            (bgbg, smirnoff, "G", "bgbg_smirnoff_by_orientation"),
            (bg2bg2, smirnoff, "G", "bg2bg2_smirnoff_by_orientation"),
        ],
    )


Bootstrapping results/experimental_bgbg_dG_statistics_overall.csv
Bootstrapping results/experimental_smirnoff_dG_statistics_overall.csv
Bootstrapping results/bgbg_smirnoff_dG_statistics_overall.csv
Bootstrapping results/experimental_bg2bg2_dG_statistics_overall.csv
Bootstrapping results/bg2bg2_smirnoff_by_orientation_dG_statistics_overall.csv
Bootstrapping results/bg2bg2_smirnoff_dG_statistics_overall.csv
Bootstrapping results/bgbg_bg2bg2_by_orientation_dG_statistics_overall.csv
Bootstrapping results/bgbg_smirnoff_by_orientation_dG_statistics_overall.csv
Bootstrapping results/bgbg_bg2bg2_dG_statistics_overall.csv
Bootstrapping results/bgbg_smirnoff_dG_statistics_aliphatic_ammoniums.csv
Bootstrapping results/experimental_smirnoff_dG_statistics_aliphatic_ammoniums.csv
Bootstrapping results/bgbg_bg2bg2_dG_statistics_aliphatic_ammoniums.csv
Bootstrapping results/bg2bg2_smirnoff_dG_statistics_aliphatic_ammoniums.csv
Bootstrapping results/bgbg_smirnoff_dG_statistics_cyclic_alcohols.csv
Boots

## Enthalpy

In [9]:
with Pool(12) as p:

    p.starmap(
        write_statistics,
        [
            (experimental, smirnoff_combined, "H", "experimental_smirnoff"),
            (experimental, bgbg_combined, "H", "experimental_bgbg"),
            (experimental, bg2bg2_combined, "H", "experimental_bg2bg2"),
            (bgbg_combined, smirnoff_combined, "H", "bgbg_smirnoff"),
            (bg2bg2_combined, smirnoff_combined, "H", "bg2bg2_smirnoff"),
            (bgbg_combined, bg2bg2_combined, "H", "bgbg_bg2bg2"),
            (bgbg, bg2bg2, "H", "bgbg_bg2bg2_by_orientation"),
            (bgbg, smirnoff, "H", "bgbg_smirnoff_by_orientation"),
            (bg2bg2, smirnoff, "H", "bg2bg2_smirnoff_by_orientation"),
        ],
    )


Bootstrapping results/bgbg_smirnoff_dH_statistics_overall.csv
Bootstrapping results/experimental_smirnoff_dH_statistics_overall.csv
Bootstrapping results/experimental_bgbg_dH_statistics_overall.csv
Bootstrapping results/experimental_bg2bg2_dH_statistics_overall.csv
Bootstrapping results/bgbg_bg2bg2_by_orientation_dH_statistics_overall.csv
Bootstrapping results/bg2bg2_smirnoff_dH_statistics_overall.csv
Bootstrapping results/bg2bg2_smirnoff_by_orientation_dH_statistics_overall.csv
Bootstrapping results/bgbg_smirnoff_by_orientation_dH_statistics_overall.csv
Bootstrapping results/bgbg_bg2bg2_dH_statistics_overall.csv
Bootstrapping results/bgbg_bg2bg2_dH_statistics_aliphatic_ammoniums.csv
Bootstrapping results/bg2bg2_smirnoff_dH_statistics_aliphatic_ammoniums.csv
Bootstrapping results/experimental_bgbg_dH_statistics_aliphatic_ammoniums.csv
Bootstrapping results/bgbg_bg2bg2_dH_statistics_cyclic_alcohols.csv
Bootstrapping results/bgbg_smirnoff_dH_statistics_aliphatic_ammoniums.csv
Bootstrappi

# Entropy

In [10]:
bgbg["-TdS"] = bgbg["Delta G"] - bgbg["Delta H"]
bgbg["-TdS_SEM"] = np.sqrt(bgbg["G_SEM"]**2 + bgbg["H_SEM"]**2)

bg2bg2["-TdS"] = bg2bg2["Delta G"] - bg2bg2["Delta H"]
bg2bg2["-TdS_SEM"] = np.sqrt(bg2bg2["G_SEM"]**2 + bg2bg2["H_SEM"]**2)

smirnoff["-TdS"] = smirnoff["Delta G"] - smirnoff["Delta H"]
smirnoff["-TdS_SEM"] = np.sqrt(smirnoff["G_SEM"]**2 + smirnoff["H_SEM"]**2)


smirnoff_combined["-TdS"] = smirnoff_combined["Delta G"] - smirnoff_combined["Delta H"]
smirnoff_combined["-TdS_SEM"] = np.sqrt(smirnoff_combined["G_SEM"]**2 + smirnoff_combined["H_SEM"]**2)

bgbg_combined["-TdS"] = bgbg_combined["Delta G"] - bgbg_combined["Delta H"]
bgbg_combined["-TdS_SEM"] = np.sqrt(bgbg_combined["G_SEM"]**2 + bgbg_combined["H_SEM"]**2)

bg2bg2_combined["-TdS"] = bg2bg2_combined["Delta G"] - bg2bg2_combined["Delta H"]
bg2bg2_combined["-TdS_SEM"] = np.sqrt(bg2bg2_combined["G_SEM"]**2 + bg2bg2_combined["H_SEM"]**2)

experimental["-TdS"] = experimental["Delta G"] - experimental["Delta H"]
experimental["-TdS_SEM"] = np.sqrt(experimental["G_SEM"]**2 + experimental["H_SEM"]**2)

In [11]:
from summarize_statistics import write_entropy_statistics

In [12]:
with Pool(12) as p:

    p.starmap(
        write_entropy_statistics,
        [
            (experimental, smirnoff_combined, "-TdS", "experimental_smirnoff"),
            (experimental, bgbg_combined, "-TdS", "experimental_bgbg"),
            (experimental, bg2bg2_combined, "-TdS", "experimental_bg2bg2"),
            (bgbg_combined, smirnoff_combined, "-TdS", "bgbg_smirnoff"),
            (bg2bg2_combined, smirnoff_combined, "-TdS", "bg2bg2_smirnoff"),
            (bgbg_combined, bg2bg2_combined, "-TdS", "bgbg_bg2bg2"),
            (bgbg, bg2bg2, "-TdS", "bgbg_bg2bg2_by_orientation"),
            (bgbg, smirnoff, "-TdS", "bgbg_smirnoff_by_orientation"),
            (bg2bg2, smirnoff, "-TdS", "bg2bg2_smirnoff_by_orientation"),
        ],
    )


Bootstrapping results/experimental_smirnoff_-TdS_statistics_overall.csv
Bootstrapping results/bgbg_smirnoff_-TdS_statistics_overall.csv
Bootstrapping results/experimental_bgbg_-TdS_statistics_overall.csv
Bootstrapping results/bg2bg2_smirnoff_-TdS_statistics_overall.csv
Bootstrapping results/experimental_bg2bg2_-TdS_statistics_overall.csv
Bootstrapping results/bgbg_bg2bg2_-TdS_statistics_overall.csv
Bootstrapping results/bg2bg2_smirnoff_by_orientation_-TdS_statistics_overall.csv
Bootstrapping results/bgbg_smirnoff_by_orientation_-TdS_statistics_overall.csv
Bootstrapping results/bgbg_bg2bg2_by_orientation_-TdS_statistics_overall.csv
Bootstrapping results/experimental_smirnoff_-TdS_statistics_aliphatic_ammoniums.csv
Bootstrapping results/bg2bg2_smirnoff_-TdS_statistics_aliphatic_ammoniums.csv
Bootstrapping results/bgbg_smirnoff_-TdS_statistics_aliphatic_ammoniums.csv
Bootstrapping results/experimental_smirnoff_-TdS_statistics_cyclic_alcohols.csv
Bootstrapping results/bgbg_bg2bg2_-TdS_stat

In [27]:
from make_statistics_table import table

In [28]:
table(thermodynamic_quantity="G")

| ΔG | SMIRNOFF99Frosst | 0.91 | [0.71, 1.13] | -0.01 | [-0.29, 0.26] | 0.34 | [0.12, 0.56] | 0.49 | [0.26, 0.72] | -1.55 | [-0.80, -2.29] | 
| ΔG | GAFF v1.7 | 0.88 | [0.71, 1.08] | 0.46 | [0.23, 0.69] | 0.54 | [0.33, 0.71] | 0.69 | [0.47, 0.91] | -0.47 | [0.22, -1.16] | 


KeyError: 'ci'

In [29]:
table(thermodynamic_quantity="H")

| ΔH | SMIRNOFF99Frosst | 1.85 | [1.40, 2.30] | 0.77 | [0.26, 1.28] | 0.44 | [0.21, 0.66] | 0.85 | [0.54, 1.19] | 0.41 | [1.55, -0.50] | 
| ΔH | GAFF v1.7 | 2.54 | [2.08, 2.99] | 1.84 | [1.31, 2.37] | 0.39 | [0.17, 0.62] | 0.80 | [0.47, 1.18] | 1.36 | [2.66, 0.31] | 


KeyError: 'ci'

In [None]:
entropy_table(thermodynamic_quantity="-TdS")

In [None]:
df = pd.merge(experimental, smirnoff_combined, on=["System"], 
              suffixes=).merge(bgbg_combined, on=["System"])   


In [None]:
experimental = experimental.sort_values(by="System").reset_index(drop=True)
smirnoff_combined = smirnoff_combined.sort_values(by="System").reset_index(drop=True)
bgbg_combined = bgbg_combined.sort_values(by="System").reset_index(drop=True)
bg2bg2_combined = bg2bg2_combined.sort_values(by="System").reset_index(drop=True)

experimental = experimental.add_suffix('_experimental')
smirnoff_combined = smirnoff_combined.add_suffix('_smirnoff')
bgbg_combined = bgbg_combined.add_suffix('_bgbg')
bg2bg2_combined = bg2bg2_combined.add_suffix('_bg2bg2')


In [None]:
experimental.tail()

In [None]:
bg2bg2_combined.tail()

In [None]:
data = [experimental, smirnoff_combined, bgbg_combined, bg2bg2_combined]
df = data[0].join(data[1:])

In [None]:
df[["System_experimental",
   "Delta G_experimental",
   "G_SEM_experimental",
   "Delta G_smirnoff",
   "G_SEM_smirnoff",
   "Delta G_bgbg",
   "G_SEM_bgbg",
   "Delta G_bg2bg2",
   "G_SEM_bg2bg2"]]

In [None]:
df[["System_experimental",
   "Delta H_experimental",
   "H_SEM_experimental",
   "Delta H_smirnoff",
   "H_SEM_smirnoff",
   "Delta H_bgbg",
   "H_SEM_bgbg",
   "Delta H_bg2bg2",
   "H_SEM_bg2bg2"]]

In [None]:
df[["System_experimental",
   "-TdS_experimental",
   "-TdS_SEM_experimental",
   "-TdS_smirnoff",
   "-TdS_SEM_smirnoff",
   "-TdS_bgbg",
   "-TdS_SEM_bgbg",
   "-TdS_bg2bg2",
   "-TdS_SEM_bg2bg2"]]