# Pre-Processing of Non-Adaptive Run Data

The cells below are examples of the slow analyses (run in TMUX sessions using ipython) which were performed to generate data for the final analyses.

## Define Paths

In [None]:
# Dict mapping directory names to calculation names
calc_dirs = {
    "t4l_0.2": "t4l_200ps",
    "t4l_6": "t4l_5000ps",
    "t4l_30": "t4l_30000ps",
    "mif_0.2": "mif_180_anti_200ps",
    "mif_6": "mif_180_anti_5000ps",
    "mif_30": "mif_180_anti_30000ps",
    "mdm2_nut_0.2": "mdm2_nutlin_notprot_200ps",
    "mdm2_nut_6": "mdm2_nutlin_notprot_5000ps",
    "mdm2_nut_30": "mdm2_nutlin_notprot_30000ps",
    "mdm2_pip_0.2": "mdm2_pip2_short_200ps",
    "mdm2_pip_6": "mdm2_pip2_short_5000ps",
    "mdm2_pip_30": "mdm2_pip2_short_30000ps",
    "pde_0.2": "pde2a_p10_200ps",
    "pde_6": "pde2a_p10_5000ps",
    "pde_30": "pde2a_p10_30000",
}


ligs = {
    "T4L": {"0.2 ns": "t4l_0.2", "6 ns": "t4l_6", "30 ns": "t4l_30"},
    "MIF": {"0.2 ns": "mif_0.2", "6 ns": "mif_6", "30 ns": "mif_30"},
    "MDM2-Nutlin": {"0.2 ns": "mdm2_nut_0.2", "6 ns": "mdm2_nut_6", "30 ns": "mdm2_nut_30"},
    "MDM2-PIP2": {"0.2 ns": "mdm2_pip_0.2", "6 ns": "mdm2_pip_6", "30 ns": "mdm2_pip_30"},
    "PDE2A": {"0.2 ns": "pde_0.2", "6 ns": "pde_6", "30 ns": "pde_30"},
}

# List the systems in the order we want them plotted
systems = ["T4L", "MIF", "MDM2-PIP2", "PDE2A", "MDM2-Nutlin"]

## Initial Slow Analyses to be Run in TMUX Sessions

In [None]:
import a3fe as a3
import pickle

# Dict mapping directory names to calculation names
calc_dirs = {
    "t4l_0.2": "t4l_200ps",
    "t4l_6": "t4l_5000ps",
    "t4l_30": "t4l_30000ps",
    "mif_0.2": "mif_180_anti_200ps",
    "mif_6": "mif_180_anti_5000ps",
    "mif_30": "mif_180_anti_30000ps",
    "mdm2_nut_0.2": "mdm2_nutlin_notprot_200ps",
    "mdm2_nut_6": "mdm2_nutlin_notprot_5000ps",
    "mdm2_nut_30": "mdm2_nutlin_notprot_30000ps",
    "mdm2_pip_0.2": "mdm2_pip2_short_200ps",
    "mdm2_pip_6": "mdm2_pip2_short_5000ps",
    "mdm2_pip_30": "mdm2_pip2_short_30000ps",
    "pde_0.2": "pde2a_p10_200ps",
    "pde_6": "pde2a_p10_5000ps",
    "pde_30": "pde2a_p10_30000",
}


ligs = {
    "T4L": {"0.2 ns": "t4l_0.2", "6 ns": "t4l_6", "30 ns": "t4l_30"},
    "MIF": {"0.2 ns": "mif_0.2", "6 ns": "mif_6", "30 ns": "mif_30"},
    "MDM2-Nutlin": {"0.2 ns": "mdm2_nut_0.2", "6 ns": "mdm2_nut_6", "30 ns": "mdm2_nut_30"},
    "MDM2-PIP2": {"0.2 ns": "mdm2_pip_0.2", "6 ns": "mdm2_pip_6", "30 ns": "mdm2_pip_30"},
    "PDE2A": {"0.2 ns": "pde_0.2", "6 ns": "pde_6", "30 ns": "pde_30"},
}

# Get dictionary of final free energy changes for the 30 ns runs
final_dGs_all = {}
for system in ligs:
    final_dGs_all[system] = {}
    for time in ligs["T4L"]:
        print(f"Analysing {system} {time}")
        calc = a3.Calculation(base_dir=calc_dirs[ligs[system][time]])
        calc.recursively_set_attr("relative_simulation_cost", 1, force=True)
        if calc._delta_g is None:
            calc.analyse()
        final_dGs_all[system][time] = {}
        final_dGs_all[system][time]["dgs"] = calc._delta_g
        for leg in calc.legs:
            final_dGs_all[system][time][str(leg.leg_type)] = {}
            final_dGs_all[system][time][str(leg.leg_type)]["dg"] = leg._delta_g
            for stage in leg.stages:
                final_dGs_all[system][time][str(leg.leg_type)][str(stage.stage_type)] = {}
                final_dGs_all[system][time][str(leg.leg_type)][str(stage.stage_type)]["dg"] = stage._delta_g
        calc._dump()
        calc._close_logging_handlers()
        del(calc)

        with open("final_analysis/final_dGs_all.pkl", "wb") as f:
            pickle.dump(final_dGs_all, f)

In [None]:
import a3fe as a3
import pickle

calc_dirs = {
    "t4l_0.2": "t4l_200ps",
    "t4l_6": "t4l_5000ps",
    "t4l_30": "t4l_30000ps",
    "mif_0.2": "mif_180_anti_200ps",
    "mif_6": "mif_180_anti_5000ps",
    "mif_30": "mif_180_anti_30000ps",
    "mdm2_nut_0.2": "mdm2_nutlin_notprot_200ps",
    "mdm2_nut_6": "mdm2_nutlin_notprot_5000ps",
    "mdm2_nut_30": "mdm2_nutlin_notprot_30000ps",
    "mdm2_pip_0.2": "mdm2_pip2_short_200ps",
    "mdm2_pip_6": "mdm2_pip2_short_5000ps",
    "mdm2_pip_30": "mdm2_pip2_short_30000ps",
    "pde_0.2": "pde2a_p10_200ps",
    "pde_6": "pde2a_p10_5000ps",
    "pde_30": "pde2a_p10_30000",
}


ligs = {
    "T4L": {"0.2 ns": "t4l_0.2", "6 ns": "t4l_6", "30 ns": "t4l_30"},
    "MIF": {"0.2 ns": "mif_0.2", "6 ns": "mif_6", "30 ns": "mif_30"},
    "MDM2-Nutlin": {"0.2 ns": "mdm2_nut_0.2", "6 ns": "mdm2_nut_6", "30 ns": "mdm2_nut_30"},
    "MDM2-PIP2": {"0.2 ns": "mdm2_pip_0.2", "6 ns": "mdm2_pip_6", "30 ns": "mdm2_pip_30"},
    "PDE2A": {"0.2 ns": "pde_0.2", "6 ns": "pde_6", "30 ns": "pde_30"},
}
# Dict to store all the gradient data objects
grad_data = {}

for lig in ligs:
    grad_data[lig] = {}
    for time in ligs[lig]:
        grad_data[lig][time] = {}
        print(f"Loading gradient data for {lig} {time}")
        calc = a3.Calculation(base_dir = calc_dirs[ligs[lig][time]])
        calc.recursively_set_attr("relative_simulation_cost", 1, force=True)
        grad_data[lig][time]["bound"] = {}
        grad_data[lig][time]["free"] = {}
        grad_data[lig][time]["bound"]["restrain"] = a3.analyse.GradientData(calc.legs[0].stages[0].lam_windows, equilibrated=True)
        grad_data[lig][time]["bound"]["discharge"] = a3.analyse.GradientData(calc.legs[0].stages[1].lam_windows, equilibrated=True)
        grad_data[lig][time]["bound"]["vanish"] = a3.analyse.GradientData(calc.legs[0].stages[2].lam_windows, equilibrated=True)
        grad_data[lig][time]["free"]["discharge"] = a3.analyse.GradientData(calc.legs[1].stages[0].lam_windows, equilibrated=True)
        grad_data[lig][time]["free"]["vanish"] = a3.analyse.GradientData(calc.legs[1].stages[1].lam_windows, equilibrated=True)
        calc._close_logging_handlers()
        del(calc)

    # Save a pickle of the gradient data
    print(f"Saving gradient data for {lig}")
    with open("final_analysis/grad_data.pkl", "wb") as f:
        pickle.dump(grad_data, f)

# Finally, let's get the restraint parameters for each system
systems = ["T4L", "MIF", "MDM2-PIP2", "PDE2A", "MDM2-Nutlin"]
restraint_dicts = {}
names_to_calc_dirs = {"T4L": "t4l_0.2", "MIF": "mif_0.2", "MDM2-PIP2": "mdm2_pip_0.2", "PDE2A": "pde_0.2", "MDM2-Nutlin": "mdm2_nut_0.2"}
for system in systems:
    print(f"Getting restraint parameters for {system}")
    calc = a3.Calculation(base_dir=calc_dirs[names_to_calc_dirs[system]])
    restr = calc.legs[0].restraints[0]
    restr_dict = eval(calc.legs[0].restraints[0].toString("SOMD").split("=")[1])
    restraint_dicts[system] = restr_dict
    calc._close_logging_handlers()
    del(calc)

with open("final_analysis/restraint_dicts.pkl", "wb") as f:
    pickle.dump(restraint_dicts, f)


In [None]:
import a3fe as a3
import pickle
import logging

# Dict mapping directory names to calculation names
calc_dirs = {
    "t4l_0.2": "t4l_200ps",
    "t4l_6": "t4l_5000ps",
    "t4l_30": "t4l_30000ps",
    "mif_0.2": "mif_180_anti_200ps",
    "mif_6": "mif_180_anti_5000ps",
    "mif_30": "mif_180_anti_30000ps",
    "mdm2_nut_0.2": "mdm2_nutlin_notprot_200ps",
    "mdm2_nut_6": "mdm2_nutlin_notprot_5000ps",
    "mdm2_nut_30": "mdm2_nutlin_notprot_30000ps",
    "mdm2_pip_0.2": "mdm2_pip2_short_200ps",
    "mdm2_pip_6": "mdm2_pip2_short_5000ps",
    "mdm2_pip_30": "mdm2_pip2_short_30000ps",
    "pde_0.2": "pde2a_p10_200ps",
    "pde_6": "pde2a_p10_5000ps",
    "pde_30": "pde2a_p10_30000",
}


ligs = {
    "T4L": {"0.2 ns": "t4l_0.2", "6 ns": "t4l_6", "30 ns": "t4l_30"},
    "MIF": {"0.2 ns": "mif_0.2", "6 ns": "mif_6", "30 ns": "mif_30"},
    "MDM2-Nutlin": {"0.2 ns": "mdm2_nut_0.2", "6 ns": "mdm2_nut_6", "30 ns": "mdm2_nut_30"},
    "MDM2-PIP2": {"0.2 ns": "mdm2_pip_0.2", "6 ns": "mdm2_pip_6", "30 ns": "mdm2_pip_30"},
    "PDE2A": {"0.2 ns": "pde_0.2", "6 ns": "pde_6", "30 ns": "pde_30"},
}

# Code to generate dictionary of 100 blocks of free energy changes for each stage for each leg for each time for each system
for equil in [False, True]:
dgs_conv = {}
for lig in ligs:
    dgs_conv[lig] = {}
    for time in ligs[lig]:
        dgs_conv[lig][time] = {}
        print(f"Analysing {lig} {time}")
        calc = a3.Calculation(base_dir = calc_dirs[ligs[lig][time]], stream_log_level=logging.CRITICAL)
        for leg in calc.legs:
            dgs_conv[lig][time][str(leg.leg_type)] = {}
            for stage in leg.stages:
                dgs_conv[lig][time][str(leg.leg_type)][str(stage.stage_type)] = {}
                fracts = stage._delta_g_convergence_fracts
                dgs = stage._delta_g_convergence
                fracts, dgs = stage.analyse_convergence(mode="block", equilibrated=False)
                dgs_conv[lig][time][str(leg.leg_type)][str(stage.stage_type)]["fracts"] = fracts
                dgs_conv[lig][time][str(leg.leg_type)][str(stage.stage_type)]["dgs"] = dgs
        calc._close_logging_handlers()
        del(calc)

        # Write most recent version of the dictionary to a pickle
        
        savename = "final_analysis/dgs_conv_equil.pkl" if equil else "final_analysis/dgs_conv_nonequil.pkl"
        with open(savename, "wb") as f:
            pickle.dump(dgs_conv, f)

In [None]:
import a3fe as a3
import pickle
import logging
import numpy as np

# dict mapping directory names to calculation names
calc_dirs = {
    "t4l_0.2": "t4l_200ps",
    "t4l_6": "t4l_5000ps",
    "t4l_30": "t4l_30000ps",
    "mif_0.2": "mif_180_anti_200ps",
    "mif_6": "mif_180_anti_5000ps",
    "mif_30": "mif_180_anti_30000ps",
    "mdm2_nut_0.2": "mdm2_nutlin_notprot_200ps",
    "mdm2_nut_6": "mdm2_nutlin_notprot_5000ps",
    "mdm2_nut_30": "mdm2_nutlin_notprot_30000ps",
    "mdm2_pip_0.2": "mdm2_pip2_short_200ps",
    "mdm2_pip_6": "mdm2_pip2_short_5000ps",
    "mdm2_pip_30": "mdm2_pip2_short_30000ps",
    "pde_0.2": "pde2a_p10_200ps",
    "pde_6": "pde2a_p10_5000ps",
    "pde_30": "pde2a_p10_30000",
}


ligs = {
    "T4L": {"0.2 ns": "t4l_0.2", "6 ns": "t4l_6", "30 ns": "t4l_30"},
    "MIF": {"0.2 ns": "mif_0.2", "6 ns": "mif_6", "30 ns": "mif_30"},
    "MDM2-Nutlin": {"0.2 ns": "mdm2_nut_0.2", "6 ns": "mdm2_nut_6", "30 ns": "mdm2_nut_30"},
    "MDM2-PIP2": {"0.2 ns": "mdm2_pip_0.2", "6 ns": "mdm2_pip_6", "30 ns": "mdm2_pip_30"},
    "PDE2A": {"0.2 ns": "pde_0.2", "6 ns": "pde_6", "30 ns": "pde_30"},
}

# Convert fractions to GPU hours. For the unequilibrated leg, do this by multiplying
# by first converting the fracts to simulation times, then multipling these by the 
# relative simulation cost * the simulation cost reference of 0.21 hr / ns
REF_COST = 0.21 # GPU hours per ns
costs = {'T4L': {'bound': 0.8970899470899469,
  'free': 0.09157509157509156},
 'MIF': {'bound': 0.9984126984126981,
  'free': 0.18416768416768411},
 'MDM2-Nutlin': {'bound': 0.5854497354497354,
  'free': 0.16025641025641021},
 'MDM2-PIP2': {'bound': 0.548148148148148,
   'free': 0.13838013838013835},
 'PDE2A': {'bound': 2.486772486772486,
  'free': 0.17348392348392347}}

with open("final_analysis/dgs_conv_nonequil.pkl", "rb") as f:
    dgs_conv_nonequil = pickle.load(f)p

#dg_dicts = {"equil": dgs_conv_equil, "nonequil": dgs_conv_nonequil}
# Only bother with nonequilibrated
dg_dicts = {"nonequil": dgs_conv_nonequil}
for dg_type, dg_dict in dg_dicts.items():
    for lig in dg_dict:
        print(f"Converting {lig} {dg_type}")
        for time in dg_dict[lig]:
            calc = a3.Calculation(base_dir = calc_dirs[ligs[lig][time]])
            for leg in calc.legs:
                for stage in leg.stages:
                    cost = REF_COST * costs[lig][str(leg.leg_type).split(".")[1].lower()]
                    start_time = 0 if dg_type == "nonequil" else stage.equil_time * stage.ensemble_size
                    end_time = stage.tot_simtime
                    fracts = np.array(dg_dict[lig][time][str(leg.leg_type)][str(stage.stage_type)]["fracts"])
                    times = ((end_time - start_time) * fracts ) + start_time
                    gpu_times = times * cost
                    dg_dict[lig][time][str(leg.leg_type)][str(stage.stage_type)]["sim_times"] = times
                    dg_dict[lig][time][str(leg.leg_type)][str(stage.stage_type)]["gpu_times"] = gpu_times

            calc._close_logging_handlers()
            del(calc)

for dg_type, dg_dict in dg_dicts.items():
    with open(f"final_analysis/dgs_conv_{dg_type}_times.pkl", "wb") as f:
        pickle.dump(dg_dict, f)

In [None]:
import a3fe as a3
import pickle
import logging
import numpy as np
from scipy.stats import sem, t

def get_95_ci(data: np.ndarray):
    """Get the 95% confidence interval for a given array of data using scipy.stats.sem"""
    mean_free_energy = np.mean(data)
    conf_int = t.interval(
        0.95,
        len(data) - 1,
        mean_free_energy,
        scale=sem(data),
    )[1] - mean_free_energy # 95 % C.I.
    return conf_int

# dict mapping directory names to calculation names
calc_dirs = {
    "t4l_0.2": "t4l_200ps",
    "t4l_6": "t4l_5000ps",
    "t4l_30": "t4l_30000ps",
    "mif_0.2": "mif_180_anti_200ps",
    "mif_6": "mif_180_anti_5000ps",
    "mif_30": "mif_180_anti_30000ps",
    "mdm2_nut_0.2": "mdm2_nutlin_notprot_200ps",
    "mdm2_nut_6": "mdm2_nutlin_notprot_5000ps",
    "mdm2_nut_30": "mdm2_nutlin_notprot_30000ps",
    "mdm2_pip_0.2": "mdm2_pip2_short_200ps",
    "mdm2_pip_6": "mdm2_pip2_short_5000ps",
    "mdm2_pip_30": "mdm2_pip2_short_30000ps",
    "pde_0.2": "pde2a_p10_200ps",
    "pde_6": "pde2a_p10_5000ps",
    "pde_30": "pde2a_p10_30000",
}


ligs = {
    "T4L": {"0.2 ns": "t4l_0.2", "6 ns": "t4l_6", "30 ns": "t4l_30"},
    "MIF": {"0.2 ns": "mif_0.2", "6 ns": "mif_6", "30 ns": "mif_30"},
    "MDM2-Nutlin": {"0.2 ns": "mdm2_nut_0.2", "6 ns": "mdm2_nut_6", "30 ns": "mdm2_nut_30"},
    "MDM2-PIP2": {"0.2 ns": "mdm2_pip_0.2", "6 ns": "mdm2_pip_6", "30 ns": "mdm2_pip_30"},
    "PDE2A": {"0.2 ns": "pde_0.2", "6 ns": "pde_6", "30 ns": "pde_30"},
}

# Get dictionary of final free energy changes for the 30 ns runs
final_dGs = {}
for system in atom_nums:
    final_dGs[system] = {}
    calc = a3.Calculation(base_dir=calc_dirs[ligs[system]["30 ns"]])
    final_dGs[system]["dg"] = (calc._delta_g.mean(), get_95_ci(calc._delta_g))
    for leg in calc.legs:
        final_dGs[system][str(leg.leg_type)] = {}
        final_dGs[system][str(leg.leg_type)]["dg"] = (leg._delta_g.mean(), get_95_ci(leg._delta_g))
        for stage in leg.stages:
            final_dGs[system][str(leg.leg_type)][str(stage.stage_type)] = (stage._delta_g.mean(), get_95_ci(stage._delta_g))
    calc._close_logging_handlers()
    del(calc)

with open("final_analysis/final_dGs_30.pkl", "wb") as f:
    pickle.dump(final_dGs, f)