# Analysis Template: Michaelis-Menten Kinetics
Updated 11/19/2024 JSZ

In [None]:
#enables autoreloding of modules
%load_ext autoreload
%autoreload 2

import numpy as np
import pandas as pd
from scipy.stats import linregress
from matplotlib import pyplot as plt 
from seaborn import color_palette, stripplot
from scipy.optimize import minimize, root_scalar

from kinetics.functions import *
from kinetics.interactive import plot_data, plot_data_and_fits
from kinetixsimulator.chemicalkinetics import KineticModel, MMReaction, ReversibleReaction
from kinetixsimulator.guis import ProgressCurveGUI

plt.rcParams['axes.spines.top'] = False
plt.rcParams['axes.spines.right'] = False
plt.rcParams['font.family'] = 'Helvetica'

import warnings
warnings.filterwarnings("ignore")

## Load expression data

In [None]:
expression_df = pd.read_csv('./_data/button_quant.csv')
expression_df['enzyme_conc'] = expression_df['summed_button_BGsub'] / 91900.03

fig, ax = plt.subplots(figsize=(6,4))
_ = stripplot(data=expression_df.sort_values('id'), x='id', y='enzyme_conc', palette='husl', ax=ax)

ax.set_ylabel('[eGFP] (nM)')
ax.set_xlabel('')
_ = ax.set_xticklabels(ax.get_xticklabels(), rotation=75)

## Load binding data

In [None]:
bait_file = '' # path to csv file for the bait signal generated in processing
bait_df = pd.read_csv(bait_file)
postwash_mask = bait_df['series_index'] == 'post_wash_bait' # only care about postwash bait intensity here

prey_file = ''
prey_df = pd.read_csv(prey_file)
prey_df['r'] = prey_df['summed_button_BGsub'].to_numpy() / np.hstack([bait_df[postwash_mask]['summed_button_BGsub'].to_numpy()])

## Remove chambers with low expression

In [None]:
def transform_mask(mask, n_conc=7):
    transformed_mask = np.vstack([mask.to_numpy()] * n_conc).T.flatten()
    return transformed_mask.squeeze()
    

# get dataframe mask to remove low-expressing chambers 
expression_filter = expression_df['enzyme_conc'] > 1

# need to transform the mask so it matches the dimensions of prey_df
transformed_expression_filter = transform_mask(expression_filter, n_conc=7)

# apply the mask
prey_df = prey_df[transformed_expression_filter]

## Fit a Kd to each chamber

In [None]:
def calculate_z_scores(data):
    mean, std = np.nanmean(data), np.nanstd(data)
    z = np.abs((data - mean) / std)
    return z


def binding_model(l: np.ndarray, rmax: float, kd: float):
    return rmax * l / (kd + l)


def fit_kd_main(fixed_rmax=None, variants_to_fit=[]):


    def std_err_from_hess_inv(hess_inv): return np.sqrt(np.diag(hess_inv))


    def get_objective(l: np.ndarray, r: np.ndarray):
        """ 
        Implements a non-global fitting of Kd.
        """

        def objective(x):
            rmax, kd = x
            yhat = binding_model(l.flatten(), rmax, kd)
            residuals = yhat - r.flatten()
            return (residuals**2).sum()
        
        return objective
        

    def aggregate_chamber_data(group):

        key = 'r'
        intensities = group[key].to_list()
        concentrations = group['time_s'].to_list()
        x, y = group['x'].to_list()[0], group['y'].to_list()[0]
        id = group['id'].to_list()[0]
            

        out = {
            'x': x,
            'y': y,
            'concentrations': concentrations,
            'intensities': intensities,
            'id': id
        }

        return pd.Series(out)

    if len(variants_to_fit) > 0:
        mask = prey_df['id'].isin(variants_to_fit)
    else:
        mask = np.array([True] * len(prey_df))

    data = prey_df[mask].groupby(by=['x', 'y']).apply(aggregate_chamber_data).reset_index(drop=True)

    chamber_fit_df = []
    for l, r in zip(np.vstack(data['concentrations'].to_list()), np.vstack(data['intensities'].to_list())):

        objective = get_objective(l, r)

        if fixed_rmax:
            rmax, rmax_std_err = fixed_rmax, 1
            objective_wrapper = lambda kd: objective([fixed_rmax, kd])
            fit = minimize(objective_wrapper, np.array(1))
            kd_std_err = std_err_from_hess_inv(fit.hess_inv)
            kd = fit.x[0]

        else:
            objective_wrapper = objective
            fit = minimize(objective_wrapper, np.array([0.5, 1]))
            rmax_std_err, kd_std_err = std_err_from_hess_inv(fit.hess_inv)
            rmax, kd = fit.x

        chamber_fit_df.append({'rmax': rmax, 'rmax_std_err': rmax_std_err, 'kd': kd, 'kd_std_err': kd_std_err})
    chamber_fit_df = pd.DataFrame(chamber_fit_df)
    chamber_fit_df['id'] = data['id']
    chamber_fit_df['x'] = data['x']
    chamber_fit_df['y'] = data['y']

    return chamber_fit_df


def group_chamber_fit_df(chamber_fit_df):

    def aggregate_mutants(group):

        id = group['id'].to_list()[0]
        rmax = group['rmax'].to_numpy()
        kd = group['kd'].to_numpy()

        if len(kd) > 1:
            z_kd = calculate_z_scores(kd)
            z_score_filter = z_kd < 3

            kd, rmax = kd[z_score_filter], rmax[z_score_filter]
            kd_mean, rmax_mean = np.nanmean(kd), np.nanmean(rmax)
            kd_stderr, rmax_stderr = np.nanstd(kd) / (len(kd)**0.5), np.nanstd(rmax) / (len(rmax)**0.5)
            kd_ci, rmax_ci = np.array([kd_mean - (1.96 * kd_stderr), kd_mean + (1.96 * kd_stderr)]), np.array([rmax_mean - (1.96 * rmax_stderr), rmax_mean + (1.96 * rmax_stderr)])

        else:
            kd_mean, rmax_mean = kd[0], rmax[0]
            kd_ci = [np.nan, np.nan]
            rmax_ci = [np.nan, np.nan]

        out = {'id': id, 'rmax': rmax_mean, 'rmax_ci': rmax_ci, 'kd': kd_mean, 'kd_ci': kd_ci}

        return pd.Series(out)

    return chamber_fit_df.groupby(by='id').apply(aggregate_mutants).reset_index(drop=True)

In [None]:
# fit a Kd value to each chamber
# returns a dataframe containing fits for each chamber
chamber_fit_df = fit_kd_main()

# aggregate fits for each mutant, get a mean/stdev for the Kd value
# a z-score filter is implemented within this function
mutant_fit_df = group_chamber_fit_df(chamber_fit_df)
mutant_fit_df

## Fit a Kd to each chamber using a fixed Rmax value (optional)

Applicable if you have variants in your experiment you were unable to saturate and therefore confidently determine Kd values. 

In [None]:
tight_binders = ['H141R', 'Y157A'] # a list containing IDs for tight-binding variants
tight_binder_fit_df = fit_kd_main(variants_to_fit=tight_binders)

In [None]:
# z-score filtering of rmax
threshold = 3
tight_binder_fit_df = pd.DataFrame(tight_binder_fit_df)
tight_binder_fit_df['z'] = calculate_z_scores(tight_binder_fit_df['rmax'].to_numpy())
tight_binder_fit_df = tight_binder_fit_df[tight_binder_fit_df['z'] < threshold]
rmax, rmax_std = tight_binder_fit_df['rmax'].mean(), tight_binder_fit_df['rmax'].std()

In [None]:
# fit all chambers using value for rmax calculated above
chamber_fit_df_individual = fit_kd_main(fixed_rmax=rmax)
mutant_fit_df_individual = group_chamber_fit_df(chamber_fit_df_individual)
mutant_fit_df_individual