In [1]:
import warnings

import os
import tqdm

import numpy as np
import pandas as pd
import scipy.optimize
import scipy.stats as st

import holoviews as hv
hv.extension('matplotlib')
import colorcet as cc

import lateral_signaling as lsig

<hr>

In [22]:
data_file = "C://Users/Pranav/git/evomorph/data/growth_curves.csv"

save_dir  = "C://Users/Pranav/git/evomorph/data"

save_data = False

In [20]:
assert os.path.exists(data_file), "File does not exist"

df = pd.read_csv(data_file)

df["initial cell density (mm^-2)"] = df["initial cell density (mm^-2)"].values.astype(int)
df["density (mm^-2)"] = df.cell_count / 320

df.head().append(df.tail())

Unnamed: 0,days_integer,condition,initial cell density (mm^-2),replicate,date_time,time_of_sample,time (days),cell_count,density (mm^-2)
0,0,untreated,1250,a,,,0.0,400000,1250.0
1,0,untreated,1250,b,,,0.0,400000,1250.0
2,0,untreated,1250,c,,,0.0,400000,1250.0
3,1,untreated,1250,a,06.27.2020 06:29:47 AM,06:29:47 AM,1.0,322564,1008.0125
4,1,untreated,1250,b,06.27.2020 06:46:26 AM,06:46:26 AM,1.011562,328429,1026.340625
202,6,FGF2,5000,a,07.02.2020 05:32:01 AM,05:32:01 AM,5.949549,2345918,7330.99375
203,6,FGF2,5000,b,07.02.2020 05:39:56 AM,05:39:56 AM,5.955046,2222758,6946.11875
204,6,FGF2,5000,c,07.02.2020 05:47:44 AM,05:47:44 AM,5.960463,2216893,6927.790625
205,7,FGF2,5000,a,07.03.2020 05:14:06 AM,05:14:06 AM,6.937106,1677332,5241.6625
206,7,FGF2,5000,b,07.03.2020 05:21:56 AM,05:21:56 AM,6.942546,1841546,5754.83125


In [3]:
# Select growth data starting from 100% confluence 
rho_0 = 1250   # cells / mm^2
data = df.loc[df["initial cell density (mm^-2)"] == rho_0]

In [11]:
# Unpack conditions and density data
conds = []
rhos = np.zeros((3, 23))

for i, tup in enumerate(data.groupby(["condition"])):
    
    # Get condition
    conds.append(tup[0])
    
    # Get density data in chronological order
    d = tup[1].sort_values("time (days)")
    rhos[i] = d["density (mm^-2)"].values

In [12]:
p1 = hv.Scatter(
    data=data,
    kdims=["days_integer"],
    vdims=["density (mm^-2)", "condition"],
).groupby(
    "condition"
).overlay(
    "condition"
).opts(
    aspect=1.6,
    fig_size=160,
)

hv.output(p1, dpi=90)

In [13]:
# Get time-points 
tmax_days = df.days_integer.max()
t = np.repeat(np.arange(tmax_days + 1), 3)[:-1]

In [14]:
def resid(params, t, rho, rho_0):
    """Residual for logistic growth model."""
    g, rho_max = params
    return rho - lsig.logistic(t, g, rho_0, rho_max)

def logistic_mle_lstq(data):
    """Compute MLE for parameters in logistic growth model."""
    t, rho, rho_0 = data
    
    # Get the maximum likelihood estimate (MLE) parameters
    res = scipy.optimize.least_squares(
        resid, np.array([1, 5000]), args=(t, rho, rho_0), bounds=([0, rho_0], [8, 2e5])
    )

    # Compute residual sum of squares from MLE params
    rss_mle = np.sum(resid(res.x, t, rho, rho_0)**2)

    # Compute the root-mean-squared deviation (the MLE for standard deviation)
    sigma_mle = np.sqrt(rss_mle / len(t))

    return tuple([x for x in res.x] + [sigma_mle])

In [15]:
mle_params = np.zeros((3,3))
for i in range(3):
    mle_params[i] = logistic_mle_lstq([t, rhos[i], rho_0])

mle_params

array([[1.46209717e+00, 5.67945094e+03, 1.05525685e+03],
       [3.13327247e-01, 2.73033529e+03, 4.96578882e+02],
       [7.28398184e-01, 7.03800303e+03, 7.87495152e+02]])

In [32]:
mle_params_df = pd.DataFrame(dict(
    condition=conds,
    g_inv_days=mle_params[:, 0],
    rho_max_inv_mm2=mle_params[:, 1],
    sigma=mle_params[:, 2],
    g_ratio=mle_params[:, 0] / mle_params[2, 0],
    rho_max_ratio=mle_params[:, 1]/rho_0,
    doubling_time_days=np.log(2) / mle_params[:, 0],
    doubling_time_hours=24 * np.log(2) / mle_params[:, 0],
))

In [33]:
mle_params_df

Unnamed: 0,condition,g_inv_days,rho_max_inv_mm2,sigma,g_ratio,rho_max_ratio,doubling_time_days,doubling_time_hours
0,FGF2,1.462097,5679.450941,1055.256851,2.007277,4.543561,0.474077,11.377857
1,RI,0.313327,2730.335293,496.578882,0.430159,2.184268,2.212215,53.093156
2,untreated,0.728398,7038.003027,787.495152,1.0,5.630402,0.951605,22.838514


In [34]:
if save_data:
    
    mle_params_fname = "growth_parameters_MLE.csv"
    save_path = os.path.realpath(os.path.join(save_dir, mle_params_fname))
    mle_params_df.to_csv(save_path)

<hr>

In [28]:
nt_ = 101
t_ = np.linspace(0, 7, nt_)
rho_s = [lsig.logistic(t_, mle_params[i, 0], rho_0, mle_params[i, 1]) for i in range(3)]

theor_curves = {
    "time (days)": np.tile(t_, 3),
    "density (mm^-2)": [*rho_s[0], *rho_s[1], *rho_s[2]],
    "condition": np.repeat(conds, nt_),
}

p2 = hv.Curve(
    data=theor_curves,
    kdims=["time (days)"],
    vdims=["density (mm^-2)", "condition"],
).groupby(
    "condition"
).overlay(
    "condition"
).opts(
    xlabel="time (days)",
    ylabel=r"density (cells / $mm^2$)",
    yticks=(0, 1250, 2500, 3750, 5000, 6250, 7500),
    aspect=1.6,
    fig_size=160,
)

hv.output(p1 * p2, dpi=90)

In [37]:
nt_ = 101
t_ = np.linspace(0, 7, nt_)
rho_s = lsig.logistic(t_, mle_params[2, 0], rho_0, mle_params[2, 1])

theor_curves = {
    "time (days)": np.tile(t_, 1),
    "density (mm^-2)": rho_s,
    "condition": np.repeat(conds[2:3], nt_),
}

untreated_points = hv.Scatter(
    data=data.loc[data["condition"] == "untreated"],
    kdims=["days_integer"],
    vdims=["density (mm^-2)"],
).opts(
    fontscale=1.3,
    aspect=1.2,
    c=cc.gray[150],
    marker="o",
    s=20,
#     fig_size=160,
)


untreated_curve = hv.Curve(
    data=theor_curves,
    kdims=["time (days)"],
    vdims=["density (mm^-2)"],
).opts(
    xlabel="time (days)",
    ylabel=r"density (cells / $mm^2$)",
    xticks=np.arange(8),
    yticks=(0, 1250, 2500, 3750, 5000, 6250, 7500),
    fontscale=1.3,
    aspect=1.2,
    c=lsig.col_black,
#     fig_size=160,
)

density_overlay_untreated = untreated_points * untreated_curve

hv.output(density_overlay_untreated, dpi=90)

In [39]:
# Save 
if save_figs:
    plot_fname = f"experiment_dens_curvefit_untreated_7days"
    plot_path = os.path.join(save_dir, plot_fname + "." + fig_fmt)
    hv.save(density_overlay_untreated, plot_path, dpi=dpi, fmt=fig_fmt)

<hr>

In [None]:
import bebi103

import bokeh.io
bokeh.io.output_notebook()
# bebi103.hv.set_defaults()

In [3]:
# Set RNG seed
rg = np.random.default_rng(2021)

In [20]:
def gen_logistic_data(params, t, rho_0, size, rg):
    """Generate a new logistic growth data set."""
    mu = lsig.logistic(t, params[0], rho_0, params[1])
    sigma = params[-1]
    gen_rho = np.maximum(rg.normal(mu, sigma), 0)

    return [t, gen_rho, rho_0]

In [21]:
all_cond_bs_reps = []
all_cond_conf_int = np.empty((3, 2, 3))

for i in range(3):
    
    # Bootstrap replicates of maximum likelihood estimation
    bs_reps = bebi103.draw_bs_reps_mle(
        logistic_mle_lstq,
        gen_logistic_data,
        data = [t, rhos[i], rho_0],
        mle_args=(),
        gen_args=(t, rho_0),
        size=5000,
        n_jobs=1,
        progress_bar=True,
    )
    all_cond_bs_reps.append(bs_reps)

    # Compute confidence intervals
    conf_ints = np.percentile(bs_reps, [2.5, 97.5], axis=0)
    
    all_cond_conf_int[i] = conf_ints

100%|██████████| 5000/5000 [00:22<00:00, 226.60it/s]
100%|██████████| 5000/5000 [00:43<00:00, 115.26it/s]
100%|██████████| 5000/5000 [00:17<00:00, 287.05it/s]


In [22]:
# Package replicates in data frame for plotting
df_res = pd.DataFrame(data=all_cond_bs_reps[0], columns=["g", "ρ_max", "σ"])

with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    p1 = bebi103.viz.corner(
        samples=df_res,
        pars=["g", "ρ_max", "σ"],
        show_contours=True,
        levels = [0.95],
    )

bokeh.io.show(p1)

In [23]:
# Package replicates in data frame for plotting
df_res = pd.DataFrame(data=all_cond_bs_reps[1], columns=["g", "ρ_max", "σ"])

with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    p2 = bebi103.viz.corner(
        samples=df_res,
        pars=["g", "ρ_max", "σ"],
        show_contours=True,
        levels = [0.95],
    )

bokeh.io.show(p2)

In [24]:
# Package replicates in data frame for plotting
df_res = pd.DataFrame(data=all_cond_bs_reps[2], columns=["g", "ρ_max", "σ"])

with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    p3 = bebi103.viz.corner(
        samples=df_res,
        pars=["g", "ρ_max", "σ"],
        show_contours=True,
        levels = [0.95],
    )

bokeh.io.show(p3)

In [1]:
# # Package replicates in data frame for plotting
# df_res = pd.DataFrame(data=all_cond_bs_reps[0], columns=["g", "ρ_max", "σ"])

# with warnings.catch_warnings():
#     warnings.simplefilter("ignore")
#     p1 = bebi103.viz.corner(
#         samples=df_res,
#         pars=["g", "ρ_max", "σ"],
#         show_contours=True,
#         levels = [0.95],
# #         background_fill_color = None,
# #         border_fill_color = None,
#     )

# # bokeh.io.show(p1)
# # p1.background_fill_color = None
# # p1.border_fill_color = None

# bokeh.io.export_png(p1, filename="plots/cornerplot1.png")
# bokeh.io.export_svg(p1, filename="plots/cornerplot1.svg")

<hr>

In [18]:
def log_likelihood(params, t, rho, rho_0):
    """Log likelihood of logistic growth."""
    g, rho_max, sigma = params

    if g <= 0 or rho_max <= 0 or sigma <= 0:
        return -np.inf

    mu = lsig.logistic(t, g, rho_0, rho_max)
    return np.sum(st.norm.logpdf(rho, mu, sigma))

In [19]:
def logistic_mle(t, rho, rho_0):
    """Compute MLE for parameters in logistic growth model."""
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")

        res = scipy.optimize.minimize(
            fun=lambda params, t, rho, rho_0: -log_likelihood(params, t, rho, rho_0),
            x0=np.array([1, 5000, 500]),
            args=(t, rho, rho_0),
            method='Powell'
        )

    if res.success:
        return res.x
    else:
        raise RuntimeError('Convergence failed with message', res.message)

In [21]:
cond_mle_params = []

iterator = range(len(conds))
iterator = tqdm.tqdm(iterator)

for i in iterator:
    rho = rhos[i]
    res = logistic_mle(t, rho, rho_0)
    cond_mle_params.append(res)
    
cond_mle_params = np.array(cond_mle_params)

100%|██████████| 3/3 [00:00<00:00,  4.87it/s]


In [22]:
cond_mle_params

array([[1.46364517e+00, 5.67885493e+03, 1.05525815e+03],
       [1.61942562e-01, 5.01051259e+03, 5.02591944e+02],
       [7.30798944e-01, 7.03432523e+03, 7.84915716e+02]])