In [1]:
import warnings

import os
import tqdm

import numpy as np
import pandas as pd
import scipy.optimize
import scipy.stats as st

import holoviews as hv
hv.extension('matplotlib')
import colorcet as cc

import lateral_signaling as lsig

In [2]:
import bebi103

import bokeh.io
bokeh.io.output_notebook()
# bebi103.hv.set_defaults()

  register_cmap("cet_" + name, cmap=cmap)
  register_cmap("cet_" + name, cmap=cmap)
Features requiring DataShader will not work and you will get exceptions.


In [4]:
# Set RNG seed
rg = np.random.default_rng(2021)

<hr>

In [5]:
data_dir = os.path.abspath("../data")
save_dir = os.path.abspath("../plots")

save_data = False
save_figs = False
fmt = "png"
dpi = 300

In [6]:
df = pd.read_csv(os.path.join(data_dir, "growth_curves.csv"))

In [11]:
df.head().append(df.tail())

Unnamed: 0,days_integer,condition,initial cell density (mm^-2),replicate,date_time,time_of_sample,time (days),cell_count,cell density (mm^-2)
0,0,untreated,1250,a,,,0.0,400000,1250.0
1,0,untreated,1250,b,,,0.0,400000,1250.0
2,0,untreated,1250,c,,,0.0,400000,1250.0
3,1,untreated,1250,a,06.27.2020 06:29:47 AM,6:29:47 AM,1.0,322564,1008.0125
4,1,untreated,1250,b,06.27.2020 06:46:26 AM,6:46:26 AM,1.011562,328429,1026.340625
202,6,FGF2,5000,a,07.02.2020 05:32:01 AM,5:32:01 AM,5.949549,2345918,7330.99375
203,6,FGF2,5000,b,07.02.2020 05:39:56 AM,5:39:56 AM,5.955046,2222758,6946.11875
204,6,FGF2,5000,c,07.02.2020 05:47:44 AM,5:47:44 AM,5.960463,2216893,6927.790625
205,7,FGF2,5000,a,07.03.2020 05:14:06 AM,5:14:06 AM,6.937106,1677332,5241.6625
206,7,FGF2,5000,b,07.03.2020 05:21:56 AM,5:21:56 AM,6.942546,1841546,5754.83125


In [13]:
# Select growth data starting from 100% confluence 
rho_0 = 1250   # cells / mm^2
data = df.loc[df["initial cell density (mm^-2)"] == rho_0]

In [14]:
# Unpack conditions and density data
conds = []
rhos = np.zeros((3, 23))

for i, tup in enumerate(data.groupby(["condition"])):
    
    # Get condition
    conds.append(tup[0])
    
    # Get density data in chronological order
    d = tup[1].sort_values("time (days)")
    rhos[i] = d["cell density (mm^-2)"].values

In [46]:
colors = [lsig.purple, lsig.greens[2], lsig.cols_red[1]]

In [47]:
scatter = hv.Scatter(
    data=data,
    kdims=["days_integer"],
    vdims=["cell density (mm^-2)", "condition"],
).groupby(
    "condition"
)

curve = hv.Curve(
    data=data.groupby(["condition", "days_integer"]).agg(np.mean),
    kdims=["days_integer"],
    vdims=["cell density (mm^-2)", "condition"],
).groupby(
    "condition"
)

cellcounts_overlay = (
    scatter * curve
).overlay(
    "condition",
).opts(
    hv.opts.Scatter(
        color=hv.Cycle(colors)
    ), 
    hv.opts.Curve(
        color=hv.Cycle(colors)
    ),
).opts(
    aspect=1.6,
    legend_position="right",
)

In [49]:
hv.output(cellcounts_overlay, dpi=90)

In [50]:
# Time-points for each condition
tmax_days = df.days_integer.max()
t = np.repeat(np.arange(tmax_days + 1), 3)[:-1]
nt = t.size

In [51]:
# Concatenate time-points and data for all conditions
t_cat = np.tile(t, 3)
rho_cat = np.concatenate(rhos)

<hr>

# Using the same `rho_max` for all conditions

In [11]:
def multilogistic_resid(params, t, rho_cat, rho_0):
    """Residual for a logistic growth model consisting of 3 curves with the same 
    carrying capacity, given the initial population."""
    *gs, rho_max = params
    mean = np.concatenate([lsig.logistic(t, gs[i], rho_0, rho_max) for i in range(3)])
    return rho_cat - mean

def resid(params, t, rho, rho_0):
    """Residual for a logistic growth model with given initial population."""
    g, rho_max = params
    return rho - lsig.logistic(t, g, rho_0, rho_max)

def multilogistic_mle_lstq(data, method="trf"):
    """Compute MLE for parameters in logistic growth model."""
    t, rho_cat, rho_0 = data
    
    # Get the maximum likelihood estimate (MLE) parameters
    res = scipy.optimize.least_squares(
        multilogistic_resid, 
        np.array([2, 0.3, 1, 5000]), 
        args=(t, rho_cat, rho_0), 
        method="trf",
        bounds=(
            [ 0,  0,  0, rho_0], 
            [10, 10, 10,   2e5],
        ),
    )
    
    sigma_mles = []
    for i in range(3):
        
        # Compute residual sum of squares from MLE params
        rss_mle = np.sum(resid([res.x[i], res.x[3]], t, rhos[i], rho_0)**2)
        
        # Compute the root-mean-squared deviation (the MLE for standard deviation)
        sigma_mle = np.sqrt(rss_mle / len(t))
        sigma_mles.append(sigma_mle)
    
    return tuple([x for x in res.x] + sigma_mles)

In [13]:
mle_results = multilogistic_mle_lstq_equalmax([t, rho_cat, rho_0])

mle_params  = np.array([
    mle_results[:3],
    np.repeat(mle_results[3], 3),
    mle_results[4:],
]).T

NameError: name 'multilogistic_mle_lstq_equalmax' is not defined

In [None]:
print("                                 FGF2", "RI", "untreated", sep="\t")
print("Growth rates (days^-1)         :", "\t".join([f"{mle_params[i, 0]:.2f}" for i in range(3)]))
print("Carrying capacity (cells/mm^2) :", "\t".join([f"{mle_params[i, 1]:.0f}" for i in range(3)]))
print("RMS deviation                  :", "\t".join([f"{mle_params[i, 2]:.0f}" for i in range(3)]))

In [None]:
nt_ = 101
t_ = np.linspace(0, tmax_days, nt_)
rho_s = [lsig.logistic(t_, mle_params[i, 0], rho_0, mle_params[i, 1]) for i in range(3)]

In [None]:
theor_curves_data = {
    "time (days)": np.tile(t_, 3),
    "cell density (mm^-2)": [*rho_s[0], *rho_s[1], *rho_s[2]],
    "condition": np.repeat(conds, nt_),
}

theor_curves = hv.Curve(
    data=theor_curves_data,
    kdims=["time (days)"],
    vdims=["cell density (mm^-2)", "condition"],
).groupby(
    "condition"
).overlay(
    "condition"
).opts(
    xlabel="time (days)",
    ylabel=r"cell density (cells / $mm^2$)",
    yticks=(0, 1250, 2500, 3750, 5000, 6250, 7500),
    aspect=1.6,
    fig_size=160,
)

hv.output(scatter * theor_curves, dpi=90)

In [None]:
mle_params_df = pd.DataFrame(dict(
    condition=conds,
    g_inv_days=mle_params[:, 0],
    rho_max_inv_mm2=mle_params[:, 1],
    sigma=mle_params[:, 2],
    g_ratio=mle_params[:, 0] / mle_params[2, 0],
    rho_max_ratio=mle_params[:, 1]/rho_0,
    doubling_time_days=np.log(2) / mle_params[:, 0],
    doubling_time_hours=24 * np.log(2) / mle_params[:, 0],
))

In [None]:
mle_params_df

In [None]:
if save_data:
    
    mle_params_fname = "growth_parameters_MLE_equalrhomax.csv"
    save_path = os.path.realpath(os.path.join(save_dir, mle_params_fname))
    mle_params_df.to_csv(save_path)

<hr>

In [150]:
import bebi103

import bokeh.io
bokeh.io.output_notebook()
# bebi103.hv.set_defaults()

  "Could not import ArviZ. Perhaps it is not installed."
Features requiring DataShader will not work and you will get exceptions.
  Features requiring DataShader will not work and you will get exceptions."""
  "Could not import `stan` submodule. Perhaps pystan or cmdstanpy is not properly installed."
  "Could not import `stan` submodule. Perhaps ArviZ or PyStan or CmdStanPy is/are"


In [151]:
# Set RNG seed
rg = np.random.default_rng(2021)

In [173]:
def gen_multilogistic_data(params, t, rho_0, size, rg):
    """Generate a new logistic growth data set."""
    mus     = np.concatenate([
        lsig.logistic(t, params[0], rho_0, params[3]),
        lsig.logistic(t, params[1], rho_0, params[3]),
        lsig.logistic(t, params[2], rho_0, params[3]),
    ])
    sigmas  = np.repeat(params[4:], nt)
    gen_rho = np.maximum(rg.normal(mus, sigmas), 0)

    return [t, gen_rho, rho_0]

In [176]:
# Bootstrap replicates of maximum likelihood estimation
bs_reps = bebi103.bootstrap.draw_bs_reps_mle(
    multilogistic_mle_lstq,
    gen_multilogistic_data,
    data = [t, rho_cat, rho_0],
    mle_args=(),
    gen_args=(t, rho_0),
    size=5000,
    n_jobs=1,
    progress_bar=False,
)

In [181]:
# Compute confidence intervals
conf_ints = np.percentile(bs_reps, [2.5, 97.5], axis=0)

In [180]:
conds

['FGF2', 'RI', 'untreated']

In [188]:
# Package replicates in data frame for plotting
df_res = pd.DataFrame(data=bs_reps[:, :4], columns=["g_FGF2", "g_RI", "g_untreated", "ρ_max"])

with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    p1 = bebi103.viz.corner(
        samples=df_res,
        parameters=["g_FGF2", "g_RI", "g_untreated", "ρ_max"],
        show_contours=True,
        levels = [0.95],
    )

bokeh.io.show(p1)

<hr>

# Fix `rho_max` to untreated value

In [14]:
def resid(params, t, rho, rho_0):
    """Residual for logistic growth model."""
    g, rho_max = params
    return rho - lsig.logistic(t, g, rho_0, rho_max)

def logistic_mle_lstq(data):
    """Compute MLE for parameters in logistic growth model."""
    t, rho, rho_0 = data
    
    # Get the maximum likelihood estimate (MLE) parameters
    res = scipy.optimize.least_squares(
        resid, np.array([1, 5000]), args=(t, rho, rho_0), bounds=([0, rho_0], [8, 2e5])
    )

    # Compute residual sum of squares from MLE params
    rss_mle = np.sum(resid(res.x, t, rho, rho_0)**2)

    # Compute the root-mean-squared deviation (the MLE for standard deviation)
    sigma_mle = np.sqrt(rss_mle / len(t))

    return tuple([x for x in res.x] + [sigma_mle])

In [15]:
untreated_mle_results = logistic_mle_lstq([t, rhos[2], rho_0])
g, rho_max, sigma = untreated_mle_results

print(f"""
Proliferation rate : {g:.3f} days^-1
Carrying capacity  : {rho_max:.2e} cells/mm^2
Standard deviation : {sigma:.2e} cells/mm^2
""")


Proliferation rate : 0.728 days^-1
Carrying capacity  : 7.04e+03 cells/mm^2
Standard deviation : 7.87e+02 cells/mm^2



In [16]:
def fixrhomax_resid(g, t, rho, rho_0, rho_max):
    """Residual for a logistic growth model with given initial population."""
    return rho - lsig.logistic(t, g, rho_0, rho_max)

def logistic_fixrhomax_mle_lstq(data,):
    """Compute MLE for parameters in logistic growth model."""
    t, rho, rho_0, rho_max = data
    
    # Get the maximum likelihood estimate (MLE) parameters
    res = scipy.optimize.least_squares(
        fixrhomax_resid, 
        np.array([1.]), 
        args=(t, rho, rho_0, rho_max), 
        bounds=(
            [ 0,], 
            [10,],
        ),
    )

    # Compute residual sum of squares from MLE params
    rss_mle = np.sum(fixrhomax_resid(res.x[0], t, rho, rho_0, rho_max)**2)

    # Compute the root-mean-squared deviation (the MLE for standard deviation)
    sigma_mle = np.sqrt(rss_mle / len(t))
    
    return tuple([x for x in res.x] + [sigma_mle])

In [17]:
drug_mle_results = [
    logistic_fixrhomax_mle_lstq([t, rhos[i], rho_0, rho_max])
    for i in range(2)
]

In [18]:
mle_params = np.array([
    [drug_mle_results[0][0], untreated_mle_results[1], drug_mle_results[0][1]],
    [drug_mle_results[1][0], untreated_mle_results[1], drug_mle_results[1][1]],
    untreated_mle_results,
])

In [19]:
rho_max = mle_params[2, 1]

In [20]:
print("                                 FGF2", "RI", "untreated", sep="\t")
print("Growth rates (days^-1)         :", "\t".join([f"{mle_params[i, 0]:.2f}" for i in range(3)]))
print("Carrying capacity (cells/mm^2) :", "\t".join([f"{mle_params[i, 1]:.0f}" for i in range(3)]))
print("RMS deviation                  :", "\t".join([f"{mle_params[i, 2]:.0f}" for i in range(3)]))

                                 FGF2	RI	untreated
Growth rates (days^-1)         : 1.09	0.14	0.73
Carrying capacity (cells/mm^2) : 7038	7038	7038
RMS deviation                  : 1435	505	787


In [21]:
nt_ = 101
t_ = np.linspace(0, tmax_days, nt_)
rho_s = [lsig.logistic(t_, mle_params[i, 0], rho_0, mle_params[i, 1]) for i in range(3)]

theor_curves_data = {
    "time (days)": np.tile(t_, 3),
    "cell density (mm^-2)": [*rho_s[0], *rho_s[1], *rho_s[2]],
    "condition": np.repeat(conds, nt_),
}

theor_curves = hv.Curve(
    data=theor_curves_data,
    kdims=["time (days)"],
    vdims=["cell density (mm^-2)", "condition"],
).groupby(
    "condition"
).overlay(
    "condition"
).opts(
    xlabel="time (days)",
    ylabel=r"cell density (cells / $mm^2$)",
    yticks=(0, 1250, 2500, 3750, 5000, 6250, 7500),
    aspect=1.6,
    fig_size=160,
)

hv.output(scatter * theor_curves, dpi=90)

In [22]:
def gen_logistic_data(params, t, rho_0, size, rg):
    """Generate a new logistic growth data set."""
    g, rho_max, sigma = params
    mu = lsig.logistic(t, g, rho_0, rho_max)
    gen_rho = np.maximum(rg.normal(mu, sigma), 0)

    return [t, gen_rho, rho_0]

def gen_logistic_fixedrhomax_data(params, t, rho_0, rho_max, size, rg):
    """Generate a new logistic growth data set."""
    g, sigma = params
    mu = lsig.logistic(t, g, rho_0, rho_max)
    gen_rho = np.maximum(rg.normal(mu, sigma), 0)

    return [t, gen_rho, rho_0, rho_max]

In [23]:
# Bootstrap replicates of maximum likelihood estimation
bs_reps_untreated = bebi103.bootstrap.draw_bs_reps_mle(
    logistic_mle_lstq,
    gen_logistic_data,
    data = [t, rhos[2], rho_0],
    mle_args=(),
    gen_args=(t, rho_0),
    size=5000,
    n_jobs=1,
    progress_bar=True,
)

# Bootstrap replicates of maximum likelihood estimation
bs_reps_FGF2 = bebi103.bootstrap.draw_bs_reps_mle(
    logistic_fixrhomax_mle_lstq,
    gen_logistic_fixedrhomax_data,
    data = [t, rhos[0], rho_0, rho_max],
    mle_args=(),
    gen_args=(t, rho_0, rho_max),
    size=5000,
    n_jobs=1,
    progress_bar=True,
)

# Bootstrap replicates of maximum likelihood estimation
bs_reps_RI = bebi103.bootstrap.draw_bs_reps_mle(
    logistic_fixrhomax_mle_lstq,
    gen_logistic_fixedrhomax_data,
    data = [t, rhos[1], rho_0, rho_max],
    mle_args=(),
    gen_args=(t, rho_0, rho_max),
    size=5000,
    n_jobs=1,
    progress_bar=True,
)

100%|██████████| 5000/5000 [00:30<00:00, 164.53it/s]
100%|██████████| 5000/5000 [00:25<00:00, 197.86it/s]
100%|██████████| 5000/5000 [00:29<00:00, 168.69it/s]


In [26]:
# Compute confidence intervals
conf_ints_untreated = np.percentile(bs_reps_untreated, [5, 95], axis=0)
conf_ints_FGF2      = np.percentile(     bs_reps_FGF2, [5, 95], axis=0)
conf_ints_RI        = np.percentile(       bs_reps_RI, [5, 95], axis=0)
rho_max_conf_int = conf_ints_untreated[:, 1]
g_conf_ints = np.concatenate([
    [conf_ints_FGF2[:, 0]], [conf_ints_RI[:, 0]], [conf_ints_untreated[:, 0]]
], axis=0).T

# Print results to standard output
print("""
MLE parameters in the untreated condition
-----------------------------------------
          Estimate  90% CI: lower - higher
g       :    {0:.3f}          {3:.3f}    {6:.3f}  
rho_max :     {1:.0f}           {4:.0f}     {7:.0f}  
sigma   :      {2:.0f}            {5:.0f}      {8:.0f}  
""".format(*mle_params[2], *conf_ints_untreated[0], *conf_ints_untreated[1]))

print("""
MLE parameters with FGF2 treatment
-----------------------------------------
          Estimate  90% CI: lower - higher
g       :    {0:.3f}          {3:.3f}    {5:.3f}  
sigma   :     {2:.0f}            {4:.0f}     {6:.0f}  
""".format(*mle_params[0], *conf_ints_FGF2[0], *conf_ints_FGF2[1]))

print("""
MLE parameters with ROCK-i treatment
-----------------------------------------
          Estimate  90% CI: lower - higher
g       :    {0:.3f}          {3:.3f}    {5:.3f}  
sigma   :      {2:.0f}            {4:.0f}      {6:.0f}  
""".format(*mle_params[1], *conf_ints_RI[0], *conf_ints_RI[1]))


MLE parameters in the untreated condition
-----------------------------------------
          Estimate  90% CI: lower - higher
g       :    0.728          0.605    0.884  
rho_max :     7038           6393     7939  
sigma   :      787            561      932  


MLE parameters with FGF2 treatment
-----------------------------------------
          Estimate  90% CI: lower - higher
g       :    1.088          0.847    1.449  
sigma   :     1435            1024     1706  


MLE parameters with ROCK-i treatment
-----------------------------------------
          Estimate  90% CI: lower - higher
g       :    0.141          0.111    0.169  
sigma   :      505            371      613  



## Save data

In [29]:
mle_params_df = pd.DataFrame(dict(
    condition=conds,
    g_inv_days=mle_params[:, 0],
    g_inv_days_90CI_lo=g_conf_ints[0],
    g_inv_days_90CI_hi=g_conf_ints[1],
    rho_max_inv_mm2=mle_params[:, 1],
    rho_max_inv_mm2_90CI_lo=rho_max_conf_int[0],
    rho_max_inv_mm2_90CI_hi=rho_max_conf_int[1],
    sigma=mle_params[:, 2],
    g_ratio=mle_params[:, 0] / mle_params[2, 0],
    rho_max_ratio=mle_params[:, 1]/rho_0,
    doubling_time_days=np.log(2) / mle_params[:, 0],
    doubling_time_hours=24 * np.log(2) / mle_params[:, 0],
))

In [30]:
mle_params_df

Unnamed: 0,condition,g_inv_days,g_inv_days_90CI_lo,g_inv_days_90CI_hi,rho_max_inv_mm2,rho_max_inv_mm2_90CI_lo,rho_max_inv_mm2_90CI_hi,sigma,g_ratio,rho_max_ratio,doubling_time_days,doubling_time_hours
0,FGF2,1.088313,0.847411,1.449318,7038.003027,6392.692794,7938.776514,1435.041229,1.494119,5.630402,0.6369,15.285609
1,RI,0.141042,0.111284,0.169047,7038.003027,6392.692794,7938.776514,504.595599,0.193633,5.630402,4.914488,117.947718
2,untreated,0.728398,0.604513,0.883794,7038.003027,6392.692794,7938.776514,787.495152,1.0,5.630402,0.951605,22.838514


In [31]:
if save_data:
    
    mle_params_fname = "growth_parameters_MLE.csv"
    save_path = os.path.realpath(os.path.join(save_dir, mle_params_fname))
    mle_params_df.to_csv(save_path)

<hr>

## Corner plots

In [46]:
# Package replicates in data frame for plotting
df_res_untreated = pd.DataFrame(data=bs_reps_untreated, columns=["g", "ρ_max", "σ"])

with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    corner_untreated = bebi103.viz.corner(
        samples=df_res_untreated,
        parameters=["g", "ρ_max", "σ"],
        show_contours=True,
        levels = [0.95],
    )

bokeh.io.show(corner_untreated)

In [270]:
# Package replicates in data frame for plotting
df_res_FGF2 = pd.DataFrame(data=bs_reps_FGF2, columns=["g", "σ"])

with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    corner_FGF2 = bebi103.viz.corner(
        samples=df_res_FGF2,
        parameters=["g", "σ"],
        show_contours=True,
        levels = [0.95],
    )

bokeh.io.show(corner_FGF2)

In [273]:
# Package replicates in data frame for plotting
df_res_RI = pd.DataFrame(data=bs_reps_RI, columns=["g", "σ"])

with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    corner_RI = bebi103.viz.corner(
        samples=df_res_RI,
        parameters=["g", "σ"],
        show_contours=True,
        levels = [0.95],
    )

bokeh.io.show(corner_RI)

<hr>