In [2]:
from pathlib import Path
import pandas as pd
import numpy as np
import scipy.stats as ss
from scipy.interpolate import interp1d
from scipy.special import expit
from fractions import Fraction
from empiricaldist import Pmf, Cdf
import matplotlib.pyplot as plt
from collections import Counter
import statsmodels.formula.api as smfa
import pymc3 as pm
import arviz as az

In [3]:
def calc_volume(diameter):
    factor = 4 * np.pi / 3
    return factor * (diameter/2.0)**3

In [8]:
def calc_diameter(volume):
    factor = 3 / np.pi / 4
    return 2 * (factor * volume)**(1/3)

In [4]:
d1 = 1
v1 = calc_volume(d1)
v1

0.5235987755982988

In [5]:
median_doubling_time = 811
rdt = 365 / median_doubling_time

In [6]:
interval = 9.0
doublings = interval * rdt
doublings

4.05055487053021

In [7]:
v2 = v1 * 2**doublings
v2

8.676351488087187

In [9]:
d2 = calc_diameter(v2)
d2

2.5494480788327483

In [11]:
rdts = [5.089,  3.572,  3.242,  2.642,  1.982,  1.847,  1.908,  1.798,
        1.798,  1.761,  2.703, -0.416,  0.024,  0.869,  0.746,  0.257,
        0.269,  0.086,  0.086,  1.321,  1.052,  1.076,  0.758,  0.587,
        0.367,  0.416,  0.073,  0.538,  0.281,  0.122, -0.869, -1.431,
        0.012,  0.037, -0.135,  0.122,  0.208,  0.245,  0.404,  0.648,
        0.673,  0.673,  0.563,  0.391,  0.049,  0.538,  0.514,  0.404,
        0.404,  0.33,  -0.061,  0.538,  0.306]
rdt_sample = np.array(rdts)

In [12]:
def kde_from_sample(sample, qs):
    kde = ss.gaussian_kde(sample)
    ps = kde(qs)
    pmf = Pmf(ps, qs)
    pmf.normalize()
    return pmf

In [13]:
qs = np.linspace(-2, 6, 201)
pmf_rdt = kde_from_sample(rdt_sample, qs)

In [14]:
interval = 245 / 365
min_diameter = 0.3
max_diameter = 20

In [15]:
v0 = calc_volume(min_diameter)
vmax = calc_volume(max_diameter)

In [16]:
def simulate_growth(pmf_rdt):
    age = 0
    volume = v0
    res = []
    
    while True:
        res.append((age, volume))
        if volume > vmax:
            break

        rdt = pmf_rdt.choice()
        age += interval 
        doublings = rdt * interval
        volume *= 2**doublings
        
    columns = ['age', 'volume']
    sim = pd.DataFrame(res, columns=columns)
    sim['diameter'] = calc_diameter(sim['volume'])
    return sim

In [17]:
sim = simulate_growth(pmf_rdt)
sim.head()

Unnamed: 0,age,volume,diameter
0,0.0,0.014137,0.3
1,0.671233,0.014403,0.301867
2,1.342466,0.017349,0.321186
3,2.013699,0.022935,0.352508
4,2.684932,0.03585,0.409099


In [18]:
sims = [simulate_growth(pmf_rdt) for _ in range(101)]

In [19]:
def interpolate_ages(sims, diameter):
    ages = []
    for sim in sims:
        interp = interp1d(sim['diameter'], sim['age'])
        age = interp(diameter)
        ages.append(float(age))
    return ages

In [24]:
ages = interpolate_ages(sims, 15)
cdf = Cdf.from_seq(ages)
cdf.median(), cdf.credible_interval(0.9)

(array(22.88014493), array([14.05986447, 33.6866603 ]))

In [28]:
total_squares = 25
squares_counted = 5
yeast_counted = 49

In [29]:
billion = 1e9

with pm.Model() as model:
    yeast_conc = pm.Normal("yeast conc", mu=2 * billion, sd=0.4 * billion)
    shaker1_vol = pm.Normal("shaker1 vol", mu=9.0, sd=0.05)
    shaker2_vol = pm.Normal("shaker2 vol", mu=9.0, sd=0.05)
    shaker3_vol = pm.Normal("shaker3 vol", mu=9.0, sd=0.05)

In [30]:
with model:
    yeast_slurry_vol = pm.Normal("yeast slurry vol", mu=1.0, sd=0.01)
    shaker1_to_shaker2_vol = pm.Normal("shaker1 to shaker2", mu=1.0, sd=0.01)
    shaker2_to_shaker3_vol = pm.Normal("shaker2 to shaker3", mu=1.0, sd=0.01)

In [31]:
with model:
    dilution_shaker1 = (yeast_slurry_vol / (yeast_slurry_vol + shaker1_vol))
    dilution_shaker2 = (shaker1_to_shaker2_vol / (shaker1_to_shaker2_vol + shaker2_vol))
    dilution_shaker3 = (shaker2_to_shaker3_vol / (shaker2_to_shaker3_vol + shaker3_vol))
    
    final_dilution = (dilution_shaker1 * dilution_shaker2 * dilution_shaker3)

In [32]:
with model:
    chamber_vol = pm.Gamma("chamber_vol", mu=0.0001, sd=0.0001 / 20)

In [33]:
with model:
    yeast_in_chamber = pm.Poisson("yeast in chamber", mu=yeast_conc * final_dilution * chamber_vol)

In [34]:
with model:
    count = pm.Binomial("count", n=yeast_in_chamber, p=squares_counted/total_squares, observed=yeast_counted)

In [35]:
with model:
    prior_sample = pm.sample_prior_predictive(10000)

In [37]:
count = prior_sample['count']
count.mean()

40.1121

In [38]:
mask = (count == 49)
mask.sum()

242

In [42]:
posterior_sample2 = prior_sample['yeast conc'][mask] / billion
cdf_abc = Cdf.from_seq(posterior_sample2)
cdf_abc.mean(), cdf_abc.credible_interval(0.9)

(2.2771882337314566, array([1.82221807, 2.68039535]))

In [44]:
n = prior_sample['yeast in chamber']
p = squares_counted / total_squares

In [45]:
likelihood = ss.binom(n, p).pmf(yeast_counted).flatten()

In [48]:
qs = prior_sample['yeast conc'] / billion
ps = likelihood
posterior_pmf = Pmf(ps, qs)
posterior_pmf.sort_index(inplace=True)
posterior_pmf.normalize()

245.4198966346221

In [49]:
posterior_pmf.mean(), posterior_pmf.credible_interval(0.9)

(2.2754446607345518, array([1.85709347, 2.70692541]))