In [2]:
from pathlib import Path
import pandas as pd
import numpy as np
import scipy.stats as ss
from scipy.interpolate import interp1d
from scipy.special import expit
from fractions import Fraction
from empiricaldist import Pmf, Cdf
import matplotlib.pyplot as plt
from collections import Counter
import statsmodels.formula.api as smfa
import pymc3 as pm
import arviz as az

WARN: Could not locate executable g77
WARN: Could not locate executable f77
WARN: Could not locate executable ifort
WARN: Could not locate executable ifl
WARN: Could not locate executable f90
WARN: Could not locate executable DF
WARN: Could not locate executable efl
WARN: Could not locate executable gfortran
WARN: Could not locate executable f95
WARN: Could not locate executable g95
WARN: Could not locate executable efort
WARN: Could not locate executable efc
WARN: Could not locate executable flang
WARN: don't know how to compile Fortran code on platform 'nt'




In [3]:
def calc_volume(diameter):
    factor = 4 * np.pi / 3
    return factor * (diameter/2.0)**3

In [4]:
def calc_diameter(volume):
    factor = 3 / np.pi / 4
    return 2 * (factor * volume)**(1/3)

In [5]:
d1 = 1
v1 = calc_volume(d1)
v1

0.5235987755982988

In [6]:
median_doubling_time = 811
rdt = 365 / median_doubling_time

In [7]:
interval = 9.0
doublings = interval * rdt
doublings

4.05055487053021

In [8]:
v2 = v1 * 2**doublings
v2

8.676351488087187

In [9]:
d2 = calc_diameter(v2)
d2

2.5494480788327483

In [10]:
rdts = [5.089,  3.572,  3.242,  2.642,  1.982,  1.847,  1.908,  1.798,
        1.798,  1.761,  2.703, -0.416,  0.024,  0.869,  0.746,  0.257,
        0.269,  0.086,  0.086,  1.321,  1.052,  1.076,  0.758,  0.587,
        0.367,  0.416,  0.073,  0.538,  0.281,  0.122, -0.869, -1.431,
        0.012,  0.037, -0.135,  0.122,  0.208,  0.245,  0.404,  0.648,
        0.673,  0.673,  0.563,  0.391,  0.049,  0.538,  0.514,  0.404,
        0.404,  0.33,  -0.061,  0.538,  0.306]
rdt_sample = np.array(rdts)

In [11]:
def kde_from_sample(sample, qs):
    kde = ss.gaussian_kde(sample)
    ps = kde(qs)
    pmf = Pmf(ps, qs)
    pmf.normalize()
    return pmf

In [12]:
qs = np.linspace(-2, 6, 201)
pmf_rdt = kde_from_sample(rdt_sample, qs)

In [13]:
interval = 245 / 365
min_diameter = 0.3
max_diameter = 20

In [14]:
v0 = calc_volume(min_diameter)
vmax = calc_volume(max_diameter)

In [15]:
def simulate_growth(pmf_rdt):
    age = 0
    volume = v0
    res = []
    
    while True:
        res.append((age, volume))
        if volume > vmax:
            break

        rdt = pmf_rdt.choice()
        age += interval 
        doublings = rdt * interval
        volume *= 2**doublings
        
    columns = ['age', 'volume']
    sim = pd.DataFrame(res, columns=columns)
    sim['diameter'] = calc_diameter(sim['volume'])
    return sim

In [16]:
sim = simulate_growth(pmf_rdt)
sim.head()

Unnamed: 0,age,volume,diameter
0,0.0,0.014137,0.3
1,0.671233,0.02169,0.346008
2,1.342466,0.02129,0.343869
3,2.013699,0.020897,0.341742
4,2.684932,0.014949,0.305635


In [17]:
sims = [simulate_growth(pmf_rdt) for _ in range(101)]

In [18]:
def interpolate_ages(sims, diameter):
    ages = []
    for sim in sims:
        interp = interp1d(sim['diameter'], sim['age'])
        age = interp(diameter)
        ages.append(float(age))
    return ages

In [19]:
ages = interpolate_ages(sims, 15)
cdf = Cdf.from_seq(ages)
cdf.median(), cdf.credible_interval(0.9)

(array(21.51572521), array([14.3011755, 31.751359 ]))

In [20]:
total_squares = 25
squares_counted = 5
yeast_counted = 49

In [21]:
billion = 1e9

with pm.Model() as model:
    yeast_conc = pm.Normal("yeast conc", mu=2 * billion, sd=0.4 * billion)
    shaker1_vol = pm.Normal("shaker1 vol", mu=9.0, sd=0.05)
    shaker2_vol = pm.Normal("shaker2 vol", mu=9.0, sd=0.05)
    shaker3_vol = pm.Normal("shaker3 vol", mu=9.0, sd=0.05)

In [22]:
with model:
    yeast_slurry_vol = pm.Normal("yeast slurry vol", mu=1.0, sd=0.01)
    shaker1_to_shaker2_vol = pm.Normal("shaker1 to shaker2", mu=1.0, sd=0.01)
    shaker2_to_shaker3_vol = pm.Normal("shaker2 to shaker3", mu=1.0, sd=0.01)

In [23]:
with model:
    dilution_shaker1 = (yeast_slurry_vol / (yeast_slurry_vol + shaker1_vol))
    dilution_shaker2 = (shaker1_to_shaker2_vol / (shaker1_to_shaker2_vol + shaker2_vol))
    dilution_shaker3 = (shaker2_to_shaker3_vol / (shaker2_to_shaker3_vol + shaker3_vol))
    
    final_dilution = (dilution_shaker1 * dilution_shaker2 * dilution_shaker3)

In [24]:
with model:
    chamber_vol = pm.Gamma("chamber_vol", mu=0.0001, sd=0.0001 / 20)

In [25]:
with model:
    yeast_in_chamber = pm.Poisson("yeast in chamber", mu=yeast_conc * final_dilution * chamber_vol)

In [26]:
with model:
    count = pm.Binomial("count", n=yeast_in_chamber, p=squares_counted/total_squares, observed=yeast_counted)

In [27]:
with model:
    prior_sample = pm.sample_prior_predictive(10000)

In [28]:
count = prior_sample['count']
count.mean()

39.8725

In [29]:
mask = (count == 49)
mask.sum()

234

In [30]:
posterior_sample2 = prior_sample['yeast conc'][mask] / billion
cdf_abc = Cdf.from_seq(posterior_sample2)
cdf_abc.mean(), cdf_abc.credible_interval(0.9)

(2.2885260431588, array([1.88947404, 2.75036723]))

In [31]:
n = prior_sample['yeast in chamber']
p = squares_counted / total_squares

In [32]:
likelihood = ss.binom(n, p).pmf(yeast_counted).flatten()

In [33]:
qs = prior_sample['yeast conc'] / billion
ps = likelihood
posterior_pmf = Pmf(ps, qs)
posterior_pmf.sort_index(inplace=True)
posterior_pmf.normalize()

242.28640128581975

In [34]:
posterior_pmf.mean(), posterior_pmf.credible_interval(0.9)

(2.267209232786857, array([1.85900174, 2.69062647]))

In [35]:
prior_sample

{'yeast in chamber': array([221, 161, 169, ..., 241, 153, 147]),
 'shaker1 vol': array([8.99438166, 8.96662917, 9.0720907 , ..., 9.14213097, 9.05720707,
        9.00583556]),
 'yeast slurry vol': array([1.0057147 , 1.00005174, 0.99338239, ..., 1.00939671, 0.99026742,
        1.00274361]),
 'shaker1 to shaker2': array([1.00740733, 1.01574795, 0.99115679, ..., 0.98894902, 1.01051415,
        0.98857345]),
 'count': array([44, 20, 32, ..., 58, 34, 32]),
 'shaker2 to shaker3': array([1.00074472, 0.99664343, 0.9940783 , ..., 1.00796353, 1.00627634,
        1.00923257]),
 'yeast conc': array([2.13897596e+09, 1.62268702e+09, 1.73366334e+09, ...,
        2.21650001e+09, 1.75208768e+09, 1.52904473e+09]),
 'shaker2 vol': array([9.05772329, 9.07057254, 9.10396983, ..., 9.06071078, 9.06744021,
        8.99084411]),
 'chamber_vol': array([1.02950575e-04, 1.01346588e-04, 1.03716112e-04, ...,
        1.03090336e-04, 9.77650116e-05, 9.97045081e-05]),
 'shaker3 vol': array([9.0254441 , 9.06962329, 9.10