# Using Julia and Stan to infer Growth Rates

To read the results from CmdStan, you have to get https://github.com/tomroesch/cmdstan_utils.

In [22]:
using DataFrames, Plots, CSV, Dates, LaTeXStrings, CmdStan, cmdstan_utils, Statistics, LinearAlgebra

## Extracting file

In [2]:
file = "data/20200617_r1_O1_T_tetracycline_growth.csv"

"data/20200617_r1_O1_T_tetracycline_growth.csv"

In [23]:
include("src/data_handling.jl")
include("src/viz.jl")

# set plotting defaults
default_plotlyjs!()

df = read_OD(file);


Plot one growth curve.

In [4]:
plot()
for i in 15:15
    plot!(
        df[df.Well .== Symbol("Column$i"), Symbol("time_[s]")], 
        df[df.Well .== Symbol("Column$i"), :OD], 
        label=:none,
        xlabel="time [s]",
        ylabel = L"OD_{600}"
    )
end
plot!()

Extract time points an OD from dataframe.

In [5]:
t = df[df.Well .== Symbol("Column15"), Symbol("time_[s]")]*1.
N = df[df.Well .== Symbol("Column15"), :OD];

## Maximum Likelihood Gaussian Process

First we try to use a maximum likelihood approach.

In [24]:
mle_stan_file = "
data {
  int<lower=1> N;
  real x[N];
  vector[N] y;
}

parameters {
  real<lower=0> rho;
  real<lower=0> alpha;
  real<lower=0> sigma;
}

model {
  matrix[N, N] cov =   cov_exp_quad(x, alpha, rho)
                     + diag_matrix(rep_vector(square(sigma), N));
  matrix[N, N] L_cov = cholesky_decompose(cov);

  y ~ multi_normal_cholesky(rep_vector(0, N), L_cov);
}"

"\ndata {\n  int<lower=1> N;\n  real x[N];\n  vector[N] y;\n}\n\nparameters {\n  real<lower=0> rho;\n  real<lower=0> alpha;\n  real<lower=0> sigma;\n}\n\nmodel {\n  matrix[N, N] cov =   cov_exp_quad(x, alpha, rho)\n                     + diag_matrix(rep_vector(square(sigma), N));\n  matrix[N, N] L_cov = cholesky_decompose(cov);\n\n  y ~ multi_normal_cholesky(rep_vector(0, N), L_cov);\n}"

In [25]:
mle_data=Dict(
    "N"=>length(t),
    "x"=>t,
    "y"=>N,
)
mle_model = Stanmodel(
  name="mle_test", 
  nchains=4,
  num_warmup=1000, 
  num_samples=1000,
  thin=10,
  model=mle_stan_file,
  printsummary=false,
)


File /Users/tomroschinger/git/evo_mwc_julia/tmp/mle_test.stan will be updated.



  name =                    "mle_test"
  nchains =                 4
  num_samples =             1000
  num_warmup =                1000
  thin =                    10
  monitors =                String[]
  model_file =              "mle_test.stan"
  data_file =               ""
  output =                  Output()
    file =                    ""
    diagnostics_file =        ""
    refresh =                 100
  pdir =                   "/Users/tomroschinger/git/evo_mwc_julia"
  tmpdir =                 "/Users/tomroschinger/git/evo_mwc_julia/tmp"
  output_format =           :array
  method =                  Sample()
    num_samples =             1000
    num_warmup =              1000
    save_warmup =             false
    thin =                    10
    algorithm =               HMC()
      engine =                  NUTS()
        max_depth =               10
      metric =                  CmdStan.diag_e
      stepsize =                1.0
      stepsize_jitter =         1.0
 

In [26]:
a, mle_chains, mle_b = stan(
  mle_model, 
  mle_data, 
  summary=false
)

Informational Message: The current Metropolis proposal is about to be rejected because of the following issue:
Exception: gp_exp_quad_cov: sigma is 0, but must be > 0! (in '/Users/tomroschinger/git/evo_mwc_julia/tmp/mle_test.stan', line 14, column 2 to line 15, column 65)

Informational Message: The current Metropolis proposal is about to be rejected because of the following issue:
Exception: cholesky_decompose: A is not symmetric. A[1,2] = inf, but A[2,1] = inf (in '/Users/tomroschinger/git/evo_mwc_julia/tmp/mle_test.stan', line 16, column 2 to column 47)

Informational Message: The current Metropolis proposal is about to be rejected because of the following issue:
Exception: gp_exp_quad_cov: sigma is 0, but must be > 0! (in '/Users/tomroschinger/git/evo_mwc_julia/tmp/mle_test.stan', line 14, column 2 to line 15, column 65)



(0, [296.213 0.972505 … 0.388299 0.00257986; 296.911 0.863752 … 0.387444 0.00274786; … ; 295.028 0.972017 … 0.437114 0.00308422; 296.667 0.886411 … 0.447181 0.00233784]

[296.611 0.973094 … 0.326748 0.00299693; 297.493 0.928429 … 0.420731 0.00293512; … ; 296.861 1.0 … 0.44771 0.00275189; 296.81 0.981865 … 0.368398 0.00342291]

[297.511 0.966209 … 0.371694 0.00273518; 295.953 0.933128 … 0.559885 0.00305995; … ; 296.133 0.97247 … 0.344046 0.00238871; 296.437 0.975446 … 0.48761 0.00272597]

[296.625 0.864662 … 0.526668 0.0026499; 295.899 0.854891 … 0.478445 0.0036129; … ; 297.09 0.774576 … 0.410101 0.00328782; 296.685 0.990592 … 0.541949 0.00307518], ["lp__", "accept_stat__", "stepsize__", "treedepth__", "n_leapfrog__", "divergent__", "energy__", "rho", "alpha", "sigma"])

In [27]:
d = collect_params_from_chain(mle_b, mle_chains[:,:,1])

Dict{Any,Any} with 10 entries:
  "treedepth__"   => [3.0; 4.0; … ; 2.0; 4.0]
  "n_leapfrog__"  => [7.0; 15.0; … ; 3.0; 15.0]
  "sigma"         => [0.00257986; 0.00274786; … ; 0.00308422; 0.00233784]
  "energy__"      => [-295.099; -296.008; … ; -294.134; -295.095]
  "lp__"          => [296.213; 296.911; … ; 295.028; 296.667]
  "alpha"         => [0.388299; 0.387444; … ; 0.437114; 0.447181]
  "accept_stat__" => [0.972505; 0.863752; … ; 0.972017; 0.886411]
  "divergent__"   => [0.0; 0.0; … ; 0.0; 0.0]
  "stepsize__"    => [0.281688; 0.281688; … ; 0.281688; 0.281688]
  "rho"           => [7746.06; 7801.3; … ; 8755.84; 6433.55]

In [28]:
println(mean(d["rho"]))
println(mean(d["alpha"]))
println(mean(d["sigma"]))


7361.8209
0.43963281000000004
0.0028379727


## Simulation from gaussian process

In [11]:
pred_gauss_file = "
functions {
  vector gp_pred_rng(real[] x2,
                     vector y1, real[] x1,
                     real alpha, real rho, real sigma, real delta) {
    int N1 = rows(y1);
    int N2 = size(x2);
    vector[N2] f2;
    {
      matrix[N1, N1] K =   cov_exp_quad(x1, alpha, rho)
                         + diag_matrix(rep_vector(square(sigma), N1));
      matrix[N1, N1] L_K = cholesky_decompose(K);

      vector[N1] L_K_div_y1 = mdivide_left_tri_low(L_K, y1);
      vector[N1] K_div_y1 = mdivide_right_tri_low(L_K_div_y1', L_K)';
      matrix[N1, N2] k_x1_x2 = cov_exp_quad(x1, x2, alpha, rho);
      vector[N2] f2_mu = (k_x1_x2' * K_div_y1);
      matrix[N1, N2] v_pred = mdivide_left_tri_low(L_K, k_x1_x2);
      matrix[N2, N2] cov_f2 =   cov_exp_quad(x2, alpha, rho) - v_pred' * v_pred
                              + diag_matrix(rep_vector(delta, N2));
      f2 = multi_normal_rng(f2_mu, cov_f2);
    }
    return f2;
  }
}

data {
  int<lower=1> N;
  real x[N];
  vector[N] y;

  int<lower=1> N_predict;
  real x_predict[N_predict];

  real<lower=0> rho;
  real<lower=0> alpha;
  real<lower=0> sigma;
}

transformed data {
  matrix[N, N] cov =   cov_exp_quad(x, alpha, rho)
                     + diag_matrix(rep_vector(1e-10, N));
  matrix[N, N] L_cov = cholesky_decompose(cov);
}

parameters {}
model {}

generated quantities {
  vector[N_predict] f_predict = gp_pred_rng(x_predict, y, x, alpha, rho, sigma, 1e-10);
  vector[N_predict] y_predict;

  for (n in 1:N_predict)
    y_predict[n] = normal_rng(f_predict[n], sigma);
}"

"\nfunctions {\n  vector gp_pred_rng(real[] x2,\n                     vector y1, real[] x1,\n                     real alpha, real rho, real sigma, real delta) {\n    int N1 = rows(y1);\n    int N2 = size(x2);\n    vector[N2] f2;\n    {\n      matrix[N1, N1] K =   cov_exp_quad(x1, alpha, rho)\n                         + diag_matrix(rep_vector(square(sigma), N1));\n      matrix[N1, N1] L_K = cholesky_decompose(K);\n\n      vector[N1] L_K_div_y1 = mdivide_left_tri_low(L_K, y1);\n      vector[N1] K_div_y1 = mdivide_right_tri_low(L_K_div_y1', L_K)';\n      matrix[N1, N2] k_x1_x2 = cov_exp_quad(x1, x2, alpha, rho);\n      vector[N2] f2_mu = (k_x1_x2' * K_div_y1);\n      matrix[N1, N2] v_pred = mdivide_left_tri_low(L_K, k_x1_x2);\n      matrix[N2, N2] cov_f2 =   cov_exp_quad(x2, alpha, rho) - v_pred' * v_pred\n                              + diag_matrix(rep_vector(delta, N2));\n      f2 = multi_normal_rng(f2_mu, cov_f2);\n    }\n    return f2;\n  }\n}\n\ndata {\n  int<lower=1> N;\n  real x[N

In [12]:
pred_gauss_model = Stanmodel(
  name="pred_gauss", 
  nchains=1,
  num_warmup=0, 
  num_samples=1000,
  model=pred_gauss_file,
  printsummary=false,
  Sample(algorithm=CmdStan.Fixed_param())
)

pred_gauss_data = Dict(    
    "N"=>length(t),
    "x"=>t,
    "y"=>N,
    "N_predict"=>500,
    "x_predict"=>range(minimum(t), stop=maximum(t), length=500),
    "rho" => mean(d["rho"]),
    "alpha" => mean(d["alpha"]),
    "sigma" => mean(d["sigma"])
)
    

_, pred_gauss_chains, pred_gauss_names = stan(pred_gauss_model, pred_gauss_data, summary=false);


File /Users/tomroschinger/git/evo_mwc_julia/tmp/pred_gauss.stan will be updated.



In [13]:
d = collect_params_from_chain(pred_gauss_names, pred_gauss_chains[:,:,1])

Dict{Any,Any} with 4 entries:
  "f_predict"     => [0.0372475 0.0378605 … 0.828137 0.829118; 0.036091 0.03682…
  "lp__"          => [0.0; 0.0; … ; 0.0; 0.0]
  "accept_stat__" => [0.0; 0.0; … ; 0.0; 0.0]
  "y_predict"     => [0.0376049 0.0393351 … 0.826494 0.832463; 0.0366217 0.0316…

In [14]:
y = [d["y_predict"][i, :] for i in 1:1000];

In [15]:
predictive_regression(y, range(minimum(t), stop=maximum(t), length=500), data=[t, N])

## Full inference and predicting growth rates

In [29]:
bw_file="
functions {
    vector gp_pred_rng(real[] x2,
                         vector y1, real[] x1,
                         real alpha, real rho, real sigma, real delta) {
        int N1 = rows(y1);
        int N2 = size(x2);
        vector[N2] f2;
        {
          matrix[N1, N1] K =   cov_exp_quad(x1, alpha, rho)
                             + diag_matrix(rep_vector(square(sigma), N1));
          matrix[N1, N1] L_K = cholesky_decompose(K);

          vector[N1] L_K_div_y1 = mdivide_left_tri_low(L_K, y1);
          vector[N1] K_div_y1 = mdivide_right_tri_low(L_K_div_y1', L_K)';
          matrix[N1, N2] k_x1_x2 = cov_exp_quad(x1, x2, alpha, rho);
          vector[N2] f2_mu = (k_x1_x2' * K_div_y1);
          matrix[N1, N2] v_pred = mdivide_left_tri_low(L_K, k_x1_x2);
          matrix[N2, N2] cov_f2 =   cov_exp_quad(x2, alpha, rho) - v_pred' * v_pred
                                  + diag_matrix(rep_vector(delta, N2));
          f2 = multi_normal_rng(f2_mu, cov_f2);
        }
        return f2;
      }

  vector gp_pred_der_rng(real[] x2,
                     vector y1, real[] x1,
                     real alpha, real rho, real sigma, real delta) {
    int N1 = rows(y1);
    int N2 = size(x2);
    vector[N2] g2;
    {     
      matrix[N1, N1] K =   cov_exp_quad(x1, alpha, rho)
                             + diag_matrix(rep_vector(square(sigma), N1));

      matrix[N1, N1] L_K = cholesky_decompose(K);

      vector[N1] L_K_div_y1 = mdivide_left_tri_low(L_K, y1);
      vector[N1] K_div_y1 = mdivide_right_tri_low(L_K_div_y1', L_K)';
      matrix[N1, N2] k_x1_x2 = cov_exp_quad(x1, x2, alpha, rho);
      
      matrix[N1, N2] K_1;
      for (i in 1:N1){
        for (j in 1:N2){
          K_1[i, j] = (x1[i] - x2[j])/rho^2 * k_x1_x2[i,j];
        }
      }
      matrix[N2, N2] k_x2_x2 = cov_exp_quad(x2, alpha, rho);
      matrix[N2, N2] K_2;
      for (i in 1:N2){
        for (j in 1:N2){
          K_2[i, j] = (1/rho^2 - (x2[i] - x2[j])^2 /rho^4) * k_x2_x2[i, j];
        }
      }
      matrix[N1, N2] v_pred = mdivide_left_tri_low(L_K, K_1);
      vector[N2] g2_mu = (K_1' * K_div_y1);
      matrix[N2, N2] cov_g2 =   K_2 - v_pred' * v_pred
                              + diag_matrix(rep_vector(delta, N2));

      g2 = multi_normal_rng(g2_mu, cov_g2);
    }
    return g2;
  }
}

data {
  int<lower=1> N;
  real x[N];
  vector[N] y;

  int<lower=1> N_predict;
  real x_predict[N_predict];
}

parameters {
  real<lower=0> rho;
  real<lower=0> alpha;
  real<lower=0> sigma;
}

model {
  matrix[N, N] cov =   cov_exp_quad(x, alpha, rho)
                     + diag_matrix(rep_vector(square(sigma), N));
  matrix[N, N] L_cov = cholesky_decompose(cov);

  // P[rho < 2.0] = 0.01
  // P[rho > 10] = 0.01
  rho ~ normal(8000, 500);
  alpha ~ normal(0, 2);
  sigma ~ normal(0, 1);

  y ~ multi_normal_cholesky(rep_vector(0, N), L_cov);
}

generated quantities {
  vector[N_predict] f_predict = gp_pred_rng(x_predict, y, x, alpha, rho, sigma, 1e-10);
  vector[N_predict] g_predict = gp_pred_der_rng(x_predict, y, x, alpha, rho, sigma, 1e-10);
  vector[N_predict] y_predict;
  for (n in 1:N_predict){
    y_predict[n] = normal_rng(f_predict[n], sigma);
}
}

";



In [17]:
bw_model = Stanmodel(
  name="bw", 
  nchains=4,
  num_warmup=5000,
  num_samples=5000,
  thin=50,
  model=bw_file,
  printsummary=false
)

bw_data = Dict(    
    "N"=>length(t),
    "x"=>t,
    "y"=>N,
    "N_predict"=>500,
    "x_predict"=>range(minimum(t), stop=maximum(t), length=500),
)
    

_, bw_chains, bw_names = stan(bw_model, bw_data, summary=false);


File /Users/tomroschinger/git/evo_mwc_julia/tmp/bw.stan will be updated.



Informational Message: The current Metropolis proposal is about to be rejected because of the following issue:
Exception: cholesky_decompose: Matrix A is not positive definite (in '/Users/tomroschinger/git/evo_mwc_julia/tmp/bw.stan', line 83, column 2 to column 47)

Informational Message: The current Metropolis proposal is about to be rejected because of the following issue:
Exception: gp_exp_quad_cov: sigma is 0, but must be > 0! (in '/Users/tomroschinger/git/evo_mwc_julia/tmp/bw.stan', line 81, column 2 to line 82, column 65)

Informational Message: The current Metropolis proposal is about to be rejected because of the following issue:
Exception: cholesky_decompose: Matrix A is not positive definite (in '/Users/tomroschinger/git/evo_mwc_julia/tmp/bw.stan', line 83, column 2 to column 47)



In [18]:
d = collect_params_from_chain(bw_names, bw_chains[:,:,1])

Dict{Any,Any} with 13 entries:
  "accept_stat__" => [0.959555; 0.989979; … ; 1.0; 0.939895]
  "sigma"         => [0.0031668; 0.00311258; … ; 0.00298316; 0.00279455]
  "divergent__"   => [0.0; 0.0; … ; 0.0; 0.0]
  "energy__"      => [-295.836; -297.099; … ; -295.902; -291.486]
  "alpha"         => [0.513843; 0.465292; … ; 0.409774; 0.354699]
  "g_predict"     => [3.33237e-6 3.13449e-6 … 1.29642e-5 -1.16899e-7; 1.86846e-…
  "rho"           => [8252.15; 8109.87; … ; 7725.64; 7301.45]
  "treedepth__"   => [2.0; 2.0; … ; 3.0; 2.0]
  "n_leapfrog__"  => [7.0; 7.0; … ; 7.0; 7.0]
  "f_predict"     => [0.0398022 0.03916 … 0.822772 0.82184; 0.0376108 0.0374675…
  "lp__"          => [296.595; 297.142; … ; 297.387; 296.295]
  "stepsize__"    => [0.598104; 0.598104; … ; 0.598104; 0.598104]
  "y_predict"     => [0.0390286 0.0416105 … 0.824775 0.82301; 0.0383023 0.04106…

In [19]:
sample_Stack = [d["y_predict"][i, :] for i in 1:100];
g_sample_Stack = [d["g_predict"][i, :] for i in 1:100];

In [20]:
predictive_regression(sample_Stack, range(minimum(t), stop=maximum(t), length=500), data=[t, N],kwargs=Dict{Any,Any}(:xlabel=>"time[s]", :ylabel=>"OD"))

In [21]:
predictive_regression(g_sample_Stack, range(minimum(t), stop=maximum(t), length=500),kwargs=Dict{Any,Any}(:xlabel=>"time[s]", :ylabel=>"Growth Rate[1/s]"))