In [None]:
import pandas as pd
import pymc3 as pm
import numpy as np
import seaborn as sns
import arviz as az
import theano.tensor as tt
import matplotlib.pyplot as plt
import spc_os
from spc_vis import my_plot_ppc
RANDOM_SEED = 28101990
from pymc3 import DensityDist
from pymc3.math import switch,exp
import pickle

import theano.tensor as T
import scipy.stats as st

In [None]:
raw_data_dir = '../data/raw/'
interim_data_dir = '../data/interim/'
processed_data_dir = '../data/processed/'
external_data_dir = '../data/external/'
models_dir = '../models/'

In [None]:
model_path = models_dir + f"kinetic_modelling_double_exp_mem_patch_sep.nc"

In [None]:
infer=True

# Load data

In [None]:
df = pd.read_csv(f'{processed_data_dir}data.csv').drop('Unnamed: 0', axis=1)

In [None]:
df.head()

In [None]:
df.Replica=df.Replica.astype('category')

In [None]:
df['Replica_enc'] = df.Replica.cat.codes

In [None]:
category_dic = {
    i:cat for i,cat in enumerate(np.unique(df['Replica']))
}

In [None]:
category_dic

## Visualize Data

In [None]:
df['tpore'].groupby(df['Replica']).describe()

In [None]:
min_val = df['tpore'].groupby(df['Replica']).min()

In [None]:
df['tpore'].hist(by=df['Replica'],sharex=True)

In [None]:
ordered_days = df.Replica.value_counts().index
g = sns.FacetGrid(df, row="Replica", row_order=ordered_days,
                  height=1.7, aspect=4,)
g.map(sns.kdeplot, "tpore")

In [None]:
df['tpore'].hist()

# Hierarchical Model

Define the model distribution for the data

In [None]:
def logp_exp_lag(tau_0, tau, tau_0_p, tau_p, c):
    def logp_(t):
        #return T.log(T.switch(T.lt(t, tau_0), np.finfo(np.float64).tiny,T.exp( - (t - tau_0) / tau)/ tau ))
        f = T.switch(T.lt(t, tau_0), 0. , c * T.exp( - (t - tau_0) / tau ) / tau )
        g = T.switch(T.lt(t, tau_0_p), 0. , (1 - c) * T.exp( - (t - tau_0_p) / tau_p ) / tau_p )
        return T.log(f + g)
    return  logp_

In [None]:
def exp_lag_cdf(t, tau_0, tau, tau_0_p, tau_p, c):
    if t < tau_0:
        cdf0 = 0.0
    elif t >= tau_0:
        cdf0 = c * (1-np.exp(-(t-tau_0)/tau))
    if t < tau_0_p:
        cdf1 = 0.0
    elif t >= tau_0_p:
        cdf1 = (1 - c) * (1-np.exp(-(t-tau_0_p)/tau_p))
    return cdf0+cdf1

In [None]:
exp_lag_cdf_vec = np.vectorize(exp_lag_cdf)

In [None]:
class my_pdf(st.rv_continuous):
    
    def _cdf(self,t,tau_0, tau, tau_0_p, tau_p, c):
        return exp_lag_cdf_vec(t, tau_0, tau, tau_0_p, tau_p, c)
my_cv = my_pdf(a=0, name='my_pdf')

In [None]:
from pymc3.distributions import draw_values, generate_samples
def random(point=None, size=None):
    # draw a numerical value for the parameters
    size = 1 if size is None else size
    tau_0_, tau_, tau_0_p_, tau_p_, c_ = draw_values([tau_0, tau, tau_0_p, tau_p, c], point=point,size=size)
    
 
    return generate_samples(my_cv.rvs, tau=tau_, tau_0=tau_0_, tau_0_p=tau_0_p_, tau_p=tau_p_, c=c_, size=size)

Generate some data to compare to the theoretical PDF

In [None]:
d = DensityDist.dist(logp_exp_lag).logp(0.5,1/3, 1, 1/2, 0.25)

In [None]:
l = []
for i in range(1000):
    l.append(my_cv.rvs(tau_0=0.5,tau=1/3,tau_0_p=1, tau_p=1/2, c=0.25 ))

In [None]:
plt.plot(np.linspace(0,10,100),np.exp(d(np.linspace(0,10,100)).eval()))
_ = plt.hist(np.array(l), density=True)

## Visualize Priors

These are the shapes of the priors used.

In [None]:
scale = 5
d = st.halfnorm(scale=scale)
x = np.linspace(0,10,100)
plt.plot(x, d.pdf(x), 'k-', lw=2)

In [None]:
scale = 1
d = st.norm(scale=scale,loc=1)
x = np.linspace(0,1,100)
plt.plot(x, d.pdf(x), 'k-', lw=2)

## Run model

In [None]:
shape = df['Replica_enc'].unique().shape
with pm.Model() as my_model:
    tau = pm.HalfNormal('tau', sigma=5.,shape =shape)
    tau_0 = pm.TruncatedNormal('tau_0', mu = min_val, sigma = 3, lower = 0, upper = min_val, shape =shape)
    tau_p = pm.HalfNormal('tau_p', sigma=5.,shape =shape)
    tau_0_p = pm.TruncatedNormal('tau_0_p', mu = tau_0, sigma = 3, lower = tau_0, shape =shape)
    c = pm.TruncatedNormal('c', mu = 0.5, sigma = 0.25, lower = 0, upper = 1, shape =shape)
    like = DensityDist('like', logp_exp_lag(tau_0[df['Replica_enc']],tau[df['Replica_enc']], tau_0_p[df['Replica_enc']],tau_p[df['Replica_enc']],c[df['Replica_enc']]), observed=df['tpore'], random=random)

In [None]:
if infer:
    with my_model:
        my_model_trace = pm.sample(4000, tune=3000,  random_seed=RANDOM_SEED)
        ppc = pm.sample_posterior_predictive(my_model_trace,  random_seed=RANDOM_SEED, samples=40, size=1000)
        my_model_trace = az.from_pymc3(trace=my_model_trace, posterior_predictive=ppc)
        print('Done infering.')

        my_model_trace = my_model_trace.assign_coords(tau_0_dim_0=[category_dic[i] for i in my_model_trace.posterior.tau_0_dim_0.to_index()])
        my_model_trace = my_model_trace.assign_coords(tau_dim_0=[category_dic[i] for i in my_model_trace.posterior.tau_dim_0.to_index()])
        my_model_trace = my_model_trace.assign_coords(tau_0_p_dim_0=[category_dic[i] for i in my_model_trace.posterior.tau_0_p_dim_0.to_index()])
        my_model_trace = my_model_trace.assign_coords(tau_p_dim_0=[category_dic[i] for i in my_model_trace.posterior.tau_p_dim_0.to_index()])
        my_model_trace = my_model_trace.assign_coords(c_dim_0=[category_dic[i] for i in my_model_trace.posterior.c_dim_0.to_index()])
        
        spc_os.remove(model_path)
        my_model_trace.to_netcdf(model_path)
else:
    my_model_trace=az.from_netcdf(model_path)

In [None]:
my_model_trace = my_model_trace.assign_coords(like_dim_0=df['Replica'].to_numpy(),
                                              groups='observed_data')
my_model_trace = my_model_trace.assign_coords(like_dim_1=df['Replica'].unique().to_numpy(),
                                              groups='posterior_predictive')
my_model_trace = my_model_trace.assign_coords(tau_dim_0=df['Replica'].unique().to_numpy(),
                                              groups='posterior')
my_model_trace = my_model_trace.assign_coords(tau_0_dim_0=df['Replica'].unique().to_numpy(),
                                              groups='posterior')
my_model_trace = my_model_trace.assign_coords(tau_p_dim_0=df['Replica'].unique().to_numpy(),
                                              groups='posterior')
my_model_trace = my_model_trace.assign_coords(tau_0_p_dim_0=df['Replica'].unique().to_numpy(),
                                              groups='posterior')
my_model_trace = my_model_trace.assign_coords(c_dim_0=df['Replica'].unique().to_numpy(),
                                              groups='posterior')
my_model_trace.posterior_predictive = my_model_trace.posterior_predictive.rename({'like_dim_1':'Replica_enc'})
my_model_trace.observed_data = my_model_trace.observed_data.rename({'like_dim_0':'Replica_enc'})

## Analyze

### Point Summaries

Check that r_hat is approx 1 and that ess_mean is near to the number of steps.

In [None]:
df_summary = az.summary(my_model_trace)
with pd.option_context('display.max_rows', None):
    print(df_summary)

### Plot Traces (Posteriors)

#### with az.rc_context(rc={'plot.max_subplots': None}):
        az.plot_trace(my_model_trace)

### Autocorrelations

In [None]:
with az.rc_context(rc={'plot.max_subplots': None}):
    az.plot_autocorr(my_model_trace, combined=True)

### Posterior Predictive Test

In [None]:
az.plot_ppc(my_model_trace,  flatten=['draw', 'chain', 'like_dim_0'], kind='kde') # flatten all dimension except the one we are interested in

In [None]:
az.plot_ppc(my_model_trace,  flatten=['draw', 'chain', 'like_dim_0'], kind='cumulative') # flatten all dimension except the one we are interested in

### Compare parameter

In [None]:
variable = 'tau_0'
az.plot_forest(my_model_trace,
               var_names=variable,
               combined=True)

In [None]:
variable = 'tau'
az.plot_forest(my_model_trace,
               var_names=variable,
               combined=True)

In [None]:
variable = 'tau_0_p'
az.plot_forest(my_model_trace,
               var_names=variable,
               combined=True)

In [None]:
variable = 'tau_p'
az.plot_forest(my_model_trace,
               var_names=variable,
               combined=True)

In [None]:
variable = 'c'
az.plot_forest(my_model_trace,
               var_names=variable,
               combined=True)