In [None]:
import pandas as pd
import pymc3 as pm
import numpy as np
import seaborn as sns
import arviz as az
import theano.tensor as tt
import matplotlib.pyplot as plt
import spc_os
from spc_vis import my_plot_ppc
RANDOM_SEED = 28101990
from pymc3 import DensityDist
from pymc3.math import switch,exp
import pickle

import theano.tensor as T
import scipy.stats as st

In [None]:
raw_data_dir = '../data/raw/'
interim_data_dir = '../data/interim/'
processed_data_dir = '../data/processed/'
external_data_dir = '../data/external/'
models_dir = '../models/'

In [None]:
model_path = models_dir + f"kinetic_modelling_mem_patch_sep.nc"

In [None]:
infer=True

# Load data

In [None]:
df = pd.read_csv(f'{processed_data_dir}data.csv').drop('Unnamed: 0', axis=1)

Drop data that has nans

In [None]:
df = df.dropna()

In [None]:
df.membrane=df.membrane.astype('category')

In [None]:
df['membrane_enc'] = df.membrane.cat.codes

In [None]:
category_dic = {
    i:cat for i,cat in enumerate(np.unique(df['membrane']))
}

In [None]:
category_dic

PU, MU, PS; ZWITT,NCHG;

## Visualize Data

In [None]:
df['tpore'].groupby(df['membrane']).describe()

In [None]:
min_val = df['tpore'].groupby(df['membrane']).min()

In [None]:
df['tpore'].hist(by=df['membrane'],sharex=True)

In [None]:
df['tpore'].hist()

# Non-Hierarchical Model

In [None]:
def logp_exp_lag(tau_0, tau):
    def logp_(t):
        #return T.log(T.switch(T.lt(t, tau_0), np.finfo(np.float64).tiny,T.exp( - (t - tau_0) / tau)/ tau ))
        return T.switch(T.lt(t, tau_0), - np.finfo(np.float64).max,  - (t - tau_0) / tau -  T.log( tau ))
    return  logp_

In [None]:
def exp_lag_cdf(t, tau_0, tau):
    if t < tau_0:
        cdf = 0.0
    elif t >= tau_0:
        cdf = 1-np.exp(-(t-tau_0)/tau)
    return cdf

In [None]:
exp_lag_cdf_vec = np.vectorize(exp_lag_cdf)

In [None]:
class my_pdf(st.rv_continuous):
    
    def _cdf(self,t,tau_0, tau):
        return exp_lag_cdf_vec(t, tau_0, tau)
my_cv = my_pdf(a=0, name='my_pdf')

In [None]:
from pymc3.distributions import draw_values, generate_samples
def random(point=None, size=None):
    # draw a numerical value for the parameters
    size = 1 if size is None else size
    tau_0_, tau_ = draw_values([tau_0, tau], point=point,size=size)
    
 
    return generate_samples(my_cv.rvs, tau=tau_, tau_0=tau_0_, size=size)

In [None]:
d = DensityDist.dist(logp_exp_lag).logp(0.5,1/3)

In [None]:
l = []
for i in range(1000):
    l.append(my_cv.rvs(tau_0=0.5,tau=1/3))

In [None]:
plt.plot(np.linspace(0,10,100),np.exp(d(np.linspace(0,10,100)).eval()))
_ = plt.hist(np.array(l), density=True)

## Visualize Priors

In [None]:
scale = 5
d = st.halfnorm(scale=scale)
x = np.linspace(0,10,100)
plt.plot(x, d.pdf(x), 'k-', lw=2)

In [None]:
scale = 1
d = st.norm(scale=scale,loc=1)
x = np.linspace(0,1,100)
plt.plot(x, d.pdf(x), 'k-', lw=2)

## Run model

In [None]:
shape = df['membrane_enc'].unique().shape

In [None]:
shape = df['membrane_enc'].unique().shape
with pm.Model() as my_model:
    tau = pm.HalfNormal('tau', sigma=5.,shape =shape)
    tau_0 = pm.TruncatedNormal('tau_0', mu = min_val, sigma = 1, lower = 0, upper = min_val, shape =shape)
    like = DensityDist('like', logp_exp_lag(tau_0[df['membrane_enc']],tau[df['membrane_enc']]), observed=df['tpore'], random=random)

In [None]:
if infer:
    with my_model:
        my_model_trace = pm.sample(4000, tune=3000,  random_seed=RANDOM_SEED)
        ppc = pm.sample_posterior_predictive(my_model_trace,  random_seed=RANDOM_SEED, samples=40, size=1000)
        my_model_trace = az.from_pymc3(trace=my_model_trace, posterior_predictive=ppc)
        print('Done infering.')

        my_model_trace = my_model_trace.assign_coords(tau_0_dim_0=[category_dic[i] for i in my_model_trace.posterior.tau_0_dim_0.to_index()])
        my_model_trace = my_model_trace.assign_coords(tau_dim_0=[category_dic[i] for i in my_model_trace.posterior.tau_dim_0.to_index()])
        
        spc_os.remove(model_path)
        my_model_trace.to_netcdf(model_path)
else:
    my_model_trace=az.from_netcdf(model_path)

In [None]:
my_model_trace = my_model_trace.assign_coords(like_dim_0=df['membrane'].to_numpy(),
                                              groups='observed_data')
my_model_trace = my_model_trace.assign_coords(like_dim_1=df['membrane'].unique().to_numpy(),
                                              groups='posterior_predictive')
my_model_trace = my_model_trace.assign_coords(tau_dim_0=df['membrane'].unique().to_numpy(),
                                              groups='posterior')
my_model_trace = my_model_trace.assign_coords(tau_0_dim_0=df['membrane'].unique().to_numpy(),
                                              groups='posterior')
my_model_trace.posterior_predictive = my_model_trace.posterior_predictive.rename({'like_dim_1':'membrane_enc'})
my_model_trace.observed_data = my_model_trace.observed_data.rename({'like_dim_0':'membrane_enc'})

## Analyze

### Point Summaries

Check that r_hat is approx 1 and that ess_mean is near to the number of steps.

In [None]:
df_summary = az.summary(my_model_trace)
with pd.option_context('display.max_rows', None):
    print(df_summary)

### Plot Traces (Posteriors)

In [None]:
with az.rc_context(rc={'plot.max_subplots': None}):
        az.plot_trace(my_model_trace)

### Autocorrelations

In [None]:
with az.rc_context(rc={'plot.max_subplots': None}):
    az.plot_autocorr(my_model_trace, combined=True)

### Posterior Predictive Test

In [None]:
az.plot_ppc(my_model_trace,  flatten=['draw', 'chain', 'like_dim_0']) # flatten all dimension except the one we are interested in

### Compare parameter

In [None]:
variable = 'tau_0'
az.plot_forest(my_model_trace,
               var_names=variable,
               combined=True)

In [None]:
variable = 'tau'
az.plot_forest(my_model_trace,
               var_names=variable,
               combined=True)