In [None]:
import pandas as pd
import pymc3 as pm
import numpy as np
import seaborn as sns
import arviz as az
import theano.tensor as tt
import matplotlib.pyplot as plt
import spc_os
from spc_vis import my_plot_ppc

RANDOM_SEED = 28101990
from pymc3 import DensityDist
from pymc3.math import switch, exp
from pymc3.distributions import draw_values, generate_samples
import pickle

import theano.tensor as T
import scipy.stats as st

In [None]:
raw_data_dir = "../data/raw/"
interim_data_dir = "../data/interim/"
processed_data_dir = "../data/processed/"
external_data_dir = "../data/external/"
models_dir = "../models/"

In [None]:
infer = True

+ Send to src
+ Fix plotting with new function

# Load data

In [None]:
df = pd.read_csv(f"{processed_data_dir}data.csv").drop("Unnamed: 0", axis=1)

In [None]:
df.head()

In [None]:
df.Replica = df.membrane

In [None]:
df.Replica = df.Replica.astype("category")

In [None]:
df["Replica_enc"] = df.Replica.cat.codes

In [None]:
category_dic = {
    i:cat for i,cat in enumerate(np.unique(df['Replica']))
}

In [None]:
category_dic

# Custom Likelyhood

In [None]:
class my_pdf(st.rv_continuous):
    
    def _cdf(self,t,tau_0, lam, c):
        return exp_lag_cdf_vec(t, tau_0, lam,  c)
my_cv = my_pdf(a=0, name='my_pdf')

In [None]:
def logp_exp_lag(tau_0, lam, c):
    def logp_(t):
        #return T.log(T.switch(T.lt(t, tau_0), np.finfo(np.float64).tiny,T.exp( - (t - tau_0) / tau)/ tau ))
        f = c * T.exp( - (t ) * lam ) * lam 
        g = T.switch(T.lt(t, tau_0), 1.e-8 , (1 - c) * T.exp( - (t - tau_0) * lam) * lam )
        return T.log(f + g)
    return  logp_

In [None]:
def exp_lag_cdf(t, tau_0, lam, c):
    cdf0 = c * (1-np.exp(-(t)*lam))
    if t >= tau_0:
        cdf1 = (1 - c) * (1-np.exp(-(t-tau_0) * lam))
    else:
        cdf1=1.e-8
    return cdf0+cdf1

In [None]:
exp_lag_cdf_vec = np.vectorize(exp_lag_cdf)

In [None]:
def random(point=None, size=None):
    # draw a numerical value for the parameters
    size = 1 if size is None else size
    tau_0_, lam_,  c_ = draw_values([tau_0, lam, c], point=point,size=size)
    
 
    return generate_samples(my_cv.rvs, lam=lam_, tau_0=tau_0_, c=c_, size=size)

Generate some data to compare to the theoretical PDF

In [None]:
d = DensityDist.dist(logp_exp_lag).logp(lam=2.5,tau_0=1.5, c=0.25)

In [None]:
l = []
for i in range(1000):
    l.append(my_cv.rvs(tau_0=2.5,lam=1.5,c=0.25 ))

In [None]:
plt.plot(np.linspace(0,20,100),np.exp(d(np.linspace(0,20,100)).eval()))
_ = plt.hist(np.array(l), density=True)

## Visualize Data

In [None]:
df['tpore'].groupby(df['Replica']).describe()

In [None]:
_ = df['tpore'].hist(by=df['Replica'],sharex=True, bins=20)

In [None]:
_ = df['tpore'].hist(bins=50)

## Visualize Priors

These are the shapes of the priors used.

In [None]:
scale = 0.25 
loc = 0.0
d = st.norm(scale=scale, loc=loc)
x = np.linspace(0,1,100)
tau_0_pdf = d.pdf(x)
plt.plot(x,  tau_0_pdf, 'k-', lw=2)

In [None]:
scale = 0.75
loc = 2.5
d = st.norm(scale=scale, loc=loc)
x = np.linspace(0,20,100)
tau_0_pdf = d.pdf(x)
plt.plot(x,  tau_0_pdf, 'k-', lw=2)

In [None]:
lam = 0.5 
loc = 0
d = st.expon(scale=1/lam, loc=0)
x = np.linspace(0,100,100)
tau_pdf = d.pdf(x)
plt.plot(x,  tau_pdf, 'k-', lw=2)

## Run model

In [None]:
grouper = df.groupby('Replica')
df_data = pd.concat([pd.Series(v['tpore'].tolist(), name=k) for k, v in grouper], axis=1)

In [None]:
min_val = df_data.min()

In [None]:
df_data = df_data - min_val 

In [None]:
maxes = []
for col in df_data:
    y, x = np.histogram(df_data[col])
    maxes.append(x[y.argmax()])
maxes = np.array(maxes)

In [None]:
ax=df_data.hist(density=True, bins=15)
ax=ax.flatten()
for a, m in zip(ax,maxes):
    a.hist(np.array(l), density=True)
    a.axvline(m, color="green")
    a.axvline(m-0.75*2, color="green")
    a.axvline(m+0.75*2, color="green")

In [None]:
coords ={ "Replica":df_data.columns, "sim_number":df_data.index}
df_data.index.name = "sim_number"
df_data.columns.name = "Replica"
with pm.Model(coords=coords) as my_model:
    lam = pm.Exponential('lam', lam=0.05, dims="Replica")
    tau_0 = pm.TruncatedNormal('tau_0', mu=maxes, sigma=0.75, lower=0, dims="Replica")
    c = pm.TruncatedNormal('c', mu=0., sigma=0.25, lower=0, upper=1, dims="Replica")
    tau = pm.Deterministic("tau", 1/lam)
    tau_0_c = pm.Deterministic("tau_0_c", tau_0+min_val)
    like = DensityDist('like', logp_exp_lag(tau_0,lam, c), observed=df_data, random=random)

In [None]:
pm.model_to_graphviz(my_model)

In [None]:
%%time
if infer:
    with my_model:
        my_model_trace = pm.sample(4000, tune=4000, random_seed=RANDOM_SEED, return_inferencedata=True, cores=4)

In [None]:
%%time
if infer:
    with my_model:
        prior = pm.sample_prior_predictive(random_seed=RANDOM_SEED, samples=100)
        my_model_trace.extend(az.from_pymc3(prior=prior))

In [None]:
%%time
if infer:
    with my_model:
        ppc = pm.sample_posterior_predictive(my_model_trace,  random_seed=RANDOM_SEED, samples=100)
        print('Done infering.')
    my_model_trace = az.concat(my_model_trace, az.from_pymc3(posterior_predictive=ppc))

In [None]:
my_model_trace = my_model_trace.assign_coords(like_dim_1=df_data.columns.to_numpy(),
                                              groups='observed_data')
my_model_trace = my_model_trace.assign_coords(like_dim_1=df_data.columns.to_numpy(),
                                              groups='posterior_predictive')
my_model_trace = my_model_trace.assign_coords(like_dim_1=df_data.columns.to_numpy(),
                                              groups='prior_predictive')
my_model_trace.posterior_predictive = my_model_trace.posterior_predictive.rename({'like_dim_1':'Replica_enc'})
my_model_trace.prior_predictive = my_model_trace.prior_predictive.rename({'like_dim_1':'Replica_enc'})
my_model_trace.observed_data = my_model_trace.observed_data.rename({'like_dim_1':'Replica_enc'})

## Analyze

### Point Summaries

Check that r_hat is approx 1 and that ess_mean is near to the number of steps.

In [None]:
df_summary = az.summary(my_model_trace)
with pd.option_context('display.max_rows', None):
    print(df_summary)

### Prior Predictive Test

In [None]:
ax = az.plot_ppc(my_model_trace,group="prior",flatten=['draw', 'chain', 'like_dim_0'])
for a in ax:
    a.set_xlim([0,15])

In [None]:
ax = az.plot_ppc(my_model_trace,group="prior",flatten=['draw', 'chain', 'like_dim_0'], kind="cumulative")
for a in ax:
    a.set_xlim([0,15])

### Posterior Predictive Test

In [None]:
az.plot_ppc(my_model_trace,group="posterior",flatten=['draw', 'chain', 'like_dim_0'])

In [None]:
az.plot_ppc(my_model_trace,group="posterior",flatten=['draw', 'chain', 'like_dim_0'], kind="cumulative")

### Plot Traces (Posteriors)

In [None]:
with az.rc_context(rc={'plot.max_subplots': None}):
        az.plot_trace(my_model_trace)

### Compare parameter

In [None]:
variable = 'tau_0'
az.plot_forest(my_model_trace,
               var_names=variable,
               combined=True)

In [None]:
variable = 'tau_0_c'
az.plot_forest(my_model_trace,
               var_names=variable,
               combined=True)

In [None]:
variable = 'tau'
az.plot_forest(my_model_trace,
               var_names=variable,
               combined=True)

In [None]:
variable = 'lam'
az.plot_forest(my_model_trace,
               var_names=variable,
               combined=True)

In [None]:
variable = 'c'
az.plot_forest(my_model_trace,
               var_names=variable,
               combined=True)

## Autocorrelations

In [None]:
with az.rc_context(rc={'plot.max_subplots': None}):
    az.plot_autocorr(my_model_trace, combined=True)

## Model?

In [None]:
model_path = models_dir + f"tpore_double_expon_tau_tau0_same_membrane.nc"

In [None]:
spc_os.remove(model_path)
my_model_trace.to_netcdf(model_path)