In [None]:
%load_ext autoreload
%autoreload 2
%load_ext lab_black
# Sometimes if too much logging
#

In [None]:
import pandas as pd
import pymc3 as pm
import numpy as np
import seaborn as sns
import arviz as az
import theano.tensor as tt
import matplotlib.pyplot as plt
import spc_os
from spc_vis import my_plot_ppc

RANDOM_SEED = 28101990
from pymc3 import DensityDist
from pymc3.math import switch, exp
from pymc3.distributions import draw_values, generate_samples
import pickle

import theano.tensor as T
import scipy.stats as st

In [None]:
raw_data_dir = "../data/raw/"
interim_data_dir = "../data/interim/"
processed_data_dir = "../data/processed/"
external_data_dir = "../data/external/"
models_dir = "../models/"

In [None]:
infer = True

+ Send to src
+ Fix plotting with new function

# Load data

In [None]:
df = pd.read_csv(f"{processed_data_dir}data.csv").drop("Unnamed: 0", axis=1)

In [None]:
df.head()

In [None]:
df.Replica = df.membrane

In [None]:
df.Replica = df.Replica.astype("category")

In [None]:
df["Replica_enc"] = df.Replica.cat.codes

In [None]:
category_dic = {i: cat for i, cat in enumerate(np.unique(df["Replica"]))}

In [None]:
category_dic

## Visualize Data

In [None]:
df["tpore"].groupby(df["Replica"]).describe()

In [None]:
fig, ax = plt.subplots(2, 2, figsize=(10, 10), sharex=True, sharey=True)
x = np.linspace(0, 20, 20)
k = 1
x0 = 3
lam0 = 3
lam = lam0 / (1 + np.exp(-k * (x - x0)))
y = np.exp(-lam * x) * lam
y = y / np.sum(y)
df["tpore"].hist(by=df["Replica"], bins=20, density=True, ax=ax)
for a in ax.flatten():
    a.plot(x, y)

In [None]:
_ = df["tpore"].hist(by=df["Replica"], sharex=True, bins=20)

In [None]:
_ = df["tpore"].hist(bins=50)

## Visualize Priors

These are the shapes of the priors used.

In [None]:
plt.style.use("seaborn-darkgrid")
x = np.linspace(0, 20, 200)
alphas = [1.0, 2.0, 2.0, 3.0, 7.5]
betas = [0.5, 0.5, 1.0, 1.0, 1.0]
for a, b in zip(alphas, betas):
    pdf = st.gamma.pdf(x, a, scale=1.0 / b)
    plt.plot(x, pdf, label=r"$\alpha$ = {}, $\beta$ = {}".format(a, b))
plt.xlabel("x", fontsize=12)
plt.ylabel("f(x)", fontsize=12)
plt.legend(loc=1)

In [None]:
beta = 5
alpha = 5
d = st.gamma(scale=1 / beta, a=alpha)
x = np.linspace(0, 10, 100)
tau_0_pdf = d.pdf(x)
plt.plot(x, tau_0_pdf, "k-", lw=2)
plt.xlabel("k")

In [None]:
scale = 1.0
loc = 3
d = st.norm(scale=scale, loc=loc)
x = np.linspace(0, 20, 100)
tau_0_pdf = d.pdf(x)
plt.plot(x, tau_0_pdf, "k-", lw=2)
plt.xlabel("t-half")

In [None]:
lam = 1.0
loc = 0
d = st.expon(scale=1 / lam, loc=0)
x = np.linspace(0, 5, 100)
tau_pdf = d.pdf(x)
plt.plot(x, tau_pdf, "k-", lw=2)
plt.xlabel("lam0")

## Run model

In [None]:
grouper = df.groupby("Replica")
df_data = pd.concat(
    [pd.Series(v["tpore"].tolist(), name=k) for k, v in grouper], axis=1
)

In [None]:
df_data -= df_data.min()

In [None]:
coords = {"Replica": df_data.columns, "sim_number": df_data.index}
df_data.index.name = "sim_number"
df_data.columns.name = "Replica"
with pm.Model(coords=coords) as my_model:
    k = pm.Gamma("k", alpha=5.0, beta=5.0, dims=("Replica"))
    t_half = pm.TruncatedNormal("t_half", mu=6.0, sigma=0.1, lower=0, dims=("Replica"))
    lam0 = pm.Exponential("lam0", lam=1, dims=("Replica"))
    lam = pm.Deterministic(
        # "lam", T.outer(lam0, 1 / (1 + T.exp(T.outer(k , (df_data.to_numpy() - t_half)))))
        "lam",
        lam0 / (1 + T.exp(-k * (df_data.to_numpy() - t_half))),
    )
    data = pm.Data("data", df_data, dims=("sim_number", "Replica"))
    like = pm.Exponential(
        "like",
        lam=lam,
        observed=data,
    )

In [None]:
pm.model_to_graphviz(my_model)

In [None]:
%%time
if infer:
    with my_model:
        my_model_trace = pm.sample(1000, tune=1000, random_seed=RANDOM_SEED, return_inferencedata=True, cores=8)

In [None]:
%%time
if infer:
    with my_model:
        prior = pm.sample_prior_predictive(random_seed=RANDOM_SEED, samples=100)
        my_model_trace.extend(az.from_pymc3(prior=prior))

In [None]:
%%time
if infer:
    with my_model:
        ppc = pm.sample_posterior_predictive(my_model_trace,  random_seed=RANDOM_SEED, samples=100)
        print('Done infering.')
    my_model_trace = az.concat(my_model_trace, az.from_pymc3(posterior_predictive=ppc))

In [None]:
my_model_trace = my_model_trace.assign_coords(
    like_dim_1=df_data.columns.to_numpy(), groups="observed_data"
)
my_model_trace = my_model_trace.assign_coords(
    like_dim_1=df_data.columns.to_numpy(), groups="posterior_predictive"
)
my_model_trace = my_model_trace.assign_coords(
    like_dim_1=df_data.columns.to_numpy(), groups="prior_predictive"
)
my_model_trace.posterior_predictive = my_model_trace.posterior_predictive.rename(
    {"like_dim_1": "Replica_enc"}
)
my_model_trace.prior_predictive = my_model_trace.prior_predictive.rename(
    {"like_dim_1": "Replica_enc"}
)
my_model_trace.observed_data = my_model_trace.observed_data.rename(
    {"like_dim_1": "Replica_enc"}
)

In [None]:
my_model_trace

## Analyze

### Point Summaries

Check that r_hat is approx 1 and that ess_mean is near to the number of steps.

In [None]:
df_summary = az.summary(my_model_trace)
with pd.option_context("display.max_rows", None):
    print(df_summary)

In [None]:
my_model_trace

In [None]:
my_model_trace.constant_data.data[:, 0]

In [None]:
plt.plot(
    my_model_trace.constant_data.data[:, 0].values,
    my_model_trace.posterior.lam.mean(axis=(0, 1))[:, 0].values,
    ls="",
    marker="o",
)

In [None]:
plt.plot(
    my_model_trace.constant_data.data[:, 0].values,
    my_model_trace.prior.lam.mean(axis=(0, 1))[:, 0].values,
    ls="",
    marker="o",
)

In [None]:
plt.hist(my_model_trace.prior.t_half.values.flatten())

In [None]:
plt.hist(my_model_trace.prior.k.values.flatten())

In [None]:
plt.hist(my_model_trace.prior.lam0.values.flatten())

In [None]:
plt.hist(my_model_trace.prior.t_half.values.flatten())

In [None]:
plt.hist(my_model_trace.posterior.t_half.values.flatten())

In [None]:
plt.hist(my_model_trace.posterior.k.values.flatten())

### Prior Predictive Test

In [None]:
ax = az.plot_ppc(my_model_trace, group="prior", flatten=["draw", "chain", "like_dim_0"])
for a in ax:
    a.set_xlim([0, 15])

In [None]:
ax = az.plot_ppc(
    my_model_trace,
    group="prior",
    flatten=["draw", "chain", "like_dim_0"],
    kind="cumulative",
)
for a in ax:
    a.set_xlim([0, 15])

### Posterior Predictive Test

In [None]:
ax = az.plot_ppc(
    my_model_trace, group="posterior", flatten=["draw", "chain", "like_dim_0"]
)
for a in ax:
    a.set_xlim([0, 15])

In [None]:
ax = az.plot_ppc(
    my_model_trace,
    group="posterior",
    flatten=["draw", "chain", "like_dim_0"],
    kind="cumulative",
)
for a in ax:
    a.set_xlim([0, 15])

### Compare parameter

In [None]:
variable = "lam0"
az.plot_forest(my_model_trace, var_names=variable, combined=True)

In [None]:
variable = "k"
az.plot_forest(my_model_trace, var_names=variable, combined=True)

In [None]:
variable = "t_half"
az.plot_forest(my_model_trace, var_names=variable, combined=True)

### Plot Traces (Posteriors)

In [None]:
with az.rc_context(rc={'plot.max_subplots': None}):
        az.plot_trace(my_model_trace)

## Autocorrelations

In [None]:
with az.rc_context(rc={'plot.max_subplots': None}):
    az.plot_autocorr(my_model_trace, combined=True)

## Model?

In [None]:
model_path = models_dir + f"tpore_double_expon_tau_tau0_same_membrane.nc"

In [None]:
spc_os.remove(model_path)
my_model_trace.to_netcdf(model_path)