In [None]:
%load_ext autoreload
%autoreload 2
%load_ext lab_black
# Sometimes if too much logging
#

In [None]:
import pandas as pd
import pymc3 as pm
import numpy as np
import seaborn as sns
import arviz as az
import matplotlib.pyplot as plt
import spc_os
from spc_vis import my_plot_ppc

RANDOM_SEED = 28101990
import pickle

import theano.tensor as T
import scipy.stats as st

In [None]:
raw_data_dir = "../data/raw/"
interim_data_dir = "../data/interim/"
processed_data_dir = "../data/processed/"
external_data_dir = "../data/external/"
models_dir = "../models/"

In [None]:
infer = True

+ Send to src
+ Fix plotting with new function

# Load data

In [None]:
df = pd.read_csv(f"{processed_data_dir}data.csv").drop("Unnamed: 0", axis=1)

In [None]:
df.head()

In [None]:
df.Replica = df.membrane

In [None]:
df.Replica = df.Replica.astype("category")

In [None]:
df["Replica_enc"] = df.Replica.cat.codes

In [None]:
category_dic = {i: cat for i, cat in enumerate(np.unique(df["Replica"]))}

In [None]:
category_dic

In [None]:
n_categories = len(category_dic)

## Visualize Data

In [None]:
df["tpore"].groupby(df["Replica"]).describe()

In [None]:
_ = df["tpore"].hist(by=df["Replica"], sharex=True, bins=10)

In [None]:
_ = df["tpore"].hist(bins=50)

## Visualize Priors

These are the shapes of the priors used.

In [None]:
plt.style.use("seaborn-darkgrid")
x = np.linspace(0, 20, 200)
alphas = [1.0, 2.0, 2.0, 3.0, 7.5]
betas = [0.5, 0.5, 1.0, 1.0, 1.0]
for a, b in zip(alphas, betas):
    pdf = st.gamma.pdf(x, a, scale=1.0 / b)
    plt.plot(x, pdf, label=r"$\alpha$ = {}, $\beta$ = {}".format(a, b))
plt.xlabel("x", fontsize=12)
plt.ylabel("f(x)", fontsize=12)
plt.legend(loc=1)

In [None]:
beta = 5
alpha = 5
d = st.gamma(scale=1 / beta, a=alpha)
x = np.linspace(0, 10, 100)
tau_0_pdf = d.pdf(x)
plt.plot(x, tau_0_pdf, "k-", lw=2)
plt.xlabel("k")

In [None]:
scale = 1.0
loc = 3
d = st.norm(scale=scale, loc=loc)
x = np.linspace(0, 20, 100)
tau_0_pdf = d.pdf(x)
plt.plot(x, tau_0_pdf, "k-", lw=2)
plt.xlabel("t-half")

In [None]:
lam = 1.0
loc = 0
d = st.expon(scale=1 / lam, loc=0)
x = np.linspace(0, 5, 100)
tau_pdf = d.pdf(x)
plt.plot(x, tau_pdf, "k-", lw=2)
plt.xlabel("lam0")

## Prepare data

In [None]:
grouper = df.groupby("Replica")
data = pd.concat(
    [pd.Series(v["tpore"].tolist(), name=k) for k, v in grouper], axis=1
).to_numpy()

In [None]:
min_vals = data.min(axis=0)

In [None]:
data = data - min_vals

In [None]:
n_bins = 10
bin_size = (data.max(axis=0) + 0.001) / n_bins

In [None]:
porations = np.zeros([*data.shape, n_bins])

In [None]:
for i in range(porations.shape[0]):
    for j in range(porations.shape[1]):
        porations[i, j, int(data[i, j] // bin_size[j])] = 1

In [None]:
with pm.Model() as my_model:
    lam0 = pm.Gamma("lam0", alpha=0.1, beta=0.1, shape=(n_categories, n_bins))
    mu = pm.Deterministic("mu", porations * lam0)
    like = pm.Poisson(
        "like",
        mu=mu,
        observed=porations,
    )

In [None]:
pm.model_to_graphviz(my_model)

In [None]:
%%time
if infer:
    with my_model:
        my_model_trace = pm.sample(1000, tune=1000, random_seed=RANDOM_SEED, return_inferencedata=True, cores=8)

In [None]:
%%time
if infer:
    with my_model:
        prior = pm.sample_prior_predictive(random_seed=RANDOM_SEED, samples=1000)
        my_model_trace.extend(az.from_pymc3(prior=prior))

In [None]:
%%time
if infer:
    with my_model:
        ppc = pm.sample_posterior_predictive(my_model_trace,  random_seed=RANDOM_SEED, samples=1000)
        print('Done infering.')
    my_model_trace = az.concat(my_model_trace, az.from_pymc3(posterior_predictive=ppc))

In [None]:
my_model_trace

In [None]:
my_model_trace = my_model_trace.assign_coords(
    like_dim_1=df_data.columns.to_numpy(), groups="observed_data"
)
my_model_trace = my_model_trace.assign_coords(
    like_dim_1=df_data.columns.to_numpy(), groups="posterior_predictive"
)
my_model_trace = my_model_trace.assign_coords(
    like_dim_1=df_data.columns.to_numpy(), groups="prior_predictive"
)
my_model_trace.posterior_predictive = my_model_trace.posterior_predictive.rename(
    {"like_dim_1": "Replica_enc"}
)
my_model_trace.prior_predictive = my_model_trace.prior_predictive.rename(
    {"like_dim_1": "Replica_enc"}
)
my_model_trace.observed_data = my_model_trace.observed_data.rename(
    {"like_dim_1": "Replica_enc"}
)

In [None]:
freq0, bins = np.histogram(np.where(porations[:, 0, :] == 1)[1], density=True)
n_draws = my_model_trace.posterior_predictive.like.values.shape[1]
l = []
for i in range(n_draws):
    freq, _ = np.histogram(
        np.where(my_model_trace.posterior_predictive.like.values[0, i, :, 0, :])[1],
        bins=bins,
        density=True,
    )
    l.append(freq)
l = np.array(l)
plt.plot(
    bins[:-1] + bin_size[0] / 2,
    np.mean(l, axis=0),
)
plt.errorbar(bins[:-1] + bin_size[0] / 2, np.mean(l, axis=0), yerr=np.std(l, axis=0))
_ = plt.hist(np.where(porations[:, 0, :] == 1)[1], bins=bins, density=True)

In [None]:
my_model_trace

## Analyze

### Point Summaries

Check that r_hat is approx 1 and that ess_mean is near to the number of steps.

In [None]:
df_summary = az.summary(my_model_trace)
with pd.option_context("display.max_rows", None):
    print(df_summary)

In [None]:
my_model_trace

### Compare parameter

In [None]:
variable = "lam0"
az.plot_forest(my_model_trace, var_names=variable, combined=True)

In [None]:
variable = "mu"
az.plot_forest(my_model_trace, var_names=variable, combined=True)

### Plot Traces (Posteriors)

In [None]:
with az.rc_context(rc={'plot.max_subplots': None}):
        az.plot_trace(my_model_trace)

## Autocorrelations

In [None]:
with az.rc_context(rc={'plot.max_subplots': None}):
    az.plot_autocorr(my_model_trace, combined=True)

## Model?

In [None]:
model_path = models_dir + f"tpore_double_expon_tau_tau0_same_membrane.nc"

In [None]:
spc_os.remove(model_path)
my_model_trace.to_netcdf(model_path)