In [1]:
import arviz as az
import pandas as pd
import numpy as np
import pymc as pm
import pymc.sampling_jax
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import roc_curve, precision_score, recall_score, roc_auc_score, accuracy_score, auc
from sklearn.calibration import CalibrationDisplay
import pickle

import matplotlib.pyplot as plt
import seaborn as sns

%config InlineBackend.figure_format = 'retina'
az.style.use("arviz-darkgrid")

In [5]:
import os
os.getcwd()
os.chdir("D:/Manal/phd_ppr_manal/Implementation/project1/Transmission_probability/method1_Binomial/v_PyMC/")

SyntaxError: EOL while scanning string literal (1374626222.py, line 2)

In [3]:
os.getcwd()

'd:\\Manal\\phd_ppr_manal\\Implementation\\Transmission_probability\\method1_Binomial\\v_PyMC\\Scripts'

In [None]:
# Load data 
sero = pd.read_excel("../Input/sero_data_binomiale.xlsx")
y = sero["Positive"].to_numpy()  
n = sero["n"].to_numpy()
t = sero["Time"].to_numpy()

In [None]:
%%time
with pm.Model() as model:

    # Prior
    p = pm.Uniform("p", lower=.0001, upper=.01)
    
    p_tot = 1 - (1 - p)**t
    
    y = pm.Binomial("y", n=n, p=p_tot, observed=y) 

In [None]:
%%time
with model:
    trace = pm.sampling.jax.sample_blackjax_nuts(draws=10000, tune=1000, chains=10, target_accept=0.95)

In [None]:
az.plot_posterior(trace, hdi_prob=.95);

# Posterior analysis

In [None]:
az.summary(trace, round_to=5, hdi_prob=.95)

# Model log-likelihood

In [None]:
with model:
    pm.compute_log_likelihood(trace)

In [None]:
model_loo = az.loo(trace)

model_loo

In [None]:
model_waic = az.waic(trace)

model_waic

In [None]:
with model:
    pm.sample_posterior_predictive(trace, extend_inferencedata=True, random_seed=42)

In [None]:
az.plot_ppc(trace, num_pp_samples=100, kind ='kde', observed_rug=True);

In [None]:
az.plot_autocorr(trace, var_names="p");

In [None]:
# Gelman-Rubin convergence diagnostic
r_hat = az.rhat(trace, var_names=["p"])
print(r_hat)

In [None]:
ess = az.ess(trace, var_names=["p"])
ess

In [None]:
az.plot_ess(trace, var_names=["p"], kind="evolution");

In [None]:
total_ess = ess["p"].sum().values
print(f"Total effective sample size: {float(total_ess):.2f}")

In [None]:
p_sample = trace.posterior["p"].stack(sample=("chain", "draw")).values
p_sample = np.random.choice(p_sample, size=1000)

In [None]:
N = sero.shape[0] 
M = len(p_sample)

t = sero["Time"].values
n = sero["n"].values.astype(np.int32)

In [None]:
resultats_simules = np.zeros((N, M)) 

for i in range(M):
    p_i = p_sample[i]
    p_tot = 1 - (1 - p_i)**t
    
    sim = np.random.binomial(n, p_tot)
    resultats_simules[:, i] = sim

df = pd.DataFrame(resultats_simules, 
                  columns=[f"Sim_{i}" for i in range(1, M+1)])

In [None]:
# Prepare observed data
datacal = sero[["Event", "n", "Positive"]]  
datacal.columns = ["Exp", "N", "obs"]

# Calculate summary stats
means = resultats_simules.mean(axis=1) 
print(means)

quantiles = np.quantile(resultats_simules, 
                       q=[0.025, 0.25, 0.5, 0.75, 0.975],
                       axis=1)
print(quantiles)

# Create dataframe
res = pd.DataFrame() 
res["Exp"] = datacal["Exp"]
res = pd.concat([res, pd.DataFrame(quantiles.T)], axis=1)
res["Mean"] = means

# Set column names  
res.columns = ["Exp", "CI2.5", "CI25", "Median",
               "CI75", "CI97.5", "Mean"] 

# Set dtypes
cols = res.columns[1:]
res[cols] = res[cols].astype(float)

print(res)

In [None]:
datacal.loc[:, 'sd'] = np.sqrt(datacal['obs'] * (datacal['N'] - datacal['obs']))

In [None]:
datacal

In [None]:
az.plot_trace(trace, var_names="p", kind="rank_vlines");


In [None]:
az.plot_trace(trace, var_names="p", kind="rank_bars");


In [None]:
az.plot_mcse(trace, var_names=["p"], rug=True, extra_methods=True)

In [None]:
loo = az.loo(trace, pointwise=True)

az.plot_khat(loo, show_bins=True);


In [None]:
az.plot_loo_pit(trace, y="y", ecdf=True)

In [None]:
import seaborn as sns

In [None]:
# Plotting the main line and points
plt.figure(figsize=(10, 8))
sns.lineplot(data=res, x='Exp', y='Mean', color='red')
sns.scatterplot(data=res, x='Exp', y='Mean', color='red')

# Adding confidence intervals (ribbons)
plt.fill_between(res['Exp'], res['CI2.5'], res['CI97.5'], color='red', alpha=0.2)
plt.fill_between(res['Exp'], res['CI25'], res['CI75'], color='red', alpha=0.4)

# Adding points and error bars from 'datacal'
sns.scatterplot(data=datacal, x='Exp', y='obs')
plt.errorbar(datacal['Exp'], datacal['obs'], yerr=datacal['sd'], fmt='none', ecolor='black')

# Setting labels and title
plt.title("New Infections by Experiment")
plt.xlabel("Exp")
plt.ylabel("New Infections")

# Setting y-limits and y-ticks
plt.ylim(0, 10)
plt.yticks(np.arange(0, 11, 1))

# Rotating x-axis labels
plt.xticks(rotation=90)

# Hiding the legend
plt.legend([],[], frameon=False)

plt.show()


In [None]:
import pickle

In [None]:
with open('trace_binomiale.pkl', 'wb') as f:
    pickle.dump(trace, f)