### Example E.5.3. BAYESIAN STUDENT-T REGRESSION from https://openreview.net/pdf?id=HltJfwwfhX

In [1]:
import pystan
import pandas as pd
import numpy as np
import requests

from rpy2.robjects import pandas2ri
from rpy2.robjects.conversion import rpy2py
import rpy2.robjects as ro
import json


from amis_algorithms import alpha_AMIS_fixed_dof, AMIS_student_fixed_dof
import multiprocessing
multiprocessing.set_start_method("fork")

import bridgestan

from scipy.stats import multivariate_t
from scipy.special import logsumexp

# Load and prepare the dataset
url = "https://github.com/faosorios/heavy/blob/master/data/creatinine.rda?raw=true"
with requests.get(url) as resp:
    with open("creatinine.rda", "wb") as f:
        f.write(resp.content)

# Load RDA file into Python
ro.r['load']("creatinine.rda")
df = pandas2ri.rpy2py_dataframe(ro.r['creatinine'])

data_df = pd.DataFrame(columns=['log_SC', 'log_WT', 'log_140_minus_A', 'log_CR'])

# Apply transformations following https://openreview.net/pdf?id=HltJfwwfhX
data_df['log_SC'] = np.log(df['SC'])
data_df['log_WT'] = np.log(df['WT'])
data_df['log_CR'] = np.log(df['CR'])
data_df['log_140_minus_A'] = np.log(140 - df['Age'])
data_df = data_df.dropna() # remove any rows with NaN values after transformation

# Define your Stan model
stan_model_code = """
data {
    int<lower=0> N;  // number of data points
    vector[N] x1;    // first covariate
    vector[N] x2;    // second covariate
    vector[N] x3;    // third covariate
    vector[N] y;     // response variable
}
parameters {
    real alpha;       // intercept
    real beta1;       // coefficient for x1
    real beta2;       // coefficient for x2
    real beta3;       // coefficient for x3
}
model {
    // Priors
    alpha ~ cauchy(0, 1);
    beta1 ~ cauchy(0, 1);
    beta2 ~ cauchy(0, 1);
    beta3 ~ cauchy(0, 1);

    // Likelihood with student-t errors, 5 degrees of freedom
    y ~ student_t(5, alpha + beta1 * x1 + beta2 * x2 + beta3 * x3, 1);
}
"""

# Compile the Stan model
sm = pystan.StanModel(model_code=stan_model_code)

# Prepare data for Stan model
data_for_stan = {
    'N': len(data_df),
    'log_SC': data_df['log_SC'].values.tolist(),
    'log_WT': data_df['log_WT'].values.tolist(),
    'log_140_minus_A': data_df['log_140_minus_A'].values.tolist(),
    'log_CR': data_df['log_CR'].values.tolist()  # response variable
}

# Save the data dictionary to a JSON file
with open("student_regression_data.json", "w") as f:
    json.dump(data_for_stan, f, indent=4)


# Fit the model and sample from the posterior using NUTS (NUTS paper: https://arxiv.org/abs/1111.4246)
fit = sm.sampling(data=data_for_stan, iter=1000, warmup=200, chains=3)

INFO:rpy2.situation:cffi mode is CFFI_MODE.ANY
INFO:rpy2.situation:R home found: /Users/nicolabranchini/miniforge3/lib/R
INFO:rpy2.situation:R library path: 
INFO:rpy2.situation:LD_LIBRARY_PATH: 
INFO:rpy2.rinterface_lib.embedded:Default options to initialize R: rpy2, --quiet, --no-save
INFO:rpy2.rinterface_lib.embedded:R is already initialized. No need to initialize.
INFO:pystan:COMPILING THE C++ CODE FOR MODEL anon_model_f4ef895b15a51e0a4b2ba279ece8485e NOW.
In file included from /var/folders/j5/9grhtgg17d5cnz9j926s7tyw0000gn/T/pystan_2d4sdkyg/stanfit4anon_model_f4ef895b15a51e0a4b2ba279ece8485e_1230288213261222356.cpp:840:
In file included from /Users/nicolabranchini/PycharmProjects/is_estimators_testbed/venv/lib/python3.9/site-packages/numpy/core/include/numpy/arrayobject.h:5:
In file included from /Users/nicolabranchini/PycharmProjects/is_estimators_testbed/venv/lib/python3.9/site-packages/numpy/core/include/numpy/ndarrayobject.h:12:
In file included from /Users/nicolabranchini/Pyc

RuntimeError: Exception: variable does not exist; processing stage=data initialization; variable name=x1; base type=vector_d  (in 'unknown file name' at line 4)


In [None]:
stan = "./student_reg_model.stan"
data = "./student_regression_data.json"
bridgestan_model = bridgestan.StanModel.from_stan_file(stan, data)


In [None]:
fit.summary()

In [None]:
true_log_pdf = fit.log_prob

In [None]:
# Step 3: Find the MAP solution
map_sol = sm.optimizing(data=data_for_stan)
map_sol = list(map_sol.values())
print(sm.log_prob(map_sol))

In [None]:
log_dens_at_map, _, hessian_at_map = bridgestan_model.log_density_hessian(theta_unc=map_sol)
print(log_dens_at_map)

In [None]:
dim = 4
dof_proposal = 5
mu_initial_proposal = np.ones(dim)
shape_initial_proposal = (dof_proposal - 2) / (dof_proposal) * np.eye(dim)
num_samples = 1000
alpha = 1 + 2 / (dof_proposal + dim)

In [None]:
alpha_AMIS_fixed_dof(mu_initial=mu_initial_proposal,shape_initial=shape_initial_proposal, n_iterations=50, log_pi_tilde=true_log_pdf, dof_proposal=dof_proposal, M=num_samples, D=dim)

In [None]:
# compare results with MCMC via the KSD, also plot Renyi divergence