In [2]:
# Set a seed for reproducibility, so you get the same random sample every time
set.seed(123)

# Define the parameters
n_draws <- 2000
alpha_param <- 3
beta_param <- 3

# Draw a sample of size 400 from the gamma(3, 3) distribution
gamma_sample <- rgamma(n = n_draws, shape = alpha_param, rate = beta_param)

# Calculate the empirical mean of the sample
empirical_mean <- mean(gamma_sample)

# The analytical mean
analytical_mean <- alpha_param / beta_param

# Print the results to compare them
print(paste("Analytical Mean:", analytical_mean))
print(paste("Empirical Mean from", n_draws, "draws:", empirical_mean))

# The output will show a value close to, but not exactly, 1.
# For example, you might get:
# [1] "Analytical Mean: 1"
# [1] "Empirical Mean from 400 draws: 0.9855"



# Set a seed for reproducibility of the random numbers
set.seed(123) 

# Define the number of samples to simulate
NUMBER_OF_SAMPLES <- 2000

# create a vector to store the sample means
sample_means <- rep(NA, times = NUMBER_OF_SAMPLES)

# loop over the number of samples
for (i in 1:NUMBER_OF_SAMPLES) {
  # generate a sample from the gamma distribution of size 400
  sample <- rgamma(n = 400, shape = 3, rate = 3)
  #sample <- rcauchy(n = 400, location = 0, scale = 1)
  # calculate the sample mean and save it to the vector
  sample_means[i] <- mean(sample)
}

# calculate the standard deviation of the sample means
mcse <- sd(sample_means)

# Print the result
print(mcse)

set.seed(123)

# Draw 1e6 samples from Cauchy(0, 1)
samples <- rcauchy(n = 400, location = 0, scale = 1)

# Try to compute the mean
mean(samples)


# --- Function to calculate MCSE ---
calculate_mcse <- function(distribution_func, n_samples, sample_size, ...) {
  # '...' allows passing extra arguments like shape/rate or location/scale
  
  sample_means <- rep(NA, times = n_samples)
  
  for (i in 1:n_samples) {
    sample <- distribution_func(n = sample_size, ...)
    sample_means[i] <- mean(sample)
  }
  
  mcse <- sd(sample_means)
  return(mcse)
}

# --- Simulation Parameters ---
NUMBER_OF_SAMPLES <- 2000
SAMPLE_SIZE <- 400

# --- Gamma Distribution Simulation (run a few times) ---
print("MCSE for Gamma(3, 3):")
# Run 1
set.seed(1) # Set seed for reproducibility of this specific run
print(calculate_mcse(rgamma, NUMBER_OF_SAMPLES, SAMPLE_SIZE, shape = 3, rate = 3))

# Run 2
set.seed(2)
print(calculate_mcse(rgamma, NUMBER_OF_SAMPLES, SAMPLE_SIZE, shape = 3, rate = 3))

# Run 3
set.seed(3)
print(calculate_mcse(rgamma, NUMBER_OF_SAMPLES, SAMPLE_SIZE, shape = 3, rate = 3))


# --- Cauchy Distribution Simulation (run a few times) ---
print("MCSE for Cauchy(0, 1):")
# Run 1
set.seed(10) # Use a different series of seeds
print(calculate_mcse(rcauchy, NUMBER_OF_SAMPLES, SAMPLE_SIZE, location = 0, scale = 1))

# Run 2
set.seed(20)
print(calculate_mcse(rcauchy, NUMBER_OF_SAMPLES, SAMPLE_SIZE, location = 0, scale = 1))

# Run 3
set.seed(30)
print(calculate_mcse(rcauchy, NUMBER_OF_SAMPLES, SAMPLE_SIZE, location = 0, scale = 1))




[1] "Analytical Mean: 1"
[1] "Empirical Mean from 2000 draws: 0.981122467369818"
[1] 0.02864011


[1] "MCSE for Gamma(3, 3):"
[1] 0.02847158
[1] 0.02888921
[1] 0.02859779
[1] "MCSE for Cauchy(0, 1):"
[1] 21.45548
[1] 274.9728
[1] 5460.374


In [3]:

print("Pareto's distribution")
set.seed(123)

# Gamma(3,3) sample
gamma_draws <- rgamma(n = 400, shape = 3, rate = 3)
gamma_k <-  posterior::pareto_khat(gamma_draws)
gamma_k

# Cauchy(0,1) sample
cauchy_draws <- rcauchy(n = 400, location = 0, scale = 1)
cauchy_k <- pareto_khat(cauchy_draws)
cauchy_k

[1] "Pareto's distribution"


In [4]:
library(MASS)  # for mvrnorm

mu <- c(0, 10)              # mean vector
sigma_alpha <- 2
sigma_beta <- 10
rho <- 0.6

# covariance matrix
Sigma <- matrix(c(
  sigma_alpha^2, rho*sigma_alpha*sigma_beta,
  rho*sigma_alpha*sigma_beta, sigma_beta^2
), nrow = 2, byrow = TRUE)

# draw 1 sample from the bivariate normal
set.seed(123)
sample <- mvrnorm(n = 1, mu = mu, Sigma = Sigma)
sample


In [11]:
# Load packages
library(MASS)      # for mvrnorm

set.seed(123)

# Prior parameters
mu <- c(0, 10)
sigma_alpha <- 2
sigma_beta <- 10
rho <- 0.6

# Covariance matrix
Sigma <- matrix(c(
  sigma_alpha^2, rho*sigma_alpha*sigma_beta,
  rho*sigma_alpha*sigma_beta, sigma_beta^2
), nrow = 2, byrow = TRUE)

# Draw 4000 samples from the prior
S <- 4000
theta_samples <- mvrnorm(n = S, mu = mu, Sigma = Sigma)

# Calculate unnormalized log posterior (log-likelihood) for each draw
# Assuming bioassaylp returns log posterior under uniform prior
log_lik <- apply(theta_samples, 1, function(theta) bioassaylp(theta))

# Unnormalized importance weights
w <- exp(log_lik)

# Self-normalized weights
w_tilde <- w / sum(w)

# Importance sampling estimate of posterior mean
posterior_mean <- colSums(theta_samples * w_tilde)

posterior_mean


ERROR: Error in checkmate::assertNumeric(beta): argument "beta" is missing, with no default


In [10]:
# Load packages
library(aaltobda)
library(MASS)  # for mvrnorm if needed

set.seed(123)

# Load bioassay data
data("bioassay")

# Prior parameters (from Q2)
mu <- c(0, 10)
sigma_alpha <- 2
sigma_beta <- 10
rho <- 0.6

# Covariance matrix
Sigma <- matrix(c(
  sigma_alpha^2, rho*sigma_alpha*sigma_beta,
  rho*sigma_alpha*sigma_beta, sigma_beta^2
), nrow = 2, byrow = TRUE)

# Function for unnormalized log importance weights
log_importance_weights <- function(alpha, beta) {
  theta <- cbind(alpha, beta)
  apply(theta, 1, function(th) bioassaylp(th))
}

# Function for normalized importance weights
normalized_importance_weights <- function(alpha, beta) {
  log_w <- log_importance_weights(alpha, beta)
  w <- exp(log_w - max(log_w))  # subtract max for numerical stability
  w_tilde <- w / sum(w)
  return(w_tilde)
}

# Function to compute posterior mean using normalized weights
posterior_mean <- function(alpha, beta) {
  w_tilde <- normalized_importance_weights(alpha, beta)
  colSums(cbind(alpha, beta) * w_tilde)
}

# Function to compute effective sample size
S_eff <- function(alpha, beta) {
  w_tilde <- normalized_importance_weights(alpha, beta)
  1 / sum(w_tilde^2)
}

# Function to compute MCSE of posterior mean
posterior_mean_MCSE <- function(alpha, beta) {
  w_tilde <- normalized_importance_weights(alpha, beta)
  theta <- cbind(alpha, beta)
  sqrt(colSums(w_tilde * (theta - matrix(colSums(theta * w_tilde), nrow = nrow(theta), ncol = 2, byrow = TRUE))^2))
}

# Sample 4000 draws from the prior
S <- 4000
prior_draws <- rmvnorm(S, mu = mu, Sigma = Sigma)

alpha_draws <- prior_draws[,1]
beta_draws <- prior_draws[,2]

# Compute normalized importance weights
w_tilde <- normalized_importance_weights(alpha_draws, beta_draws)

# Posterior mean estimate
post_mean <- posterior_mean(alpha_draws, beta_draws)
post_mean
# [1] 0.503 8.275  # example from test data

# Effective sample size
effective_sample_size <- S_eff(alpha_draws, beta_draws)
effective_sample_size
# [1] 1.354  # test data example

# Posterior mean MCSE
mcse <- posterior_mean_MCSE(alpha_draws, beta_draws)
mcse
# [1] 0.3031766 4.4794358  # test data example

# Histogram of normalized importance weights (optional)
hist(w_tilde, breaks = 50, main = "Normalized Importance Weights", xlab = "w_tilde")


ERROR: Error in rmvnorm(S, mu = mu, Sigma = Sigma): unused arguments (mu = mu, Sigma = Sigma)


In [9]:
if(!require(aaltobda)){
    install.packages("remotes")
    remotes::install_github("avehtari/BDA_course_Aalto", subdir = "rpackage", upgrade="never")
    library(aaltobda)
}

Loading required package: aaltobda

“there is no package called ‘aaltobda’”
Installing package into ‘/usr/local/lib/R/site-library’
(as ‘lib’ is unspecified)

Downloading GitHub repo avehtari/BDA_course_Aalto@HEAD



[36m──[39m [36mR CMD build[39m [36m─────────────────────────────────────────────────────────────────[39m
* checking for file ‘/tmp/RtmpIajLoC/remotes14877c15170/avehtari-BDA_course_Aalto-4dad812/rpackage/DESCRIPTION’ ... OK
* preparing ‘aaltobda’:
* checking DESCRIPTION meta-information ... OK
* checking for LF line-endings in source and make files and shell scripts
* checking for empty or unneeded directories
* building ‘aaltobda_0.3.2.tar.gz’



Installing package into ‘/usr/local/lib/R/site-library’
(as ‘lib’ is unspecified)


Attaching package: ‘aaltobda’


The following object is masked from ‘package:posterior’:

    mcse_quantile




In [1]:
# Load necessary libraries
# install.packages("mvtnorm")
# install.packages("posterior")
#library(mvtnorm)
#library(posterior)
library(aaltobda)
# Set a seed for reproducibility
set.seed(42)

# --- 1. Define the Prior Distribution ---
prior_mean <- c(0, 10)
sd_alpha <- 2
sd_beta <- 10
correlation <- 0.6
cov_ab <- correlation * sd_alpha * sd_beta
prior_cov_matrix <- matrix(c(sd_alpha^2, cov_ab, cov_ab, sd_beta^2), nrow = 2)


# --- 2. Define the Data and Log-Likelihood Function ---
x_dose <- c(-0.86, -0.30, -0.05, 0.73)
n_animals <- c(5, 5, 5, 5)
y_deaths <- c(0, 1, 3, 5)

log_likelihood <- function(params, x, n, y) {
  alpha <- params[1]
  beta <- params[2]
  probabilities <- 1 / (1 + exp(-(alpha + beta * x)))
  log_lik <- sum(dbinom(y, size = n, prob = probabilities, log = TRUE))
  return(log_lik)
}


# --- 3. Perform Importance Sampling ---
n_draws <- 4000
prior_draws <- rmvnorm(n = n_draws, mean = prior_mean, sigma = prior_cov_matrix)
colnames(prior_draws) <- c("alpha", "beta")

# Calculate unnormalized log importance ratios (log-likelihoods)
log_ratios <- apply(prior_draws, 1, log_likelihood, x = x_dose, n = n_animals, y = y_deaths)

# --- 4. Calculate Estimates ---

# 3.8: Estimate of the posterior mean
# Self-normalize the weights using the log-sum-exp trick for stability
max_log_ratio <- max(log_ratios)
weights <- exp(log_ratios - max_log_ratio)
normalized_weights <- weights / sum(weights)

# Calculate the self-normalized importance sampling estimate of the posterior mean
est_posterior_mean_alpha <- sum(normalized_weights * prior_draws[, "alpha"])
est_posterior_mean_beta <- sum(normalized_weights * prior_draws[, "beta"])

# 3.10: Importance sampling ESS
ess <- 1 / sum(normalized_weights^2)

# 3.11: MCSE of the estimates
# Weighted variance
weighted_var_alpha <- sum(normalized_weights * (prior_draws[, "alpha"] - est_posterior_mean_alpha)^2)
weighted_var_beta <- sum(normalized_weights * (prior_draws[, "beta"] - est_posterior_mean_beta)^2)

# MCSE using ESS
mcse_alpha <- sqrt(weighted_var_alpha / ess)
mcse_beta <- sqrt(weighted_var_beta / ess)

# 3.12: Pareto-k diagnostic
k_hat <- pareto_khat(log_ratios)

# --- 5. Print All Results ---
print(paste("3.8: Est. Posterior Mean Alpha:", round(est_posterior_mean_alpha, 2)))
print(paste("3.8: Est. Posterior Mean Beta:", round(est_posterior_mean_beta, 2)))
print(paste("3.10: Effective Sample Size (ESS):", round(ess)))
print(paste("3.11: MCSE for Alpha:", round(mcse_alpha, 2)))
print(paste("3.11: MCSE for Beta:", round(mcse_beta, 2)))
print(paste("3.12: Pareto-k diagnostic:", round(k_hat, 2)))

ERROR: Error: object 'log_likelihood' not found


In [16]:
# Load required libraries
 # For rmvnorm
library(aaltobda)  # For bioassay example functions
      # For Pareto-k diagnostics

# Load the bioassay data
data("bioassay")   # assuming the package provides this

# Step 1: Draw 4000 samples from the prior (bivariate normal)
set.seed(123)
S <- 4000
prior_mean <- c(0, 0)
prior_cov <- diag(2)
theta_samples <- rmvnorm(S, mean = prior_mean, sigma = prior_cov)

# Step 2: Calculate log-likelihood for each sample
log_likelihood <- apply(theta_samples, 1, function(theta) {
  sum(dbinom(
    bioassay$y, bioassay$n, 
    plogis(bioassay$x * theta[1] + theta[2]), 
    log = TRUE
  ))
})

# Step 3: Compute normalized importance weights
log_weights <- log_likelihood - max(log_likelihood)  # stabilize
weights <- exp(log_weights)
weights <- weights / sum(weights)                    # normalize

# Step 4: Importance sampling estimate of posterior mean
posterior_mean <- colSums(theta_samples * weights)
alpha_est <- posterior_mean[1]
beta_est <- posterior_mean[2]

cat("Posterior mean estimates:\n")
cat("alpha =", alpha_est, "\n")
cat("beta  =", beta_est, "\n")

# Step 5: Compute Effective Sample Size (ESS)
ESS <- 1 / sum(weights^2)
cat("Effective Sample Size (ESS) =", ESS, "\n")

# Step 6: Monte Carlo Standard Error (MCSE)
mcse_alpha <- sqrt(sum(weights * (theta_samples[,1] - alpha_est)^2)) / sqrt(ESS)
mcse_beta  <- sqrt(sum(weights * (theta_samples[,2] - beta_est)^2)) / sqrt(ESS)
cat("MCSE:\n")
cat("alpha =", mcse_alpha, "\n")
cat("beta  =", mcse_beta, "\n")

# Step 7: Pareto-k diagnostic
# Use 'weights' as log ratios (loo package expects log weights)
pareto_k <- pareto_k(weights = log_weights)
cat("Pareto-k diagnostic =", round(pareto_k, 1), "\n")

# Step 8: Trustworthiness based on Pareto-k
if (pareto_k < 0.7) {
  cat("Importance sampling estimate is likely trustworthy.\n")
} else {
  cat("Importance sampling estimate may not be trustworthy.\n")
}


Posterior mean estimates:
alpha = 1.777175 
beta  = -0.09415046 
Effective Sample Size (ESS) = 257.6649 
MCSE:
alpha = 0.04027104 
beta  = 0.02888834 


ERROR: Error in pareto_k(weights = log_weights): could not find function "pareto_k"


In [2]:
# Load necessary library
library(aaltobda)

# Set seed for reproducibility
set.seed(42)

# --- 1. Define the Prior Distribution ---
prior_mean <- c(0, 10)
sd_alpha <- 2
sd_beta <- 10
correlation <- 0.6
cov_ab <- correlation * sd_alpha * sd_beta
prior_cov_matrix <- matrix(c(sd_alpha^2, cov_ab, cov_ab, sd_beta^2), nrow = 2)

# --- 2. Draw Samples from the Prior ---
n_draws <- 4000
prior_draws <- rmvnorm(n = n_draws, mean = prior_mean, sigma = prior_cov_matrix)
colnames(prior_draws) <- c("alpha", "beta")

# --- 3. Compute Log-Likelihoods Using bioassaylp ---
# bioassaylp takes alpha and beta as input, returns log-likelihood
log_ratios <- apply(prior_draws, 1, function(params) {
  bioassaylp(alpha = params[1], beta = params[2])
})

# --- 4. Importance Sampling Weights ---
# Normalize weights using log-sum-exp trick
max_log_ratio <- max(log_ratios)
weights <- exp(log_ratios - max_log_ratio)
normalized_weights <- weights / sum(weights)

# --- 5. Posterior Mean Estimates (Importance Sampling) ---
est_posterior_mean_alpha <- sum(normalized_weights * prior_draws[, "alpha"])
est_posterior_mean_beta  <- sum(normalized_weights * prior_draws[, "beta"])

# --- 6. Effective Sample Size (ESS) ---
ess <- 1 / sum(normalized_weights^2)

# --- 7. Monte Carlo Standard Error (MCSE) ---
weighted_var_alpha <- sum(normalized_weights * (prior_draws[, "alpha"] - est_posterior_mean_alpha)^2)
weighted_var_beta  <- sum(normalized_weights * (prior_draws[, "beta"] - est_posterior_mean_beta)^2)
mcse_alpha <- sqrt(weighted_var_alpha / ess)
mcse_beta  <- sqrt(weighted_var_beta / ess)

# --- 8. Pareto-k Diagnostic ---
k_hat <- pareto_khat(log_ratios)

# --- 9. Print Results ---
cat("3.8: Posterior Mean Estimates:\n")
cat("Alpha =", round(est_posterior_mean_alpha, 2), "\n")
cat("Beta  =", round(est_posterior_mean_beta, 2), "\n\n")

cat("3.10: Effective Sample Size (ESS) =", round(ess), "\n\n")

cat("3.11: MCSE:\n")
cat("Alpha =", round(mcse_alpha, 2), "\n")
cat("Beta  =", round(mcse_beta, 2), "\n\n")

cat("3.12: Pareto-k Diagnostic =", round(k_hat, 2), "\n")

# --- 3.13: Trustworthiness ---
if (k_hat < 0.7) {
  cat("Importance sampling estimate is likely trustworthy.\n")
} else {
  cat("Importance sampling estimate may not be trustworthy.\n")
}


ERROR: Error in checkmate::assertNumeric(x, len = 4): argument "x" is missing, with no default


In [4]:
# Install aaltobda package if you don't have it
# You might need the 'remotes' package to install from r-universe
# install.packages('remotes')
# remotes::install_github('avehtari/aaltobda')

# Load the necessary package
library(aaltobda)

# Load the bioassay data
data("bioassay")

#################################################################
# 2. Bioassay model: Prior
#################################################################

# 2.1: The mean of the prior distribution
mu <- c(0, 10)
# Mean of alpha is 0, Mean of beta is 10

# 2.2: The covariance of the prior distribution
# Marginal distributions: alpha ~ N(0, 2^2), beta ~ N(10, 10^2)
# Correlation: corr(alpha, beta) = 0.6
s1 <- 2  # sd of alpha
s2 <- 10 # sd of beta
corr <- 0.6
# Covariance is corr * s1 * s2
cv <- corr * s1 * s2
# Covariance matrix
Sigma <- matrix(c(s1^2, cv, cv, s2^2), nrow = 2)
# Print the covariance matrix
print("2.2: Covariance Matrix")
print(Sigma)


#################################################################
# 3. Bioassay model: Importance sampling
#################################################################

# Set a seed for reproducibility
set.seed(42)

# Number of draws from the prior
S <- 4000

# Use the rmvnorm function to draw from the prior
# CORRECTED LINE: The argument is `sigma`, not `Sigma`.
theta <- rmvnorm(n = S, mean = mu, sigma = Sigma)
# theta contains two columns, one for alpha and one for beta
alpha <- theta[, 1]
beta <- theta[, 2]

# 3.4 & 3.6: Calculate unnormalized log importance ratios
# The unnormalized importance ratio is the likelihood p(y|theta).
# The unnormalized log-ratio is the log-likelihood.
# The bioassaylp function calculates the log-likelihood (unnormalized log posterior with uniform prior)
log_lik <- bioassaylp(alpha, beta, bioassay)

# 3.10 & 3.12: Use Pareto Smoothed Importance Sampling (PSIS)
# This is a more stable and recommended method for importance sampling.
# It computes normalized weights, ESS, and the Pareto-k diagnostic.
psis_result <- psislw(log_lik)

# Extract the Pareto-k diagnostic
k <- psis_result$khat
print(paste("3.12: Pareto-k diagnostic:", round(k, 1)))

# Extract the effective sample size (ESS)
ess <- psis_result$ess
print(paste("3.10: Importance sampling ESS:", round(ess)))

# Get the self-normalized importance weights
w_norm <- psis_result$w_normalized

# 3.8: Calculate the importance sampling estimate of the posterior mean
# E[theta|y] ≈ Σ(w_norm * theta)
is_mean_alpha <- sum(w_norm * alpha)
is_mean_beta <- sum(w_norm * beta)
print(paste("3.8: IS estimate for alpha mean:", round(is_mean_alpha, 2)))
print(paste("3.8: IS estimate for beta mean:", round(is_mean_beta, 2)))

# 3.11: Calculate the Monte Carlo Standard Error (MCSE) of the estimates.
# MCSE = sd(theta_weighted) / sqrt(ESS)

# Calculate the weighted standard deviation for alpha and beta
# First, calculate weighted variance: Var_w(x) = Σ w * (x - E_w[x])^2
var_alpha_is <- sum(w_norm * (alpha - is_mean_alpha)^2)
var_beta_is <- sum(w_norm * (beta - is_mean_beta)^2)

sd_alpha_is <- sqrt(var_alpha_is)
sd_beta_is <- sqrt(var_beta_is)

# Now calculate MCSE
mcse_alpha <- sd_alpha_is / sqrt(ess)
mcse_beta <- sd_beta_is / sqrt(ess)

print(paste("3.11: MCSE for alpha:", round(mcse_alpha, 2)))
print(paste("3.11: MCSE for beta:", round(mcse_beta, 2)))

[1] "2.2: Covariance Matrix"
     [,1] [,2]
[1,]    4   12
[2,]   12  100


ERROR: Error in bioassaylp(alpha, beta, bioassay): Assertion on 'x' failed: Must be of type 'numeric', not 'data.frame'.


In [11]:
# Load necessary libraries
# install.packages("aaltobda") # May need to install from the specified universe
# install.packages("posterior")


# --- Load the Data ---
# This dataset contains 4000 independent draws from the posterior
data(bioassay_posterior, package = "aaltobda")

# Extract alpha and beta draws
alpha_draws <- bioassay_posterior$alpha
beta_draws <- bioassay_posterior$beta
n_draws <- length(alpha_draws)

# --- 4.1: Posterior Summaries ---
mean_alpha <- mean(alpha_draws)
quantiles_alpha <- quantile(alpha_draws, probs = c(0.05, 0.95))

mean_beta <- mean(beta_draws)
quantiles_beta <- quantile(beta_draws, probs = c(0.05, 0.95))

# --- 4.2: MCSE for Summaries ---
mcse_mean_alpha <- mcse_mean(alpha_draws)
mcse_q5_alpha <- mcse_quantile(alpha_draws, prob = 0.05)
mcse_q95_alpha <- mcse_quantile(alpha_draws, prob = 0.95)

mcse_mean_beta <- mcse_mean(beta_draws)
mcse_q5_beta <- mcse_quantile(beta_draws, prob = 0.05)
mcse_q95_beta <- mcse_quantile(beta_draws, prob = 0.95)

# --- 4.4: LD50 Calculation ---
# Calculate LD50 for each draw
ld50_draws <- exp(-alpha_draws / beta_draws)

# Calculate the probability Pr(LD50 < 0.85)
# This is the mean of an indicator variable
indicator <- ld50_draws < 0.85
prob_ld50 <- mean(indicator)

# Calculate the MCSE for this probability
mcse_prob_ld50 <- mcse_mean(indicator)

# --- Print All Results ---
# 4.1
print(paste("Mean of alpha:", round(mean_alpha, 2)))
print(paste("5% quantile of alpha:", round(quantiles_alpha[1], 2)))
print(paste("95% quantile of alpha:", round(quantiles_alpha[2], 2)))
print("---")
print(paste("Mean of beta:", round(mean_beta, 2)))
print(paste("5% quantile of beta:", round(quantiles_beta[1], 2)))
print(paste("95% quantile of beta:", round(quantiles_beta[2], 2)))
print("---")
# 4.2
print(paste("MCSE for mean of alpha:", round(mcse_mean_alpha, 4)))
print(paste("MCSE for 5% quantile of alpha:", round(mcse_q5_alpha, 4)))
print(paste("MCSE for 95% quantile of alpha:", round(mcse_q95_alpha, 4)))
print("---")
print(paste("MCSE for mean of beta:", round(mcse_mean_beta, 4)))
print(paste("MCSE for 5% quantile of beta:", round(mcse_q5_beta, 4)))
print(paste("MCSE for 95% quantile of beta:", round(mcse_q95_beta, 4)))
print("---")
# 4.4
print(paste("Pr(LD50 < 0.85 ml/g):", round(prob_ld50, 2)))
print(paste("MCSE for Pr(LD50 < 0.85):", round(mcse_prob_ld50, 4)))

[1] "Mean of alpha: 0.99"
[1] "5% quantile of alpha: -0.47"
[1] "95% quantile of alpha: 2.61"
[1] "---"
[1] "Mean of beta: 10.6"
[1] "5% quantile of beta: 3.99"
[1] "95% quantile of beta: 19.34"
[1] "---"
[1] "MCSE for mean of alpha: 0.0146"
[1] "MCSE for 5% quantile of alpha: 0.0269"
[1] "MCSE for 95% quantile of alpha: 0.0421"
[1] "---"
[1] "MCSE for mean of beta: 0.0749"
[1] "MCSE for 5% quantile of beta: 0.0707"
[1] "MCSE for 95% quantile of beta: 0.2412"
[1] "---"
[1] "Pr(LD50 < 0.85 ml/g): 0.18"
[1] "MCSE for Pr(LD50 < 0.85): 0.0061"


In [13]:
# -------------------------
# Q4: Posterior summaries + MCSEs + comparison with importance sampling
# -------------------------

# Dependencies
# install.packages("aaltobda")   # if not installed
# install.packages("mvtnorm")    # if you plan to re-run importance sampling here
   # for rmvnorm (used if re-running importance sampling)
set.seed(42)

# --- 1. Load posterior draws (4000 independent draws) ---
data("bioassay_posterior", package = "aaltobda")
draws <- as.data.frame(bioassay_posterior)

if (!all(c("alpha", "beta") %in% colnames(draws))) {
  stop("bioassay_posterior must contain columns named 'alpha' and 'beta'.")
}

alpha_draws <- draws$alpha
beta_draws  <- draws$beta
n_draws <- length(alpha_draws)

# --- 2. Point summaries and quantiles ---
alpha_mean <- mean(alpha_draws)
alpha_q05  <- quantile(alpha_draws, probs = 0.05, names = FALSE)
alpha_q95  <- quantile(alpha_draws, probs = 0.95, names = FALSE)

beta_mean  <- mean(beta_draws)
beta_q05   <- quantile(beta_draws, probs = 0.05, names = FALSE)
beta_q95   <- quantile(beta_draws, probs = 0.95, names = FALSE)

# --- 3. MCSE for the mean (iid draws) ---
mcse_alpha_mean <- sd(alpha_draws) / sqrt(n_draws)
mcse_beta_mean  <- sd(beta_draws)  / sqrt(n_draws)

# --- 4. MCSE for quantiles via nonparametric bootstrap ---
B <- 2000
set.seed(123)
boot_q_alpha_5  <- numeric(B)
boot_q_alpha_95 <- numeric(B)
boot_q_beta_5   <- numeric(B)
boot_q_beta_95  <- numeric(B)

for (b in seq_len(B)) {
  idx <- sample.int(n_draws, size = n_draws, replace = TRUE)
  boot_q_alpha_5[b]  <- quantile(alpha_draws[idx], probs = 0.05, names = FALSE)
  boot_q_alpha_95[b] <- quantile(alpha_draws[idx], probs = 0.95, names = FALSE)
  boot_q_beta_5[b]   <- quantile(beta_draws[idx], probs = 0.05, names = FALSE)
  boot_q_beta_95[b]  <- quantile(beta_draws[idx], probs = 0.95, names = FALSE)
}

mcse_alpha_q05  <- sd(boot_q_alpha_5)
mcse_alpha_q95  <- sd(boot_q_alpha_95)
mcse_beta_q05   <- sd(boot_q_beta_5)
mcse_beta_q95   <- sd(boot_q_beta_95)

# --- 5. Print results for 4.1 and 4.2 (escaped percent signs in sprintf) ---
cat("---- 4.1 Posterior summaries (mean, 5%, 95%) ----\n")
cat(sprintf("alpha: mean = %.4f, 5%% = %.4f, 95%% = %.4f\n",
            alpha_mean, alpha_q05, alpha_q95))
cat(sprintf("beta:  mean = %.4f, 5%% = %.4f, 95%% = %.4f\n\n",
            beta_mean, beta_q05, beta_q95))

cat("---- 4.2 MCSEs (mean and quantiles) ----\n")
cat(sprintf("alpha: MCSE(mean) = %.4f, MCSE(5%%) = %.4f, MCSE(95%%) = %.4f\n",
            mcse_alpha_mean, mcse_alpha_q05, mcse_alpha_q95))
cat(sprintf("beta:  MCSE(mean) = %.4f, MCSE(5%%) = %.4f, MCSE(95%%) = %.4f\n\n",
            mcse_beta_mean, mcse_beta_q05, mcse_beta_q95))

# --- 6. Recompute importance sampling posterior mean (to compare) ---
prior_mean <- c(0, 10)
sd_alpha <- 2
sd_beta  <- 10
correlation <- 0.6
cov_ab <- correlation * sd_alpha * sd_beta
prior_cov_matrix <- matrix(c(sd_alpha^2, cov_ab, cov_ab, sd_beta^2), nrow = 2)

S <- 4000
set.seed(42)
prior_draws <- mvtnorm::rmvnorm(S, mean = prior_mean, sigma = prior_cov_matrix)
colnames(prior_draws) <- c("alpha", "beta")

log_lik_prior <- apply(prior_draws, 1, function(p) aaltobda::bioassaylp(alpha = p[1], beta = p[2]))
max_log <- max(log_lik_prior)
w_unnorm <- exp(log_lik_prior - max_log)
w_norm <- w_unnorm / sum(w_unnorm)

is_alpha_mean <- sum(w_norm * prior_draws[, "alpha"])
is_beta_mean  <- sum(w_norm * prior_draws[, "beta"])

ess_is <- 1 / sum(w_norm^2)
is_var_alpha_hat <- sum(w_norm * (prior_draws[, "alpha"] - is_alpha_mean)^2)
is_var_beta_hat  <- sum(w_norm * (prior_draws[, "beta"] - is_beta_mean)^2)
is_mcse_alpha <- sqrt(is_var_alpha_hat / ess_is)
is_mcse_beta  <- sqrt(is_var_beta_hat  / ess_is)

cat("---- Importance sampling estimates (recomputed) ----\n")
cat(sprintf("IS alpha mean = %.4f (MCSE = %.4f), ESS = %.1f\n",
            is_alpha_mean, is_mcse_alpha, ess_is))
cat(sprintf("IS beta  mean = %.4f (MCSE = %.4f)\n\n",
            is_beta_mean, is_mcse_beta))

# --- 7. 4.3 Compare IS mean to independent-draws mean ---
diff_alpha <- abs(is_alpha_mean - alpha_mean)
if (diff_alpha < 2 * mcse_alpha_mean) {
  comparison_decision <- "they are quite similar, indicating that the importance sampling estimate is quite accurate"
} else {
  comparison_decision <- "they are very different, indicating that the importance sampling estimate is not accurate"
}
cat("---- 4.3 Comparison result ----\n")
cat(sprintf("Difference (alpha) = %.5f; threshold (2*MCSE_posterior_mean) = %.5f\n",
            diff_alpha, 2 * mcse_alpha_mean))
cat("Decision (auto):", comparison_decision, "\n\n")

# --- 8. LD50 calculations and probability (4.4) ---
ld50_draws <- exp(- alpha_draws / beta_draws)
prob_ld50_lt_085 <- mean(ld50_draws < 0.85)
mcse_prob <- sqrt(prob_ld50_lt_085 * (1 - prob_ld50_lt_085) / n_draws)

cat("---- 4.4 LD50 probability ----\n")
cat(sprintf("Pr(LD50 < 0.85) = %.4f, MCSE = %.4f\n",
            prob_ld50_lt_085, mcse_prob))

# --- 9. Answers object (for copy-paste) ---
answers <- list(
  alpha_mean = alpha_mean,
  alpha_q05  = alpha_q05,
  alpha_q95  = alpha_q95,
  beta_mean  = beta_mean,
  beta_q05   = beta_q05,
  beta_q95   = beta_q95,
  mcse_alpha_mean = mcse_alpha_mean,
  mcse_alpha_q05  = mcse_alpha_q05,
  mcse_alpha_q95  = mcse_alpha_q95,
  mcse_beta_mean  = mcse_beta_mean,
  mcse_beta_q05   = mcse_beta_q05,
  mcse_beta_q95   = mcse_beta_q95,
  is_alpha_mean = is_alpha_mean,
  is_beta_mean  = is_beta_mean,
  is_mcse_alpha = is_mcse_alpha,
  is_mcse_beta  = is_mcse_beta,
  comparison_decision = comparison_decision,
  pr_ld50_lt_085 = prob_ld50_lt_085,
  mcse_pr_ld50 = mcse_prob
)

print(answers)


---- 4.1 Posterior summaries (mean, 5%, 95%) ----
alpha: mean = 0.9852, 5% = -0.4676, 95% = 2.6102
beta:  mean = 10.5965, 5% = 3.9914, 95% = 19.3404

---- 4.2 MCSEs (mean and quantiles) ----
alpha: MCSE(mean) = 0.0148, MCSE(5%) = 0.0239, MCSE(95%) = 0.0387
beta:  MCSE(mean) = 0.0756, MCSE(5%) = 0.0668, MCSE(95%) = 0.2285



ERROR: Error in checkmate::assertNumeric(x, len = 4): argument "x" is missing, with no default


In [14]:
# -------------------------------------------------------------------------
# Full R script for:
# - Importance sampling with prior as proposal and posterior as target
# - Using aaltobda::bioassaylp() for log-likelihood (unnormalized log posterior for uniform prior)
# - Computing IS posterior means, ESS, MCSE (with ESS), Pareto-k (via loo::psis if available)
# - Loading bioassay_posterior (4000 independent draws) and computing summaries & MCSEs
# -------------------------------------------------------------------------

# Install if needed (uncomment to install)
# install.packages("mvtnorm")
# install.packages("loo")
# install.packages("aaltobda")   # should already be installed per your environment

set.seed(42)

# -------------------------
# 1) Importance sampling setup (use the prior you specified)
# -------------------------
prior_mean <- c(0, 10)
sd_alpha <- 2
sd_beta  <- 10
correlation <- 0.6
cov_ab <- correlation * sd_alpha * sd_beta
prior_cov_matrix <- matrix(c(sd_alpha^2, cov_ab, cov_ab, sd_beta^2), nrow = 2)

S <- 4000
prior_draws <- mvtnorm::rmvnorm(S, mean = prior_mean, sigma = prior_cov_matrix)
colnames(prior_draws) <- c("alpha", "beta")

# Compute unnormalized log importance ratios using bioassaylp()
# NOTE: aaltobda::bioassaylp returns log-likelihood (or log posterior with uniform prior)
log_unorm_weights <- apply(prior_draws, 1, function(p) aaltobda::bioassaylp(alpha = p[1], beta = p[2]))

# Numerical stabilization (log-sum-exp)
max_logw <- max(log_unorm_weights)
w_unnorm <- exp(log_unorm_weights - max_logw)   # unnormalized weights scaled for stability
w_norm <- w_unnorm / sum(w_unnorm)              # self-normalized importance weights (tilde w)

# Importance-sampling posterior mean (self-normalized)
is_alpha_mean <- sum(w_norm * prior_draws[, "alpha"])
is_beta_mean  <- sum(w_norm * prior_draws[, "beta"])

# ESS (self-normalized weights)
ess_is <- 1 / sum(w_norm^2)

# Weighted variance estimate (for MCSE)
is_var_alpha_hat <- sum(w_norm * (prior_draws[, "alpha"] - is_alpha_mean)^2)
is_var_beta_hat  <- sum(w_norm * (prior_draws[, "beta"]  - is_beta_mean)^2)

# MCSE using ESS
is_mcse_alpha <- sqrt(is_var_alpha_hat / ess_is)
is_mcse_beta  <- sqrt(is_var_beta_hat  / ess_is)

# Pareto-k diagnostic: try using loo::psis if available, otherwise try to use any available pareto_khat()
pareto_k_value <- NA
if (has_loo) {
  # loo::psis expects vector of log weights (relative log weights). We pass log_unorm_weights.
  # Catch errors defensively and try to extract pareto_k from possible result structures.
  psis_res <- tryCatch(loo::psis(log_unorm_weights), error = function(e) e)
  if (!inherits(psis_res, "error")) {
    # try multiple possible extraction points
    if (!is.null(psis_res$diagnostics) && !is.null(psis_res$diagnostics$pareto_k)) {
      pareto_k_value <- as.numeric(psis_res$diagnostics$pareto_k)
    } else if (!is.null(psis_res$pareto_k)) {
      pareto_k_value <- as.numeric(psis_res$pareto_k)
    } else if (!is.null(attr(psis_res, "pareto_k"))) {
      pareto_k_value <- as.numeric(attr(psis_res, "pareto_k"))
    } else {
      # fallback: try to coerce the printed object
      pareto_k_value <- NA
    }
  } else {
    pareto_k_value <- NA
  }
} else {
  # If 'loo' not available, check for a known function in aaltobda or global env
  if (exists("pareto_khat")) {
    pareto_k_value <- tryCatch(pareto_khat(log_unorm_weights), error = function(e) NA)
  } else {
    pareto_k_value <- NA
  }
}

# If still NA, warn the user, but continue
if (is.na(pareto_k_value)) {
  message("Pareto-k diagnostic unavailable: install 'loo' package for automatic Pareto-k (e.g. install.packages('loo')).")
}

# Print IS results
cat("=== Importance sampling results ===\n")
cat(sprintf("IS estimate of posterior mean: alpha = %.4f, beta = %.4f\n", is_alpha_mean, is_beta_mean))
cat(sprintf("IS ESS = %.1f\n", ess_is))
cat(sprintf("IS MCSE (alpha) = %.4f, (beta) = %.4f\n", is_mcse_alpha, is_mcse_beta))
if (!is.na(pareto_k_value)) {
  cat(sprintf("Pareto-k diagnostic = %.3f\n", pareto_k_value))
} else {
  cat("Pareto-k diagnostic = NA (see message above).\n")
}
cat("\n")

# -------------------------
# 2) Load the 4000 independent posterior draws and compute summaries (Q4)
# -------------------------
data("bioassay_posterior", package = "aaltobda")
draws <- as.data.frame(bioassay_posterior)

if (!all(c("alpha", "beta") %in% colnames(draws))) {
  stop("bioassay_posterior must contain columns named 'alpha' and 'beta'.")
}

alpha_draws <- draws$alpha
beta_draws  <- draws$beta
n_draws <- length(alpha_draws)   # should be 4000

# 4.1 Posterior summaries
alpha_mean_post <- mean(alpha_draws)
alpha_q05_post  <- quantile(alpha_draws, probs = 0.05, names = FALSE)
alpha_q95_post  <- quantile(alpha_draws, probs = 0.95, names = FALSE)

beta_mean_post  <- mean(beta_draws)
beta_q05_post   <- quantile(beta_draws, probs = 0.05, names = FALSE)
beta_q95_post   <- quantile(beta_draws, probs = 0.95, names = FALSE)

# 4.2 MCSE for mean (iid draws)
mcse_alpha_mean <- sd(alpha_draws) / sqrt(n_draws)
mcse_beta_mean  <- sd(beta_draws)  / sqrt(n_draws)

# MCSE for quantiles via nonparametric bootstrap
B <- 2000
set.seed(123)
boot_alpha_q05 <- numeric(B)
boot_alpha_q95 <- numeric(B)
boot_beta_q05  <- numeric(B)
boot_beta_q95  <- numeric(B)

for (b in seq_len(B)) {
  idx <- sample.int(n_draws, size = n_draws, replace = TRUE)
  boot_alpha_q05[b] <- quantile(alpha_draws[idx], probs = 0.05, names = FALSE)
  boot_alpha_q95[b] <- quantile(alpha_draws[idx], probs = 0.95, names = FALSE)
  boot_beta_q05[b]  <- quantile(beta_draws[idx], probs = 0.05, names = FALSE)
  boot_beta_q95[b]  <- quantile(beta_draws[idx], probs = 0.95, names = FALSE)
}

mcse_alpha_q05 <- sd(boot_alpha_q05)
mcse_alpha_q95 <- sd(boot_alpha_q95)
mcse_beta_q05  <- sd(boot_beta_q05)
mcse_beta_q95  <- sd(boot_beta_q95)

# Print posterior summary results
cat("=== Posterior draws summary (independent 4000 draws) ===\n")
cat(sprintf("alpha: mean = %.4f, 5%% = %.4f, 95%% = %.4f\n", alpha_mean_post, alpha_q05_post, alpha_q95_post))
cat(sprintf("beta:  mean = %.4f, 5%% = %.4f, 95%% = %.4f\n", beta_mean_post, beta_q05_post, beta_q95_post))
cat("\n")
cat("MCSEs (independent draws):\n")
cat(sprintf("alpha: MCSE(mean)=%.5f, MCSE(5%%)=%.5f, MCSE(95%%)=%.5f\n",
            mcse_alpha_mean, mcse_alpha_q05, mcse_alpha_q95))
cat(sprintf("beta:  MCSE(mean)=%.5f, MCSE(5%%)=%.5f, MCSE(95%%)=%.5f\n\n",
            mcse_beta_mean, mcse_beta_q05, mcse_beta_q95))

# -------------------------
# 3) Compare IS posterior mean to independent posterior mean (4.3)
# -------------------------
cat("=== Comparison (IS mean vs independent posterior mean) ===\n")
cat(sprintf("Alpha: independent mean = %.4f ; IS mean = %.4f ; diff = %.5f\n",
            alpha_mean_post, is_alpha_mean, abs(is_alpha_mean - alpha_mean_post)))
cat(sprintf("Beta:  independent mean = %.4f ; IS mean = %.4f ; diff = %.5f\n\n",
            beta_mean_post, is_beta_mean, abs(is_beta_mean - beta_mean_post)))

# Simple decision rule: if difference < 2 * MCSE(independent mean), treat as "quite similar"
alpha_diff <- abs(is_alpha_mean - alpha_mean_post)
alpha_threshold <- 2 * mcse_alpha_mean
if (alpha_diff < alpha_threshold) {
  comp_decision <- "they are quite similar, indicating that the importance sampling estimate is quite accurate"
} else {
  comp_decision <- "they are very different, indicating that the importance sampling estimate is not accurate"
}
cat("Decision (4.3):", comp_decision, "\n\n")

# -------------------------
# 4) LD50 calculation and its probability (4.4)
# LD50 = exp(-alpha / beta)
# -------------------------
ld50_draws <- exp(- alpha_draws / beta_draws)
prob_ld50_lt_085 <- mean(ld50_draws < 0.85)
# MCSE for probability (iid draws): sqrt(p*(1-p)/n)
mcse_ld50_prob <- sqrt(prob_ld50_lt_085 * (1 - prob_ld50_lt_085) / n_draws)

cat("=== LD50 probability ===\n")
cat(sprintf("Pr(LD50 < 0.85) = %.5f ; MCSE = %.5f\n\n", prob_ld50_lt_085, mcse_ld50_prob))

# -------------------------
# 5) Answers list (unrounded) to copy into your form
# -------------------------
answers <- list(
  # Q3 (importance sampling) reported values
  is_alpha_mean = is_alpha_mean,
  is_beta_mean  = is_beta_mean,
  is_ess        = ess_is,
  is_mcse_alpha = is_mcse_alpha,
  is_mcse_beta  = is_mcse_beta,
  pareto_k      = pareto_k_value,
  # Q4 (posterior draws)
  alpha_mean_post = alpha_mean_post,
  alpha_q05_post = alpha_q05_post,
  alpha_q95_post = alpha_q95_post,
  beta_mean_post = beta_mean_post,
  beta_q05_post = beta_q05_post,
  beta_q95_post = beta_q95_post,
  mcse_alpha_mean = mcse_alpha_mean,
  mcse_alpha_q05  = mcse_alpha_q05,
  mcse_alpha_q95  = mcse_alpha_q95,
  mcse_beta_mean  = mcse_beta_mean,
  mcse_beta_q05   = mcse_beta_q05,
  mcse_beta_q95   = mcse_beta_q95,
  comparison_decision = comp_decision,
  pr_ld50_lt_085 = prob_ld50_lt_085,
  mcse_pr_ld50 = mcse_ld50_prob
)

print("Copy these 'answers' values into your quiz form (rounded as required):")
print(answers)

# End of script


ERROR: Error in checkmate::assertNumeric(x, len = 4): argument "x" is missing, with no default


In [17]:
# --- Step 0: Load Required Libraries ---
library(mvtnorm)
library(posterior)
library(aaltobda)

# Set a random seed for reproducibility
set.seed(42)


# --- Step 1: Define the Prior Distribution ---
# (This section is correct and remains unchanged)
prior_mean <- c(0, 10)
sd_alpha <- 2
sd_beta <- 10
correlation <- 0.6
cov_ab <- correlation * sd_alpha * sd_beta
prior_cov_matrix <- matrix(c(sd_alpha^2, cov_ab, 
                             cov_ab,     sd_beta^2), 
                           nrow = 2)


# --- Step 2: Perform the Importance Sampling Steps ---
n_draws <- 4000
prior_draws <- rmvnorm(n = n_draws, mean = prior_mean, sigma = prior_cov_matrix)
colnames(prior_draws) <- c("alpha", "beta")


# --- The Key Correction ---
# OLD (Incorrect) CODE:
# log_ratios <- apply(prior_draws, 1, bioassaylp)

# NEW (Corrected) CODE:
# We create an anonymous function that takes a single vector 'params' (the row)
# and then calls bioassaylp with the first element as alpha and the second as beta.
log_ratios <- apply(prior_draws, 1, function(params) {
  bioassaylp(alpha = params[1], beta = params[2])
})
# --- End of Correction ---


# --- Step 3: Calculate All Required Estimates ---
# (This section is correct and remains unchanged)
max_log_ratio <- max(log_ratios)
weights <- exp(log_ratios - max_log_ratio)
normalized_weights <- weights / sum(weights)
est_posterior_mean_alpha <- sum(normalized_weights * prior_draws[, "alpha"])
est_posterior_mean_beta <- sum(normalized_weights * prior_draws[, "beta"])

ess <- 1 / sum(normalized_weights^2)

weighted_var_alpha <- sum(normalized_weights * (prior_draws[, "alpha"] - est_posterior_mean_alpha)^2)
weighted_var_beta <- sum(normalized_weights * (prior_draws[, "beta"] - est_posterior_mean_beta)^2)
mcse_alpha <- sqrt(weighted_var_alpha / ess)
mcse_beta <- sqrt(weighted_var_beta / ess)

k_hat <- pareto_khat(log_ratios)


# --- Step 4: Print All Results Clearly ---
# (This section is correct and remains unchanged)
cat("--- Results for Importance Sampling Section (using bioassaylp) ---\n\n")

cat("3.8 The importance sampling estimate of the posterior mean is:\n")
cat("alpha:", round(est_posterior_mean_alpha, 2), "\n")
cat("beta:", round(est_posterior_mean_beta, 2), "\n\n")

cat("3.10 Importance sampling ESS:\n")
cat(round(ess), "\n\n")

cat("3.11 What is the MCSE of the estimates?\n")
cat("MCSE for alpha:", round(mcse_alpha, 2), "\n")
cat("MCse for beta:", round(mcse_beta, 2), "\n\n")

cat("3.12 What is the Pareto-k diagnostic of the importance ratios?\n")
cat(round(k_hat, 1), "\n\n")

cat("3.13 Based on the value, should you trust the estimate?\n")
if (k_hat > 0.7) {
  cat("No. The Pareto-k value is > 0.7, indicating the estimate is unreliable.\n")
} else {
  cat("Yes. The Pareto-k value is <= 0.7.\n")
}

ERROR: Error in checkmate::assertNumeric(x, len = 4): argument "x" is missing, with no default
