In [1]:
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
import torch
import os
import sys

# Set up path to import from src
project_root = os.path.abspath(os.path.join(os.getcwd(), ".."))
src_path = os.path.join(project_root, "src")
if src_path not in sys.path:
    sys.path.append(src_path)

# Import pokie from pokie.py
from pokie import pokie, get_device

# Section 3.1: Linear Regression

In [2]:
# -------------------------------
# 0. CONFIGURATION
# -------------------------------
num_gt        = 5000               # Number of ground‑truth parameter draws
num_samples   = 5000               # Posterior samples per GT
n             = 100               # Observations per GT
true_sigma    = 1.0               # Observation noise σ
prior_mu      = np.zeros(2)       # Prior mean vector [m0, b0]
prior_Sigma   = np.eye(2) * 1.0   # Prior covariance (τ² I)
noise_levels  = [0.001, 0.01, 0.1, 0.15, 0.2, 0.25]
num_noise     = len(noise_levels)
curr_num_runs = 100

In [3]:
# -------------------------------
# 1. Storage
# -------------------------------
# ground_truths: shape (num_gt, 2) for [m*, b*]
# ground_truths = np.random.uniform(-5, 5, size=(num_gt, 2))
# m_stars       = np.random.uniform(-1,  1, size=num_gt)
# b_stars       = np.random.uniform(-5,  5, size=num_gt)
m_stars       = np.random.normal(loc=0.0, scale=0.5, size=num_gt)  # e.g., mean=0, std=0.5
b_stars       = np.random.normal(loc=0.0, scale=2.0, size=num_gt)  # e.g., mean=0, std=2.0
ground_truths = np.stack([m_stars, b_stars], axis=1)  # shape (num_gt, 2)

# posteriors: shape (num_noise, num_gt, num_samples, 2)
posteriors = np.zeros((num_noise, num_gt, num_samples, 2))

In [4]:
# -------------------------------
# 2. Posterior Sampling Loop
# -------------------------------
for gt_idx in tqdm(range(num_gt), desc="Sampling Linear-Regression Posteriors"):
    m_star, b_star = ground_truths[gt_idx]
    # Generate data
    x = np.random.uniform(-1, 1, size=n)
    y = m_star * x + b_star \
        + np.random.normal(0, true_sigma, size=n)
    # Build design
    A = np.stack([x, np.ones(n)], axis=1)  # shape (n,2)
    
    # Precompute posterior natural parameters
    Sigma_n_inv = np.eye(n) / (true_sigma**2)
    Precision_post = np.linalg.inv(prior_Sigma) + A.T @ Sigma_n_inv @ A
    Sigma_post     = np.linalg.inv(Precision_post)
    mu_post        = Sigma_post @ (A.T @ Sigma_n_inv @ y + np.linalg.inv(prior_Sigma) @ prior_mu)
    
    # Draw posterior samples, with optional bias inflation
    for nl_idx, scale in enumerate(noise_levels):
        # simulate mis‑calibration by shifting the posterior mean
        delta = np.sqrt(scale) * np.array([1.0, 1.0])  # same shift for both m, b
        biased_mean = mu_post + delta
        # sample from N(biased_mean, Sigma_post)
        posteriors[nl_idx, gt_idx] = np.random.multivariate_normal(
            biased_mean, Sigma_post, size=num_samples
        )

Sampling Linear-Regression Posteriors: 100%|██████████| 5000/5000 [00:08<00:00, 586.28it/s]


In [5]:
# -------------------------------
# 3. Normalization for Pokie
# -------------------------------
# We'll flatten all posterior samples + the GT vector, per-parameter, 
# then min/max rescale to [0,1] so Pokie operates on comparable scales.

gt_norm        = np.zeros_like(ground_truths)
posterior_norm = np.zeros_like(posteriors)

for gt_idx in range(num_gt):
    # stack all samples for both noise levels, plus the GT itself
    all_samps = np.vstack([
        posteriors[nl, gt_idx] for nl in range(num_noise)
    ] + [ground_truths[gt_idx:gt_idx+1]])
    
    mins = all_samps.min(axis=0)
    maxs = all_samps.max(axis=0)
    span = np.maximum(maxs - mins, 1e-8)
    
    # normalize
    gt_norm[gt_idx] = (ground_truths[gt_idx] - mins) / span
    for nl in range(num_noise):
        posterior_norm[nl, gt_idx] = (posteriors[nl, gt_idx] - mins) / span

In [6]:
# -------------------------------
# 4. Pokie Evaluation
# -------------------------------
# Assume you have a function pokie(gt_norm, posterior_norm, num_runs)
# that returns (raw_probs, calibrated_probs, n_over_N_vals).
device = get_device()
print("Using device:", device)

# Convert to torch Tensors on the chosen device
posterior_norm   = torch.tensor(posterior_norm, dtype=torch.float32, device=device)
gt_norm = torch.tensor(gt_norm,   dtype=torch.float32, device=device)


pokie_score = pokie(
    gt_norm, posterior_norm, num_runs=curr_num_runs
)

# Convert results, calibrated, n_over_N_vals back to numpy arrays
pokie_score = pokie_score.cpu().numpy()

print("Noise Levels:", noise_levels)
print("Pokie Score:", pokie_score)

Using device: mps


Pokie MC runs: 100%|██████████| 100/100 [00:07<00:00, 14.06it/s]


Noise Levels: [0.001, 0.01, 0.1, 0.15, 0.2, 0.25]
Pokie Score: [0.66340035 0.64130616 0.5665319  0.55781114 0.55263966 0.5496138 ]
