In [5]:
import numpy as np
import arviz as az
import pymc as pm
import pandas as pd

In [6]:
# a)

df = pd.read_csv('Prices.csv')
y = df['Price'].values
x1 = df['Speed'].values
x2 = np.log(df['HardDrive'].values)
N = len(y)

def log_prior(theta):
    alpha, beta1, beta2, sigma = theta
    if sigma <= 0: return -np.inf # Constraint: sigma > 0
    
    # Weakly informative priors
    lp = -0.5 * (alpha / 1000)**2 
    lp -= 0.5 * (beta1 / 100)**2 
    lp -= 0.5 * (beta2 / 500)**2 
    lp -= 0.5 * (sigma / 500)**2
    return lp

def log_likelihood(theta, x1, x2, y):
    alpha, beta1, beta2, sigma = theta
    if sigma <= 0: return -np.inf
    
    mu = alpha + beta1 * x1 + beta2 * x2
    # Log-Likelihood for Normal Distribution.
    ll = -N * np.log(sigma) - np.sum((y - mu)**2) / (2 * sigma**2)
    return ll

def log_posterior(theta, x1, x2, y):
    lp = log_prior(theta)
    if not np.isfinite(lp): return -np.inf
    return lp + log_likelihood(theta, x1, x2, y)

n_samples = 20000
burn_in = 5000

# Initial guess.
current_theta = np.array([-45.0, 4.0, 350.0, 500.0]) 
current_log_post = log_posterior(current_theta, x1, x2, y)

chain = np.zeros((n_samples, 4))
proposal_scales = np.array([50.0, 0.5, 10.0, 10.0]) # Tuned step sizes
accepted = 0

np.random.seed(42)
for i in range(n_samples):
    # Propose new parameters
    proposal = current_theta + np.random.normal(0, 1, 4) * proposal_scales
    proposed_log_post = log_posterior(proposal, x1, x2, y)
    
    # Calculate acceptance probability
    log_ratio = proposed_log_post - current_log_post
    if np.log(np.random.rand()) < log_ratio:
        current_theta = proposal
        current_log_post = proposed_log_post
        accepted += 1
    chain[i] = current_theta

# 4. Results
posterior_samples = chain[burn_in:]
means = np.mean(posterior_samples, axis=0)
ci_lower = np.percentile(posterior_samples, 2.5, axis=0)
ci_upper = np.percentile(posterior_samples, 97.5, axis=0)

results = pd.DataFrame({
    'Parameter': ['alpha', 'beta1', 'beta2', 'sigma'],
    'Mean': means,
    '95% CI Lower': ci_lower,
    '95% CI Upper': ci_upper
})
print(results)

  Parameter        Mean  95% CI Lower  95% CI Upper
0     alpha  -51.863845   -492.206919    344.731762
1     beta1    4.222246      2.018331      6.573542
2     beta2  353.883350    280.137470    434.291148
3     sigma  503.518293    470.895480    538.999366


In [7]:
# b)

def compute_hdi(samples, credible_mass=0.95):
    sorted_samples = np.sort(samples)
    n_samples = len(sorted_samples)
    interval_idx_inc = int(np.floor(credible_mass * n_samples))
    n_intervals = n_samples - interval_idx_inc
    interval_width = sorted_samples[interval_idx_inc:] - sorted_samples[:n_intervals]
    min_idx = np.argmin(interval_width)
    hdi_min = sorted_samples[min_idx]
    hdi_max = sorted_samples[min_idx + interval_idx_inc]
    return hdi_min, hdi_max

beta1_samples = posterior_samples[:, 1]
beta2_samples = posterior_samples[:, 2]

hdi_beta1 = compute_hdi(beta1_samples)
hdi_beta2 = compute_hdi(beta2_samples)

print(f"95% HDI for Beta1: {hdi_beta1}")
print(f"95% HDI for Beta2: {hdi_beta2}")

95% HDI for Beta1: (np.float64(2.0131959212706088), np.float64(6.545007604187195))
95% HDI for Beta2: (np.float64(278.01208117382487), np.float64(431.32374448437326))


In [8]:
# c)
