<a href="https://colab.research.google.com/github/tkorsi/Machine-Learning-Seminars/blob/main/Fish%20dataset.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [14]:
import numpy as np
import pandas as pd
import pymc as pm
import arviz as az
import matplotlib.pyplot as plt

# 1) Read the data and drop row 40
url = "https://stepik.org/media/attachments/lesson/832665/Fish.csv"
data = pd.read_csv(url)
data = data.drop(40, axis=0)  # row with Weight=0

# Log-transform the relevant columns
data["log_Weight"]   = np.log(data["Weight"])
data["log_Length1"]  = np.log(data["Length1"])
data["log_Height"]   = np.log(data["Height"])
data["log_Width"]    = np.log(data["Width"])

# 2) Build the PyMC model
with pm.Model() as model:
    # Priors
    beta0 = pm.Normal("beta0", mu=0, sigma=100)
    beta1 = pm.Normal("beta1", mu=0, sigma=100)
    beta2 = pm.Normal("beta2", mu=0, sigma=100)
    beta3 = pm.Normal("beta3", mu=0, sigma=100)
    sigma = pm.HalfNormal("sigma", sigma=100)

    # Linear predictor
    mu = (beta0
          + beta1 * data["log_Length1"].values
          + beta2 * data["log_Height"].values
          + beta3 * data["log_Width"].values)

    # Likelihood
    pm.Normal("obs", mu=mu, sigma=sigma, observed=data["log_Weight"].values)

    # 3) Sample from the posterior (10,000 draws, 1 chain)
    trace = pm.sample(
        10000,
        chains=1,
        tune=2000,       # you can adjust tuning steps if needed
        target_accept=0.9,  # can help with acceptance in some cases
        random_seed=42
    )

Output()

In [15]:

# 4) Summarize the trace
print(az.summary(trace, var_names=["beta0", "beta1", "beta2", "beta3", "sigma"], hdi_prob=0.95))


Shape validation failed: input_shape: (1, 10000), minimum_shape: (chains=2, draws=4)


        mean     sd  hdi_2.5%  hdi_97.5%  mcse_mean  mcse_sd  ess_bulk  \
beta0 -1.768  0.117    -2.005     -1.540      0.002    0.001    3604.0   
beta1  1.428  0.053     1.325      1.533      0.001    0.001    3590.0   
beta2  0.628  0.033     0.566      0.693      0.001    0.000    4220.0   
beta3  0.952  0.062     0.828      1.072      0.001    0.001    3482.0   
sigma  0.100  0.006     0.089      0.112      0.000    0.000    5447.0   

       ess_tail  r_hat  
beta0    4248.0    NaN  
beta1    4355.0    NaN  
beta2    4907.0    NaN  
beta3    4544.0    NaN  
sigma    4812.0    NaN  


In [16]:

# 5) Compute the posterior of (beta1 + beta2 + beta3)
beta1_vals = trace.posterior["beta1"].values.flatten()
beta2_vals = trace.posterior["beta2"].values.flatten()
beta3_vals = trace.posterior["beta3"].values.flatten()

sum_betas = beta1_vals + beta2_vals + beta3_vals
hdi_sum = az.hdi(sum_betas, hdi_prob=0.95)
lower_95, upper_95 = hdi_sum[0], hdi_sum[1]

print(f"\n95% Credible Interval for (beta1 + beta2 + beta3): [{lower_95:.3f}, {upper_95:.3f}]")





95% Credible Interval for (beta1 + beta2 + beta3): [2.968, 3.047]


In [None]:
# 6) Plot the traces (optional)
az.plot_trace(trace, var_names=["beta0", "beta1", "beta2", "beta3", "sigma"])
plt.tight_layout()
plt.show()