In [None]:
import pymc as pm
import numpy as np
import pandas as pd
import arviz as az
import matplotlib.pyplot as plt

In [None]:

data = pd.read_csv('date_colesterol.csv')
t = data['exercise_hours'].values
y = data['cholesterol'].values

clusters_list = [3, 4, 5]
idatas = {}
models = {}

for K in clusters_list:
    with pm.Model() as model:
        # Prior for mixture weights
        w = pm.Dirichlet('w', a=np.ones(K))
        
        alpha = pm.Normal('alpha', mu=y.mean(), sigma=10, shape=K)
        beta = pm.Normal('beta', mu=0, sigma=10, shape=K)
        gamma = pm.Normal('gamma', mu=0, sigma=10, shape=K)
        
        sigma = pm.HalfNormal('sigma', sigma=10, shape=K)
        
        mu = alpha[:, None] + beta[:, None] * t + gamma[:, None] * (t**2)
        # pm.NormalMixture automatically handles the latent assignment
        y_obs = pm.NormalMixture('y_obs', w=w, mu=mu, sigma=sigma, observed=y)
        
        print(f"Sampling for K={K}...")
        trace = pm.sample(1000, tune=1000, target_accept=0.9, random_seed=123, return_inferencedata=True)
        pm.compute_log_likelihood(trace)
        
        idatas[str(K)] = trace
        models[str(K)] = model

print("\nSummary for K=3:")
print(az.summary(idatas['3'], var_names=['w', 'alpha', 'beta', 'gamma']))

# Model Comparison using WAIC
comp = az.compare(idatas, ic="waic", scale="deviance")
print("\nModel Comparison Table:")
print(comp)

az.plot_compare(comp)
plt.title("Model Comparison (WAIC)")
plt.show()