In [None]:
import seaborn as sns
from experiments import perform_experiment

In [None]:
results_mean, _ = perform_experiment(100)
print(results_mean)

In [None]:
import matplotlib.pyplot as plt

metrics = ["MSE", "TPR", "FPR"]
methods = results_mean["Method"]

fig, axes = plt.subplots(3, 1, figsize=(10, 15))

for i, metric in enumerate(metrics):
    axes[i].bar(methods, results_mean[metric])
    axes[i].set_title(f"{metric} Comparison")
    axes[i].set_ylabel(metric)
    axes[i].set_xlabel("Method")
    axes[i].set_xticks(range(len(methods)))
    axes[i].set_xticklabels(methods, rotation=15)

plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize=(8, 6))
sns.boxplot(x="Method", y="MSE", data=_)
plt.title("MSE Boxplot by Method")
plt.ylabel("Mean Squared Error")
plt.xlabel("Method")
plt.xticks(rotation=15)
plt.tight_layout()
plt.show()

In [None]:
import numpy as np
import pandas as pd
from numpy.linalg import norm
from data_generation import generate_data
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression

# Mock version of generate_data for testing


def group_lasso(X, y, groups, lambda_, max_iter=100, tol=1e-6):
    beta = np.zeros(X.shape[1])
    for _ in range(max_iter):
        beta_old = beta.copy()
        for j, g in enumerate(groups):
            residual = y - X @ beta + X[:, g] @ beta[g]
            S_j = X[:, g].T @ residual
            norm_Sj = norm(S_j)
            threshold = lambda_ * np.sqrt(len(g))
            if norm_Sj <= threshold or np.isnan(norm_Sj):
                beta[g] = 0
            else:
                beta[g] = (1 - threshold / norm_Sj) * S_j
        if norm(beta - beta_old) < tol:
            break
    return beta, X @ beta

def compute_cp(y_true, y_pred, df, sigma2):
    rss = np.sum((y_true - y_pred) ** 2)
    return rss + 2 * sigma2 * df

def select_lambda_group_lasso_with_cp(X, y, groups, lambdas=None):
    if lambdas is None:
        lambdas = np.logspace(0.5, 2, 100)

    beta_full = np.linalg.pinv(X.T @ X) @ X.T @ y
    sigma2 = np.mean((y - X @ beta_full) ** 2)

    cp_values = []
    all_lambdas = []
    all_betas = []

    best_cp = float('inf')
    best_beta = None
    best_lambda = None

    for lam in lambdas:
        beta, y_pred = group_lasso(X, y, groups, lambda_=lam)
        df = sum(1 for g in groups if norm(beta[g]) > 1e-6)
        cp = compute_cp(y, y_pred, df, sigma2)

        cp_values.append(cp)
        all_lambdas.append(lam)
        all_betas.append(beta)

        if cp < best_cp:
            best_cp = cp
            best_beta = beta
            best_lambda = lam

    return best_beta, best_lambda, all_lambdas, cp_values

# Run once and store for plotting
X, Y, group_indices = generate_data(seed=0)
beta_lasso, best_lambda, lambdas_cp, cp_vals = select_lambda_group_lasso_with_cp(X, Y, group_indices)

# Create DataFrame for plotting
df_cp = pd.DataFrame({"Lambda": lambdas_cp, "Cp": cp_vals})
df_cp.sort_values("Lambda", inplace=True)

# Display for plotting

plt.figure(figsize=(8, 5))
plt.plot(np.log10(lambdas_cp), cp_vals, marker='o')
plt.xlabel(r'$\log_{10}(\lambda)$')
plt.ylabel(r'$C_p$')
plt.title('Cp Criterion vs. Lambda for Group Lasso')
plt.grid(True)
plt.tight_layout()
plt.show()