In [91]:
from core.models.regression import ChoquisticRegression


In [92]:
from pathlib import Path
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

from utils.visualization.plotting import (
	plot_coefficients,
	plot_interaction_matrix_2add_shapely,
	plot_horizontal_bar,
	plot_model_performance_comparison,
	plot_noise_robustness,
	plot_k_additivity_results,
	ensure_folder
)

from sklearn.metrics import accuracy_score, roc_auc_score



data_path = Path("../../data/diabetes.csv")
plot_folder = Path("test_plots")
ensure_folder(plot_folder)

# Load data
df = pd.read_csv(data_path)

X = df.drop(columns="Outcome")
y = df["Outcome"].to_numpy()
feature_names = X.columns.tolist()

# split data into train and test sets to evaluate generalization
X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.2,
    random_state=1,
    stratify=y,
)

# ==========================================================
# --- STEP 3: Train ChoquisticRegression models ---
# ==========================================================
models = {
    # "game": ChoquisticRegression(representation="game", k_add=2, scale_data=True) #,
    "mobius": ChoquisticRegression(representation="mobius", k_add=2, scale_data=True),
    "shapley": ChoquisticRegression(representation="shapley", k_add=2, scale_data=True),
}

results = {}

for name, model in models.items():
    print(f"\nTraining ChoquisticRegression ({name} representation)...")
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    y_proba = model.predict_proba(X_test)[:,1]
    auc = roc_auc_score(y_test, y_proba)
    results[name] = {
        "accuracy": acc,
        "roc_auc": auc,
        "model": model
    }
    print(f"→ Accuracy ({name}): {acc:.4f}")
    print(f"→ AUC ({name}): {auc:.4f}")



Training ChoquisticRegression (mobius representation)...
→ Accuracy (mobius): 0.7532
→ AUC (mobius): 0.8170

Training ChoquisticRegression (shapley representation)...
→ Accuracy (shapley): 0.7597
→ AUC (shapley): 0.8202


In [None]:
import os
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, roc_auc_score, classification_report
from core.models.regression import ChoquisticRegression
from itertools import combinations

from utils.visualization.plotting import plot_coefficients, plot_interaction_matrix_2add_shapley

# ==========================================================
# --- STEP 2: Split data ---
# ==========================================================
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42, stratify=y
)

# ==========================================================
# --- STEP 3: Train ChoquisticRegression models ---
# ==========================================================
models = {
    "mobius": ChoquisticRegression(representation="mobius", k_add=2, scale_data=True),
    "shapley": ChoquisticRegression(representation="shapley", k_add=2, scale_data=True),
}

results = {}

for name, model in models.items():
    print(f"\nTraining ChoquisticRegression ({name} representation)...")
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    y_proba = model.predict_proba(X_test)[:,1]
    auc = roc_auc_score(y_test, y_proba)
    results[name] = {"accuracy": acc, "roc_auc": auc, "model": model}
    print(f"→ Accuracy ({name}): {acc:.4f}")
    print(f"→ AUC ({name}): {auc:.4f}")

# ==========================================================
# --- STEP 4: Compute interactions for Shapley ---
# ==========================================================
model_shapley = results["shapley"]["model"]
if hasattr(model_shapley, "compute_interactions"):
    model_shapley.compute_interactions()

# ==========================================================
# --- STEP 5: Create output folder ---
# ==========================================================
output_folder = "results/tomas"
os.makedirs(output_folder, exist_ok=True)
print(f"\nPlots will be saved to '{output_folder}' folder.")

# ==========================================================
# --- STEP 6: Plot coefficients ---
# ==========================================================
print(f"coefs: {model_shapley.model_.coef_[0]}")

expanded_feature_names = feature_names.copy()
print("\nGenerating coefficients plot...")
for i, j in combinations(range(len(feature_names)), 2):
    expanded_feature_names.append(f"{feature_names[i]} × {feature_names[j]}")
plot_coefficients(
    feature_names=expanded_feature_names,
    all_coefficients=[model_shapley.model_.coef_[0]],
    plot_folder=output_folder,
    k_add=2
)
# Rename saved file
old_file = os.path.join(output_folder, "coefficients.png")
new_file = os.path.join(output_folder, "shapley_coefficients.png")
if os.path.exists(old_file):
    os.rename(old_file, new_file)
print(f"Coefficients plot saved as '{new_file}'.")

# ==========================================================
# --- STEP 7: Plot interaction matrix ---
# ==========================================================
print("\nGenerating interaction matrix plot...")

plot_interaction_matrix_2add_shapley(
    feature_names=feature_names,
    coefs=model_shapley.model_.coef_[0],
    plot_folder=output_folder
)
# Rename saved file
old_file = os.path.join(output_folder, "interaction_matrix_2add.png")
new_file = os.path.join(output_folder, "shapley_interaction_matrix.png")
if os.path.exists(old_file):
    os.rename(old_file, new_file)
print(f"Interaction matrix plot saved as '{new_file}'.")

# ==========================================================
# --- STEP 8: List saved files ---
# ==========================================================
print("\nSaved files in folder:")
print(os.listdir(output_folder))


ImportError: cannot import name 'plot_interaction_matrix_2add_shapley' from 'utils.visualization.plotting' (C:\Users\Tomas\OneDrive - Universidade de Lisboa\3ºano_LEFT\PIC-I\utils\visualization\plotting.py)

In [None]:
# ==========================================================
# --- SPLIT DATA ---
# ==========================================================
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42, stratify=y
)

# ==========================================================
# --- TRAIN MODELS ---
# ==========================================================
models = {
    "mobius": ChoquisticRegression(representation="mobius", k_add=2, scale_data=True),
    "shapley": ChoquisticRegression(representation="shapley", k_add=2, scale_data=True),
}

results = {}
for name, model in models.items():
    print(f"\nTraining {name} model...")
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    y_proba = model.predict_proba(X_test)[:, 1]
    acc = accuracy_score(y_test, y_pred)
    auc = roc_auc_score(y_test, y_proba)
    cls_report = classification_report(y_test, y_pred)
    results[name] = {
        "accuracy": acc,
        "roc_auc": auc,
        "classification_report": cls_report,
        "model": model
    }
    print(f"Accuracy ({name}): {acc:.4f}, AUC ({name}): {auc:.4f}")
    print(f"Classification report ({name}):\n{cls_report}")

# ==========================================================
# --- COMPUTE SHAPLEY INTERACTIONS (if possible) ---
# ==========================================================
model_shapley = results["shapley"]["model"]
if hasattr(model_shapley, "compute_interactions"):
    model_shapley.compute_interactions(X_train, y_train)

print("Shapley coef shape:", getattr(model_shapley, "coef_", None))
print("Shapley interaction matrix shape:", getattr(model_shapley, "interaction_matrix_", None))

# ==========================================================
# --- CREATE OUTPUT FOLDER ---
# ==========================================================
output_folder = "results/plots"
os.makedirs(output_folder, exist_ok=True)
print(f"\nPlots will be saved to '{output_folder}' folder.")

# ==========================================================
# --- PLOT MOBIUS (ONLY, since Shapley is None) ---
# ==========================================================
model_mobius = results["mobius"]["model"]
feature_names = X.columns.tolist()

# Coefficients
if getattr(model_mobius, "coef_", None) is not None:
    plot_coefficients(
        feature_names=feature_names,
        all_coefficients=[model_mobius.coef_],
        plot_folder=output_folder,
        k_add=2
    )
    print("Mobius coefficients plot saved.")

# Interaction matrix
if getattr(model_mobius, "interaction_matrix_", None) is not None:
    plot_interaction_matrix_2add(
        feature_names=feature_names,
        all_interaction_matrices=[model_mobius.interaction_matrix_],
        plot_folder=output_folder
    )
    print("Mobius interaction matrix plot saved.")

# ==========================================================
# --- LIST SAVED FILES ---
# ==========================================================
print("\nSaved files in folder:")
print(os.listdir(output_folder))


Training mobius model...
Accuracy (mobius): 0.7489, AUC (mobius): 0.8363
Classification report (mobius):
              precision    recall  f1-score   support

           0       0.77      0.87      0.82       150
           1       0.69      0.52      0.59        81

    accuracy                           0.75       231
   macro avg       0.73      0.70      0.71       231
weighted avg       0.74      0.75      0.74       231


Training shapley model...
Accuracy (shapley): 0.7619, AUC (shapley): 0.8406
Classification report (shapley):
              precision    recall  f1-score   support

           0       0.77      0.91      0.83       150
           1       0.74      0.49      0.59        81

    accuracy                           0.76       231
   macro avg       0.75      0.70      0.71       231
weighted avg       0.76      0.76      0.75       231

Shapley coef shape: None
Shapley interaction matrix shape: None

Plots will be saved to 'results/plots' folder.

Saved files in fo