In [1]:
import os
import numpy as np
import pandas as pd
import time
import joblib
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score, r2_score, silhouette_score, mean_squared_error
from sklearn.preprocessing import LabelEncoder, StandardScaler, OrdinalEncoder
from XPER.compute.Performance import ModelPerformance
from sklearn_extra.cluster import KMedoids
from loguru import logger
from load_data import load_datasets
from utils import evaluate_model, initiate_model, identify_problem_type
from config import BASE_DIR, SAMPLE_SIZE, N_FEATURES


# Create a directory to store results
os.makedirs(BASE_DIR, exist_ok=True)


def preprocess_data(df: pd.DataFrame, target_col: str, dataset_name: str, sample_size: int = 500, n_features: int = 6):
    """Preprocess dataset by encoding categorical features, splitting into train-test sets, and determining problem type."""
    label_encoder = LabelEncoder()
    
    # Limit dataset size
    if df.shape[0] > sample_size:
        df = df.sample(n=sample_size, random_state=42).reset_index(drop=True)
    
    # Save the full dataset
    dataset_dir = os.path.join(BASE_DIR, dataset_name.replace(" ", "_"))
    os.makedirs(dataset_dir, exist_ok=True)
    df.to_csv(os.path.join(data_dir, "full_dataset.csv"), index=False)
    
    # Select features and target
    X = df.drop(columns=[target_col]).iloc[:, :n_features]
    y = df[target_col]
    
    # Split data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=3)

    # Encode categorical features
    categorical_cols = X.select_dtypes(include=["object", "category"]).columns.tolist()
    if categorical_cols:
        encoder = OrdinalEncoder()
        X_train[categorical_cols] = encoder.fit_transform(X_train[categorical_cols])
        X_test[categorical_cols] = encoder.transform(X_test[categorical_cols])
    
    # Determine problem type
    model_type, classification, num_classes = identify_problem_type(dataset_name, y_train, y_test, target_col, label_encoder)
    
    # Reset indexes
    for df in [X_train, X_test, y_train, y_test]:
        df.reset_index(drop=True, inplace=True)
    
    return X_train, X_test, y_train, y_test, model_type, num_classes, classification

def compute_xper(X: pd.DataFrame, y: pd.DataFrame, model, classification: bool):
    """Compute XPER values for model explainability and apply clustering for instance-level insights."""
    XPER_ = ModelPerformance(X.values, y.values, X.values, y.values, model, sample_size = X.shape[0])
    metric = "AUC" if classification else "R2"
    phi, phi_i_j = XPER_.calculate_XPER_values([metric])
    
    save_xper_results(X, phi, phi_i_j)
    
    return apply_kmedoids_clustering(X, phi_i_j)


def save_xper_results(X: pd.DataFrame, phi, phi_i_j):
    """Save XPER values to CSV files."""
    pd.DataFrame(phi, columns=["Global XPER"]).to_csv(os.path.join(xper_dir, "train_global_xper.csv"), index=False)
    
    phi_i_j_df = pd.DataFrame(phi_i_j, columns=["Benchmark"] + list(X.columns))
    phi_i_j_df.to_csv(os.path.join(xper_dir, "train_per_instance_xper.csv"), index=False)


def apply_kmedoids_clustering(X: pd.DataFrame, phi_i_j):
    """Apply K-Medoids clustering on XPER values and save the best clustering model."""
    XPER_values = phi_i_j[:, 1:]  # Remove benchmark column
    scaler = StandardScaler()
    XPER_scaled = scaler.fit_transform(XPER_values)
    
    best_score, best_n_clusters, best_kmedoid = -1, 2, None
    for n_clusters in range(2, 6):
        kmedoid = KMedoids(n_clusters=n_clusters, random_state=3).fit(XPER_scaled)
        labels = kmedoid.labels_
        score = silhouette_score(XPER_scaled, labels)
        
        if score > best_score:
            best_score, best_n_clusters, best_kmedoid = score, n_clusters, kmedoid
    
    joblib.dump(best_kmedoid, os.path.join(model_dir, "best_xper_kmedoid.pkl"))
    
    xper_cluster_df = pd.DataFrame({"Index": X.index, "Cluster": best_kmedoid.labels_})
    xper_cluster_df.to_csv(os.path.join(xper_dir, "train_xper_clusters.csv"), index=False)
    
    logger.info(f"✅ XPER clustering saved in {xper_dir}/train_xper_clusters.csv")
    
    return best_kmedoid.labels_, best_n_clusters, best_score, scaler

def cluster_feature_based(X: pd.DataFrame):
    """Apply K-Medoids clustering on scaled feature data."""
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    
    best_score, best_n_clusters, best_kmedoid = -1, 2, None
    for n_clusters in range(2, 6):
        kmedoid = KMedoids(n_clusters=n_clusters, random_state=3).fit(X_scaled)
        labels = kmedoid.labels_
        score = silhouette_score(X_scaled, labels)
        
        if score > best_score:
            best_score, best_n_clusters, best_kmedoid = score, n_clusters, kmedoid
    
    joblib.dump(best_kmedoid, os.path.join(model_dir, "best_feature_kmedoid.pkl"))

    feature_cluster_df = pd.DataFrame({"Index": X.index, "Cluster": best_kmedoid.labels_})
    feature_cluster_df.to_csv(os.path.join(xper_dir, "train_feature_clusters.csv"), index=False)
    
    logger.info(f"✅ XPER clustering saved in {xper_dir}/train_feature_clusters.csv")
    
    return best_kmedoid.labels_, best_n_clusters, best_score, scaler

def train_and_evaluate_models(X_train: pd.DataFrame, y_train: pd.DataFrame, target_col: str, cluster_labels: np.ndarray, model_type: str, model_prefix: str = None):
    """Train and evaluate models for each feature-based cluster."""
    X_train["Cluster"] = cluster_labels
    cluster_results, label_encoders = {}, {}
    
    for cluster in np.unique(cluster_labels):
        cluster_indices = X_train[X_train["Cluster"] == cluster].index
        X_train_cluster = X_train.loc[cluster_indices].drop(columns=["Cluster"])
        y_train_cluster = y_train.loc[cluster_indices]
        
        if isinstance(y_train_cluster, pd.Series):
            y_train_cluster = y_train_cluster.to_frame()
            y_train_cluster = y_train_cluster.rename(columns = {0: target_col})
        
        unique_classes = np.sort(y_train_cluster[target_col].unique())
        
        if model_type in ["binary", "multiclass"] and len(unique_classes) == 1:
            logger.info(f"{model_prefix} cluster {cluster} is purely class {unique_classes[0]}. Assigning perfect score.")
            cluster_results[cluster] = {"Score": 1.0, "Accuracy": 1.0, "Cluster Size": len(cluster_indices), "Train Time (s)": "Pure Cluster"}
            pure_clusters[model_prefix][str(cluster)] = unique_classes[0]
            continue
        
        temp_encoder = LabelEncoder() if model_type in ["binary", "multiclass"] else None
        if temp_encoder:
            temp_encoder.fit(unique_classes)
            y_train_cluster_encoded = temp_encoder.transform(y_train_cluster[target_col])
            label_encoders[cluster] = temp_encoder
        else:
            y_train_cluster_encoded = y_train_cluster[target_col].values
        
        start_time = time.time()
        cluster_model = initiate_model(model_type, len(unique_classes) if model_type != "regression" else None)
        cluster_model.fit(X_train_cluster, y_train_cluster_encoded)
        train_time = round(time.time() - start_time, 2)
        
        joblib.dump(cluster_model, os.path.join(model_dir, f"{model_prefix}_cluster_{cluster}.pkl"))
        pd.concat([X_train_cluster, y_train_cluster], axis=1).to_csv(os.path.join(data_dir, f"{model_prefix}_cluster_{cluster}.csv"), index=False)
        
        y_pred_temp = cluster_model.predict(X_train_cluster)
        y_pred = temp_encoder.inverse_transform(y_pred_temp) if temp_encoder else y_pred_temp
        
        # **Compute Scores**
        if model_type == "multiclass":
            if np.unique(y_train_cluster_encoded).shape[0] > 2:
                auc_score = roc_auc_score(y_train_cluster_encoded, cluster_model.predict_proba(X_train_cluster), multi_class="ovr")
                accuracy = np.mean(y_train_cluster_encoded == y_pred)
            else:
                auc_score = roc_auc_score(y_train_cluster_encoded, cluster_model.predict_proba(X_train_cluster)[:, 1])
                accuracy = np.mean(y_train_cluster_encoded == y_pred)

        elif model_type == "binary":
            auc_score = roc_auc_score(y_train_cluster_encoded, cluster_model.predict_proba(X_train_cluster)[:, 1])
            accuracy = np.mean(y_train_cluster_encoded == y_pred)
        else:
            auc_score, accuracy = None, None
            mse, r2 = mean_squared_error(y_train_cluster_encoded, y_pred), r2_score(y_train_cluster_encoded, y_pred)
        
        cluster_results[cluster] = {"AUC/R² Score": auc_score if model_type in ["binary", "multiclass"] else r2, "Accuracy": accuracy, "Cluster Size": len(cluster_indices), "Train Time (s)": train_time}
    
    return cluster_results, label_encoders, pure_clusters

def test_eval_xper(X_test: pd.DataFrame, y_test: pd.DataFrame, pure_clusters: dict, classification: bool, scaler: StandardScaler, label_encoder: LabelEncoder):
    """Evaluate test data using trained models and XPER clustering."""

    X_test = X_test.copy()

    models = {fname.split('.')[0]: joblib.load(os.path.join(model_dir, fname)) for fname in os.listdir(model_dir) if fname.endswith(".pkl")}
    
    XPER_ = ModelPerformance(X_test.values, y_test.values, X_test.values, y_test.values, models['baseline_model'])
    phi, phi_i_j = XPER_.calculate_XPER_values(["AUC"] if classification else ["R2"])
    
    pd.DataFrame(phi, columns=["Global XPER"]).to_csv(os.path.join(xper_dir, "test_global_xper.csv"), index=False)
    pd.DataFrame(phi_i_j, columns=["Benchmark"] + list(X_test.columns)).to_csv(os.path.join(xper_dir, "test_per_instance_xper.csv"), index=False)
    
    XPER_scaled = scaler.transform(phi_i_j[:, 1:])
    predicted_labels = models['best_xper_kmedoid'].predict(XPER_scaled)
    X_test["Cluster"] = predicted_labels
    pd.DataFrame({"Index": X_test.index, "Cluster": predicted_labels}).to_csv(os.path.join(xper_dir, "test_xper_clusters.csv"), index=False)
    
    logger.info(f"✅ XPER clustering saved in {xper_dir}/test_xper_clusters.csv")
    
    cluster_results = {}
    for cluster in np.unique(predicted_labels):
        cluster_indices = X_test[X_test["Cluster"] == cluster].index
        X_test_cluster = X_test.loc[cluster_indices].drop(columns=["Cluster"])
        y_test_cluster = y_test.loc[cluster_indices]
        
        try:
            cluster_model = models[f'xper_cluster_{cluster}']
            y_pred_temp = cluster_model.predict(X_test_cluster)
            y_pred = label_encoder[cluster].inverse_transform(y_pred_temp)
            y_pred_proba = cluster_model.predict_proba(X_test_cluster)[:, 1]
        except KeyError:
            logger.debug("KeyError XPER")
            logger.info(f"Pure cluster: {pure_clusters['xper'][str(cluster)]}")
            y_pred = np.full(X_test_cluster.shape[0], pure_clusters['xper'][str(cluster)])
            auc_prob_perfect = 0.999 if pure_clusters['xper'][str(cluster)] == "1" else 0.001
            y_pred_proba = np.full(X_test_cluster.shape[0], auc_prob_perfect)

        logger.info(len(np.unique(y_test_cluster.values))) #NOTE: If y_test_cluster only has one class auc fails.
        if len(np.unique(y_test_cluster.values)) != 2:
            logger.info(f"For cluster {cluster} the test set predictions were purly class {y_test_cluster.values[0]}, with the following probabilities: {y_pred_proba}")
            auc_score = 0.5
        else:
            auc_score = roc_auc_score(y_test_cluster.values, y_pred_proba)
        accuracy = np.mean(y_test_cluster.values == y_pred)
        
        cluster_results[cluster] = {"AUC Score": auc_score, "Accuracy": accuracy, "Cluster Size": len(cluster_indices)}
    
    return cluster_results

def test_eval_feature(X_test: pd.DataFrame, y_test: pd.DataFrame, pure_clusters: dict, classification: bool, scaler: StandardScaler, label_encoder: LabelEncoder):
    """Evaluate test data using trained models and feature clustering."""
    
    X_test = X_test.copy()
    
    models = {fname.split('.')[0]: joblib.load(os.path.join(model_dir, fname)) for fname in os.listdir(model_dir) if fname.endswith(".pkl")}
    
    X_scaled = scaler.transform(X_test)
    predicted_labels = models['best_feature_kmedoid'].predict(X_scaled)
    X_test["Cluster"] = predicted_labels
    pd.DataFrame({"Index": X_test.index, "Cluster": predicted_labels}).to_csv(os.path.join(xper_dir, "test_feature_clusters.csv"), index=False)
    
    logger.info(f"✅ Feature clustering saved in {xper_dir}/test_feature_clusters.csv")
    
    cluster_results = {}
    for cluster in np.unique(predicted_labels):
        cluster_indices = X_test[X_test["Cluster"] == cluster].index
        X_test_cluster = X_test.loc[cluster_indices].drop(columns=["Cluster"])
        y_test_cluster = y_test.loc[cluster_indices]
        
        try:
            cluster_model = models[f'feature_cluster_{cluster}']
            y_pred_temp = cluster_model.predict(X_test_cluster)
            y_pred = label_encoder[cluster].inverse_transform(y_pred_temp)
            y_pred_proba = cluster_model.predict_proba(X_test_cluster)[:, 1] #NOTE: indexing only works for binary classification
        except KeyError:
            logger.debug("KeyError Feature")
            logger.info(f"Pure cluster: {pure_clusters['feature'][str(cluster)]}")
            y_pred = np.full(X_test_cluster.shape[0], pure_clusters['feature'][str(cluster)])
            auc_prob_perfect = 0.999 if pure_clusters['feature'][str(cluster)] == "1" else 0.001
            y_pred_proba = np.full(X_test_cluster.shape[0], auc_prob_perfect)

        logger.info(len(np.unique(y_test_cluster.values))) #NOTE: If y_test_cluster only has one class auc fails.
        if len(np.unique(y_test_cluster.values)) != 2:
            logger.info(f"For cluster {cluster} the test set predictions were purly class {y_test_cluster.values[0]}, with the following probabilities: {y_pred_proba}")
            auc_score = 0.5
        else:
            auc_score = roc_auc_score(y_test_cluster.values, y_pred_proba)
        accuracy = np.mean(y_test_cluster.values == y_pred)
        
        cluster_results[cluster] = {"AUC Score": auc_score, "Accuracy": accuracy, "Cluster Size": len(cluster_indices)}
    
    return cluster_results


def main(dataset_name: str, data: tuple):
    """Process dataset, train models, compute XPER-based and feature-based clustering, and store results."""
    df, target_col = data
    logger.info(f"Processing Dataset: {dataset_name}")
    start_time = time.time()
    
    # Preprocess Data
    X_train, X_test, y_train, y_test, model_type, num_classes, classification = preprocess_data(df, target_col, dataset_name, sample_size=SAMPLE_SIZE, n_features=N_FEATURES)
    
    # Train Baseline Model
    baseline_model = initiate_model(model_type, num_classes)
    baseline_model.fit(X_train, y_train)
    joblib.dump(baseline_model, os.path.join(model_dir, "baseline_model.pkl"))
    
    # Evaluate Baseline Model
    baseline_score_train, baseline_score_test = evaluate_model(baseline_model, X_train, X_test, y_train, y_test, model_type)
    logger.info(f"Baseline test roc_auc_score is: {baseline_score_test}")
    
    # Compute XPER-Based Clustering
    xper_cluster_labels, xper_best_n_clusters, xper_best_score, xper_scaler = compute_xper(X_train, y_train, baseline_model, classification)
    logger.info("XPER done")
    
    # Run Feature-Based KMedoids Clustering
    feature_cluster_labels, feature_best_n_clusters, feature_best_score, feature_scaler = cluster_feature_based(X_train)
    
    # Train Models for Each Cluster
    xper_cluster_results, label_encoder_xper, pure_clusters_xper = train_and_evaluate_models(X_train, y_train, target_col, xper_cluster_labels, model_type, "xper")
    logger.info("XPER Cluster done")
    feature_cluster_results, label_encoder_feature, pure_clusters_feature = train_and_evaluate_models(X_train, y_train, target_col, feature_cluster_labels, model_type, "feature")
    logger.info("Feature Cluster done")

    logger.info(f"Pure Clusters: {pure_clusters}")
    
    # Test Model on Clusters
    test_xper_cluster_results = test_eval_xper(X_test, y_test, pure_clusters_xper, classification, xper_scaler, label_encoder_xper)
    test_feature_cluster_results = test_eval_feature(X_test, y_test, pure_clusters_feature, classification, feature_scaler, label_encoder_feature)

    # Store Results
    result = {
        "Dataset": dataset_name,
        "Model Type": model_type,
        "Sample Count": X_train.shape[0],
        "Feature Count": X_train.shape[1],
        "Baseline Model AUC/R² Train": baseline_score_train,
        "Baseline Model AUC/R² Test": baseline_score_test,
        "XPER-Based Cluster Count": xper_best_n_clusters,
        "XPER-Based Silhouette Score": xper_best_score,
        "XPER-Based Per-Cluster Scores": xper_cluster_results,
        "Feature-Based Cluster Count": feature_best_n_clusters,
        "Feature-Based Silhouette Score": feature_best_score,
        "Feature-Based Per-Cluster Scores": feature_cluster_results,
        "Test XPER Cluster Results": test_xper_cluster_results,
        "Test Feature Cluster Results": test_feature_cluster_results,
        "Computation Time (s)": round(time.time() - start_time, 2),
    }
    
    return result

def create_directories(folder_name: str):
    """Create necessary directories for dataset processing."""
    global model_dir, data_dir, xper_dir, pure_clusters
    pure_clusters = {"xper": {}, "feature": {}}
    model_dir = os.path.join(BASE_DIR, folder_name, "models")
    data_dir = os.path.join(BASE_DIR, folder_name, "data")
    xper_dir = os.path.join(BASE_DIR, folder_name, "xper_values")
    for directory in [model_dir, data_dir, xper_dir]:
        os.makedirs(directory, exist_ok=True)


def process_datasets():
    """Load datasets, run main processing, and save results."""
    datasets = load_datasets()
    results = []
    
    for dataset_name, (df, target_col) in datasets.items():
        folder_name = dataset_name.lower().replace(" ", "_")
        
        # Create Directories
        create_directories(folder_name)
        logger.info(dataset_name)
        
        if dataset_name == "Loan Status": #NOTE: Change here to allow other data sets
            result = main(dataset_name, (df, target_col))
            pd.DataFrame(result).to_csv(os.path.join(BASE_DIR, folder_name, "final_results.csv"))
            results.append(result)
    
    # Save Overall Results
    results_df = pd.DataFrame(results)
    results_df.to_csv(os.path.join(BASE_DIR, "overall_results.csv"))

    return results_df
    
# Execute dataset processing
results_df = process_datasets()

"""
ToDo: 

1. Potential other clusters
2. Compare kernel XPER
3. (Compare to benchmark study)
4. Run with dataset we got from them during class
5. Preprocessing of data and model selection 
6. (Different Models beyond XGBoost)
7. Alter XPER to return also coalititon values to cluster with those values also 
8. Use feature cluster also for test set

"""

[32m2025-01-30 18:50:34.103[0m | [1mINFO    [0m | [36m__main__[0m:[36mprocess_datasets[0m:[36m358[0m - [1mIris[0m
[32m2025-01-30 18:50:34.104[0m | [1mINFO    [0m | [36m__main__[0m:[36mprocess_datasets[0m:[36m358[0m - [1mLoan Status[0m
[32m2025-01-30 18:50:34.105[0m | [1mINFO    [0m | [36m__main__[0m:[36mmain[0m:[36m282[0m - [1mProcessing Dataset: Loan Status[0m
[32m2025-01-30 18:50:34.295[0m | [1mINFO    [0m | [36m__main__[0m:[36mmain[0m:[36m295[0m - [1mBaseline test roc_auc_score is: 0.7761278195488723[0m
Performing Computation:   0%|          | 0/1 [00:00<?, ?it/s]

254


Performing Computation: 100%|██████████| 1/1 [20:06<00:00, 1206.59s/it]
[32m2025-01-30 19:10:41.114[0m | [1mINFO    [0m | [36m__main__[0m:[36mapply_kmedoids_clustering[0m:[36m96[0m - [1m✅ XPER clustering saved in experiment_results_loan5000/loan_status/xper_values/train_xper_clusters.csv[0m
[32m2025-01-30 19:10:41.118[0m | [1mINFO    [0m | [36m__main__[0m:[36mmain[0m:[36m299[0m - [1mXPER done[0m
[32m2025-01-30 19:10:41.221[0m | [1mINFO    [0m | [36m__main__[0m:[36mcluster_feature_based[0m:[36m119[0m - [1m✅ XPER clustering saved in experiment_results_loan5000/loan_status/xper_values/train_feature_clusters.csv[0m
[32m2025-01-30 19:10:41.770[0m | [1mINFO    [0m | [36m__main__[0m:[36mmain[0m:[36m306[0m - [1mXPER Cluster done[0m
[32m2025-01-30 19:10:42.508[0m | [1mINFO    [0m | [36m__main__[0m:[36mmain[0m:[36m308[0m - [1mFeature Cluster done[0m
[32m2025-01-30 19:10:42.508[0m | [1mINFO    [0m | [36m__main__[0m:[36mmain[0m:[

254


Performing Computation: 100%|██████████| 1/1 [01:28<00:00, 88.57s/it]
[32m2025-01-30 19:12:11.249[0m | [1mINFO    [0m | [36m__main__[0m:[36mtest_eval_xper[0m:[36m202[0m - [1m✅ XPER clustering saved in experiment_results_loan5000/loan_status/xper_values/test_xper_clusters.csv[0m
[32m2025-01-30 19:12:11.273[0m | [1mINFO    [0m | [36m__main__[0m:[36mtest_eval_xper[0m:[36m222[0m - [1m2[0m
[32m2025-01-30 19:12:11.300[0m | [1mINFO    [0m | [36m__main__[0m:[36mtest_eval_xper[0m:[36m222[0m - [1m2[0m
[32m2025-01-30 19:12:11.333[0m | [1mINFO    [0m | [36m__main__[0m:[36mtest_eval_feature[0m:[36m246[0m - [1m✅ Feature clustering saved in experiment_results_loan5000/loan_status/xper_values/test_feature_clusters.csv[0m
[32m2025-01-30 19:12:11.343[0m | [1mINFO    [0m | [36m__main__[0m:[36mtest_eval_feature[0m:[36m266[0m - [1m1[0m
[32m2025-01-30 19:12:11.344[0m | [1mINFO    [0m | [36m__main__[0m:[36mtest_eval_feature[0m:[36m268[0m -

'\nToDo: \n\n1. Potential other clusters\n2. Compare kernel XPER\n3. (Compare to benchmark study)\n4. Run with dataset we got from them during class\n5. Preprocessing of data and model selection \n6. (Different Models beyond XGBoost)\n7. Alter XPER to return also coalititon values to cluster with those values also \n8. Use feature cluster also for test set\n\n'

In [2]:
results_df.columns

Index(['Dataset', 'Model Type', 'Sample Count', 'Feature Count',
       'Baseline Model AUC/R² Train', 'Baseline Model AUC/R² Test',
       'XPER-Based Cluster Count', 'XPER-Based Silhouette Score',
       'XPER-Based Per-Cluster Scores', 'Feature-Based Cluster Count',
       'Feature-Based Silhouette Score', 'Feature-Based Per-Cluster Scores',
       'Test XPER Cluster Results', 'Test Feature Cluster Results',
       'Computation Time (s)'],
      dtype='object')

In [3]:
results_df["Test XPER Cluster Results"].values[0]

{0: {'AUC Score': 0.9399038461538461, 'Accuracy': 0.9, 'Cluster Size': 60},
 1: {'AUC Score': 0.9604651162790698,
  'Accuracy': 0.9206349206349206,
  'Cluster Size': 63}}

In [4]:
results_df['Feature-Based Per-Cluster Scores'].values[0]

{0: {'AUC/R² Score': 1.0,
  'Accuracy': 0.9848484848484849,
  'Cluster Size': 66,
  'Train Time (s)': 0.12},
 1: {'AUC/R² Score': 1.0,
  'Accuracy': 1.0,
  'Cluster Size': 165,
  'Train Time (s)': 0.16},
 2: {'AUC/R² Score': 1.0,
  'Accuracy': 1.0,
  'Cluster Size': 79,
  'Train Time (s)': 0.14},
 3: {'AUC/R² Score': 1.0,
  'Accuracy': 1.0,
  'Cluster Size': 32,
  'Train Time (s)': 0.09},
 4: {'AUC/R² Score': 1.0,
  'Accuracy': 1.0,
  'Cluster Size': 149,
  'Train Time (s)': 0.19}}

In [5]:
results_df["Test Feature Cluster Results"].values[0]

{0: {'AUC Score': 0.5, 'Accuracy': 0.9, 'Cluster Size': 10},
 1: {'AUC Score': 0.6764705882352942,
  'Accuracy': 0.717948717948718,
  'Cluster Size': 39},
 2: {'AUC Score': 0.8611111111111112,
  'Accuracy': 0.9545454545454546,
  'Cluster Size': 22},
 3: {'AUC Score': 0.0, 'Accuracy': 0.2857142857142857, 'Cluster Size': 7},
 4: {'AUC Score': 0.7844827586206897,
  'Accuracy': 0.7111111111111111,
  'Cluster Size': 45}}