In [2]:
mohs_test = pd.read_csv("mohs_test_with_complexity_scores.csv")
features = mohs_test.iloc[:, 3:13]
target_visit_duration = mohs_test['Duration of Visit (min)']
target_stages = mohs_test['Number of stages']
target_aneshetic = mohs_test['Anesthetic Amount (ml)']

In [7]:
import joblib
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score


mohs_test = pd.read_csv("mohs_test_with_complexity_scores.csv")
all_features = mohs_test.iloc[:, 3:13]

for col in all_features.columns:
    if all_features[col].dtype == 'object':
        all_features[col] = all_features[col].astype('category')

target_visit_duration = mohs_test['Duration of Visit (min)']
target_stages = mohs_test['Number of stages']
target_aneshetic = mohs_test['Anesthetic Amount (ml)']

targets = {
    "Number of Stages": target_stages,
    "Duration of Visit": target_visit_duration,
    "Anesthetic Amount": target_aneshetic
}

model_dir = "models"

model_files = {
    "Number of Stages": "xgb_regressor_number_of_stages.pkl",
    "Duration of Visit": "xgb_regressor_visit_duration.pkl",
    "Anesthetic Amount": "xgb_regressor_anesthetic_amount.pkl"
}

full_feature_list = [
    'Lesion  Size (cm)',
    'Recurrent Tumor (Y/N)',
    'Aggressive Histology (Y/N)',
    'Wound Management (H/M/L)',
    'Location (H/M/L)',
    'Treatment Delay (days)',
    'Age (years)',
    'Immunosuppressed (Y/N)',
    'Bleeding Risk (Y/N)',
    'Greater Average Time (Y/N)'
]


results = []

for name, file in model_files.items():
    model_path = f"{model_dir}/{file}"
    model = joblib.load(model_path)
    target = targets[name]

    X_train, X_test, y_train, y_test = train_test_split(
        all_features, target, test_size=0.4, random_state=42
    )

    for df in [X_train, X_test]:
        for col in df.columns:
            if df[col].dtype.name == 'object':
                df[col] = df[col].astype('category')

    expected_n_features = model.n_features_in_
    if expected_n_features == len(full_feature_list):
        X_train_aligned = X_train[full_feature_list]
        X_test_aligned = X_test[full_feature_list]
    elif expected_n_features == len(reduced_feature_list):
        X_train_aligned = X_train[reduced_feature_list]
        X_test_aligned = X_test[reduced_feature_list]
    else:
        raise ValueError(f"Unexpected feature count for model {name} ({expected_n_features}).")

    # Predictions
    y_pred_train = model.predict(X_train_aligned)
    y_pred_test = model.predict(X_test_aligned)

    # Metrics for training data
    train_rmse = mean_squared_error(y_train, y_pred_train, squared=False)
    train_mae = mean_absolute_error(y_train, y_pred_train)
    train_r2 = r2_score(y_train, y_pred_train)

    # Metrics for test data
    test_rmse = mean_squared_error(y_test, y_pred_test, squared=False)
    test_mae = mean_absolute_error(y_test, y_pred_test)
    test_r2 = r2_score(y_test, y_pred_test)

    results.append({
        "Model": name,
        "Train RMSE": round(train_rmse, 2),
        "Train MAE": round(train_mae, 2),
        "Train R²": round(train_r2, 2),
        "Test RMSE": round(test_rmse, 2),
        "Test MAE": round(test_mae, 2),
        "Test R²": round(test_r2, 2)
    })



metrics_df = pd.DataFrame(results)
print(metrics_df)

               Model  Train RMSE  Train MAE  Train R²  Test RMSE  Test MAE  \
0   Number of Stages        0.44       0.34      0.72       0.44      0.34   
1  Duration of Visit        9.77       7.60      0.97       9.83      7.64   
2  Anesthetic Amount        1.46       1.13      0.94       1.46      1.14   

   Test R²  
0     0.71  
1     0.97  
2     0.94  




In [6]:
metrics_df

Unnamed: 0,Model,Train RMSE,Train MAE,Train R²,Test RMSE,Test MAE,Test R²
0,Number of Stages,0.44,0.34,0.72,0.44,0.34,0.71
1,Duration of Visit,9.77,7.6,0.97,9.83,7.64,0.97
2,Anesthetic Amount,1.46,1.13,0.94,1.46,1.14,0.94
