In [14]:
import pandas as pd
import numpy as np
import joblib
import os
from sklearn.metrics import (
    mean_absolute_error,
    mean_squared_error,
    r2_score,
    accuracy_score,
    precision_score,
    recall_score
)
from sklearn.model_selection import train_test_split

model_dir = "models"

duration_model = joblib.load(os.path.join(model_dir, "xgb_regressor_visit_duration.pkl"))
anesthetic_model = joblib.load(os.path.join(model_dir, "xgb_regressor_anesthetic_amount.pkl"))
stages_model = joblib.load(os.path.join(model_dir, "xgb_classifier_number_of_stages.pkl"))
label_encoder = joblib.load(os.path.join(model_dir, "label_encoder_number_of_stages.pkl"))
feature_columns = joblib.load(os.path.join(model_dir, "model1_features.pkl"))

df = pd.read_csv("mohs_test_with_complexity_scores.csv")

X = df.drop(columns=[
    "Duration of Visit (min)",
    "Number of stages",
    "Anesthetic Amount (ml)",
    "Visit Complexity KMeans",
    "Normalized Visit Complexity Score KMeans"
])
X_encoded = pd.get_dummies(X)
X_encoded = X_encoded.reindex(columns=feature_columns, fill_value=0)

y_duration = df["Duration of Visit (min)"]
y_anesthetic = df["Anesthetic Amount (ml)"]
y_stages_raw = df["Number of stages"]
y_stages_encoded = label_encoder.transform(y_stages_raw)

X_train, X_test, y_d_train, y_d_test, y_s_train, y_s_test, y_a_train, y_a_test = train_test_split(
    X_encoded, y_duration, y_stages_encoded, y_anesthetic, test_size=0.2, random_state=42
)

def print_metrics(y_true, y_pred, label, set_name):
    print(f"\n--- {label} ({set_name}) ---")
    print(f"MAE:  {mean_absolute_error(y_true, y_pred):.2f}")
    print(f"RMSE: {np.sqrt(mean_squared_error(y_true, y_pred)):.2f}")
    print(f"R^2:  {r2_score(y_true, y_pred):.4f}")

print_metrics(y_d_train, duration_model.predict(X_train), "Visit Duration", "Train")
print_metrics(y_d_test, duration_model.predict(X_test), "Visit Duration", "Test")

print_metrics(y_a_train, anesthetic_model.predict(X_train), "Anesthetic Amount", "Train")
print_metrics(y_a_test, anesthetic_model.predict(X_test), "Anesthetic Amount", "Test")

def print_classification_metrics(y_true, y_pred, set_name):
    print(f"\n--- Number of Stages ({set_name}) ---")
    print(f"Accuracy:  {accuracy_score(y_true, y_pred):.4f}")
    print(f"Precision: {precision_score(y_true, y_pred, average='weighted', zero_division=0):.4f}")
    print(f"Recall:    {recall_score(y_true, y_pred, average='weighted', zero_division=0):.4f}")

train_preds = stages_model.predict(X_train)
test_preds = stages_model.predict(X_test)

decoded_train_preds = label_encoder.inverse_transform(train_preds)
decoded_test_preds = label_encoder.inverse_transform(test_preds)
decoded_y_s_train = label_encoder.inverse_transform(y_s_train)
decoded_y_s_test = label_encoder.inverse_transform(y_s_test)

print_classification_metrics(y_s_train, train_preds, "Train")
print_classification_metrics(y_s_test, test_preds, "Test")


--- Visit Duration (Train) ---
MAE:  5.66
RMSE: 7.37
R^2:  0.9846

--- Visit Duration (Test) ---
MAE:  10.80
RMSE: 14.01
R^2:  0.9435

--- Anesthetic Amount (Train) ---
MAE:  0.73
RMSE: 0.98
R^2:  0.9724

--- Anesthetic Amount (Test) ---
MAE:  1.41
RMSE: 1.84
R^2:  0.9023

--- Number of Stages (Train) ---
Accuracy:  0.9732
Precision: 0.9733
Recall:    0.9732

--- Number of Stages (Test) ---
Accuracy:  0.6960
Precision: 0.6846
Recall:    0.6960
