In [None]:
# -------------------------------------------
# Empathy score evaluation (1–5 scale)
# -------------------------------------------
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error

# ==== FILE & COLUMN NAMES (edit if yours differ) ====
FILE_PATH = "records_informations.xlsx"
SHEET = "Sheet1"
LABEL_COL = "Using compassionate and empathetic language"
PRED_COL  = "gpt_empathy score"

# ==== LOAD DATA ====
df = pd.read_excel(FILE_PATH, sheet_name=SHEET)

# keep only rows with numeric label & prediction
df = df[[LABEL_COL, PRED_COL]].copy()
df = df.apply(pd.to_numeric, errors="coerce").dropna()

y_true = df[LABEL_COL].astype(float).to_numpy()
y_pred = df[PRED_COL].astype(float).to_numpy()

# ==== ERRORS & METRICS ====
abs_err = np.abs(y_true - y_pred)
mae = abs_err.mean()
std_err = abs_err.std(ddof=0)  # population std
rmse = mean_squared_error(y_true, y_pred, squared=False)

# Random baseline (uniform between 1 and 5 inclusive)
rng = np.random.default_rng(42)     # reproducible
y_rand = rng.integers(1, 6, size=len(y_true))
rmse_rand = mean_squared_error(y_true, y_rand, squared=False)

# Optional constant baselines
y_const_mean = np.full_like(y_true, fill_value=y_true.mean())
rmse_const_mean = mean_squared_error(y_true, y_const_mean, squared=False)

mode_val = pd.Series(y_true).mode().iloc[0]
y_const_mode = np.full_like(y_true, fill_value=mode_val)
rmse_const_mode = mean_squared_error(y_true, y_const_mode, squared=False)

print(f"Samples: {len(y_true)}")
print(f"MAE:  {mae:.3f}")
print(f"STD(error): {std_err:.3f}")
print(f"RMSE (GPT): {rmse:.3f}")
print(f"RMSE (Random 1–5): {rmse_rand:.3f}")
print(f"RMSE (Const=mean={y_true.mean():.2f}): {rmse_const_mean:.3f}")
print(f"RMSE (Const=mode={mode_val:.0f}): {rmse_const_mode:.3f}")

# ==== PLOTS ====

# 1) Histogram of absolute errors
plt.figure(figsize=(6,4))
plt.hist(abs_err, bins=range(0,6), align="left", rwidth=0.8, edgecolor="black")
plt.xticks(range(0,6))
plt.xlabel("Absolute Error (scale 1–5)")
plt.ylabel("Count")
plt.title("Distribution of Prediction Errors")
plt.tight_layout()
plt.show()

# 2) Bar with mean ± 1 std of error
plt.figure(figsize=(6,4))
plt.bar(["Error"], [mae], yerr=[std_err], capsize=10, edgecolor="black")
plt.ylabel("Error (scale 1–5)")
plt.title("Mean Absolute Error with Standard Deviation")
plt.tight_layout()
plt.show()

# 3) RMSE comparison: GPT vs Random (and optional constants)
labels = ["GPT", "Random", "Const-Mean", "Const-Mode"]
vals = [rmse, rmse_rand, rmse_const_mean, rmse_const_mode]

plt.figure(figsize=(7,4))
bars = plt.bar(labels, vals, edgecolor="black")
for i, v in enumerate(vals):
    plt.text(i, v + 0.03, f"{v:.2f}", ha="center", va="bottom", fontsize=10)
plt.ylabel("RMSE (scale 1–5)")
plt.title("RMSE Comparison")
plt.tight_layout()
plt.show()