# Customer Support Ticket Classifier â€“ Evaluation

Use this notebook to inspect the dataset, load trained models, and review prediction quality. Update the configuration or artefact path cells as needed when running online.

In [None]:
%matplotlib inline

from pathlib import Path

import joblib
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import train_test_split

from customer_support_classifier.config import load_config
from customer_support_classifier.data import load_ticket_data

In [None]:
CONFIG_PATH = Path("config/default.yaml")
config = load_config(CONFIG_PATH)
data_cfg = config["data"]

X, y = load_ticket_data(data_cfg)
print(f"Loaded {len(X)} ticket records across {len(set(y))} categories.")

In [None]:
label_counts = pd.Series(y).value_counts().rename_axis("category").reset_index(name="count")
label_counts.head()

In [None]:
plt.figure(figsize=(10, 5))
sns.barplot(data=label_counts.head(10), x="count", y="category", palette="viridis")
plt.title("Top 10 Ticket Categories")
plt.xlabel("Tickets")
plt.ylabel("Category")
plt.tight_layout()
plt.show()

In [None]:
split_kwargs = {
    "test_size": data_cfg.get("test_size", 0.2),
    "random_state": data_cfg.get("random_state", 42),
    "stratify": y,
}
X_train, X_test, y_train, y_test = train_test_split(X, y, **split_kwargs)
print(f"Training samples: {len(X_train)} | Test samples: {len(X_test)}")

In [None]:
artefact_dir = Path("artifacts")
model_candidates = sorted(artefact_dir.glob("*.joblib"))
if not model_candidates:
    raise FileNotFoundError("No trained model artefacts found in 'artifacts/'. Run the training script first.")

MODEL_PATH = model_candidates[0]
model = joblib.load(MODEL_PATH)
print(f"Loaded model: {MODEL_PATH.name}")

In [None]:
y_pred = model.predict(X_test)
report = classification_report(y_test, y_pred)
print(report)

In [None]:
labels = list(getattr(model, "classes_", sorted(pd.Series(y_train).unique())))
cm = confusion_matrix(y_test, y_pred, labels=labels)

plt.figure(figsize=(12, 10))
sns.heatmap(cm, annot=False, cmap="Blues", xticklabels=labels, yticklabels=labels)
plt.title("Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.tight_layout()
plt.show()

In [None]:
sample_tickets = [
    "My credit card was charged twice at the store even though the teller reversed it.",
    "Online banking keeps timing out when I try to reset my password.",
    "There are unfamiliar withdrawals listed on my checking account statement.",
]

sample_predictions = model.predict(sample_tickets)
pd.DataFrame({"ticket_text": sample_tickets, "prediction": sample_predictions})