# IT Ticket Triage - Training & Evaluation (Dept + Urgency + Tags/Summary)

This notebook trains and evaluates the NLP triage pipeline in phases.

- Trains: department routing classifier and urgency/priority classifier
- Prototypes: tag extraction and summary generation
- Produces: metrics, saved model artifacts, and inference-ready mappings

Target inference JSON schema:

```json
{
  "ticket_id": "...",
  "department": {"label": "...", "confidence": 0.92},
  "urgency": {"label": "...", "confidence": 0.81},
  "tags": ["vpn", "login", "timeout"],
  "summary": "User cannot connect to VPN after password reset."
}
```

In [26]:
# Cell 2: Environment sanity + GPU check + deterministic seeds
import os
import random
import sys

import numpy as np
import torch

print(f"Python: {sys.version.split()[0]}")
print(f"Torch: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")

SEED = 42

def set_seed(seed: int = 42) -> None:
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed(SEED)
print(f"Seed set to {SEED}")

Python: 3.12.4
Torch: 2.10.0
CUDA available: False
Seed set to 42


In [27]:
# Cell 3: Install dependencies (single requirements cell)
import subprocess
import sys

if "google.colab" in sys.modules:
    subprocess.check_call([
        sys.executable,
        "-m",
        "pip",
        "install",
        "-q",
        "-U",
        "numpy==1.26.4",
        "pandas==2.2.2",
        "scikit-learn>=1.4,<2",
        "transformers<5",
        "datasets<3",
        "evaluate<1",
        "accelerate<1",
        "yake<1",
        "sentencepiece<1",
    ])
    print("Colab dependencies installed.")
else:
    print("Local environment detected. Skipping pip install in notebook; using active .venv packages.")

Local environment detected. Skipping pip install in notebook; using active .venv packages.


In [28]:
import sys, numpy, pandas
print(sys.executable)
print("numpy:", numpy.__version__)
print("pandas:", pandas.__version__)

/Users/rawadyared/NLP-IT-Ticket_Triage/.venv/bin/python
numpy: 2.1.3
pandas: 2.2.3


In [29]:
# Cell 4: Imports
import json
import re
import warnings
from pathlib import Path

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

import datasets
import evaluate
from transformers import AutoModelForSequenceClassification, AutoTokenizer, EarlyStoppingCallback, Trainer, TrainingArguments

warnings.filterwarnings("ignore")
pd.set_option("display.max_colwidth", 160)

In [30]:
# Cell 5: Repo paths + config (single source of truth)
import sys
from pathlib import Path

if "SEED" not in globals():
    SEED = 42

IN_COLAB = "google.colab" in sys.modules

def detect_base_dir() -> Path:
    if IN_COLAB:
        preferred = Path("/content/NLP-IT-Ticket_Triage")
        if preferred.exists():
            return preferred
        return Path("/content")

    cwd = Path.cwd().resolve()
    candidate_processed = Path("data") / "processed" / "IT Support Ticket Data.stratified_3000.csv"
    candidate_raw = Path("data") / "raw" / "IT Support Ticket Data.csv"

    search_roots = [cwd, *cwd.parents]
    for root in search_roots:
        if (root / candidate_processed).exists() or (root / candidate_raw).exists():
            return root
    return cwd

BASE_DIR = detect_base_dir()
print(f"Resolved BASE_DIR: {BASE_DIR}")

DATASET_CANDIDATES = [
    BASE_DIR / "data" / "processed" / "IT Support Ticket Data.stratified_3000.csv",
    BASE_DIR / "data" / "processed" / "IT Support Ticket Data.stratified_1000.csv",
    BASE_DIR / "data" / "raw" / "IT Support Ticket Data.csv",
    Path("/content") / "IT Support Ticket Data.stratified_3000.csv",
    Path("/content") / "IT Support Ticket Data.stratified_1000.csv",
    Path("/content") / "IT Support Ticket Data.csv",
]


def resolve_dataset_path(candidates):
    for path in candidates:
        if path.exists():
            return path
    candidate_list = "\n".join([str(p) for p in candidates])
    raise FileNotFoundError(f"Dataset not found. Checked:\n{candidate_list}")

OUTPUT_ROOT = Path("/content") if IN_COLAB else BASE_DIR
RESULTS_DIR = OUTPUT_ROOT / "results"
MODELS_DIR = OUTPUT_ROOT / "models"
MAPPINGS_DIR = RESULTS_DIR / "mappings"

for out_dir in [RESULTS_DIR, MODELS_DIR, MAPPINGS_DIR]:
    out_dir.mkdir(parents=True, exist_ok=True)

CONFIG = {
    "dataset_path": str(resolve_dataset_path(DATASET_CANDIDATES)),
    "id_column": "Unnamed: 0",
    "text_columns": ["Body"],
    "label_columns": {
        "department": "Department",
        "urgency": "Priority",
    },
    "model_names": {
        "department": "distilroberta-base",
        "urgency": "distilroberta-base",
        "summary": "t5-small",
    },
    "candidate_models": {
        "department": ["distilroberta-base"],
        "urgency": ["distilroberta-base"],
    },
    "train": {
        "max_length": 256,
        "batch_size": 8,
        "learning_rate": 2e-5,
        "learning_rates": [1e-5, 2e-5, 3e-5],
        "epochs": 3,
        "weight_decay": 0.01,
        "warmup_ratio": 0.1,
        "early_stopping_patience": 1,
    },
    "split": {
        "train_size": 0.8,
        "val_size": 0.1,
        "test_size": 0.1,
    },
    "preprocess": {
        "lowercase": False,
        "remove_boilerplate": True,
        "remove_urls": True,
        "remove_emails": True
    },
    "label_standardization": {
        "department_aliases": {
            "tech support": "Technical Support",
            "technical support": "Technical Support",
            "it support": "IT Support",
            "billing & payments": "Billing and Payments"
        },
        "urgency_aliases": {
            "urgent": "high",
            "high priority": "high",
            "med": "medium",
            "normal": "medium",
            "low priority": "low"
        }
    },
    "paths": {
        "results_dir": str(RESULTS_DIR),
        "models_dir": str(MODELS_DIR),
        "mappings_dir": str(MAPPINGS_DIR),
    },
    "experiment": {
        "candidate_epochs": 3,
        "run_candidate_search": True,
        "tune_learning_rate": True
    },
    "seed": SEED,
}

print(json.dumps(CONFIG, indent=2))

Resolved BASE_DIR: /Users/rawadyared/NLP-IT-Ticket_Triage
{
  "dataset_path": "/Users/rawadyared/NLP-IT-Ticket_Triage/data/processed/IT Support Ticket Data.stratified_3000.csv",
  "id_column": "Unnamed: 0",
  "text_columns": [
    "Body"
  ],
  "label_columns": {
    "department": "Department",
    "urgency": "Priority"
  },
  "model_names": {
    "department": "distilroberta-base",
    "urgency": "distilroberta-base",
    "summary": "t5-small"
  },
  "candidate_models": {
    "department": [
      "distilroberta-base"
    ],
    "urgency": [
      "distilroberta-base"
    ]
  },
  "train": {
    "max_length": 256,
    "batch_size": 8,
    "learning_rate": 2e-05,
    "learning_rates": [
      1e-05,
      2e-05,
      3e-05
    ],
    "epochs": 3,
    "weight_decay": 0.01,
    "warmup_ratio": 0.1,
    "early_stopping_patience": 1
  },
  "split": {
    "train_size": 0.8,
    "val_size": 0.1,
    "test_size": 0.1
  },
  "preprocess": {
    "lowercase": false,
    "remove_boilerplate": tr

In [31]:
# Cell 6: Load dataset
df_raw = pd.read_csv(CONFIG["dataset_path"])
print(f"Dataset shape: {df_raw.shape}")
print(f"Columns: {list(df_raw.columns)}")
display(df_raw.head(3))

Dataset shape: (3000, 5)
Columns: ['Unnamed: 0', 'Body', 'Department', 'Priority', 'Tags']


Unnamed: 0.1,Unnamed: 0,Body,Department,Priority,Tags
0,27907,"Hello Customer Support, I have contacted you to seek information about the integrations supported by your project management software as a service. I am par...",Technical Support,low,"['Product', 'Integration', 'Documentation', 'API', 'Guidance']"
1,2966,"Our marketing agency faced several hardware and software failures, which affected our digital campaigns. The possible root cause was compatibility issues. T...",Technical Support,high,"['Hardware', 'Software', 'Performance', 'Disruption', 'Outage']"
2,16253,In need of comprehensive documentation for integrating ESET NOD32 Antivirus 14 with the SaaS project management platform. Requesting detailed steps for the ...,Technical Support,high,"['Documentation', 'Feature', 'Security', 'IT', 'Tech Support']"


In [32]:
# Cell 7: Build ticket_text
configured_cols = CONFIG["text_columns"]
available_text_cols = [col for col in configured_cols if col in df_raw.columns]

if not available_text_cols:
    fallback_cols = [col for col in ["short_description", "description", "Body"] if col in df_raw.columns]
    if not fallback_cols:
        raise ValueError("No usable text columns found. Update CONFIG['text_columns'].")
    available_text_cols = fallback_cols

print(f"Using text columns: {available_text_cols}")

HEADER_LINE_RE = re.compile(r"(?i)^(from|sent|to|subject|cc|bcc):")
SEPARATOR_LINE_RE = re.compile(r"^[-_]{2,}$")
GREETING_LINE_RE = re.compile(r"(?i)^(dear|hi|hello)\b")
SIGNOFF_MARKER_RE = re.compile(r"(?i)\b(best regards|kind regards|regards|thanks(?: and regards)?|thank you|sincerely)\b")
DISCLAIMER_MARKER_RE = re.compile(r"(?i)\b(this email and any attachments are confidential|do not reply to this email)\b")

def _is_boilerplate_line(line: str) -> bool:
    line = line.strip()
    if not line:
        return False
    if HEADER_LINE_RE.match(line):
        return True
    if SEPARATOR_LINE_RE.match(line):
        return True
    if GREETING_LINE_RE.match(line):
        # Keep natural ticket text that starts with greeting + issue details.
        words = line.rstrip(',:').split()
        if len(words) <= 6 and len(line) <= 45:
            return True
    return False

def _trim_trailing_boilerplate(text: str, marker_re: re.Pattern, min_ratio: float = 0.60) -> str:
    matches = list(marker_re.finditer(text))
    if not matches:
        return text
    cutoff = matches[-1].start()
    if cutoff >= int(len(text) * min_ratio):
        return text[:cutoff]
    return text

def normalize_ticket_text(text: str) -> str:
    text = "" if pd.isna(text) else str(text)

    # Normalize obvious formatting noise first.
    text = re.sub(r"<[^>]+>", " ", text)

    if CONFIG.get("preprocess", {}).get("remove_urls", True):
        text = re.sub(r"https?://\S+|www\.\S+", " ", text)
    if CONFIG.get("preprocess", {}).get("remove_emails", True):
        text = re.sub(r"\b[\w\.-]+@[\w\.-]+\.\w+\b", " ", text)

    if CONFIG.get("preprocess", {}).get("remove_boilerplate", True):
        lines = [line.strip() for line in re.split(r"[\r\n]+", text) if line.strip()]
        lines = [line for line in lines if not _is_boilerplate_line(line)]
        text = " ".join(lines)

        # Trim disclaimers/signatures only when they appear near the tail.
        text = _trim_trailing_boilerplate(text, DISCLAIMER_MARKER_RE, min_ratio=0.55)
        text = _trim_trailing_boilerplate(text, SIGNOFF_MARKER_RE, min_ratio=0.60)

    text = re.sub(r"[^\w\s\.,:;!?\-/]", " ", text)
    text = re.sub(r"\s+", " ", text).strip()
    if CONFIG.get("preprocess", {}).get("lowercase", False):
        text = text.lower()
    return text

df_raw["ticket_text"] = (
    df_raw[available_text_cols]
    .fillna("")
    .astype(str)
    .agg(" ".join, axis=1)
    .map(normalize_ticket_text)
)

df_raw = df_raw[df_raw["ticket_text"].str.len() > 0].copy()
print(f"Rows after text cleaning: {len(df_raw)}")
display(df_raw[[CONFIG["id_column"], "ticket_text"]].head(3))


Using text columns: ['Body']
Rows after text cleaning: 3000


Unnamed: 0.1,Unnamed: 0,ticket_text
0,27907,"Hello Customer Support, I have contacted you to seek information about the integrations supported by your project management software as a service. I am par..."
1,2966,"Our marketing agency faced several hardware and software failures, which affected our digital campaigns. The possible root cause was compatibility issues. T..."
2,16253,In need of comprehensive documentation for integrating ESET NOD32 Antivirus 14 with the SaaS project management platform. Requesting detailed steps for the ...


In [33]:
# Cell 8: Label cleaning + encoding
def _standardize_label(label: str, task_name: str) -> str:
    cleaned = re.sub(r"\s+", " ", str(label)).strip()
    if cleaned == "":
        return cleaned

    if task_name == "urgency":
        alias_map = {
            k.casefold(): v
            for k, v in CONFIG.get("label_standardization", {}).get("urgency_aliases", {}).items()
        }
        normalized = cleaned.casefold()
        return alias_map.get(normalized, normalized)

    alias_map = {
        k.casefold(): v
        for k, v in CONFIG.get("label_standardization", {}).get("department_aliases", {}).items()
    }
    normalized = cleaned.casefold()
    return alias_map.get(normalized, cleaned)

def clean_and_encode_labels(df: pd.DataFrame, label_col: str, task_name: str, mappings_dir: Path):
    if label_col not in df.columns:
        raise KeyError(f"Label column '{label_col}' not found for task '{task_name}'.")

    task_df = df.copy()
    task_df = task_df[task_df[label_col].notna()].copy()
    task_df[label_col] = task_df[label_col].astype(str).map(lambda x: _standardize_label(x, task_name))
    task_df = task_df[task_df[label_col] != ""].copy()

    label_values = sorted(task_df[label_col].unique().tolist())
    label2id = {label: idx for idx, label in enumerate(label_values)}
    id2label = {idx: label for label, idx in label2id.items()}

    task_df["label_text"] = task_df[label_col]
    task_df["label"] = task_df["label_text"].map(label2id).astype(int)

    mapping_payload = {
        "task": task_name,
        "label_column": label_col,
        "label2id": label2id,
        "id2label": {str(k): v for k, v in id2label.items()},
    }
    mapping_path = mappings_dir / f"{task_name}_label_mapping.json"
    with open(mapping_path, "w", encoding="utf-8") as f:
        json.dump(mapping_payload, f, ensure_ascii=True, indent=2)

    print(f"[{task_name}] rows after label cleaning: {len(task_df)}")
    print(f"[{task_name}] num classes: {len(label2id)}")
    print(f"[{task_name}] mapping saved: {mapping_path}")
    return task_df, label2id, id2label

dept_df, dept_label2id, dept_id2label = clean_and_encode_labels(
    df=df_raw,
    label_col=CONFIG["label_columns"]["department"],
    task_name="department",
    mappings_dir=MAPPINGS_DIR,
)

urgency_df, urgency_label2id, urgency_id2label = clean_and_encode_labels(
    df=df_raw,
    label_col=CONFIG["label_columns"]["urgency"],
    task_name="urgency",
    mappings_dir=MAPPINGS_DIR,
)

TASK_DATA = {
    "department": {
        "df": dept_df,
        "label2id": dept_label2id,
        "id2label": dept_id2label,
    },
    "urgency": {
        "df": urgency_df,
        "label2id": urgency_label2id,
        "id2label": urgency_id2label,
    },
}

[department] rows after label cleaning: 3000
[department] num classes: 10
[department] mapping saved: /Users/rawadyared/NLP-IT-Ticket_Triage/results/mappings/department_label_mapping.json
[urgency] rows after label cleaning: 3000
[urgency] num classes: 3
[urgency] mapping saved: /Users/rawadyared/NLP-IT-Ticket_Triage/results/mappings/urgency_label_mapping.json


In [34]:
# Cell 9: Train/Val/Test split (stratified) + leakage checks
def stratified_split_with_leakage_guard(
    task_df: pd.DataFrame,
    id_col: str,
    seed: int,
    train_size: float,
    val_size: float,
    test_size: float,
):
    if not np.isclose(train_size + val_size + test_size, 1.0):
        raise ValueError("Split sizes must sum to 1.0")

    working_df = task_df.copy()
    if id_col not in working_df.columns:
        working_df[id_col] = np.arange(len(working_df))

    id_label = working_df[[id_col, "label"]].drop_duplicates()
    label_per_id = id_label.groupby(id_col)["label"].nunique()
    if (label_per_id > 1).any():
        raise ValueError("A ticket ID maps to multiple labels; cannot guarantee leakage-free split.")

    id_frame = id_label.drop_duplicates(subset=[id_col]).copy()
    y = id_frame["label"]
    stratify_1 = y if y.value_counts().min() >= 2 else None

    train_ids, temp_ids = train_test_split(
        id_frame[id_col],
        test_size=(1.0 - train_size),
        random_state=seed,
        stratify=stratify_1,
    )

    temp_frame = id_frame[id_frame[id_col].isin(temp_ids)].copy()
    y_temp = temp_frame["label"]
    stratify_2 = y_temp if y_temp.value_counts().min() >= 2 else None
    rel_test_size = test_size / (val_size + test_size)

    val_ids, test_ids = train_test_split(
        temp_frame[id_col],
        test_size=rel_test_size,
        random_state=seed,
        stratify=stratify_2,
    )

    splits = {
        "train": working_df[working_df[id_col].isin(set(train_ids))].reset_index(drop=True),
        "val": working_df[working_df[id_col].isin(set(val_ids))].reset_index(drop=True),
        "test": working_df[working_df[id_col].isin(set(test_ids))].reset_index(drop=True),
    }

    train_set = set(splits["train"][id_col].tolist())
    val_set = set(splits["val"][id_col].tolist())
    test_set = set(splits["test"][id_col].tolist())
    assert train_set.isdisjoint(val_set)
    assert train_set.isdisjoint(test_set)
    assert val_set.isdisjoint(test_set)

    return splits

def print_split_distribution(task_name: str, splits: dict):
    for split_name, split_df in splits.items():
        print(f"\n[{task_name}] {split_name}: n={len(split_df)}")
        dist = split_df["label_text"].value_counts(normalize=True).mul(100).round(2)
        print(dist.to_string())

SPLITS = {}
for task_name, task_info in TASK_DATA.items():
    splits = stratified_split_with_leakage_guard(
        task_df=task_info["df"],
        id_col=CONFIG["id_column"],
        seed=CONFIG["seed"],
        train_size=CONFIG["split"]["train_size"],
        val_size=CONFIG["split"]["val_size"],
        test_size=CONFIG["split"]["test_size"],
    )
    SPLITS[task_name] = splits
    print_split_distribution(task_name, splits)

print("\nSplit generation complete for tasks:", list(SPLITS.keys()))


[department] train: n=2400
label_text
Technical Support                  29.04
Product Support                    18.67
Customer Service                   15.12
IT Support                         11.79
Billing and Payments               10.21
Returns and Exchanges               4.92
Service Outages and Maintenance     3.92
Sales and Pre-Sales                 3.00
Human Resources                     1.92
General Inquiry                     1.42

[department] val: n=300
label_text
Technical Support                  29.00
Product Support                    18.67
Customer Service                   15.33
IT Support                         11.67
Billing and Payments               10.33
Returns and Exchanges               5.00
Service Outages and Maintenance     4.00
Sales and Pre-Sales                 3.00
Human Resources                     1.67
General Inquiry                     1.33

[department] test: n=300
label_text
Technical Support                  29.00
Product Support            

## Baseline Plan (Justification)

Before transformer fine-tuning, we establish classical NLP baselines using **TF-IDF + Logistic Regression**.

- Provides a transparent performance floor for both targets.
- Validates that improvements from transformers are meaningful.
- Keeps evaluation consistent via accuracy, macro F1, and per-class metrics.

In [35]:
# Cell 11: Baseline (TF-IDF + Logistic Regression)
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, f1_score
from sklearn.pipeline import Pipeline

BASELINE_DIR = RESULTS_DIR / "baselines"
BASELINE_DIR.mkdir(parents=True, exist_ok=True)

def run_tfidf_logreg_baseline(task_name: str, splits: dict, max_features: int = 80000):
    train_df = splits["train"]
    val_df = splits["val"]
    test_df = splits["test"]

    pipeline = Pipeline(
        steps=[
            (
                "tfidf",
                TfidfVectorizer(
                    lowercase=True,
                    ngram_range=(1, 2),
                    max_features=max_features,
                    min_df=2,
                ),
            ),
            (
                "clf",
                LogisticRegression(
                    max_iter=1000,
                    class_weight="balanced",
                    random_state=CONFIG["seed"],
                    n_jobs=None,
                ),
            ),
        ]
    )

    pipeline.fit(train_df["ticket_text"], train_df["label"])

    metrics_by_split = {}
    for split_name, split_df in [("val", val_df), ("test", test_df)]:
        y_true = split_df["label"].values
        y_pred = pipeline.predict(split_df["ticket_text"])

        split_metrics = {
            "accuracy": float(accuracy_score(y_true, y_pred)),
            "macro_f1": float(f1_score(y_true, y_pred, average="macro", zero_division=0)),
            "weighted_f1": float(f1_score(y_true, y_pred, average="weighted", zero_division=0)),
            "classification_report": classification_report(y_true, y_pred, output_dict=True, zero_division=0),
            "confusion_matrix": confusion_matrix(y_true, y_pred).tolist(),
        }
        metrics_by_split[split_name] = split_metrics

        print(
            f"[{task_name}][{split_name}] "
            f"accuracy={split_metrics['accuracy']:.4f} "
            f"macro_f1={split_metrics['macro_f1']:.4f} "
            f"weighted_f1={split_metrics['weighted_f1']:.4f}"
        )

    payload = {
        "task": task_name,
        "model": "tfidf_logistic_regression",
        "config": {
            "max_features": max_features,
            "ngram_range": [1, 2],
            "min_df": 2,
            "class_weight": "balanced",
            "seed": CONFIG["seed"],
        },
        "metrics": metrics_by_split,
    }

    out_path = BASELINE_DIR / f"{task_name}_baseline_metrics.json"
    with open(out_path, "w", encoding="utf-8") as f:
        json.dump(payload, f, ensure_ascii=True, indent=2)
    print(f"Saved baseline metrics: {out_path}")

    return pipeline, payload

if "SPLITS" not in globals() or not isinstance(SPLITS, dict) or len(SPLITS) == 0:
    raise RuntimeError("SPLITS not found. Run notebook cells in order through Cell 9 (Train/Val/Test split), then run this baseline cell.")

BASELINE_MODELS = {}
BASELINE_RESULTS = {}

for task_name in ["department", "urgency"]:
    model, result = run_tfidf_logreg_baseline(task_name=task_name, splits=SPLITS[task_name])
    BASELINE_MODELS[task_name] = model
    BASELINE_RESULTS[task_name] = result

[department][val] accuracy=0.3900 macro_f1=0.3225 weighted_f1=0.3798
[department][test] accuracy=0.3433 macro_f1=0.2462 weighted_f1=0.3465
Saved baseline metrics: /Users/rawadyared/NLP-IT-Ticket_Triage/results/baselines/department_baseline_metrics.json
[urgency][val] accuracy=0.5133 macro_f1=0.4815 weighted_f1=0.5103
[urgency][test] accuracy=0.4467 macro_f1=0.4039 weighted_f1=0.4448
Saved baseline metrics: /Users/rawadyared/NLP-IT-Ticket_Triage/results/baselines/urgency_baseline_metrics.json


## Transformer Plan (Multiple Approaches)

We compare multiple transformer backbones for **department routing** and select the best on validation macro F1.

- Candidate A: `distilroberta-base` (faster)
- Candidate B: `bert-base-uncased` (strong baseline)
- Selection criterion: best validation macro F1, then test on held-out set

Note: Hugging Face model/tokenizer files are downloaded on first use if not cached.

In [36]:
# Cell 13: Convert to HuggingFace Dataset (department)
from datasets import Dataset, DatasetDict

DEPT_SPLITS = SPLITS["department"]
DEPT_LABEL2ID = TASK_DATA["department"]["label2id"]
DEPT_ID2LABEL = TASK_DATA["department"]["id2label"]

def to_hf_dataset(split_df: pd.DataFrame, id_col: str) -> Dataset:
    keep_cols = [col for col in [id_col, "ticket_text", "label", "label_text"] if col in split_df.columns]
    export_df = split_df[keep_cols].copy()
    return Dataset.from_pandas(export_df, preserve_index=False)

dept_hf_raw = DatasetDict(
    {
        "train": to_hf_dataset(DEPT_SPLITS["train"], CONFIG["id_column"]),
        "val": to_hf_dataset(DEPT_SPLITS["val"], CONFIG["id_column"]),
        "test": to_hf_dataset(DEPT_SPLITS["test"], CONFIG["id_column"]),
    }
)

print(dept_hf_raw)

DatasetDict({
    train: Dataset({
        features: ['Unnamed: 0', 'ticket_text', 'label', 'label_text'],
        num_rows: 2400
    })
    val: Dataset({
        features: ['Unnamed: 0', 'ticket_text', 'label', 'label_text'],
        num_rows: 300
    })
    test: Dataset({
        features: ['Unnamed: 0', 'ticket_text', 'label', 'label_text'],
        num_rows: 300
    })
})


In [37]:
# Cell 14: Tokenizer + tokenize function
from transformers import DataCollatorWithPadding

def tokenize_department_splits(tokenizer):
    text_col = "ticket_text"
    max_len = CONFIG["train"]["max_length"]

    def _tokenize(batch):
        return tokenizer(batch[text_col], truncation=True, padding="max_length", max_length=max_len)

    remove_cols = [c for c in dept_hf_raw["train"].column_names if c != "label"]
    tokenized = dept_hf_raw.map(_tokenize, batched=True, remove_columns=remove_cols)
    tokenized.set_format(type="torch")
    return tokenized

print("Tokenizer function ready. First tokenizer load will download files if not cached.")

Tokenizer function ready. First tokenizer load will download files if not cached.


In [38]:
# Cell 15: Metrics function
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = np.argmax(logits, axis=-1)
    return {
        "accuracy": float(accuracy_score(labels, preds)),
        "macro_f1": float(f1_score(labels, preds, average="macro", zero_division=0)),
        "weighted_f1": float(f1_score(labels, preds, average="weighted", zero_division=0)),
    }

In [39]:
# Cell 16: Model init
def init_model(model_name: str):
    print(f"Loading model: {model_name}")
    print("If this is the first run, Hugging Face weights will be downloaded.")
    return AutoModelForSequenceClassification.from_pretrained(
        model_name,
        num_labels=len(DEPT_LABEL2ID),
        id2label=DEPT_ID2LABEL,
        label2id=DEPT_LABEL2ID,
    )

In [40]:
# Cell 17: Trainer setup
def build_trainer(model_name: str, tokenized_ds: DatasetDict, tokenizer, run_dir: Path, num_train_epochs: int, learning_rate: float) -> Trainer:
    model = init_model(model_name)
    data_collator = DataCollatorWithPadding(
        tokenizer=tokenizer,
        pad_to_multiple_of=8 if torch.cuda.is_available() else None,
    )

    training_args_kwargs = {
        "output_dir": str(run_dir),
        "learning_rate": learning_rate,
        "per_device_train_batch_size": CONFIG["train"]["batch_size"],
        "per_device_eval_batch_size": CONFIG["train"]["batch_size"],
        "num_train_epochs": num_train_epochs,
        "weight_decay": CONFIG["train"]["weight_decay"],
        "warmup_ratio": CONFIG["train"]["warmup_ratio"],
        "save_strategy": "epoch",
        "load_best_model_at_end": True,
        "metric_for_best_model": "macro_f1",
        "greater_is_better": True,
        "save_total_limit": 2,
        "fp16": torch.cuda.is_available(),
        "logging_steps": 50,
        "report_to": [],
        "seed": CONFIG["seed"],
    }
    strategy_key = "eval_strategy" if "eval_strategy" in TrainingArguments.__init__.__code__.co_varnames else "evaluation_strategy"
    training_args_kwargs[strategy_key] = "epoch"
    args = TrainingArguments(**training_args_kwargs)

    trainer_kwargs = {
        "model": model,
        "args": args,
        "train_dataset": tokenized_ds["train"],
        "eval_dataset": tokenized_ds["val"],
        "data_collator": data_collator,
        "compute_metrics": compute_metrics,
        "callbacks": [EarlyStoppingCallback(early_stopping_patience=CONFIG["train"]["early_stopping_patience"])],
    }
    processing_key = "processing_class" if "processing_class" in Trainer.__init__.__code__.co_varnames else "tokenizer"
    trainer_kwargs[processing_key] = tokenizer
    trainer = Trainer(**trainer_kwargs)
    return trainer

In [41]:
# Cell 18: Train department classifier (candidate comparison + best model save)
DEPT_MODELS_DIR = MODELS_DIR / "department_model"
DEPT_MODELS_DIR.mkdir(parents=True, exist_ok=True)

candidate_models = (
    CONFIG["candidate_models"]["department"]
    if CONFIG["experiment"]["run_candidate_search"]
    else [CONFIG["model_names"]["department"]]
)

learning_rates = (
    CONFIG["train"].get("learning_rates", [CONFIG["train"]["learning_rate"]])
    if CONFIG["experiment"].get("tune_learning_rate", True)
    else [CONFIG["train"]["learning_rate"]]
)

candidate_epochs = CONFIG["experiment"]["candidate_epochs"]
full_epochs = CONFIG["train"]["epochs"]

DEPT_EXPERIMENTS = []
DEPT_BEST = None
DEPT_BEST_TRAINER = None
DEPT_BEST_TOKENIZER = None
DEPT_BEST_TOKENIZED_DS = None

run_idx = 0
for model_name in candidate_models:
    for lr in learning_rates:
        run_idx += 1
        safe_name = model_name.replace("/", "_")
        lr_tag = str(lr).replace(".", "p")
        run_dir = DEPT_MODELS_DIR / f"candidate_{run_idx}_{safe_name}_lr{lr_tag}"
        run_dir.mkdir(parents=True, exist_ok=True)

        print(
            f"\n[Department] Trial {run_idx}/{len(candidate_models) * len(learning_rates)}: "
            f"{model_name} | lr={lr}"
        )
        tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
        tokenized_ds = tokenize_department_splits(tokenizer)

        trainer = build_trainer(
            model_name=model_name,
            tokenized_ds=tokenized_ds,
            tokenizer=tokenizer,
            run_dir=run_dir,
            num_train_epochs=candidate_epochs,
            learning_rate=lr,
        )

        trainer.train()
        val_metrics = trainer.evaluate(tokenized_ds["val"])
        val_macro_f1 = float(val_metrics.get("eval_macro_f1", -1.0))

        experiment_row = {
            "model_name": model_name,
            "learning_rate": lr,
            "candidate_epochs": candidate_epochs,
            "val_accuracy": float(val_metrics.get("eval_accuracy", 0.0)),
            "val_macro_f1": val_macro_f1,
            "val_weighted_f1": float(val_metrics.get("eval_weighted_f1", 0.0)),
        }
        DEPT_EXPERIMENTS.append(experiment_row)
        print(f"Validation macro F1: {val_macro_f1:.4f}")

        if (DEPT_BEST is None) or (val_macro_f1 > DEPT_BEST["val_macro_f1"]):
            DEPT_BEST = experiment_row
            DEPT_BEST_TRAINER = trainer
            DEPT_BEST_TOKENIZER = tokenizer
            DEPT_BEST_TOKENIZED_DS = tokenized_ds

print("\nCandidate comparison complete.")
print(pd.DataFrame(DEPT_EXPERIMENTS).sort_values("val_macro_f1", ascending=False).to_string(index=False))

best_model_name = DEPT_BEST["model_name"]
best_learning_rate = DEPT_BEST["learning_rate"]
best_model_dir = DEPT_MODELS_DIR / "best"
best_model_dir.mkdir(parents=True, exist_ok=True)

# Optional full retrain on best candidate with configured epochs.
if full_epochs > candidate_epochs:
    print(
        f"\nRetraining best candidate ({best_model_name}, lr={best_learning_rate}) "
        f"for full epochs: {full_epochs}"
    )
    tokenizer = AutoTokenizer.from_pretrained(best_model_name, use_fast=True)
    tokenized_ds = tokenize_department_splits(tokenizer)
    trainer = build_trainer(
        model_name=best_model_name,
        tokenized_ds=tokenized_ds,
        tokenizer=tokenizer,
        run_dir=best_model_dir,
        num_train_epochs=full_epochs,
        learning_rate=best_learning_rate,
    )
    trainer.train()
    DEPT_BEST_TRAINER = trainer
    DEPT_BEST_TOKENIZER = tokenizer
    DEPT_BEST_TOKENIZED_DS = tokenized_ds

DEPT_BEST_TRAINER.save_model(str(best_model_dir))
DEPT_BEST_TOKENIZER.save_pretrained(str(best_model_dir))
print(f"Saved best department model to: {best_model_dir}")

dept_experiment_path = RESULTS_DIR / "department_model_selection.json"
with open(dept_experiment_path, "w", encoding="utf-8") as f:
    json.dump(
        {
            "selected_model": best_model_name,
            "selected_learning_rate": best_learning_rate,
            "candidate_results": DEPT_EXPERIMENTS,
            "learning_rates": learning_rates,
            "candidate_epochs": candidate_epochs,
            "full_epochs": full_epochs,
        },
        f,
        ensure_ascii=True,
        indent=2,
    )
print(f"Saved model-selection report: {dept_experiment_path}")


[Department] Trial 1/3: distilroberta-base | lr=1e-05


Map:   0%|          | 0/2400 [00:00<?, ? examples/s]

Map:   0%|          | 0/300 [00:00<?, ? examples/s]

Map:   0%|          | 0/300 [00:00<?, ? examples/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilroberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Loading model: distilroberta-base
If this is the first run, Hugging Face weights will be downloaded.


Epoch,Training Loss,Validation Loss,Accuracy,Macro F1,Weighted F1
1,1.8511,1.751816,0.383333,0.160861,0.283588
2,1.7562,1.651964,0.38,0.180795,0.302085
3,1.633,1.622141,0.413333,0.247082,0.339543


Validation macro F1: 0.2471

[Department] Trial 2/3: distilroberta-base | lr=2e-05


Map:   0%|          | 0/2400 [00:00<?, ? examples/s]

Map:   0%|          | 0/300 [00:00<?, ? examples/s]

Map:   0%|          | 0/300 [00:00<?, ? examples/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilroberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Loading model: distilroberta-base
If this is the first run, Hugging Face weights will be downloaded.


Epoch,Training Loss,Validation Loss,Accuracy,Macro F1,Weighted F1
1,1.7932,1.648134,0.42,0.185063,0.327233
2,1.7022,1.593771,0.376667,0.226634,0.317421
3,1.5306,1.565067,0.393333,0.236976,0.336591


Validation macro F1: 0.2370

[Department] Trial 3/3: distilroberta-base | lr=3e-05


Map:   0%|          | 0/2400 [00:00<?, ? examples/s]

Map:   0%|          | 0/300 [00:00<?, ? examples/s]

Map:   0%|          | 0/300 [00:00<?, ? examples/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilroberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Loading model: distilroberta-base
If this is the first run, Hugging Face weights will be downloaded.


Epoch,Training Loss,Validation Loss,Accuracy,Macro F1,Weighted F1
1,1.7978,1.647383,0.39,0.160228,0.28043
2,1.6949,1.578655,0.393333,0.234348,0.325253
3,1.4928,1.541799,0.39,0.235523,0.330018


Validation macro F1: 0.2355

Candidate comparison complete.
        model_name  learning_rate  candidate_epochs  val_accuracy  val_macro_f1  val_weighted_f1
distilroberta-base        0.00001                 3      0.413333      0.247082         0.339543
distilroberta-base        0.00002                 3      0.393333      0.236976         0.336591
distilroberta-base        0.00003                 3      0.390000      0.235523         0.330018
Saved best department model to: /Users/rawadyared/NLP-IT-Ticket_Triage/models/department_model/best
Saved model-selection report: /Users/rawadyared/NLP-IT-Ticket_Triage/results/department_model_selection.json


In [42]:
# Cell 19: Evaluate department classifier (test)
dept_test_output = DEPT_BEST_TRAINER.predict(DEPT_BEST_TOKENIZED_DS["test"])
dept_test_logits = dept_test_output.predictions
dept_test_probs = torch.softmax(torch.tensor(dept_test_logits), dim=-1).cpu().numpy()

y_true = DEPT_SPLITS["test"]["label"].to_numpy()
y_pred = dept_test_probs.argmax(axis=1)
conf = dept_test_probs.max(axis=1)

dept_test_metrics = {
    "accuracy": float(accuracy_score(y_true, y_pred)),
    "macro_f1": float(f1_score(y_true, y_pred, average="macro", zero_division=0)),
    "weighted_f1": float(f1_score(y_true, y_pred, average="weighted", zero_division=0)),
    "classification_report": classification_report(y_true, y_pred, output_dict=True, zero_division=0),
    "confusion_matrix": confusion_matrix(y_true, y_pred).tolist(),
}

print(
    f"Department transformer test -> "
    f"accuracy={dept_test_metrics['accuracy']:.4f}, "
    f"macro_f1={dept_test_metrics['macro_f1']:.4f}, "
    f"weighted_f1={dept_test_metrics['weighted_f1']:.4f}"
)

metrics_payload = {
    "task": "department",
    "selected_model": DEPT_BEST["model_name"],
    "selected_learning_rate": DEPT_BEST.get("learning_rate", CONFIG["train"]["learning_rate"]),
    "test_metrics": dept_test_metrics,
}
dept_metrics_path = RESULTS_DIR / "department_transformer_metrics.json"
with open(dept_metrics_path, "w", encoding="utf-8") as f:
    json.dump(metrics_payload, f, ensure_ascii=True, indent=2)
print(f"Saved test metrics: {dept_metrics_path}")

id_col = CONFIG["id_column"]
pred_df = DEPT_SPLITS["test"][[id_col, "ticket_text", "label", "label_text"]].copy()
pred_df["pred_label_id"] = y_pred
pred_df["pred_label"] = pred_df["pred_label_id"].map(DEPT_ID2LABEL)
pred_df["confidence"] = conf

pred_out_path = RESULTS_DIR / "department_test_predictions.csv"
pred_df.to_csv(pred_out_path, index=False)
print(f"Saved test predictions: {pred_out_path}")
display(pred_df.head(5))

Department transformer test -> accuracy=0.3867, macro_f1=0.2001, weighted_f1=0.3219
Saved test metrics: /Users/rawadyared/NLP-IT-Ticket_Triage/results/department_transformer_metrics.json
Saved test predictions: /Users/rawadyared/NLP-IT-Ticket_Triage/results/department_test_predictions.csv


Unnamed: 0.1,Unnamed: 0,ticket_text,label,label_text,pred_label_id,pred_label,confidence
0,2966,"Our marketing agency faced several hardware and software failures, which affected our digital campaigns. The possible root cause was compatibility issues. T...",9,Technical Support,9,Technical Support,0.458194
1,29612,"Greetings Customer Support,I am having difficulties in updating Norton Antivirus Plus, and I need assistance. I have attempted various troubleshooting metho...",9,Technical Support,9,Technical Support,0.393538
2,29395,Problems encountered during Kaspersky installation. Require help.,4,IT Support,9,Technical Support,0.44236
3,19002,"Customer Support, I am seeking detailed guidance on integrating Airtable project management software into my workflow. Could you provide comprehensive instr...",1,Customer Service,9,Technical Support,0.279409
4,24976,"We have noticed a decline in engagement for our digital campaigns. We have reviewed user feedback and adjusted our targeting, but we need assistance in pinp...",5,Product Support,5,Product Support,0.316021


## Urgency Transformer (Multiple Approaches)

We now apply the same multi-approach process to urgency/priority:

- Candidate A: `distilroberta-base`
- Candidate B: `bert-base-uncased`
- Model selection: best validation macro F1

Note: Hugging Face tokenizer/model files are downloaded on first run if not cached.

In [43]:
# Cell 20: Repeat transformer pipeline for urgency/priority (multi-approach)
URGENCY_SPLITS = SPLITS["urgency"]
URGENCY_LABEL2ID = TASK_DATA["urgency"]["label2id"]
URGENCY_ID2LABEL = TASK_DATA["urgency"]["id2label"]

URGENCY_MODELS_DIR = MODELS_DIR / "urgency_model"
URGENCY_MODELS_DIR.mkdir(parents=True, exist_ok=True)

def to_hf_dataset_urgency(split_df: pd.DataFrame, id_col: str) -> Dataset:
    keep_cols = [col for col in [id_col, "ticket_text", "label", "label_text"] if col in split_df.columns]
    return Dataset.from_pandas(split_df[keep_cols].copy(), preserve_index=False)

urgency_hf_raw = DatasetDict(
    {
        "train": to_hf_dataset_urgency(URGENCY_SPLITS["train"], CONFIG["id_column"]),
        "val": to_hf_dataset_urgency(URGENCY_SPLITS["val"], CONFIG["id_column"]),
        "test": to_hf_dataset_urgency(URGENCY_SPLITS["test"], CONFIG["id_column"]),
    }
)

def tokenize_urgency_splits(tokenizer):
    def _tokenize(batch):
        return tokenizer(
            batch["ticket_text"],
            truncation=True,
            padding="max_length",
            max_length=CONFIG["train"]["max_length"],
        )

    remove_cols = [c for c in urgency_hf_raw["train"].column_names if c != "label"]
    tokenized = urgency_hf_raw.map(_tokenize, batched=True, remove_columns=remove_cols)
    tokenized.set_format(type="torch")
    return tokenized

def init_urgency_model(model_name: str):
    print(f"Loading urgency model: {model_name}")
    print("If first run, Hugging Face downloads tokenizer and model weights.")
    return AutoModelForSequenceClassification.from_pretrained(
        model_name,
        num_labels=len(URGENCY_LABEL2ID),
        id2label=URGENCY_ID2LABEL,
        label2id=URGENCY_LABEL2ID,
    )

def build_urgency_trainer(model_name: str, tokenized_ds: DatasetDict, tokenizer, run_dir: Path, num_train_epochs: int, learning_rate: float) -> Trainer:
    model = init_urgency_model(model_name)
    data_collator = DataCollatorWithPadding(
        tokenizer=tokenizer,
        pad_to_multiple_of=8 if torch.cuda.is_available() else None,
    )

    training_args_kwargs = {
        "output_dir": str(run_dir),
        "learning_rate": learning_rate,
        "per_device_train_batch_size": CONFIG["train"]["batch_size"],
        "per_device_eval_batch_size": CONFIG["train"]["batch_size"],
        "num_train_epochs": num_train_epochs,
        "weight_decay": CONFIG["train"]["weight_decay"],
        "warmup_ratio": CONFIG["train"]["warmup_ratio"],
        "save_strategy": "epoch",
        "load_best_model_at_end": True,
        "metric_for_best_model": "macro_f1",
        "greater_is_better": True,
        "save_total_limit": 2,
        "fp16": torch.cuda.is_available(),
        "logging_steps": 50,
        "report_to": [],
        "seed": CONFIG["seed"],
    }
    strategy_key = "eval_strategy" if "eval_strategy" in TrainingArguments.__init__.__code__.co_varnames else "evaluation_strategy"
    training_args_kwargs[strategy_key] = "epoch"
    args = TrainingArguments(**training_args_kwargs)

    trainer_kwargs = {
        "model": model,
        "args": args,
        "train_dataset": tokenized_ds["train"],
        "eval_dataset": tokenized_ds["val"],
        "data_collator": data_collator,
        "compute_metrics": compute_metrics,
        "callbacks": [EarlyStoppingCallback(early_stopping_patience=CONFIG["train"]["early_stopping_patience"])],
    }
    processing_key = "processing_class" if "processing_class" in Trainer.__init__.__code__.co_varnames else "tokenizer"
    trainer_kwargs[processing_key] = tokenizer
    return Trainer(**trainer_kwargs)

urgency_candidate_models = (
    CONFIG["candidate_models"]["urgency"]
    if CONFIG["experiment"]["run_candidate_search"]
    else [CONFIG["model_names"]["urgency"]]
)

urgency_learning_rates = (
    CONFIG["train"].get("learning_rates", [CONFIG["train"]["learning_rate"]])
    if CONFIG["experiment"].get("tune_learning_rate", True)
    else [CONFIG["train"]["learning_rate"]]
)

urgency_candidate_epochs = CONFIG["experiment"]["candidate_epochs"]
urgency_full_epochs = CONFIG["train"]["epochs"]

URGENCY_EXPERIMENTS = []
URGENCY_BEST = None
URGENCY_BEST_TRAINER = None
URGENCY_BEST_TOKENIZER = None
URGENCY_BEST_TOKENIZED_DS = None

urgency_run_idx = 0
for model_name in urgency_candidate_models:
    for lr in urgency_learning_rates:
        urgency_run_idx += 1
        safe_name = model_name.replace("/", "_")
        lr_tag = str(lr).replace(".", "p")
        run_dir = URGENCY_MODELS_DIR / f"candidate_{urgency_run_idx}_{safe_name}_lr{lr_tag}"
        run_dir.mkdir(parents=True, exist_ok=True)

        print(
            f"\n[Urgency] Trial {urgency_run_idx}/{len(urgency_candidate_models) * len(urgency_learning_rates)}: "
            f"{model_name} | lr={lr}"
        )
        tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
        tokenized_ds = tokenize_urgency_splits(tokenizer)

        trainer = build_urgency_trainer(
            model_name=model_name,
            tokenized_ds=tokenized_ds,
            tokenizer=tokenizer,
            run_dir=run_dir,
            num_train_epochs=urgency_candidate_epochs,
            learning_rate=lr,
        )

        trainer.train()
        val_metrics = trainer.evaluate(tokenized_ds["val"])
        val_macro_f1 = float(val_metrics.get("eval_macro_f1", -1.0))

        experiment_row = {
            "model_name": model_name,
            "learning_rate": lr,
            "candidate_epochs": urgency_candidate_epochs,
            "val_accuracy": float(val_metrics.get("eval_accuracy", 0.0)),
            "val_macro_f1": val_macro_f1,
            "val_weighted_f1": float(val_metrics.get("eval_weighted_f1", 0.0)),
        }
        URGENCY_EXPERIMENTS.append(experiment_row)
        print(f"Validation macro F1: {val_macro_f1:.4f}")

        if (URGENCY_BEST is None) or (val_macro_f1 > URGENCY_BEST["val_macro_f1"]):
            URGENCY_BEST = experiment_row
            URGENCY_BEST_TRAINER = trainer
            URGENCY_BEST_TOKENIZER = tokenizer
            URGENCY_BEST_TOKENIZED_DS = tokenized_ds

print("\nUrgency candidate comparison complete.")
print(pd.DataFrame(URGENCY_EXPERIMENTS).sort_values("val_macro_f1", ascending=False).to_string(index=False))

urgency_best_model_name = URGENCY_BEST["model_name"]
urgency_best_learning_rate = URGENCY_BEST["learning_rate"]
urgency_best_model_dir = URGENCY_MODELS_DIR / "best"
urgency_best_model_dir.mkdir(parents=True, exist_ok=True)

if urgency_full_epochs > urgency_candidate_epochs:
    print(
        f"\nRetraining best urgency candidate ({urgency_best_model_name}, lr={urgency_best_learning_rate}) "
        f"for full epochs: {urgency_full_epochs}"
    )
    tokenizer = AutoTokenizer.from_pretrained(urgency_best_model_name, use_fast=True)
    tokenized_ds = tokenize_urgency_splits(tokenizer)
    trainer = build_urgency_trainer(
        model_name=urgency_best_model_name,
        tokenized_ds=tokenized_ds,
        tokenizer=tokenizer,
        run_dir=urgency_best_model_dir,
        num_train_epochs=urgency_full_epochs,
        learning_rate=urgency_best_learning_rate,
    )
    trainer.train()
    URGENCY_BEST_TRAINER = trainer
    URGENCY_BEST_TOKENIZER = tokenizer
    URGENCY_BEST_TOKENIZED_DS = tokenized_ds

URGENCY_BEST_TRAINER.save_model(str(urgency_best_model_dir))
URGENCY_BEST_TOKENIZER.save_pretrained(str(urgency_best_model_dir))
print(f"Saved best urgency model to: {urgency_best_model_dir}")

urgency_selection_path = RESULTS_DIR / "urgency_model_selection.json"
with open(urgency_selection_path, "w", encoding="utf-8") as f:
    json.dump(
        {
            "selected_model": urgency_best_model_name,
            "selected_learning_rate": urgency_best_learning_rate,
            "candidate_results": URGENCY_EXPERIMENTS,
            "learning_rates": urgency_learning_rates,
            "candidate_epochs": urgency_candidate_epochs,
            "full_epochs": urgency_full_epochs,
        },
        f,
        ensure_ascii=True,
        indent=2,
    )
print(f"Saved urgency model-selection report: {urgency_selection_path}")


[Urgency] Trial 1/3: distilroberta-base | lr=1e-05


Map:   0%|          | 0/2400 [00:00<?, ? examples/s]

Map:   0%|          | 0/300 [00:00<?, ? examples/s]

Map:   0%|          | 0/300 [00:00<?, ? examples/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilroberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Loading urgency model: distilroberta-base
If first run, Hugging Face downloads tokenizer and model weights.


Epoch,Training Loss,Validation Loss,Accuracy,Macro F1,Weighted F1
1,1.0615,1.029061,0.433333,0.321703,0.38368
2,1.0035,1.01877,0.473333,0.343722,0.411852
3,0.9784,1.008657,0.48,0.346701,0.415546


Validation macro F1: 0.3467

[Urgency] Trial 2/3: distilroberta-base | lr=2e-05


Map:   0%|          | 0/2400 [00:00<?, ? examples/s]

Map:   0%|          | 0/300 [00:00<?, ? examples/s]

Map:   0%|          | 0/300 [00:00<?, ? examples/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilroberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Loading urgency model: distilroberta-base
If first run, Hugging Face downloads tokenizer and model weights.


Epoch,Training Loss,Validation Loss,Accuracy,Macro F1,Weighted F1
1,1.054,1.036475,0.496667,0.349901,0.419886
2,1.0059,1.022274,0.476667,0.344995,0.413392


Validation macro F1: 0.3499

[Urgency] Trial 3/3: distilroberta-base | lr=3e-05


Map:   0%|          | 0/2400 [00:00<?, ? examples/s]

Map:   0%|          | 0/300 [00:00<?, ? examples/s]

Map:   0%|          | 0/300 [00:00<?, ? examples/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilroberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Loading urgency model: distilroberta-base
If first run, Hugging Face downloads tokenizer and model weights.


Epoch,Training Loss,Validation Loss,Accuracy,Macro F1,Weighted F1
1,1.0725,1.059424,0.386667,0.185897,0.215641
2,1.06,1.049227,0.446667,0.297829,0.358886
3,1.0251,1.024814,0.46,0.335312,0.401613


Validation macro F1: 0.3353

Urgency candidate comparison complete.
        model_name  learning_rate  candidate_epochs  val_accuracy  val_macro_f1  val_weighted_f1
distilroberta-base        0.00002                 3      0.496667      0.349901         0.419886
distilroberta-base        0.00001                 3      0.480000      0.346701         0.415546
distilroberta-base        0.00003                 3      0.460000      0.335312         0.401613
Saved best urgency model to: /Users/rawadyared/NLP-IT-Ticket_Triage/models/urgency_model/best
Saved urgency model-selection report: /Users/rawadyared/NLP-IT-Ticket_Triage/results/urgency_model_selection.json


In [44]:
# Cell 20 (cont.): Evaluate urgency classifier (test)
urgency_test_output = URGENCY_BEST_TRAINER.predict(URGENCY_BEST_TOKENIZED_DS["test"])
urgency_test_logits = urgency_test_output.predictions
urgency_test_probs = torch.softmax(torch.tensor(urgency_test_logits), dim=-1).cpu().numpy()

y_true = URGENCY_SPLITS["test"]["label"].to_numpy()
y_pred = urgency_test_probs.argmax(axis=1)
conf = urgency_test_probs.max(axis=1)

urgency_test_metrics = {
    "accuracy": float(accuracy_score(y_true, y_pred)),
    "macro_f1": float(f1_score(y_true, y_pred, average="macro", zero_division=0)),
    "weighted_f1": float(f1_score(y_true, y_pred, average="weighted", zero_division=0)),
    "classification_report": classification_report(y_true, y_pred, output_dict=True, zero_division=0),
    "confusion_matrix": confusion_matrix(y_true, y_pred).tolist(),
}

print(
    f"Urgency transformer test -> "
    f"accuracy={urgency_test_metrics['accuracy']:.4f}, "
    f"macro_f1={urgency_test_metrics['macro_f1']:.4f}, "
    f"weighted_f1={urgency_test_metrics['weighted_f1']:.4f}"
)

urgency_metrics_payload = {
    "task": "urgency",
    "selected_model": URGENCY_BEST["model_name"],
    "selected_learning_rate": URGENCY_BEST.get("learning_rate", CONFIG["train"]["learning_rate"]),
    "test_metrics": urgency_test_metrics,
}
urgency_metrics_path = RESULTS_DIR / "urgency_transformer_metrics.json"
with open(urgency_metrics_path, "w", encoding="utf-8") as f:
    json.dump(urgency_metrics_payload, f, ensure_ascii=True, indent=2)
print(f"Saved urgency test metrics: {urgency_metrics_path}")

id_col = CONFIG["id_column"]
urgency_pred_df = URGENCY_SPLITS["test"][[id_col, "ticket_text", "label", "label_text"]].copy()
urgency_pred_df["pred_label_id"] = y_pred
urgency_pred_df["pred_label"] = urgency_pred_df["pred_label_id"].map(URGENCY_ID2LABEL)
urgency_pred_df["confidence"] = conf

urgency_pred_out_path = RESULTS_DIR / "urgency_test_predictions.csv"
urgency_pred_df.to_csv(urgency_pred_out_path, index=False)
print(f"Saved urgency test predictions: {urgency_pred_out_path}")
display(urgency_pred_df.head(5))

Urgency transformer test -> accuracy=0.4467, macro_f1=0.2954, weighted_f1=0.3571
Saved urgency test metrics: /Users/rawadyared/NLP-IT-Ticket_Triage/results/urgency_transformer_metrics.json
Saved urgency test predictions: /Users/rawadyared/NLP-IT-Ticket_Triage/results/urgency_test_predictions.csv


Unnamed: 0.1,Unnamed: 0,ticket_text,label,label_text,pred_label_id,pred_label,confidence
0,27583,"Dear Customer Service Team,I am contacting you to address a problem with the forecast predictions offered. These forecasts appear to be unreliable due to in...",2,medium,2,medium,0.419524
1,6598,There is a risk that the medical records have been exposed due to a data leak. Outdated security measures and old software might have played a role in this.,2,medium,2,medium,0.405263
2,19435,A financial firm is encountering software incompatibility problems following a recent system update. Reinstalling and updating the software may resolve the ...,2,medium,2,medium,0.420988
3,14000,"Dear Customer Support Team, I am writing to request updates to our data security protocols to better safeguard medical data across all applications. Given t...",1,low,2,medium,0.422017
4,5858,The agency s digital marketing efforts did not enhance brand visibility; potential segmentation mistakes or targeting errors may have contributed. Efforts m...,2,medium,2,medium,0.408694


## Tags and Summary (Prototype Inference)

Approach:

- **Tags**: extractive keyphrases with YAKE (lightweight, stable, CPU-friendly)
- **Summary**: abstractive summary with `t5-small`

Download note:

- `t5-small` is downloaded from Hugging Face on first run if not cached.

In [45]:
# Cell 22: Tags extraction module (YAKE)
import yake

YAKE_CONFIG = {
    "lan": "en",
    "n": 3,
    "dedupLim": 0.9,
    "dedupFunc": "seqm",
    "windowsSize": 1,
}

def extract_tags(text: str, top_k: int = 5):
    clean_text = normalize_ticket_text(text)
    if not clean_text:
        return []

    extractor = yake.KeywordExtractor(top=top_k * 2, **YAKE_CONFIG)
    keywords = extractor.extract_keywords(clean_text)

    tags = []
    seen = set()
    for phrase, _score in keywords:
        candidate = normalize_ticket_text(phrase).lower()
        if len(candidate) < 3:
            continue
        if candidate in seen:
            continue
        seen.add(candidate)
        tags.append(candidate)
        if len(tags) >= top_k:
            break

    return tags

print("YAKE tag extractor ready.")

YAKE tag extractor ready.


In [46]:
# Cell 23: Summary module (t5-small)
from transformers import pipeline

SUMMARIZER = None

def get_summarizer():
    global SUMMARIZER
    if SUMMARIZER is None:
        model_name = CONFIG["model_names"]["summary"]
        print(f"Loading summarizer: {model_name}")
        print("If first run, Hugging Face will download summarizer files.")
        device = 0 if torch.cuda.is_available() else -1
        SUMMARIZER = pipeline("summarization", model=model_name, tokenizer=model_name, device=device)
    return SUMMARIZER

def summarize_text(text: str, min_len: int = 12, max_len: int = 60):
    clean_text = normalize_ticket_text(text)
    if not clean_text:
        return ""

    if len(clean_text.split()) < 25:
        return clean_text

    clean_text = clean_text[:3000]
    summarizer = get_summarizer()
    prefixed = f"summarize: {clean_text}"

    output = summarizer(
        prefixed,
        max_length=max_len,
        min_length=min_len,
        do_sample=False,
        truncation=True,
    )
    return normalize_ticket_text(output[0]["summary_text"])

print("Summary module ready.")

Summary module ready.


In [47]:
# Cell 24: Unified inference function -> JSON
from typing import Dict

CLASSIFIER_CACHE = {}

def _get_label_maps(task_name: str):
    return TASK_DATA[task_name]["label2id"], TASK_DATA[task_name]["id2label"]

def _best_model_dir(task_name: str) -> Path:
    return MODELS_DIR / f"{task_name}_model" / "best"

def _get_transformer_runtime(task_name: str):
    cache_key = f"transformer::{task_name}"
    if cache_key in CLASSIFIER_CACHE:
        return CLASSIFIER_CACHE[cache_key]

    trainer_var = "DEPT_BEST_TRAINER" if task_name == "department" else "URGENCY_BEST_TRAINER"
    tokenizer_var = "DEPT_BEST_TOKENIZER" if task_name == "department" else "URGENCY_BEST_TOKENIZER"
    if trainer_var in globals() and tokenizer_var in globals() and globals()[trainer_var] is not None:
        runtime = {
            "mode": "in_memory_transformer",
            "model": globals()[trainer_var].model,
            "tokenizer": globals()[tokenizer_var],
        }
        CLASSIFIER_CACHE[cache_key] = runtime
        return runtime

    model_dir = _best_model_dir(task_name)
    if model_dir.exists():
        runtime = {
            "mode": "disk_transformer",
            "model": AutoModelForSequenceClassification.from_pretrained(str(model_dir)),
            "tokenizer": AutoTokenizer.from_pretrained(str(model_dir), use_fast=True),
        }
        CLASSIFIER_CACHE[cache_key] = runtime
        return runtime

    if "BASELINE_MODELS" in globals() and task_name in BASELINE_MODELS:
        runtime = {
            "mode": "baseline",
            "model": BASELINE_MODELS[task_name],
            "tokenizer": None,
        }
        CLASSIFIER_CACHE[cache_key] = runtime
        return runtime

    raise RuntimeError(
        f"No model available for task '{task_name}'. Run training cells first "
        f"or ensure baseline cell executed."
    )

def _predict_label(task_name: str, text: str):
    _label2id, id2label = _get_label_maps(task_name)
    runtime = _get_transformer_runtime(task_name)

    if runtime["mode"] == "baseline":
        probs = runtime["model"].predict_proba([text])[0]
        pred_id = int(np.argmax(probs))
        conf = float(probs[pred_id])
    else:
        model = runtime["model"]
        tokenizer = runtime["tokenizer"]
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        model.to(device)
        model.eval()
        enc = tokenizer(
            text,
            truncation=True,
            max_length=CONFIG["train"]["max_length"],
            return_tensors="pt",
        )
        enc = {k: v.to(device) for k, v in enc.items()}
        with torch.no_grad():
            logits = model(**enc).logits
            probs = torch.softmax(logits, dim=-1).squeeze(0).detach().cpu().numpy()
        pred_id = int(np.argmax(probs))
        conf = float(probs[pred_id])

    label = id2label[pred_id]
    return {
        "label": label,
        "confidence": round(conf, 6),
    }

def triage_ticket(ticket_text: str, ticket_id: str = "ad_hoc", top_k_tags: int = 5, include_summary: bool = True) -> Dict:
    clean_text = normalize_ticket_text(ticket_text)
    if not clean_text:
        raise ValueError("ticket_text is empty after preprocessing")

    department_pred = _predict_label("department", clean_text)
    urgency_pred = _predict_label("urgency", clean_text)
    tags = extract_tags(clean_text, top_k=top_k_tags)
    summary = summarize_text(clean_text) if include_summary else ""

    return {
        "ticket_id": str(ticket_id),
        "department": department_pred,
        "urgency": urgency_pred,
        "tags": tags,
        "summary": summary,
    }

print("Unified triage function ready: triage_ticket(ticket_text)")

Unified triage function ready: triage_ticket(ticket_text)


In [48]:
# Cell 25: Run demo on 5-10 sample tickets + save sample predictions
import json

DEMO_SIZE = 8
DEMO_INCLUDE_SUMMARY = True
id_col = CONFIG["id_column"]

working = df_raw[[id_col, "ticket_text"]].copy()
working["word_count"] = working["ticket_text"].str.split().str.len()

long_idx = working.nlargest(2, "word_count").index.tolist()
question_mask = working["ticket_text"].str.contains(r"\?", regex=True, na=False)
question_pool = working[question_mask]
question_idx = question_pool.sample(n=min(2, len(question_pool)), random_state=CONFIG["seed"]).index.tolist() if len(question_pool) > 0 else []

remaining = max(0, DEMO_SIZE - len(long_idx) - len(question_idx))
random_pool = working.drop(index=set(long_idx + question_idx), errors="ignore")
rand_idx = random_pool.sample(n=min(remaining, len(random_pool)), random_state=CONFIG["seed"]).index.tolist()

demo_idx = list(dict.fromkeys(long_idx + question_idx + rand_idx))
demo_df = working.loc[demo_idx].reset_index(drop=True)

demo_outputs = []
demo_summary_mode = "model"
for row in demo_df.itertuples(index=False):
    try:
        result = triage_ticket(
            ticket_text=row.ticket_text,
            ticket_id=row[0],
            top_k_tags=5,
            include_summary=DEMO_INCLUDE_SUMMARY,
        )
    except Exception:
        demo_summary_mode = "fallback_no_summary"
        result = triage_ticket(
            ticket_text=row.ticket_text,
            ticket_id=row[0],
            top_k_tags=5,
            include_summary=False,
        )
    demo_outputs.append(result)

print(f"Demo tickets processed: {len(demo_outputs)}")
print(f"Demo summary mode: {demo_summary_mode}")
for i, payload in enumerate(demo_outputs[:3], start=1):
    print(f"\n--- Demo Output {i} ---")
    print(json.dumps(payload, ensure_ascii=True, indent=2))

sample_jsonl_path = RESULTS_DIR / "sample_predictions.jsonl"
with open(sample_jsonl_path, "w", encoding="utf-8") as f:
    for payload in demo_outputs:
        f.write(json.dumps(payload, ensure_ascii=True) + "\n")
print(f"Saved demo predictions: {sample_jsonl_path}")

demo_view = pd.DataFrame(
    [
        {
            "ticket_id": p["ticket_id"],
            "department": p["department"]["label"],
            "dept_conf": p["department"]["confidence"],
            "urgency": p["urgency"]["label"],
            "urg_conf": p["urgency"]["confidence"],
            "tags": ", ".join(p["tags"]),
            "summary": p["summary"],
        }
        for p in demo_outputs
    ]
)
display(demo_view)

Loading summarizer: t5-small
If first run, Hugging Face will download summarizer files.


Device set to use cpu
Both `max_new_tokens` (=256) and `max_length`(=60) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=256) and `max_length`(=60) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=256) and `max_length`(=60) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=256) and `max_length`(=60) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes

Demo tickets processed: 8
Demo summary mode: model

--- Demo Output 1 ---
{
  "ticket_id": "28906",
  "department": {
    "label": "Billing and Payments",
    "confidence": 0.921739
  },
  "urgency": {
    "label": "medium",
    "confidence": 0.431579
  },
  "tags": [
    "customer support team,i",
    "support team,i hope",
    "customer support",
    "aws management service",
    "support team,i"
  ],
  "summary": "the billing reflects charges that were not discussed or predicted in previous correspondence regarding expected usage rates and associated expenses . my team conducted an internal audit and found differences in the projections versus the billed amounts in the billing documentation provided by you on date."
}

--- Demo Output 2 ---
{
  "ticket_id": "28840",
  "department": {
    "label": "Technical Support",
    "confidence": 0.343191
  },
  "urgency": {
    "label": "high",
    "confidence": 0.437415
  },
  "tags": [
    "customer support team,i",
    "support team,i hope"

Unnamed: 0,ticket_id,department,dept_conf,urgency,urg_conf,tags,summary
0,28906,Billing and Payments,0.921739,medium,0.431579,"customer support team,i, support team,i hope, customer support, aws management service, support team,i",the billing reflects charges that were not discussed or predicted in previous correspondence regarding expected usage rates and associated expenses . my tea...
1,28840,Technical Support,0.343191,high,0.437415,"customer support team,i, support team,i hope, customer support, team,i hope, hope this message",our company relies heavily on the IT Consulting Service provided by your firm . the server disruption has persisted despite our in-house troubleshooting eff...
2,3787,Customer Service,0.267399,medium,0.41763,"scalable saas project, saas project management, project management platform, customer support,i, support,i am reaching",customer support would like to know the steps involved in setting up your SaaS project management platform .
3,24432,Product Support,0.268937,medium,0.422503,"protocols are advised, advised, protocols",Which protocols are advised?
4,12272,Technical Support,0.274438,medium,0.418879,"customer support, project management saas, jenkins project management, management saas, support",customer support would appreciate a detailed guide or documentation set up . could include troubleshooting tips for common issues .
5,22197,Product Support,0.280785,medium,0.427547,"customer support, digital strategy tools, boost brand growth, brand growth integration, hubspot and clickup","the enhancements would allow the team to streamline workflows, track progress, and make data-driven decisions more efficiently . current tools are limited a..."
6,12696,Technical Support,0.405435,high,0.403304,"customer support, customer, support, data has occurred, access",an incident involving unauthorized access to medical data has occurred . access issues persist .
7,6904,Technical Support,0.441482,high,0.443826,"unauthorized access alert, medical data systems, software vulnerability, reset access credentials, unauthorized access","an unauthorized access alert has been detected in our medical data systems . we have applied a patch to the affected software, reset access credentials ."


In [49]:
# Cell 26: Business value analysis + summary-length comparison
import time

ASSUMPTIONS = {
    "reading_speed_wpm": 200,
    "routing_buffer_seconds": 30,
    "model_seconds_per_ticket": 5,
    "hourly_wage_usd": 28.0,
    "monthly_ticket_volume": int(len(df_raw)),
    "summary_eval_sample_size": min(100, int(len(df_raw))),
}

id_col = CONFIG["id_column"]

word_counts = df_raw["ticket_text"].str.split().str.len()
avg_ticket_words = float(word_counts.mean())

manual_read_seconds = (avg_ticket_words / ASSUMPTIONS["reading_speed_wpm"]) * 60.0
manual_total_seconds = manual_read_seconds + ASSUMPTIONS["routing_buffer_seconds"]
model_total_seconds = float(ASSUMPTIONS["model_seconds_per_ticket"])
time_saved_seconds = manual_total_seconds - model_total_seconds

hourly_wage = ASSUMPTIONS["hourly_wage_usd"]
monthly_volume = ASSUMPTIONS["monthly_ticket_volume"]

monthly_hours_saved = (time_saved_seconds * monthly_volume) / 3600.0
monthly_savings_usd = monthly_hours_saved * hourly_wage
annual_savings_usd = monthly_savings_usd * 12.0

# Summary-length analysis on a fixed sample for runtime practicality.
sample_n = ASSUMPTIONS["summary_eval_sample_size"]
summary_sample = df_raw[[id_col, "ticket_text"]].sample(n=sample_n, random_state=CONFIG["seed"]).reset_index(drop=True)

summary_outputs = []
start_ts = time.time()
summary_mode = "model"
for txt in summary_sample["ticket_text"]:
    try:
        summary_outputs.append(summarize_text(txt))
    except Exception:
        summary_mode = "fallback"
        fallback = " ".join(normalize_ticket_text(txt).split()[:40])
        summary_outputs.append(fallback)
elapsed_summary_sec = time.time() - start_ts

summary_sample["summary_text"] = summary_outputs
summary_sample["input_words"] = summary_sample["ticket_text"].str.split().str.len()
summary_sample["summary_words"] = summary_sample["summary_text"].str.split().str.len()

avg_input_words_sample = float(summary_sample["input_words"].mean())
avg_summary_words_sample = float(summary_sample["summary_words"].mean())
compression_ratio = avg_summary_words_sample / max(avg_input_words_sample, 1e-9)

business_value_report = {
    "assumptions": ASSUMPTIONS,
    "dataset_stats": {
        "num_tickets": int(len(df_raw)),
        "avg_ticket_words": round(avg_ticket_words, 4),
    },
    "timing_seconds": {
        "manual_read_seconds": round(manual_read_seconds, 4),
        "manual_total_seconds": round(manual_total_seconds, 4),
        "model_total_seconds": round(model_total_seconds, 4),
        "time_saved_seconds_per_ticket": round(time_saved_seconds, 4),
    },
    "savings_usd": {
        "monthly_hours_saved": round(monthly_hours_saved, 4),
        "monthly_savings": round(monthly_savings_usd, 2),
        "annual_savings": round(annual_savings_usd, 2),
    },
    "summary_length_analysis": {
        "summary_mode": summary_mode,
        "sample_size": int(sample_n),
        "avg_input_words_sample": round(avg_input_words_sample, 4),
        "avg_summary_words_sample": round(avg_summary_words_sample, 4),
        "compression_ratio_summary_over_input": round(compression_ratio, 4),
        "summary_eval_elapsed_seconds": round(elapsed_summary_sec, 4),
    },
}

business_path = RESULTS_DIR / "business_value_analysis.json"
with open(business_path, "w", encoding="utf-8") as f:
    json.dump(business_value_report, f, ensure_ascii=True, indent=2)

print("Business Value Summary")
print(f"- Avg ticket words (full dataset): {avg_ticket_words:.2f}")
print(f"- Manual triage time/ticket: {manual_total_seconds:.2f}s")
print(f"- Model triage time/ticket: {model_total_seconds:.2f}s")
print(f"- Time saved per ticket: {time_saved_seconds:.2f}s")
print(f"- Monthly hours saved: {monthly_hours_saved:.2f}h")
print(f"- Monthly savings (USD): ${monthly_savings_usd:,.2f}")
print(f"- Annual savings (USD): ${annual_savings_usd:,.2f}")
print(f"- Avg input words (summary sample): {avg_input_words_sample:.2f}")
print(f"- Avg summary words (summary sample): {avg_summary_words_sample:.2f}")
print(f"- Summary/Input ratio: {compression_ratio:.3f}")
print(f"Saved business report: {business_path}")

display(summary_sample[[id_col, "input_words", "summary_words"]].head(10))

Both `max_new_tokens` (=256) and `max_length`(=60) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Your max_length is set to 60, but your input_length is only 36. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=18)
Both `max_new_tokens` (=256) and `max_length`(=60) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Your max_length is set to 60, but your input_length is only 58. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=29)
Both `

Business Value Summary
- Avg ticket words (full dataset): 55.79
- Manual triage time/ticket: 46.74s
- Model triage time/ticket: 5.00s
- Time saved per ticket: 41.74s
- Monthly hours saved: 34.78h
- Monthly savings (USD): $973.83
- Annual savings (USD): $11,686.00
- Avg input words (summary sample): 51.76
- Avg summary words (summary sample): 22.14
- Summary/Input ratio: 0.428
Saved business report: /Users/rawadyared/NLP-IT-Ticket_Triage/results/business_value_analysis.json


Unnamed: 0.1,Unnamed: 0,input_words,summary_words
0,26115,76,23
1,10543,15,15
2,17539,24,24
3,18419,11,11
4,7318,10,10
5,17021,6,6
6,11384,27,15
7,26955,19,19
8,10494,7,7
9,23959,43,28
