# Ticket Triage with Topic Modelling using Bertopic

In [1]:
from src.dataset import load_dataset

dataset, queue_labels, id2label, label2id, class_weight_dict = load_dataset()
train_dataset, test_dataset = dataset["train"], dataset["test"]

  from .autonotebook import tqdm as notebook_tqdm
Filter: 100%|██████████| 28587/28587 [00:00<00:00, 81221.16 examples/s]
Map: 100%|██████████| 16338/16338 [00:00<00:00, 18188.59 examples/s]
Map: 100%|██████████| 16338/16338 [00:00<00:00, 31534.89 examples/s]
Map: 100%|██████████| 16338/16338 [00:00<00:00, 32848.41 examples/s]
Stringifying the column: 100%|██████████| 16338/16338 [00:00<00:00, 969738.04 examples/s]
Casting to class labels: 100%|██████████| 16338/16338 [00:00<00:00, 753593.73 examples/s]
[17:03:09] {c:\Users\afons\coding-projects\bertopic-ticket-triage\src\dataset.py:42} INFO -                                                     text  labels
0      Account Disruption Dear Customer Support Team,...       0
1      Query About Smart Home System Integration Feat...       1
2      Inquiry Regarding Invoice Details Dear Custome...       2
3      Question About Marketing Agency Software Compa...       3
4      Feature Query Dear Customer Support,\n\nI hope...       0
...      

In [None]:
from src.model import train_supervised_classifier

docs, y = train_dataset["text"], train_dataset["labels"]
base_model = train_supervised_classifier(docs=docs, y=y, class_weight_dict=class_weight_dict)

[15:55:53] {c:\Users\afons\coding-projects\bertopic-ticket-triage\.venv\Lib\site-packages\sentence_transformers\SentenceTransformer.py:219} INFO - Use pytorch device_name: cpu
[15:55:53] {c:\Users\afons\coding-projects\bertopic-ticket-triage\.venv\Lib\site-packages\sentence_transformers\SentenceTransformer.py:227} INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


In [4]:
test = test_dataset[0]["text"]
test

'Delays in Financial Data Processing The financial organization encountered delays in processing data. The potential reason might be server overload. Efforts to resolve this included rebooting servers and clearing caches, but the problem continues.'

In [None]:
topic, _ = base_model.transform(test)
topic

[np.int64(0)]

In [None]:
base_model.get_topic(topic[0])
pred = id2label[topic[0]]
original = id2label[test_dataset[0]["labels"]]
pred, original

('Technical Support', 'IT Support')

In [None]:
def predict(row):
    topic, _ = base_model.transform(row["text"])
    return topic[0]


test_dataset = test_dataset.map(lambda x: {
    "preds" : predict(x)
})

## Baseline is 29.11%
accuracy = test_dataset.filter(lambda x: x["preds"] == x["labels"]).num_rows * 100 / test_dataset.num_rows
accuracy


Map: 100%|██████████| 4085/4085 [01:00<00:00, 67.08 examples/s]
Filter: 100%|██████████| 4085/4085 [00:00<00:00, 333815.18 examples/s]


29.106487148102815

In [2]:
from src.model import train_zshot_model

zshot_model = train_zshot_model(docs=train_dataset["text"], zeroshot_topic_list=queue_labels)

[17:03:34] {c:\Users\afons\coding-projects\bertopic-ticket-triage\.venv\Lib\site-packages\sentence_transformers\SentenceTransformer.py:219} INFO - Use pytorch device_name: cpu
[17:03:34] {c:\Users\afons\coding-projects\bertopic-ticket-triage\.venv\Lib\site-packages\sentence_transformers\SentenceTransformer.py:227} INFO - Load pretrained SentenceTransformer: ibm-granite/granite-embedding-small-english-r2
Batches: 100%|██████████| 1/1 [00:00<00:00, 24.87it/s]


In [3]:
def predict(row):
    topic, _ = zshot_model.transform(row["text"])
    return topic[0]


test_dataset = test_dataset.map(lambda x: {
    "preds" : predict(x)
})

accuracy = test_dataset.filter(lambda x: x["preds"] == x["labels"]).num_rows * 100 / test_dataset.num_rows
accuracy


Map: 100%|██████████| 4085/4085 [02:47<00:00, 24.35 examples/s]
Filter: 100%|██████████| 4085/4085 [00:00<00:00, 378620.91 examples/s]


4.2594859241126075