Data Preparation:

Prepare a Support Ticket dataset (Ticket ID, User Query, Category).

Normalize text (remove special characters, lowercase).

In [20]:
import torch
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.model_selection import train_test_split
from transformers import BertTokenizer, BertForSequenceClassification, AdamW
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm
import pandas as pd
import re
device = torch.device('cpu')
data_path = r"C:\Users\Manian VJS\Downloads\support_ticket_data.csv"
df = pd.read_csv(data_path)
def normalize_text(text):
    text = re.sub(r'[^\w\s]', '', text)
    text = text.lower()
    return text
df['User Query'] = df['User Query'].apply(normalize_text)
mlb = MultiLabelBinarizer()
labels = mlb.fit_transform(df['Category'])
X_train, X_test, y_train, y_test = train_test_split(df['User Query'], labels, test_size=0.2, random_state=42)

2.Multi-Label Classification:

Train a BERT model for multi-label classification of support tickets.

Fine-tune the BERT model to classify tickets into multiple tags.

In [21]:
class TicketDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_len):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_len = max_len
    def __len__(self):
        return len(self.texts)
    def __getitem__(self, idx):
        text = self.texts[idx]
        inputs = self.tokenizer.encode_plus(
            text,
            add_special_tokens=True,
            max_length=self.max_len,
            padding='max_length',
            truncation=True,
            return_tensors='pt'
        )
        label = torch.tensor(self.labels[idx], dtype=torch.float)
        return {
            'input_ids': inputs['input_ids'].flatten(),
            'attention_mask': inputs['attention_mask'].flatten(),
            'labels': label
        }
MAX_LEN = 256
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
train_dataset = TicketDataset(X_train.to_list(), y_train, tokenizer, MAX_LEN)
test_dataset = TicketDataset(X_test.to_list(), y_test, tokenizer, MAX_LEN)
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=8)

Example

In [28]:
example_batch = next(iter(train_loader))
example_input_ids = example_batch['input_ids'].to(device)
example_attention_mask = example_batch['attention_mask'].to(device)
example_labels = example_batch['labels'].to(device)

with torch.no_grad():
    example_outputs = model(example_input_ids, attention_mask=example_attention_mask)
    example_logits = example_outputs.logits
    example_preds = torch.sigmoid(example_logits).cpu().numpy()
print("Example predictions: ", example_preds)
print("Example true labels: ", example_labels.cpu().numpy())

Example predictions:  [[0.42263746 0.5583495  0.37065837 0.47111332 0.37068075 0.5853716
  0.5816554  0.55073833 0.59850115 0.41836867 0.50048196 0.47200292
  0.56716317 0.45134288 0.522708   0.4105469  0.56048894 0.4069184
  0.5966374  0.33741415 0.51642346 0.5047139  0.40780765 0.5089507 ]
 [0.34196344 0.6061839  0.29007187 0.47739378 0.45914516 0.6233553
  0.61203873 0.56748176 0.553133   0.4419453  0.5616265  0.49003357
  0.53530115 0.42320022 0.51467514 0.43902892 0.48194712 0.380439
  0.61810523 0.3538714  0.46859878 0.46764454 0.43681076 0.55148077]
 [0.44385645 0.5724901  0.36882788 0.47315145 0.42359942 0.52633953
  0.6265597  0.5468251  0.60155976 0.4445151  0.45513654 0.47009614
  0.55933267 0.5509839  0.4803009  0.4394452  0.56790274 0.42013136
  0.49835286 0.39231923 0.45044455 0.5463756  0.43492782 0.5019215 ]
 [0.42089    0.58522326 0.34094793 0.4642755  0.40175596 0.51967734
  0.52808446 0.52476054 0.63571864 0.4242338  0.51904255 0.44998792
  0.5946599  0.45640555 0.48

4.API Development:

Develop an API endpoint /auto-tag-ticket.

Return a list of relevant tags for each support ticket.

In [30]:
def get_ticket_info(ticket_id):
    ticket_row = df.loc[df['Ticket ID'] == ticket_id]
    if ticket_row.empty:
        return "Ticket ID not found."
    user_query = ticket_row['User Query'].values[0]
    actual_category = ticket_row['Category'].values[0]
    predicted_category = get_prediction(user_query)
    return user_query, actual_category, predicted_category
def get_prediction(ticket_query):
    model.eval()
    inputs = tokenizer.encode_plus(
        ticket_query,
        add_special_tokens=True,
        max_length=128,
        padding='max_length',
        truncation=True,
        return_tensors='pt'
    )
    input_ids = inputs['input_ids'].to(device)
    attention_mask = inputs['attention_mask'].to(device)
    with torch.no_grad():
        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
    logits = outputs.logits
    probs = torch.sigmoid(logits).cpu().numpy()
    tags = mlb.inverse_transform(probs > 0.5)
    return tags

In [31]:
ticket_id = input("Give the ID: ")
user_query, actual_category, predicted_category = get_ticket_info(ticket_id)
print(f"User Query: {user_query}")
print(f"Actual Category: {actual_category}")
print(f"Predicted Category: {predicted_category}")

User Query: payment issue occurred
Actual Category: Delivery Issue
Predicted Category: [('D', 'Q', 'S', 'a', 'c', 'e', 'i', 'l', 'n', 'p', 'r', 't', 'y')]


5.Testing:

Test the API with support ticket samples in Postman.

In [33]:
from pydantic import BaseModel
class Ticket(BaseModel):
    user_query: str

In [None]:
from fastapi import FastAPI,
from pydantic import BaseModel
import json
from typing import List
import uvicorn
app = FastAPI()
class Ticket(BaseModel):
    user_query: str
class Feedback(BaseModel):
    ticket_id: str
    user_query: str
    correct_tags: List[str]
feedback_data = []
@app.post("/auto-tag-ticket")
async def auto_tag_ticket(ticket: Ticket):
    ticket_query = ticket.user_query
    predicted_tags = get_prediction(ticket_query)
    return {"tags": predicted_tags}
@app.post("/feedback")
async def collect_feedback(feedback: Feedback):
    feedback_entry = feedback.dict()
    feedback_data.append(feedback_entry)
    with open('feedback.json', 'w') as f:
        json.dump(feedback_data, f)
    return {"message": "Feedback received. Thank you!"}
def get_prediction(ticket_query):
    model.eval()
    inputs = tokenizer.encode_plus(
        ticket_query,
        add_special_tokens=True,
        max_length=128,
        padding='max_length',
        truncation=True,
        return_tensors='pt'
    )
    input_ids = inputs['input_ids'].to(device)
    attention_mask = inputs['attention_mask'].to(device)
    with torch.no_grad():
        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
    logits = outputs.logits
    probs = torch.sigmoid(logits).cpu().numpy()
    tags = mlb.inverse_transform(probs > 0.5)
    return tags
config = uvicorn.Config(app, host="0.0.0.0", port=8000)
server = uvicorn.Server(config)
await server.serve()

INFO:     Started server process [2888]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit)
