In [123]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import pandas as pd
from sklearn.model_selection import train_test_split
import spacy

### HYPER PARAMETERS

In [125]:
SEED = 12
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
EPOCHS = 20

In [126]:
torch.manual_seed(SEED)
torch.backends.cudnn.deterministic = True 

### DATA PREPROCESSING

In [91]:
df = pd.read_csv("supportTicketData.csv")
df.head()

Unnamed: 0,TicketID,Ticket detailed description,urgency-Priority
0,123456,connection issues with assigned address hi fac...,P1
1,123457,cannot access hi cannot access fallowing link ...,P2
2,123458,re address shown valid dear colleagues remarke...,P1
3,123459,sent tuesday critical alert following alert oc...,P2
4,123460,code spelling mistake hello should discover fo...,P2


In [92]:
df = df.drop("TicketID" , axis=1)

In [93]:
df = df.rename({"Ticket detailed description":"description" , "urgency-Priority": "priority"} , axis = 1)

priority_mapping = {"P1":0, "P2":1, "P3":2}
df["priority"] = df["priority"].replace(priority_mapping)
df.head()

  df["priority"] = df["priority"].replace(priority_mapping)


Unnamed: 0,description,priority
0,connection issues with assigned address hi fac...,0
1,cannot access hi cannot access fallowing link ...,1
2,re address shown valid dear colleagues remarke...,0
3,sent tuesday critical alert following alert oc...,1
4,code spelling mistake hello should discover fo...,1


In [96]:
nlp = spacy.load('en_core_web_sm')

def preprocess_text(text):

    doc = nlp(text.lower())

    tokens = [token.lemma_ for token in doc if token.is_alpha and not token.is_stop]
    return " ".join(tokens)

print(df.head())
df["description"] = df["description"].apply(preprocess_text)
print(df.head())

                                         description  priority
0  connection issues with assigned address hi fac...         0
1  cannot access hi cannot access fallowing link ...         1
2  re address shown valid dear colleagues remarke...         0
3  sent tuesday critical alert following alert oc...         1
4  code spelling mistake hello should discover fo...         1
                                         description  priority
0  connection issue assign address hi face connec...         0
1  access hi access fallowing link blank proceed ...         1
2  address show valid dear colleague remark write...         0
3  send tuesday critical alert follow alert occur...         1
4  code spelling mistake hello discover code chan...         1


In [97]:
X_train, X_val, y_train, y_val = train_test_split(
    df['description'].values, df['priority'].values, test_size=0.2, random_state=SEED
)

### DATASET PREPARATION

In [98]:
class TicketDataset(Dataset):
    def __init__(self, descriptions, labels, vocab=None, max_length=50):
        self.descriptions = descriptions
        self.labels = labels
        self.max_length = max_length

        if vocab is None:
            all_words = set(word for text in descriptions for word in text.split())
            self.vocab = {word: idx + 1 for idx, word in enumerate(sorted(all_words))}
            self.vocab['<PAD>'] = 0
        else:
            self.vocab = vocab

    def encode_text(self, text):
        token_ids = [self.vocab.get(word, 0) for word in text.split()]
        token_ids = token_ids[:self.max_length] + [0] * (self.max_length - len(token_ids))
        return token_ids

    def __len__(self):
        return len(self.descriptions)

    def __getitem__(self, idx):
        text = self.encode_text(self.descriptions[idx])
        label = self.labels[idx]
        
        return torch.tensor(text, dtype=torch.long), torch.tensor(label, dtype=torch.long)

train_dataset = TicketDataset(X_train, y_train)
val_dataset = TicketDataset(X_val, y_val, vocab=train_dataset.vocab)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32)

### MODEL

In [127]:
class DenseClassifier(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(DenseClassifier, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, hidden_size*2)
        self.fc3 = nn.Linear(hidden_size*2, num_classes)
        self.relu = nn.ReLU()

    def forward(self, x):
        # Flatten input for Dense layers
        x = x.view(x.size(0), -1).float()
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return x


In [128]:
class CNNClassifier(nn.Module):
    def __init__(self, vocab_size, embedding_dim, num_classes):
        super(CNNClassifier, self).__init__()
        
        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=0)
        
        # Convolutional Layers
        self.conv1 = nn.Conv1d(in_channels=embedding_dim, out_channels=128, kernel_size=3, padding=1)
        self.conv2 = nn.Conv1d(in_channels=128, out_channels=256, kernel_size=3, padding=1)
        self.conv3 = nn.Conv1d(in_channels=256, out_channels=512, kernel_size=3, padding=1)
        self.conv4 = nn.Conv1d(in_channels=512, out_channels=256, kernel_size=3, padding=1)
        
        self.relu = nn.ReLU()
        self.pool = nn.MaxPool1d(kernel_size=2)
        
        # Fully Connected Layer
        self.flatten_size = None

        self.fc = nn.Linear(256 * 6, num_classes)

    def forward(self, x):
        x = self.embedding(x).permute(0, 2, 1)
        
        # Convolutional Blocks
        x = self.relu(self.conv1(x))
        x = self.pool(self.relu(self.conv2(x)))
        x = self.relu(self.conv3(x))
        x = self.pool(self.relu(self.conv4(x)))

        if self.flatten_size is None:
            self.flatten_size = x.shape[1] * x.shape[2]
            self.fc = nn.Linear(self.flatten_size, self.fc.out_features).to(x.device)

        # Flatten and Fully Connected
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x


### TRAINING MODEL

In [129]:
def train_model(model , loss_fn , optimiser ,  train_loader , val_loader , epochs = 10):
    for epoch in range(epochs):

        model.train()
        total_loss = 0

        for descs, labels in train_loader:
            descs, labels = descs.to(DEVICE).long(), labels.to(DEVICE).long()
            output = model(descs)
            loss = loss_fn(output , labels)
            optimiser.zero_grad()
            loss.backward()
            optimiser.step()
            total_loss += loss.item()

            print(f"Epoch {epoch + 1}/{epochs}, Loss: {loss}")

        model.eval()
        correct = 0
        total = 0

        with torch.no_grad():
            for descs , labels in val_loader:
                descs, labels = descs.to(DEVICE).long(), labels.to(DEVICE).long()
                outputs = model(descs)
                _ , predicted = torch.max(outputs.data , 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        print(f"Validation Accuracy: {100 * correct / total:.2f}%")

### MLP

In [None]:
model_dense = DenseClassifier(50,250,3).to(DEVICE)
loss_fn = nn.CrossEntropyLoss()
opt = optim.Adam(model_dense.parameters() , lr = 0.001)
train_model(model_dense , loss_fn , opt ,  train_loader , val_loader , epochs = EPOCHS)

Epoch 1/20, Loss: 289.2503356933594
Epoch 1/20, Loss: 406.1153869628906
Epoch 1/20, Loss: 239.07154846191406
Epoch 1/20, Loss: 411.83026123046875
Epoch 1/20, Loss: 131.36166381835938
Epoch 1/20, Loss: 70.37213897705078
Epoch 1/20, Loss: 206.47315979003906
Epoch 1/20, Loss: 244.43875122070312
Epoch 1/20, Loss: 171.9912567138672
Epoch 1/20, Loss: 144.451904296875
Epoch 1/20, Loss: 152.4844207763672
Epoch 1/20, Loss: 76.97837829589844
Epoch 1/20, Loss: 104.48065948486328
Epoch 1/20, Loss: 78.9263916015625
Epoch 1/20, Loss: 105.91622924804688
Epoch 1/20, Loss: 69.41368103027344
Epoch 1/20, Loss: 84.40409851074219
Epoch 1/20, Loss: 101.0908203125
Epoch 1/20, Loss: 78.88402557373047
Epoch 1/20, Loss: 77.39249420166016
Epoch 1/20, Loss: 46.541133880615234
Epoch 1/20, Loss: 78.33551025390625
Epoch 1/20, Loss: 71.72317504882812
Epoch 1/20, Loss: 83.36485290527344
Epoch 1/20, Loss: 71.45360565185547
Epoch 1/20, Loss: 39.83197021484375
Epoch 1/20, Loss: 72.03931427001953
Epoch 1/20, Loss: 72.6814

### CNN


In [130]:
model_cnn = CNNClassifier(vocab_size=len(train_dataset.vocab), embedding_dim=50, num_classes=3).to(DEVICE)
loss_fn = nn.CrossEntropyLoss()
opt = optim.Adam(model_cnn.parameters() , lr = 0.001)
train_model(model_cnn , loss_fn , opt , train_loader , val_loader , EPOCHS)

Epoch 1/20, Loss: 1.1002883911132812
Epoch 1/20, Loss: 1.096027135848999
Epoch 1/20, Loss: 1.1249616146087646
Epoch 1/20, Loss: 1.0976303815841675
Epoch 1/20, Loss: 1.1210343837738037
Epoch 1/20, Loss: 1.0909082889556885
Epoch 1/20, Loss: 1.070324182510376
Epoch 1/20, Loss: 1.0865741968154907
Epoch 1/20, Loss: 1.070947527885437
Epoch 1/20, Loss: 1.0900920629501343
Epoch 1/20, Loss: 1.1129450798034668
Epoch 1/20, Loss: 1.0380526781082153
Epoch 1/20, Loss: 1.055389404296875
Epoch 1/20, Loss: 1.0192700624465942
Epoch 1/20, Loss: 1.0920403003692627
Epoch 1/20, Loss: 1.192032814025879
Epoch 1/20, Loss: 1.0622649192810059
Epoch 1/20, Loss: 1.0772918462753296
Epoch 1/20, Loss: 1.133927822113037
Epoch 1/20, Loss: 1.0466444492340088
Epoch 1/20, Loss: 1.0506117343902588
Epoch 1/20, Loss: 1.0644551515579224
Epoch 1/20, Loss: 1.0224454402923584
Epoch 1/20, Loss: 1.0357491970062256
Epoch 1/20, Loss: 1.0058307647705078
Epoch 1/20, Loss: 1.0235199928283691
Epoch 1/20, Loss: 1.020552635192871
Epoch 1/