In [123]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import pandas as pd
from sklearn.model_selection import train_test_split
import spacy

### HYPER PARAMETERS

In [125]:
SEED = 12
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
EPOCHS = 20

In [126]:
torch.manual_seed(SEED)
torch.backends.cudnn.deterministic = True 

### DATA PREPROCESSING

In [91]:
df = pd.read_csv("supportTicketData.csv")
df.head()

Unnamed: 0,TicketID,Ticket detailed description,urgency-Priority
0,123456,connection issues with assigned address hi fac...,P1
1,123457,cannot access hi cannot access fallowing link ...,P2
2,123458,re address shown valid dear colleagues remarke...,P1
3,123459,sent tuesday critical alert following alert oc...,P2
4,123460,code spelling mistake hello should discover fo...,P2


In [92]:
df = df.drop("TicketID" , axis=1)

In [93]:
df = df.rename({"Ticket detailed description":"description" , "urgency-Priority": "priority"} , axis = 1)

priority_mapping = {"P1":0, "P2":1, "P3":2}
df["priority"] = df["priority"].replace(priority_mapping)
df.head()

  df["priority"] = df["priority"].replace(priority_mapping)


Unnamed: 0,description,priority
0,connection issues with assigned address hi fac...,0
1,cannot access hi cannot access fallowing link ...,1
2,re address shown valid dear colleagues remarke...,0
3,sent tuesday critical alert following alert oc...,1
4,code spelling mistake hello should discover fo...,1


In [96]:
nlp = spacy.load('en_core_web_sm')

def preprocess_text(text):

    doc = nlp(text.lower())

    tokens = [token.lemma_ for token in doc if token.is_alpha and not token.is_stop]
    return " ".join(tokens)

print(df.head())
df["description"] = df["description"].apply(preprocess_text)
print(df.head())

                                         description  priority
0  connection issues with assigned address hi fac...         0
1  cannot access hi cannot access fallowing link ...         1
2  re address shown valid dear colleagues remarke...         0
3  sent tuesday critical alert following alert oc...         1
4  code spelling mistake hello should discover fo...         1
                                         description  priority
0  connection issue assign address hi face connec...         0
1  access hi access fallowing link blank proceed ...         1
2  address show valid dear colleague remark write...         0
3  send tuesday critical alert follow alert occur...         1
4  code spelling mistake hello discover code chan...         1


In [97]:
X_train, X_val, y_train, y_val = train_test_split(
    df['description'].values, df['priority'].values, test_size=0.2, random_state=SEED
)

### DATASET PREPARATION

In [98]:
class TicketDataset(Dataset):
    def __init__(self, descriptions, labels, vocab=None, max_length=50):
        self.descriptions = descriptions
        self.labels = labels
        self.max_length = max_length

        if vocab is None:
            all_words = set(word for text in descriptions for word in text.split())
            self.vocab = {word: idx + 1 for idx, word in enumerate(sorted(all_words))}
            self.vocab['<PAD>'] = 0
        else:
            self.vocab = vocab

    def encode_text(self, text):
        token_ids = [self.vocab.get(word, 0) for word in text.split()]
        token_ids = token_ids[:self.max_length] + [0] * (self.max_length - len(token_ids))
        return token_ids

    def __len__(self):
        return len(self.descriptions)

    def __getitem__(self, idx):
        text = self.encode_text(self.descriptions[idx])
        label = self.labels[idx]
        
        return torch.tensor(text, dtype=torch.long), torch.tensor(label, dtype=torch.long)

train_dataset = TicketDataset(X_train, y_train)
val_dataset = TicketDataset(X_val, y_val, vocab=train_dataset.vocab)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32)

### MODEL

In [127]:
class DenseClassifier(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(DenseClassifier, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, hidden_size*2)
        self.fc3 = nn.Linear(hidden_size*2, num_classes)
        self.relu = nn.ReLU()

    def forward(self, x):
        # Flatten input for Dense layers
        x = x.view(x.size(0), -1).float()
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return x


In [128]:
class CNNClassifier(nn.Module):
    def __init__(self, vocab_size, embedding_dim, num_classes):
        super(CNNClassifier, self).__init__()
        
        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=0)
        
        # Convolutional Layers
        self.conv1 = nn.Conv1d(in_channels=embedding_dim, out_channels=128, kernel_size=3, padding=1)
        self.conv2 = nn.Conv1d(in_channels=128, out_channels=256, kernel_size=3, padding=1)
        self.conv3 = nn.Conv1d(in_channels=256, out_channels=512, kernel_size=3, padding=1)
        self.conv4 = nn.Conv1d(in_channels=512, out_channels=256, kernel_size=3, padding=1)
        
        self.relu = nn.ReLU()
        self.pool = nn.MaxPool1d(kernel_size=2)
        
        # Fully Connected Layer
        self.flatten_size = None

        self.fc = nn.Linear(256 * 6, num_classes)

    def forward(self, x):
        x = self.embedding(x).permute(0, 2, 1)
        
        # Convolutional Blocks
        x = self.relu(self.conv1(x))
        x = self.pool(self.relu(self.conv2(x)))
        x = self.relu(self.conv3(x))
        x = self.pool(self.relu(self.conv4(x)))

        if self.flatten_size is None:
            self.flatten_size = x.shape[1] * x.shape[2]
            self.fc = nn.Linear(self.flatten_size, self.fc.out_features).to(x.device)

        # Flatten and Fully Connected
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x


### TRAINING MODEL

In [131]:
def train_model(model , loss_fn , optimiser ,  train_loader , val_loader , epochs = 10):
    for epoch in range(epochs):

        model.train()
        total_loss = 0

        for descs, labels in train_loader:
            descs, labels = descs.to(DEVICE).long(), labels.to(DEVICE).long()
            output = model(descs)
            loss = loss_fn(output , labels)
            optimiser.zero_grad()
            loss.backward()
            optimiser.step()
            total_loss += loss.item()

        print(f"Epoch {epoch + 1}/{epochs}, Loss: {total_loss}")

        model.eval()
        correct = 0
        total = 0

        with torch.no_grad():
            for descs , labels in val_loader:
                descs, labels = descs.to(DEVICE).long(), labels.to(DEVICE).long()
                outputs = model(descs)
                _ , predicted = torch.max(outputs.data , 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        print(f"Validation Accuracy: {100 * correct / total:.2f}%")

### MLP

In [132]:
model_dense = DenseClassifier(50,250,3).to(DEVICE)
loss_fn = nn.CrossEntropyLoss()
opt = optim.Adam(model_dense.parameters() , lr = 0.001)
train_model(model_dense , loss_fn , opt ,  train_loader , val_loader , epochs = EPOCHS)

Epoch 1/20, Loss: 11684.240381240845
Validation Accuracy: 42.10%
Epoch 2/20, Loss: 1336.1395165920258
Validation Accuracy: 45.15%
Epoch 3/20, Loss: 649.2214574217796
Validation Accuracy: 41.29%
Epoch 4/20, Loss: 532.0528280138969
Validation Accuracy: 46.39%
Epoch 5/20, Loss: 493.1364198923111
Validation Accuracy: 48.61%
Epoch 6/20, Loss: 481.618341088295
Validation Accuracy: 49.07%
Epoch 7/20, Loss: 474.3097249865532
Validation Accuracy: 48.46%
Epoch 8/20, Loss: 472.3670623898506
Validation Accuracy: 48.36%
Epoch 9/20, Loss: 474.6995177268982
Validation Accuracy: 49.29%
Epoch 10/20, Loss: 474.4682620167732
Validation Accuracy: 46.21%
Epoch 11/20, Loss: 480.94161182641983
Validation Accuracy: 48.74%
Epoch 12/20, Loss: 483.60386765003204
Validation Accuracy: 49.22%
Epoch 13/20, Loss: 488.3847697377205
Validation Accuracy: 48.69%
Epoch 14/20, Loss: 490.5734761953354
Validation Accuracy: 42.32%
Epoch 15/20, Loss: 493.37685745954514
Validation Accuracy: 50.78%
Epoch 16/20, Loss: 498.1023304

### CNN


In [133]:
model_cnn = CNNClassifier(vocab_size=len(train_dataset.vocab), embedding_dim=50, num_classes=3).to(DEVICE)
loss_fn = nn.CrossEntropyLoss()
opt = optim.Adam(model_cnn.parameters() , lr = 0.001)
train_model(model_cnn , loss_fn , opt , train_loader , val_loader , EPOCHS)

Epoch 1/20, Loss: 324.30711057782173
Validation Accuracy: 71.39%
Epoch 2/20, Loss: 255.4980856180191
Validation Accuracy: 73.06%
Epoch 3/20, Loss: 229.9713936150074
Validation Accuracy: 73.41%
Epoch 4/20, Loss: 204.12544551491737
Validation Accuracy: 72.12%
Epoch 5/20, Loss: 168.87557138502598
Validation Accuracy: 72.10%
Epoch 6/20, Loss: 126.46058061718941
Validation Accuracy: 71.79%
Epoch 7/20, Loss: 82.29008281044662
Validation Accuracy: 70.78%
Epoch 8/20, Loss: 45.98248714162037
Validation Accuracy: 70.05%
Epoch 9/20, Loss: 33.774221881642006
Validation Accuracy: 70.93%
Epoch 10/20, Loss: 32.7867789005395
Validation Accuracy: 69.85%
Epoch 11/20, Loss: 24.97391589009203
Validation Accuracy: 69.65%
Epoch 12/20, Loss: 19.173140569822863
Validation Accuracy: 71.79%
Epoch 13/20, Loss: 16.098908177751582
Validation Accuracy: 71.19%
Epoch 14/20, Loss: 22.401313378824852
Validation Accuracy: 69.62%
Epoch 15/20, Loss: 19.261824028071715
Validation Accuracy: 71.21%
Epoch 16/20, Loss: 20.4099