In [17]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import pandas as pd
from sklearn.model_selection import train_test_split
import spacy

### HYPER PARAMETERS

In [2]:
SEED = 12
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
EPOCHS = 20

In [3]:
torch.manual_seed(SEED)
torch.backends.cudnn.deterministic = True 

### DATA PREPROCESSING

In [4]:
df = pd.read_csv("supportTicketData.csv")
df.head()

Unnamed: 0,TicketID,Ticket detailed description,urgency-Priority
0,123456,connection issues with assigned address hi fac...,P1
1,123457,cannot access hi cannot access fallowing link ...,P2
2,123458,re address shown valid dear colleagues remarke...,P1
3,123459,sent tuesday critical alert following alert oc...,P2
4,123460,code spelling mistake hello should discover fo...,P2


In [5]:
df = df.drop("TicketID" , axis=1)

In [6]:
df = df.rename({"Ticket detailed description":"description" , "urgency-Priority": "priority"} , axis = 1)

priority_mapping = {"P1":0, "P2":1, "P3":2}
df["priority"] = df["priority"].replace(priority_mapping)
df.head()

  df["priority"] = df["priority"].replace(priority_mapping)


Unnamed: 0,description,priority
0,connection issues with assigned address hi fac...,0
1,cannot access hi cannot access fallowing link ...,1
2,re address shown valid dear colleagues remarke...,0
3,sent tuesday critical alert following alert oc...,1
4,code spelling mistake hello should discover fo...,1


In [7]:
nlp = spacy.load('en_core_web_sm')

def preprocess_text(text):

    doc = nlp(text.lower())

    tokens = [token.lemma_ for token in doc if token.is_alpha and not token.is_stop]
    return " ".join(tokens)

print(df.head())
df["description"] = df["description"].apply(preprocess_text)
print(df.head())

                                         description  priority
0  connection issues with assigned address hi fac...         0
1  cannot access hi cannot access fallowing link ...         1
2  re address shown valid dear colleagues remarke...         0
3  sent tuesday critical alert following alert oc...         1
4  code spelling mistake hello should discover fo...         1
                                         description  priority
0  connection issue assign address hi face connec...         0
1  access hi access fallowing link blank proceed ...         1
2  address show valid dear colleague remark write...         0
3  send tuesday critical alert follow alert occur...         1
4  code spelling mistake hello discover code chan...         1


In [8]:
X_train, X_val, y_train, y_val = train_test_split(
    df['description'].values, df['priority'].values, test_size=0.2, random_state=SEED
)

### DATASET PREPARATION

In [9]:
class TicketDataset(Dataset):
    def __init__(self, descriptions, labels, vocab=None, max_length=50):
        self.descriptions = descriptions
        self.labels = labels
        self.max_length = max_length

        if vocab is None:
            all_words = set(word for text in descriptions for word in text.split())
            self.vocab = {word: idx + 1 for idx, word in enumerate(sorted(all_words))}
            self.vocab['<PAD>'] = 0
        else:
            self.vocab = vocab

    def encode_text(self, text):
        token_ids = [self.vocab.get(word, 0) for word in text.split()]
        token_ids = token_ids[:self.max_length] + [0] * (self.max_length - len(token_ids))
        return token_ids

    def __len__(self):
        return len(self.descriptions)

    def __getitem__(self, idx):
        text = self.encode_text(self.descriptions[idx])
        label = self.labels[idx]
        
        return torch.tensor(text, dtype=torch.long), torch.tensor(label, dtype=torch.long)

train_dataset = TicketDataset(X_train, y_train)
val_dataset = TicketDataset(X_val, y_val, vocab=train_dataset.vocab)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32)

### MODEL

In [10]:
class DenseClassifier(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(DenseClassifier, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, hidden_size*2)
        self.fc3 = nn.Linear(hidden_size*2, num_classes)
        self.relu = nn.ReLU()

    def forward(self, x):
        # Flatten input for Dense layers
        x = x.view(x.size(0), -1).float()
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return x


In [11]:
class CNNClassifier(nn.Module):
    def __init__(self, vocab_size, embedding_dim, num_classes):
        super(CNNClassifier, self).__init__()
        
        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=0)
        
        # Convolutional Layers
        self.conv1 = nn.Conv1d(in_channels=embedding_dim, out_channels=128, kernel_size=3, padding=1)
        self.conv2 = nn.Conv1d(in_channels=128, out_channels=256, kernel_size=3, padding=1)
        self.conv3 = nn.Conv1d(in_channels=256, out_channels=512, kernel_size=3, padding=1)
        self.conv4 = nn.Conv1d(in_channels=512, out_channels=256, kernel_size=3, padding=1)
        
        self.relu = nn.ReLU()
        self.pool = nn.MaxPool1d(kernel_size=2)
        
        # Fully Connected Layer
        self.flatten_size = None

        self.fc = nn.Linear(256 * 6, num_classes)

    def forward(self, x):
        x = self.embedding(x).permute(0, 2, 1)
        
        # Convolutional Blocks
        x = self.relu(self.conv1(x))
        x = self.pool(self.relu(self.conv2(x)))
        x = self.relu(self.conv3(x))
        x = self.pool(self.relu(self.conv4(x)))

        if self.flatten_size is None:
            self.flatten_size = x.shape[1] * x.shape[2]
            self.fc = nn.Linear(self.flatten_size, self.fc.out_features).to(x.device)

        # Flatten and Fully Connected
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x


### TRAINING MODEL

In [12]:
def train_model(model , loss_fn , optimiser ,  train_loader , val_loader , epochs = 10):
    for epoch in range(epochs):

        model.train()
        total_loss = 0

        for descs, labels in train_loader:
            descs, labels = descs.to(DEVICE).long(), labels.to(DEVICE).long()
            output = model(descs)
            loss = loss_fn(output , labels)
            optimiser.zero_grad()
            loss.backward()
            optimiser.step()
            total_loss += loss.item()

        print(f"Epoch {epoch + 1}/{epochs}, Loss: {total_loss}")

        model.eval()
        correct = 0
        total = 0

        with torch.no_grad():
            for descs , labels in val_loader:
                descs, labels = descs.to(DEVICE).long(), labels.to(DEVICE).long()
                outputs = model(descs)
                _ , predicted = torch.max(outputs.data , 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        print(f"Validation Accuracy: {100 * correct / total:.2f}%")

### MLP

In [13]:
model_dense = DenseClassifier(50,250,3).to(DEVICE)
loss_fn = nn.CrossEntropyLoss()
opt = optim.Adam(model_dense.parameters() , lr = 0.001)
train_model(model_dense , loss_fn , opt ,  train_loader , val_loader , epochs = EPOCHS)

Epoch 1/20, Loss: 13120.926487207413
Validation Accuracy: 40.23%
Epoch 2/20, Loss: 1496.021424293518
Validation Accuracy: 40.83%
Epoch 3/20, Loss: 688.7522221803665
Validation Accuracy: 46.14%
Epoch 4/20, Loss: 548.8023529052734
Validation Accuracy: 42.02%
Epoch 5/20, Loss: 505.1082227230072
Validation Accuracy: 45.86%
Epoch 6/20, Loss: 481.0352491736412
Validation Accuracy: 46.26%
Epoch 7/20, Loss: 470.7890850305557
Validation Accuracy: 47.65%
Epoch 8/20, Loss: 463.3287853002548
Validation Accuracy: 46.97%
Epoch 9/20, Loss: 462.92726814746857
Validation Accuracy: 48.46%
Epoch 10/20, Loss: 466.11943423748016
Validation Accuracy: 45.91%
Epoch 11/20, Loss: 469.66406202316284
Validation Accuracy: 48.13%
Epoch 12/20, Loss: 473.22965306043625
Validation Accuracy: 44.90%
Epoch 13/20, Loss: 479.7650969028473
Validation Accuracy: 44.82%
Epoch 14/20, Loss: 485.321429669857
Validation Accuracy: 47.90%
Epoch 15/20, Loss: 488.1895571947098
Validation Accuracy: 50.03%
Epoch 16/20, Loss: 490.9657056

### CNN


In [14]:
model_cnn = CNNClassifier(vocab_size=len(train_dataset.vocab), embedding_dim=50, num_classes=3).to(DEVICE)
loss_fn = nn.CrossEntropyLoss()
opt = optim.Adam(model_cnn.parameters() , lr = 0.001)
train_model(model_cnn , loss_fn , opt , train_loader , val_loader , EPOCHS)

Epoch 1/20, Loss: 324.1023592054844
Validation Accuracy: 71.52%
Epoch 2/20, Loss: 255.53652223944664
Validation Accuracy: 72.75%
Epoch 3/20, Loss: 226.852368414402
Validation Accuracy: 72.68%
Epoch 4/20, Loss: 200.93241748213768
Validation Accuracy: 72.53%
Epoch 5/20, Loss: 168.07521799206734
Validation Accuracy: 72.58%
Epoch 6/20, Loss: 122.36835217475891
Validation Accuracy: 71.04%
Epoch 7/20, Loss: 78.32211551629007
Validation Accuracy: 69.29%
Epoch 8/20, Loss: 43.88360792957246
Validation Accuracy: 71.74%
Epoch 9/20, Loss: 33.984614757820964
Validation Accuracy: 71.62%
Epoch 10/20, Loss: 28.437133766477928
Validation Accuracy: 70.93%
Epoch 11/20, Loss: 20.46362146246247
Validation Accuracy: 70.86%
Epoch 12/20, Loss: 20.240121472394094
Validation Accuracy: 71.06%
Epoch 13/20, Loss: 16.65088903978176
Validation Accuracy: 72.42%
Epoch 14/20, Loss: 18.2704384753888
Validation Accuracy: 71.36%
Epoch 15/20, Loss: 21.243385239708005
Validation Accuracy: 71.92%
Epoch 16/20, Loss: 13.996655

### SAVING & LOADING MODEL

In [24]:
torch.save(model_cnn.state_dict() , "cnn_model")
model_cnn.load_state_dict(torch.load("cnn_model", weights_only=True))

<All keys matched successfully>

### MAKING PREDICTIONS

In [21]:
def predict(model, text, vocab):
    model.eval()
    text = preprocess_text(text)
    encoded_text = torch.tensor([vocab.get(word, 0) for word in text.split()])
    padded_text = torch.cat([encoded_text, torch.zeros(50 - len(encoded_text))]) if len(encoded_text) < 50 else encoded_text[:50]
    padded_text = padded_text.unsqueeze(0).long().to(DEVICE)

    output_mapping = {0:"P1" , 1: "P2" , 2:"P3"}

    with torch.no_grad():
        output = model(padded_text)
        _, predicted = torch.max(output.data, 1)
    return output_mapping[predicted.cpu().numpy()[0]]

predict(model_cnn , "cannot access hi cannot access fallowing link get blank cannot proceed can you please help with thanks" , train_dataset.vocab)

'P2'