In [8]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import json
from torch.utils.data import Dataset, DataLoader
from torchmetrics.classification import Accuracy, Precision, Recall
import torch.nn.functional as F

## Define the dataset class

In [9]:
class TicketDataset(Dataset):
    def __init__(self, text_file, label_file, word_file):
        with open(text_file, 'r') as f:
            self.texts = json.load(f)
        self.labels = np.load(label_file)
        with open(word_file, 'r') as f:
            self.word_to_idx = json.load(f)
        self.vocab_size = len(self.word_to_idx)
        self.max_length = max(len(text) for text in self.texts)

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = self.texts[idx]
        label = self.labels[idx]
        # Convert words to indices
        text_indices = [self.word_to_idx.get(word, 0) for word in text]
        # Pad or truncate text
        text_indices = text_indices[:self.max_length] + [0] * (self.max_length - len(text_indices))
        return torch.tensor(text_indices, dtype=torch.long), torch.tensor(label, dtype=torch.long)

## Load Dataset

In [10]:
train_dataset = TicketDataset('text.json', 'labels.npy', 'words.json')
train_loader = DataLoader(train_dataset, batch_size=400, shuffle=True)

test_dataset = TicketDataset('text.json', 'labels.npy', 'words.json')  
test_loader = DataLoader(test_dataset, batch_size=400, shuffle=False)

## Define CNN Model

In [11]:
class TicketClassifier(nn.Module):
    def __init__(self, vocab_size, embed_dim=64, target_size=5):
        super(TicketClassifier, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embed_dim)
        self.conv = nn.Conv1d(embed_dim, embed_dim, kernel_size=3, stride=1, padding=1)
        self.fc = nn.Linear(embed_dim, target_size)

    def forward(self, text):
        embedded = self.embedding(text).permute(0, 2, 1)
        conved = F.relu(self.conv(embedded))
        conved = conved.mean(dim=2) 
        return self.fc(conved)

## Instantiate the model, criterion, and optimizer

In [15]:
vocab_size = train_dataset.vocab_size
target_size = len(np.unique(train_dataset.labels))  
model = TicketClassifier(vocab_size=vocab_size, embed_dim=64, target_size=target_size)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.05)

## Metrics initialization

In [13]:
accuracy_metric = Accuracy(num_classes=target_size, task='multiclass')
precision_metric = Precision(num_classes=target_size, average='none', task='multiclass')
recall_metric = Recall(num_classes=target_size, average='none', task='multiclass')

## Train the model

In [16]:
num_epochs = 3
for epoch in range(num_epochs):
    model.train()
    for texts, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(texts)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
    print(f'Epoch {epoch+1}/{num_epochs} completed')

Epoch 1/3 completed
Epoch 2/3 completed
Epoch 3/3 completed


## Test the model

In [21]:
model.eval()
predictions = []
true_labels = []
with torch.no_grad():
    for texts, labels in test_loader:
        outputs = model(texts)
        _, predicted = torch.max(outputs, 1)
        predictions.extend(predicted.cpu().numpy())
        true_labels.extend(labels.cpu().numpy())
        
        # Update metrics
        accuracy_metric.update(predicted, labels)
        precision_metric.update(predicted, labels)
        recall_metric.update(predicted, labels)

## Compute metrics

In [22]:
accuracy = accuracy_metric.compute().item()
precision = precision_metric.compute().tolist()
recall = recall_metric.compute().tolist()

In [23]:
print(f'Accuracy: {accuracy:.4f}')
print(f'Precision (per class): {precision}')
print(f'Recall (per class): {recall}')

Accuracy: 0.7070
Precision (per class): [0.5272727012634277, 0.6125186085700989, 0.7132974863052368, 0.6861423254013062, 0.9469122290611267]
Recall (per class): [0.4059999883174896, 0.41100001335144043, 0.9279999732971191, 0.9160000085830688, 0.8740000128746033]


### Save metrics

In [24]:
torch.save({'accuracy': accuracy, 'precision': precision, 'recall': recall}, 'metrics02.pth')