In [6]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import accuracy_score, precision_score, recall_score
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# Load the dataset (use your path)
file_path = 'reveiw.csv'
df = pd.read_csv(file_path)

# Preprocess the dataset
df = df[['Review', 'Airline Name']].dropna()

# Encode the target labels
label_encoder = LabelEncoder()
df['label'] = label_encoder.fit_transform(df['Airline Name'])

# Split into train and test sets
train_texts, test_texts, train_labels, test_labels = train_test_split(df['Review'], df['label'], test_size=0.2)

# Simple word tokenization and conversion to index (mocking word2idx)
word2idx = {word: i+1 for i, word in enumerate(set(" ".join(train_texts).split()))}
vocab_size = len(word2idx) + 1

def text_to_tensor(text, max_len):
    tensor = torch.tensor([word2idx.get(word, 0) for word in text.split()])
    # Padding the tensor to the max length
    if len(tensor) < max_len:
        padding = torch.zeros(max_len - len(tensor), dtype=torch.long)
        tensor = torch.cat((tensor, padding), dim=0)
    return tensor[:max_len]  # Truncate if necessary

# Determine the maximum sequence length
max_len = max(len(text.split()) for text in train_texts)

train_data = torch.stack([text_to_tensor(text, max_len) for text in train_texts])
test_data = torch.stack([text_to_tensor(text, max_len) for text in test_texts])
# Ensure the labels are of type Long
train_labels = torch.tensor(train_labels.values, dtype=torch.long)
test_labels = torch.tensor(test_labels.values, dtype=torch.long)

# Create DataLoader instances
batch_size = 400
train_loader = DataLoader(TensorDataset(train_data, train_labels), shuffle=True, batch_size=batch_size)
test_loader = DataLoader(TensorDataset(test_data, test_labels), shuffle=False, batch_size=batch_size)


# Define the classifier class
class TicketClassifier(nn.Module):
    def __init__(self, vocab_size, embed_dim, target_size):
        super(TicketClassifier, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embed_dim)
        self.conv = nn.Conv1d(embed_dim, embed_dim, kernel_size=3, stride=1, padding=1)
        self.fc = nn.Linear(embed_dim, target_size)

    def forward(self, text):
        embedded = self.embedding(text).permute(0, 2, 1)
        conved = F.relu(self.conv(embedded))
        conved = conved.mean(dim=2) 
        return self.fc(conved)

# Set parameters
embedding_dim = 64
target_size = len(label_encoder.classes_)

# Create an instance of the TicketClassifier class
model = TicketClassifier(vocab_size, embedding_dim, target_size)

lr = 0.05
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

# Train the model
model.train()
epochs = 3
for i in range(epochs):
    running_loss, num_processed = 0, 0
    for inputs, labels in train_loader:
        model.zero_grad()
        output = model(inputs)
        loss = criterion(output, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        num_processed += len(inputs)
    print(f"Epoch: {i+1}, Loss: {running_loss/num_processed}")

# Evaluate model on test set
model.eval()
predicted = []
actual = []

for inputs, labels in test_loader:
    output = model(inputs)
    cat = torch.argmax(output, dim=-1)
    predicted.extend(cat.tolist())
    actual.extend(labels.tolist())

# Compute metrics using sklearn
accuracy = accuracy_score(actual, predicted)
precision = precision_score(actual, predicted, average=None)
recall = recall_score(actual, predicted, average=None)

print('Accuracy:', accuracy)
print('Precision (per class):', precision)
print('Recall (per class):', recall)


Epoch: 1, Loss: 0.015262414707312258
Epoch: 2, Loss: 0.013305138553788964
Epoch: 3, Loss: 0.010755963787520091
Accuracy: 0.1749730312837109
Precision (per class): [0.73333333 0.         0.13846154 0.18181818 0.36842105 0.
 0.         0.         0.         0.4        0.         0.26666667
 0.         0.         0.         0.         0.         0.
 0.         0.35483871 0.         0.         0.5        0.
 0.         0.         0.         0.26666667 0.         0.
 0.         0.         0.         0.         0.66666667 0.
 0.17647059 0.         0.         0.         0.         0.0862069
 0.         0.         0.81818182 0.64285714 0.         0.
 0.         0.25       0.25       0.         0.         0.
 0.         0.07228916 0.17647059 0.90909091 0.         0.33333333
 0.         0.0952381  0.05813953 0.         1.         0.
 0.         0.         0.         0.         0.         0.
 0.47368421 0.08333333 0.         0.         0.         0.07936508
 0.15384615 0.         0.         0.   

  _warn_prf(average, modifier, msg_start, len(result))
