In [14]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm
import re
import fasttext

In [15]:
df = pd.read_csv("datasets/train.csv")
fasttext_model = fasttext.load_model('../Embeddings/devanagari_fasttext_cbow.bin')
df.head()

Unnamed: 0,index,text,label
0,10000,निर्वाचन आयोगले गति छाड्यो,0
1,10001,जीतपुरसिमराः एमालेबाट चौधरी र अधिकारीले गरे उम...,0
2,10002,दलहरुको घरदैलो कार्यक्रममा ठुलो ठुलो आवाजमा स्...,0
3,10003,"म पनि त्यही सोचेको, अब पनि भुई कटहर लाई भोट न...",0
4,10006,शिक्षकबाट राजीनामा नदिई उपमेयरमा उम्मेदवारी दर्ता,0


In [16]:
df.dropna(inplace = True)

In [17]:
def preprocess_text(text):
    pattern = r'[॥।॰،۔؟٪×÷!@#$%^&*()_+={}\[\]:;"\'<>,.?/~`|-]'
    text = re.sub(r'[०१२३४५६७८९0-9]', '', text)
    clean_text = re.sub(pattern, '', text)
    clean_text.strip()
    return clean_text

df['text'] = df['text'].apply(preprocess_text)
df["text"] = df['text'].str.strip()

In [18]:
X_train, X_test, y_train, y_test = train_test_split(df["text"], df["label"], test_size=0.2, random_state=42)

In [19]:
class DevanagariDataset(Dataset):
    def __init__(self, texts, labels, max_length=50):
        self.texts = texts.tolist()  
        self.labels = labels.tolist()  
        self.max_length = max_length
    
    def __len__(self):
        return len(self.texts)
    
    def __getitem__(self, idx):
        words = self.texts[idx].split()  
        
        embeddings = [torch.tensor(fasttext_model.get_word_vector(word)) for word in words]
        
        if len(embeddings) > self.max_length:
            embeddings = embeddings[:self.max_length]  # Truncate
        else:
            padding = [torch.zeros(fasttext_model.get_dimension())] * (self.max_length - len(embeddings))  
            embeddings.extend(padding)
        
        text_tensor = torch.stack(embeddings)

        # Get label tensor
        label_tensor = torch.tensor(self.labels[idx], dtype=torch.long)

        return text_tensor, label_tensor


In [20]:
train_dataset = DevanagariDataset(X_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

test_dataset = DevanagariDataset(X_test, y_test)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=True)

In [21]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [22]:
class Attention(nn.Module):
    def __init__(self, hidden_size):
        super(Attention, self).__init__()
        self.Wa = nn.Linear(hidden_size, hidden_size)
        self.Ua = nn.Linear(hidden_size, hidden_size)
        self.Va = nn.Linear(hidden_size, 1)

    def forward(self, lstm_output):
        scores = self.Va(torch.tanh(self.Wa(lstm_output) + self.Ua(lstm_output)))
        attention_weights = torch.softmax(scores, dim=1)
        
        context_vector = torch.bmm(attention_weights.permute(0, 2, 1), lstm_output)
        return context_vector.squeeze(1), attention_weights.squeeze(2)

class BidirectionalLSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(BidirectionalLSTMModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, 
                            batch_first=True, bidirectional=True)
        self.attention = Attention(hidden_size * 2)  
        self.fc = nn.Linear(hidden_size * 2, num_classes)  

    def forward(self, x):
        h0 = torch.zeros(self.num_layers * 2, x.size(0), self.hidden_size).to(device) 
        c0 = torch.zeros(self.num_layers * 2, x.size(0), self.hidden_size).to(device) 
        
        out, _ = self.lstm(x, (h0, c0)) 
        

        context_vector, attention_weights = self.attention(out)
     
        out = self.fc(context_vector)
        return out

In [23]:
input_size = fasttext_model.get_dimension()
hidden_size = 256
num_layers = 3
num_classes = len(df['label'].unique())  
num_epochs = 10
learning_rate = 0.001

In [24]:
model = BidirectionalLSTMModel(input_size, hidden_size, num_layers, num_classes)
model.to(device)

BidirectionalLSTMModel(
  (lstm): LSTM(100, 256, num_layers=3, batch_first=True, bidirectional=True)
  (attention): Attention(
    (Wa): Linear(in_features=512, out_features=512, bias=True)
    (Ua): Linear(in_features=512, out_features=512, bias=True)
    (Va): Linear(in_features=512, out_features=1, bias=True)
  )
  (fc): Linear(in_features=512, out_features=5, bias=True)
)

In [25]:
def train_model(model, train_loader, test_loader, criterion, optimizer, num_epochs):
    for epoch in range(num_epochs):
        model.train()
        total_train_loss = 0
        correct_train_predictions = 0
        
        for texts, labels in tqdm(train_loader, desc=f"Epoch {epoch + 1}/{num_epochs} - Training", leave=False):
            texts, labels = texts.to(device), labels.to(device)
            outputs = model(texts)  # No unsqueeze here
            loss = criterion(outputs, labels)
            optimizer.zero_grad()
            total_train_loss += loss.item()
            
            loss.backward()
            optimizer.step()

            _, predicted = torch.max(outputs.data, 1)
            correct_train_predictions += (predicted == labels).sum().item()
        
        train_accuracy = correct_train_predictions / len(train_loader.dataset)
        train_loss = total_train_loss / len(train_loader)
        
        model.eval()
        total_test_loss = 0
        correct_test_predictions = 0
        
        with torch.no_grad():
            for texts, labels in tqdm(test_loader, desc=f"Epoch {epoch + 1}/{num_epochs} - Testing", leave=False):
                texts, labels = texts.to(device), labels.to(device)
                outputs = model(texts)  # No unsqueeze here
                loss = criterion(outputs, labels)
                total_test_loss += loss.item()
                
                _, predicted = torch.max(outputs.data, 1)
                correct_test_predictions += (predicted == labels).sum().item()
        
        test_accuracy = correct_test_predictions / len(test_loader.dataset)
        test_loss = total_test_loss / len(test_loader)
        
        print(f"Epoch {epoch + 1}/{num_epochs}")
        print(f"Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.4f}")
        print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}")


criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [13]:
train_model(model, train_loader, test_loader, criterion, optimizer, num_epochs)
torch.save(model.state_dict(), "lstm_attention_model.pth")

                                                                          

Epoch 1/10
Train Loss: 0.0464, Train Accuracy: 0.9885
Test Loss: 0.0260, Test Accuracy: 0.9935


                                                                          

Epoch 2/10
Train Loss: 0.0296, Train Accuracy: 0.9931
Test Loss: 0.0218, Test Accuracy: 0.9943


                                                                          

Epoch 3/10
Train Loss: 0.0242, Train Accuracy: 0.9939
Test Loss: 0.0208, Test Accuracy: 0.9948


Epoch 4/10 - Training:  88%|████████▊ | 1156/1311 [02:14<00:14, 10.39it/s]