In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm

In [2]:
df = pd.read_csv("datasets/train.csv")
df.head()

Unnamed: 0,index,text,label
0,10000,निर्वाचन आयोगले गति छाड्यो,0
1,10001,जीतपुरसिमराः एमालेबाट चौधरी र अधिकारीले गरे उम...,0
2,10002,दलहरुको घरदैलो कार्यक्रममा ठुलो ठुलो आवाजमा स्...,0
3,10003,"म पनि त्यही सोचेको, अब पनि भुई कटहर लाई भोट न...",0
4,10006,शिक्षकबाट राजीनामा नदिई उपमेयरमा उम्मेदवारी दर्ता,0


In [3]:
df.dropna(inplace = True)

In [4]:
X_train, X_test, y_train, y_test = train_test_split(df["text"], df["label"], test_size=0.2, random_state=42)

In [5]:
class DevanagariDataset(Dataset):
    def __init__(self, texts, labels, max_length=100):
        self.texts = texts.tolist()  
        self.labels = labels.tolist()  
        self.max_length = max_length
    
    def __len__(self):
        return len(self.texts)
    
    def __getitem__(self, idx):
        text_tensor = torch.tensor([ord(char) for char in self.texts[idx]], dtype=torch.long)
        
        if len(text_tensor) > self.max_length:
            text_tensor = text_tensor[:self.max_length]  
        else:
            padding = torch.zeros(self.max_length - len(text_tensor), dtype=torch.long)  
            text_tensor = torch.cat((text_tensor, padding)) 

        label_tensor = torch.tensor(self.labels[idx], dtype=torch.long)

        return text_tensor, label_tensor


In [6]:
train_dataset = DevanagariDataset(X_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

test_dataset = DevanagariDataset(X_test, y_test)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=True)

In [7]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [8]:
class SimpleNN(nn.Module):
    def __init__(self, input_size, num_classes):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(input_size, 128)  
        self.fc2 = nn.Linear(128, 64)           
        self.fc3 = nn.Linear(64, num_classes)   
        self.relu = nn.ReLU()
    
    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [9]:
input_size = 100  
num_classes = len(df['label'].unique())  
num_epochs = 10
learning_rate = 0.001

In [10]:
model = SimpleNN(input_size, num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [11]:
model.to(device)

SimpleNN(
  (fc1): Linear(in_features=100, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=64, bias=True)
  (fc3): Linear(in_features=64, out_features=5, bias=True)
  (relu): ReLU()
)

In [12]:
def train_model(model, train_loader, test_loader, criterion, optimizer, num_epochs):
    for epoch in range(num_epochs):
        model.train()
        total_train_loss = 0
        correct_train_predictions = 0
        
        for texts, labels in tqdm(train_loader, desc=f"Epoch {epoch + 1}/{num_epochs} - Training", leave=False):
            texts, labels = texts.to(device), labels.to(device)
            outputs = model(texts.float())  
            loss = criterion(outputs, labels)
            optimizer.zero_grad()
            total_train_loss += loss.item()
            
            loss.backward()
            optimizer.step()

            _, predicted = torch.max(outputs.data, 1)
            correct_train_predictions += (predicted == labels).sum().item()
        
        train_accuracy = correct_train_predictions / len(train_loader.dataset)
        train_loss = total_train_loss / len(train_loader)
        
        model.eval()
        total_test_loss = 0
        correct_test_predictions = 0
        
        with torch.no_grad():
            for texts, labels in tqdm(test_loader, desc=f"Epoch {epoch + 1}/{num_epochs} - Testing", leave=False):
                texts, labels = texts.to(device), labels.to(device)

                outputs = model(texts.float())
                loss = criterion(outputs, labels)
                total_test_loss += loss.item()
                
                _, predicted = torch.max(outputs.data, 1)
                correct_test_predictions += (predicted == labels).sum().item()
        
        test_accuracy = correct_test_predictions / len(test_loader.dataset)
        test_loss = total_test_loss / len(test_loader)
        
        print(f"Epoch {epoch + 1}/{num_epochs}")
        print(f"Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.4f}")
        print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}")

train_model(model, train_loader, test_loader, criterion, optimizer, num_epochs)
torch.save(model.state_dict(), "models/ann.pth")

                                                                           

Epoch 1/10
Train Loss: 4.3271, Train Accuracy: 0.2928
Test Loss: 1.5788, Test Accuracy: 0.2882


                                                                           

Epoch 2/10
Train Loss: 1.5183, Train Accuracy: 0.3451
Test Loss: 1.4531, Test Accuracy: 0.3810


                                                                           

Epoch 3/10
Train Loss: 1.4179, Train Accuracy: 0.3847
Test Loss: 1.5286, Test Accuracy: 0.3713


                                                                           

Epoch 4/10
Train Loss: 1.3566, Train Accuracy: 0.4134
Test Loss: 1.3227, Test Accuracy: 0.4348


                                                                           

Epoch 5/10
Train Loss: 1.3043, Train Accuracy: 0.4380
Test Loss: 1.2894, Test Accuracy: 0.4482


                                                                           

Epoch 6/10
Train Loss: 1.2630, Train Accuracy: 0.4586
Test Loss: 1.2720, Test Accuracy: 0.4607


                                                                           

Epoch 7/10
Train Loss: 1.2394, Train Accuracy: 0.4682
Test Loss: 1.2326, Test Accuracy: 0.4706


                                                                           

Epoch 8/10
Train Loss: 1.2209, Train Accuracy: 0.4751
Test Loss: 1.2971, Test Accuracy: 0.4671


                                                                           

Epoch 9/10
Train Loss: 1.1920, Train Accuracy: 0.4869
Test Loss: 1.2121, Test Accuracy: 0.4907


                                                                            

Epoch 10/10
Train Loss: 1.1752, Train Accuracy: 0.4936
Test Loss: 1.2655, Test Accuracy: 0.4562


