In [41]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import torch
from torch.utils.data import Dataset, DataLoader

In [42]:
df = pd.read_csv("train.csv")
df.head()

Unnamed: 0,index,text,label
0,10000,निर्वाचन आयोगले गति छाड्यो,0
1,10001,जीतपुरसिमराः एमालेबाट चौधरी र अधिकारीले गरे उम...,0
2,10002,दलहरुको घरदैलो कार्यक्रममा ठुलो ठुलो आवाजमा स्...,0
3,10003,"म पनि त्यही सोचेको, अब पनि भुई कटहर लाई भोट न...",0
4,10006,शिक्षकबाट राजीनामा नदिई उपमेयरमा उम्मेदवारी दर्ता,0


In [43]:
df.dropna(inplace = True)

In [44]:
X_train, X_test, y_train, y_test = train_test_split(df["text"], df["label"], test_size=0.2, random_state=42)

In [45]:
class DevanagariDataset(Dataset):
    def __init__(self, texts, labels, max_length=100):
        self.texts = texts.tolist()  
        self.labels = labels.tolist()  
        self.max_length = max_length
    
    def __len__(self):
        return len(self.texts)
    
    def __getitem__(self, idx):
        text_tensor = torch.tensor([ord(char) for char in self.texts[idx]], dtype=torch.long)
        
        if len(text_tensor) > self.max_length:
            text_tensor = text_tensor[:self.max_length]  
        else:
            padding = torch.zeros(self.max_length - len(text_tensor), dtype=torch.long)  
            text_tensor = torch.cat((text_tensor, padding)) 

        label_tensor = torch.tensor(self.labels[idx], dtype=torch.long)

        return text_tensor, label_tensor


In [46]:
train_dataset = DevanagariDataset(X_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

test_dataset = DevanagariDataset(X_test, y_test)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=True)

In [47]:
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm

class SimpleNN(nn.Module):
    def __init__(self, input_size, num_classes):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(input_size, 128)  
        self.fc2 = nn.Linear(128, 64)           
        self.fc3 = nn.Linear(64, num_classes)   
        self.relu = nn.ReLU()
    
    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [48]:
input_size = 100  
num_classes = len(df['label'].unique())  
num_epochs = 10
learning_rate = 0.001

In [49]:
model = SimpleNN(input_size, num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [50]:
def train_model(model, train_loader, criterion, optimizer, num_epochs):
    model.train()
    for epoch in range(num_epochs):
        total_loss = 0
        correct_predictions = 0
        
        for texts, labels in tqdm(train_loader, desc=f"Epoch {epoch + 1}/{num_epochs}", leave=False):
            optimizer.zero_grad()
            # Forward pass
            outputs = model(texts.float())  # Convert to float for compatibility with nn.Linear
            loss = criterion(outputs, labels)
            total_loss += loss.item()
            
            # Backward pass
            loss.backward()
            optimizer.step()
            
            # Calculate accuracy
            _, predicted = torch.max(outputs.data, 1)
            correct_predictions += (predicted == labels).sum().item()
        
        accuracy = correct_predictions / len(train_loader.dataset)
        print(f"Loss: {total_loss / len(train_loader):.4f}, Accuracy: {accuracy:.4f}")

# Step 4: Train the model
train_model(model, train_loader, criterion, optimizer, num_epochs)

                                                                

Loss: 4.6924, Accuracy: 0.2752


                                                                

Loss: 1.4866, Accuracy: 0.3051


                                                                

Loss: 1.4795, Accuracy: 0.3099


                                                                

Loss: 1.4781, Accuracy: 0.3107


                                                                

Loss: 1.4782, Accuracy: 0.3102


                                                                

Loss: 1.4830, Accuracy: 0.3081


                                                                

Loss: 1.4795, Accuracy: 0.3088


                                                                

Loss: 1.4940, Accuracy: 0.3031


                                                               

Loss: 1.4792, Accuracy: 0.3083


                                                                

Loss: 1.4783, Accuracy: 0.3091


