In [28]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import os
from sklearn.preprocessing import LabelEncoder
from torch.utils.data import DataLoader, TensorDataset


# Define the GlossClassifier
class GlossClassifier(nn.Module):
    def __init__(self, input_dim, num_classes):
        super(GlossClassifier, self).__init__()
        self.fc = nn.Sequential(
            nn.Linear(input_dim, 512),  # Hidden layer
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(512, num_classes)  # Output layer
        )

    def forward(self, x):
        return self.fc(x)


# Function to load data
def load_data(output_folder):
    pooled_features = []
    labels = []

    for idx, filename in enumerate(os.listdir(output_folder)):
        feature_path = os.path.join(output_folder, filename)
        feature = np.load(feature_path)  # Load the pooled feature
        pooled_features.append(feature)
        labels.append(idx)  # Assign a label (modify as per your dataset)

    pooled_features = torch.tensor(pooled_features, dtype=torch.float32)
    labels = torch.tensor(labels, dtype=torch.long)  # Convert labels to tensor

    return pooled_features, labels


# Function to train the model
def train_model(model, dataloader, criterion, optimizer, num_epochs=30):
    model.train()
    for epoch in range(num_epochs):
        for features, target in dataloader:
            features = features.squeeze(1)  # Adjust the dimensions if needed
            optimizer.zero_grad()
            output = model(features)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()

        print(f"Epoch {epoch+1}/{num_epochs}, Loss: {loss.item()}")


# Function to test the model
def test_model(model, test_feature, target_sentences):
    model.eval()
    test_feature = torch.tensor(test_feature, dtype=torch.float32).unsqueeze(0)  # Add batch dimension
    output = model(test_feature)

    output = output.squeeze(1)  # Now the shape will be [1, num_classes]
    predicted_label = torch.argmax(output, dim=1).item()

    predicted_word = class_to_word[predicted_label]
    print(f"Predicted Word: {predicted_word}")

    # Map the predicted word to the corresponding sentence from target_sentences
    predicted_sentence = get_sentence_from_gloss(predicted_word, target_sentences)
    print(f"Predicted Sentence: {predicted_sentence}")


# Function to map predicted gloss to a sentence
def get_sentence_from_gloss(predicted_word, target_sentences):
    for sentence in target_sentences:
        if predicted_word in sentence.lower():  # Case-insensitive match
            return sentence
    return "Sentence not found for the predicted word!"


# Main function
def main():
    # Path to the folder containing the pooled features
    output_folder = "C:/Users/prath/OneDrive/Desktop/pooled features10"
    
    # Load the data
    pooled_features, labels = load_data(output_folder)

    # Create a dataset and dataloader
    dataset = TensorDataset(pooled_features, labels)
    dataloader = DataLoader(dataset, batch_size=16, shuffle=True)

    # Define the model, loss, and optimizer
    input_dim = 2048  # Adjust according to your feature size
    num_classes = len(set(labels.numpy()))  # Number of unique glosses

    model = GlossClassifier(input_dim, num_classes)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    # Train the model
    train_model(model, dataloader, criterion, optimizer, num_epochs=30)

    # Example: Predict gloss for a new feature (e.g., the 6th feature from the dataset)
    class_to_word = ['goodmorning', 'hearingimpaired', 'communication', 'news', 'meetings', 'primeMinister', 
                     'namaskar', 'indetail', 'watching', 'both', 'activities', 'chaired', 'children', 
                     'development', 'earlier', 'fire', 'fourteen', 'government', 'homeminister', 'india', 
                     'instructed', 'interaction', 'inthis', 'more', 'movingon', 'one', 'reviewed', 'situation', 
                     'spoke', 'technological', 'terrorists', 'thanks', 'thatsit', 'there', 'today', 'tools', 
                     'two', 'under', 'yesterday', 'youare', 'health', 'imprisonment', 'phone', 'training', 
                     'krishna', 'wrong', 'train', 'global', 'men', 'story']

    target_sentences = [
        'Youare reviewed the story',
        'children under government training',
        'The situation today is critical',
        'India meetings chaired by the primeMinister',
        'Thanks for your communication earlier',
        'children watched the communication indetail',     
        'Homeminister instructed meetings yesterday',     
        'Both terrorists and government spoke earlier',   
        'India movingon with technological development',
        'Thanks for the health training today',
        'Krishna interacted with the PrimeMinister today',
        'The phone situation was wrong yesterday',
        'The fire situation was reviewed by the government',
        'Men imprisoned for activities involving terrorists',
        'Fourteen men chaired the government meetings',       
        'The primeMinister spoke about global communication',
        'Yesterday the Homeminister instructed further action',
        'The terrorists reviewed technological tools for training',
        'Movingon from imprisonment the story continues today',
        'Inthis situation more interaction is needed',
        'The development of new tools was discussed in meetings',
        'primeMinister chaired meetings yesterday'
    ]

    test_feature = pooled_features[12]  # Test with the 6th feature (adjust as needed)
    test_model(model, test_feature, target_sentences)


if __name__ == "__main__":
    main()


Epoch 1/30, Loss: 3.916597843170166
Epoch 2/30, Loss: 3.8330509662628174
Epoch 3/30, Loss: 3.6905856132507324
Epoch 4/30, Loss: 3.52860689163208
Epoch 5/30, Loss: 3.6336162090301514
Epoch 6/30, Loss: 3.406665802001953
Epoch 7/30, Loss: 3.388509750366211
Epoch 8/30, Loss: 3.124215841293335
Epoch 9/30, Loss: 3.1875786781311035
Epoch 10/30, Loss: 2.8808376789093018
Epoch 11/30, Loss: 2.5267772674560547
Epoch 12/30, Loss: 2.152298927307129
Epoch 13/30, Loss: 2.6620020866394043
Epoch 14/30, Loss: 2.0018093585968018
Epoch 15/30, Loss: 1.9864614009857178
Epoch 16/30, Loss: 1.67706298828125
Epoch 17/30, Loss: 1.457363486289978
Epoch 18/30, Loss: 0.9757229089736938
Epoch 19/30, Loss: 0.924480676651001
Epoch 20/30, Loss: 0.7676116228103638
Epoch 21/30, Loss: 0.8261665105819702
Epoch 22/30, Loss: 0.30404895544052124
Epoch 23/30, Loss: 0.40318262577056885
Epoch 24/30, Loss: 0.44911935925483704
Epoch 25/30, Loss: 0.3758374750614166
Epoch 26/30, Loss: 0.17474721372127533
Epoch 27/30, Loss: 0.1872794

  test_feature = torch.tensor(test_feature, dtype=torch.float32).unsqueeze(0)  # Add batch dimension
