RESNET_18

In [1]:
import os
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from torchvision.models import resnet18
from torch.utils.data import DataLoader, Dataset
import numpy as np
from PIL import Image


In [2]:
# Custom Dataset
class FrameDataset(Dataset):
    def __init__(self, root_dir, words_list, transform=None):
        self.root_dir = root_dir
        self.classes = sorted(os.listdir(root_dir), key=int)  # Ensure numeric sorting (1 to 50)
        self.transform = transform
        self.data = []
        for class_id, class_name in enumerate(self.classes):
            class_path = os.path.join(root_dir, class_name)
            frames = sorted(os.listdir(class_path), key=lambda x: int(x.split('.')[0]))  # Ensure numeric frame sorting
            for frame in frames:
                self.data.append((os.path.join(class_path, frame), class_id))
        self.words_list = words_list

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        frame_path, class_id = self.data[idx]
        image = Image.open(frame_path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        return image, class_id

In [3]:
# Paths and Parameters
root_dir = "C:/Users/prath/OneDrive/Desktop/PAPER 1/50 frames"  # Replace with your folder path
output_features_dir = "C:/Users/prath/OneDrive/Desktop/resnet18_features1"  # Replace with the output directory
os.makedirs(output_features_dir, exist_ok=True)

# List of words (provided by you)
words_list = [
    'goodmorning', 'hearingimpaired', 'communication', 'news', 'meetings',
    'primeMinister', 'namaskar', 'indetail', 'watching', 'both',
    'activities', 'chaired', 'children', 'development', 'earlier',
    'fire', 'fourteen', 'government', 'homeminister', 'india',
    'instructed', 'interaction', 'inthis', 'more', 'movingon',
    'one', 'reviewed', 'situation', 'spoke', 'technological',
    'terrorists', 'thanks', 'thatsit', 'there', 'today',
    'tools', 'two', 'under', 'yesterday', 'youare',
    'health', 'imprisonment', 'phone', 'training', 'krishna',
    'wrong', 'train', 'global', 'men', 'story'  # Replace with your actual words
]

batch_size = 16
num_epochs = 10
learning_rate = 1e-3

# Data Transformation
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

# Dataset and DataLoader
dataset = FrameDataset(root_dir, words_list, transform)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)


In [4]:
# Define ResNet-18 from Scratch
class CustomResNet18(nn.Module):
    def __init__(self, num_classes=50):
        super(CustomResNet18, self).__init__()
        self.resnet = resnet18(weights=None)  # No pretrained weights
        self.resnet.fc = nn.Linear(self.resnet.fc.in_features, num_classes)  # Custom output layer

    def forward(self, x):
        return self.resnet(x)

model = CustomResNet18(num_classes=50).cuda()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)


In [5]:
# Training
model.train()
for epoch in range(num_epochs):
    total_loss = 0
    correct = 0
    total = 0
    for images, labels in dataloader:
        images, labels = images.cuda(), labels.cuda()
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        _, predicted = outputs.max(1)
        correct += (predicted == labels).sum().item()
        total += labels.size(0)

    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {total_loss/len(dataloader):.4f}, Accuracy: {100*correct/total:.2f}%")


Epoch [1/10], Loss: 1.5502, Accuracy: 57.28%
Epoch [2/10], Loss: 0.1821, Accuracy: 95.76%
Epoch [3/10], Loss: 0.0535, Accuracy: 99.20%
Epoch [4/10], Loss: 0.0921, Accuracy: 98.08%
Epoch [5/10], Loss: 0.0796, Accuracy: 98.28%
Epoch [6/10], Loss: 0.0279, Accuracy: 99.28%
Epoch [7/10], Loss: 0.0355, Accuracy: 99.20%
Epoch [8/10], Loss: 0.0031, Accuracy: 99.96%
Epoch [9/10], Loss: 0.0120, Accuracy: 99.72%
Epoch [10/10], Loss: 0.0081, Accuracy: 99.88%


In [6]:
# Feature Extraction
model.eval()
features_extractor = nn.Sequential(*list(model.resnet.children())[:-1]).cuda()  # Remove final layer
with torch.no_grad():
    for class_name in os.listdir(root_dir):
        class_path = os.path.join(root_dir, class_name)
        save_path = os.path.join(output_features_dir, class_name)
        os.makedirs(save_path, exist_ok=True)

        for frame_name in sorted(os.listdir(class_path), key=lambda x: int(x.split('.')[0])):
            frame_path = os.path.join(class_path, frame_name)
            image = Image.open(frame_path).convert("RGB")
            if transform:
                image = transform(image)
            image = image.unsqueeze(0).cuda()
            features = features_extractor(image).squeeze().cpu().numpy()
            np.save(os.path.join(save_path, frame_name.split(".")[0] + ".npy"), features)

print("Feature extraction completed!")


Feature extraction completed!


BI-LSTM

In [17]:


# List of words (provided by you)
words_list = [
    'goodmorning', 'hearingimpaired', 'communication', 'news', 'meetings',
    'primeMinister', 'namaskar', 'indetail', 'watching', 'both',
    'activities', 'chaired', 'children', 'development', 'earlier',
    'fire', 'fourteen', 'government', 'homeminister', 'india',
    'instructed', 'interaction', 'inthis', 'more', 'movingon',
    'one', 'reviewed', 'situation', 'spoke', 'technological',
    'terrorists', 'thanks', 'thatsit', 'there', 'today',
    'tools', 'two', 'under', 'yesterday', 'youare',
    'health', 'imprisonment', 'phone', 'training', 'krishna',
    'wrong', 'train', 'global', 'men', 'story'  # Replace with your actual words
]


# Create word-to-index mapping
word_to_index = {word: i for i, word in enumerate(words_list)}
index_to_word = {i: word for i, word in enumerate(words_list)}


In [18]:
import os
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader

class FeatureDataset(Dataset):
    def __init__(self, features_dir, words_list, sequence_length=50):
        self.features_dir = features_dir
        self.words_list = words_list
        self.word_to_index = {word: i for i, word in enumerate(words_list)}
        self.data = []

        # Loop through each class (1 to 50)
        for class_id in range(1, len(words_list) + 1):
            class_path = os.path.join(features_dir, str(class_id))

            # Load all 50 features for this class
            feature_files = sorted(os.listdir(class_path), key=lambda x: int(x.split('.')[0]))
            features = [np.load(os.path.join(class_path, f)) for f in feature_files]

            # Stack into (sequence_length, input_dim) = (50, 512)
            features = np.stack(features)  # Shape: (50, 512)

            # Get the word index as the label
            word_label = self.word_to_index[words_list[class_id - 1]]  # Convert word to index

            self.data.append((features, word_label))

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        sequence, word_label = self.data[idx]
        return torch.tensor(sequence, dtype=torch.float32), torch.tensor(word_label, dtype=torch.long)


In [19]:
import torch.nn as nn

class BiLSTMModel(nn.Module):
    def __init__(self, input_dim=512, hidden_dim=512, num_layers=2, num_classes=50):
        super(BiLSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True, bidirectional=True)
        self.fc = nn.Linear(hidden_dim * 2, num_classes)  # Output size = 50 (number of words)

    def forward(self, x):
        out, _ = self.lstm(x)  # Shape: (batch_size, 50, 1024)
        out = out[:, -1, :]  # Take the last timestep → Shape: (batch_size, 1024)
        out = self.fc(out)  # Shape: (batch_size, 50) → Class logits
        return out  # Predicts a word class


In [20]:
# Parameters
input_dim = 512  # ResNet feature size
hidden_dim = 512
num_layers = 2
num_classes = 50  # 50 words as labels

# Load dataset
features_dir = "C:/Users/prath/OneDrive/Desktop/resnet18_features"
dataset = FeatureDataset(features_dir, words_list)
dataloader = DataLoader(dataset, batch_size=8, shuffle=True)

# Define BiLSTM Model
model = BiLSTMModel(input_dim, hidden_dim, num_layers, num_classes).cuda()

# Loss and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

# Training Loop
num_epochs = 10
model.train()
for epoch in range(num_epochs):
    total_loss = 0
    correct = 0
    total = 0
    for features, labels in dataloader:
        features, labels = features.cuda(), labels.cuda()
        optimizer.zero_grad()
        
        # Forward pass
        outputs = model(features)  # Shape: (batch_size, 50) → Word logits
        
        # Calculate loss
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        _, predicted = outputs.max(1)
        correct += (predicted == labels).sum().item()
        total += labels.size(0)

    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {total_loss/len(dataloader):.4f}, Accuracy: {100*correct/total:.2f}%")

print("Training complete!")


Epoch [1/10], Loss: 4.0950, Accuracy: 0.00%
Epoch [2/10], Loss: 3.6409, Accuracy: 14.00%
Epoch [3/10], Loss: 2.7574, Accuracy: 66.00%
Epoch [4/10], Loss: 1.4370, Accuracy: 88.00%
Epoch [5/10], Loss: 0.8231, Accuracy: 96.00%
Epoch [6/10], Loss: 0.5595, Accuracy: 94.00%
Epoch [7/10], Loss: 0.4514, Accuracy: 98.00%
Epoch [8/10], Loss: 0.2631, Accuracy: 98.00%
Epoch [9/10], Loss: 0.1527, Accuracy: 100.00%
Epoch [10/10], Loss: 0.0751, Accuracy: 100.00%
Training complete!


In [21]:
# Output folder for BiLSTM features
output_features_dir = "C:/Users/prath/OneDrive/Desktop/bilstm_features_10"
os.makedirs(output_features_dir, exist_ok=True)

model.eval()
with torch.no_grad():
    for class_id in range(1, 51):  # Loop through 50 classes
        class_path = os.path.join(features_dir, str(class_id))

        feature_files = sorted(os.listdir(class_path), key=lambda x: int(x.split('.')[0]))
        features = []

        for f in feature_files:
            feature_path = os.path.join(class_path, f)
            feature = np.load(feature_path)  # Shape: (512,)
            features.append(feature)

        if len(features) == 50:  # Ensure we have exactly 50 frames
            features = np.stack(features)  # Shape: (50, 512)
            features = torch.tensor(features, dtype=torch.float32).unsqueeze(0).cuda()  # Shape: (1, 50, 512)
            
            output = model.lstm(features)[0][:, -1, :].cpu().numpy()  # Extract last hidden state → Shape: (1, 1024)
            np.save(os.path.join(output_features_dir, f"{class_id}.npy"), output.squeeze(0))  # Shape: (1024,)

print("BiLSTM feature extraction completed!")


BiLSTM feature extraction completed!
