# Relation Network Implementation on Indian Sign Language Classification

### Ayush Muralidharan: PES1UG22AM912
### Tejas V Bhat: PES1UG22AM909
### Atharv Revankar: PES1UG22AM920
### Prarthana Kini: PES1UG22AM119

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader
import random
import os
from PIL import Image


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)


Using device: cpu


### Embedding Network
A pre-trained ResNet-18 is used as the feature extractor.
The final classification layer is replaced with an identity layer, outputting raw feature embeddings.

In [None]:
class EmbeddingNet(nn.Module):
    def __init__(self):
        super(EmbeddingNet, self).__init__()
        self.model = models.resnet18(pretrained=True)
        self.model.fc = nn.Identity()  # Remove the classification layer

    def forward(self, x):
        return self.model(x)

embedding_net = EmbeddingNet().to(device)




### Relation Network
A fully connected network that computes relation scores between support and query features.
- Input size: concatenated feature dimensions of the query and support embeddings.
- Output: a similarity score (relation score) using a sigmoid activation.





In [None]:
class RelationNetwork(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(RelationNetwork, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, 1)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = torch.sigmoid(self.fc2(x))
        return x

relation_net = RelationNetwork(input_size=1024, hidden_size=256).to(device)


In [None]:

relation_net

RelationNetwork(
  (fc1): Linear(in_features=1024, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=1, bias=True)
)

In [None]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomVerticalFlip(p=0.5),
    transforms.RandomRotation(30),
    transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4, hue=0.2),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

### CustomDataset Class:
Organizes data from class folders and applies data augmentation (e.g., resizing, flipping, rotation, color jitter, and normalization).

In [None]:
class CustomDataset:
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.classes = os.listdir(root_dir)
        self.data = []
        for class_name in self.classes:
            class_dir = os.path.join(root_dir, class_name)
            for img_file in os.listdir(class_dir):
                self.data.append((os.path.join(class_dir, img_file), class_name))
        self.class_to_idx = {class_name: idx for idx, class_name in enumerate(self.classes)}

    def __getitem__(self, idx):
        img_path, label = self.data[idx]
        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        return image, self.class_to_idx[label]

    def __len__(self):
        return len(self.data)

In [None]:
train_dataset = CustomDataset("/Users/ayushmuralidharan/Desktop/AOML/AOML PROJECT/MAML-Pytorch-master/Experiment 2/Dataset_INDIAN_FULL/train", transform=transform)

### Few-Shot Sampling
- Episodes are dynamically generated using N_WAY, K_SHOT, and Q_QUERY sampling strategies.
- Support and query sets are created for each episode.

In [None]:
def sample_episode_n_shot(dataset, n_way, k_shot, q_query):
    classes = random.sample(dataset.classes, n_way)
    support_images, support_labels, query_images, query_labels = [], [], [], []

    for label, class_name in enumerate(classes):
        class_samples = [img for img, cls in dataset.data if cls == class_name]
        support_sample = random.sample(class_samples, k_shot)
        query_sample = random.sample([s for s in class_samples if s not in support_sample], q_query)

        support_images.extend(support_sample)
        support_labels.extend([label] * k_shot)
        query_images.extend(query_sample)
        query_labels.extend([label] * q_query)

    return support_images, support_labels, query_images, query_labels

def prepare_episode(support_images, query_images, transform, device):
    def load_images(image_paths):
        return [transform(Image.open(img).convert("RGB")).unsqueeze(0).to(device) for img in image_paths]

    support_tensors = load_images(support_images)
    query_tensors = load_images(query_images)
    return support_tensors, query_tensors


### Training Loop
Each episode calculates relation scores between query and support embeddings.
The MSE loss between predicted relation scores and target labels is minimized.
Hyperparameters:
- N_WAY: Number of classes in an episode.
- K_SHOT: Number of support images per class.
- Q_QUERY: Number of query images per class.

In [None]:
num_episodes = 500
n_way = 4
k_shot = 5
q_query = 5

# Loss and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(list(embedding_net.parameters()) + list(relation_net.parameters()), lr=1e-4)

# Training loop
for episode in range(num_episodes):
    # Sample episode
    support_images, support_labels, query_images, query_labels = sample_episode_n_shot(
        train_dataset, n_way, k_shot, q_query
    )

    # Prepare tensors
    support_tensors, query_tensors = prepare_episode(
        support_images, query_images, transform, device
    )

    # Extract support features
    support_features = [embedding_net(img).squeeze(0) for img in support_tensors]

    # Initialize loss
    loss = 0

    # Process each query image
    for i, query_tensor in enumerate(query_tensors):
        query_feature = embedding_net(query_tensor).squeeze(0)

        # Compute relation scores
        relation_scores = []
        for support_feature in support_features:
            combined_feature = torch.cat((support_feature, query_feature), dim=0)
            score = relation_net(combined_feature.unsqueeze(0))
            relation_scores.append(score)

        # Reshape and compute loss
        predicted_scores = torch.cat(relation_scores)
        target = torch.zeros(n_way).to(device)
        target[query_labels[i]] = 1

        loss += criterion(predicted_scores, target)

    # Backpropagate and optimize
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    # Print progress
    if (episode + 1) % 10 == 0:
        print(f"Episode [{episode + 1}/{num_episodes}], Loss: {loss.item():.4f}")

  return F.mse_loss(input, target, reduction=self.reduction)


Episode [10/500], Loss: 3.7513
Episode [20/500], Loss: 3.7519
Episode [30/500], Loss: 3.7514
Episode [40/500], Loss: 3.7506
Episode [50/500], Loss: 3.7501
Episode [60/500], Loss: 3.7501
Episode [70/500], Loss: 3.7501
Episode [80/500], Loss: 3.7501
Episode [90/500], Loss: 3.7501
Episode [100/500], Loss: 3.7501
Episode [110/500], Loss: 3.7501
Episode [120/500], Loss: 3.7501
Episode [130/500], Loss: 3.7500
Episode [140/500], Loss: 3.7501
Episode [150/500], Loss: 3.7501
Episode [160/500], Loss: 3.7500
Episode [170/500], Loss: 3.7501
Episode [180/500], Loss: 3.7501
Episode [190/500], Loss: 3.7501
Episode [200/500], Loss: 3.7500
Episode [210/500], Loss: 3.7500
Episode [220/500], Loss: 3.7500
Episode [230/500], Loss: 3.7500
Episode [240/500], Loss: 3.7501
Episode [250/500], Loss: 3.7500
Episode [260/500], Loss: 3.7500
Episode [270/500], Loss: 3.7500
Episode [280/500], Loss: 3.7500
Episode [290/500], Loss: 3.7500
Episode [300/500], Loss: 3.7501
Episode [310/500], Loss: 3.7500
Episode [320/500]

In [None]:
def prepare_episode_for_accuracy(support_images, query_images):
    def load_and_stack(image_paths):
        # Load images, apply transformations, and stack them into a batch
        tensors = [transform(Image.open(img).convert("RGB")).to(device) for img in image_paths]
        return torch.stack(tensors)  # Shape: [batch_size, 3, 224, 224]

    # Process support and query sets
    support_tensors = load_and_stack(support_images)  # Shape: [n_support, 3, 224, 224]
    query_tensors = load_and_stack(query_images)      # Shape: [n_query, 3, 224, 224]

    return support_tensors, query_tensors


### Evaluation
- Class prototypes (mean embeddings for each class) are computed from the support set.
- Query embeddings are compared to prototypes using the Relation Network.
- Accuracy: Percentage of correctly predicted query labels.
python



In [None]:
def calculate_accuracy(relation_net, embedding_net, support_tensors, support_labels, query_tensors, query_labels):
    """
    Calculate the accuracy of the Relation Network on the query set.

    Args:
        relation_net: The Relation Network model.
        embedding_net: The Embedding Network model.
        support_tensors: Batched tensor for support set images.
        support_labels: Labels corresponding to the support set.
        query_tensors: Batched tensor for query set images.
        query_labels: Labels corresponding to the query set.

    Returns:
        Accuracy (in percentage).
    """
    # Set models to evaluation mode
    relation_net.eval()
    embedding_net.eval()

    correct = 0
    total = len(query_labels)
    n_way = len(set(support_labels))  # Number of classes in the support set

    # Compute class prototypes (mean feature vectors for each class in the support set)
    class_prototypes = []
    for class_idx in range(n_way):
        # Get features for all support images belonging to the current class
        class_indices = [i for i, label in enumerate(support_labels) if label == class_idx]
        class_support_features = embedding_net(support_tensors[class_indices])
        class_prototype = class_support_features.mean(dim=0)  # Compute mean feature vector
        class_prototypes.append(class_prototype)

    class_prototypes = torch.stack(class_prototypes).to(device)  # Shape: [n_way, feature_dim]

    # Predict labels for query set
    for i, query_tensor in enumerate(query_tensors):
        query_feature = embedding_net(query_tensor.unsqueeze(0))  # Shape: [1, feature_dim]

        # Compute relation scores between query and all class prototypes
        relation_scores = [
            relation_net(torch.cat((prototype, query_feature.squeeze(0)), dim=-1).unsqueeze(0))
            for prototype in class_prototypes
        ]

        # Determine the predicted class
        predicted_class = torch.argmax(torch.cat(relation_scores).view(1, n_way), dim=1).item()
        print(f"Predicted:{predicted_class} Actual:{query_labels[i]}")
        # Check if the prediction is correct
        if predicted_class == query_labels[i]:
            correct += 1


    # Calculate accuracy
    accuracy = (correct / total) * 100
    print(total)
    print(correct)
    print(f"Accuracy: {accuracy:.2f}%")
    return accuracy


In [None]:
test_dataset = CustomDataset("/Users/ayushmuralidharan/Desktop/AOML/AOML PROJECT/MAML-Pytorch-master/Experiment 2/Dataset_INDIAN_FULL/test", transform=transform)

# Sample an episode from the test dataset
eval_n_way = 3
eval_k_shot = 5
eval_q_query = 5

# Generate support and query sets
support_images, support_labels, query_images, query_labels = sample_episode_n_shot(
    test_dataset, eval_n_way, eval_k_shot, eval_q_query
)

# Load tensors for support and query sets
support_tensors, query_tensors = prepare_episode_for_accuracy(support_images, query_images)

In [None]:
# Prepare support and query tensors
support_tensors, query_tensors = prepare_episode_for_accuracy(support_images, query_images)

# Calculate accuracy
accuracy = calculate_accuracy(
    relation_net=relation_net,
    embedding_net=embedding_net,
    support_tensors=support_tensors,
    support_labels=support_labels,
    query_tensors=query_tensors,
    query_labels=query_labels,
)

print(f"Final Evaluation Accuracy: {accuracy:.2f}%")


Predicted:0 Actual:0
Predicted:0 Actual:0
Predicted:1 Actual:0
Predicted:2 Actual:0
Predicted:2 Actual:0
Predicted:1 Actual:1
Predicted:1 Actual:1
Predicted:2 Actual:1
Predicted:2 Actual:1
Predicted:2 Actual:1
Predicted:2 Actual:2
Predicted:2 Actual:2
Predicted:1 Actual:2
Predicted:1 Actual:2
Predicted:1 Actual:2
15
6
Accuracy: 40.00%
Final Evaluation Accuracy: 40.00%
