# ASL data training

In [104]:
import os
import cv2
import numpy as np
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import MobileNet
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import Conv2D, BatchNormalization, Activation, MaxPooling2D, SpatialDropout2D, Flatten, Dropout
from typing import Tuple

In [105]:
import torch
from torch import nn, Tensor
from torchvision import models
from torchvision.transforms import Compose
from torchvision.transforms import functional as F
from torchvision import transforms as T
from torchmetrics.detection.mean_ap import MeanAveragePrecision
import random
from tqdm import tqdm

import warnings
warnings.filterwarnings('ignore')

In [106]:
import os
import json
import logging
import random
from tqdm import tqdm
from collections import defaultdict
from typing import Tuple
from glob import glob
import pandas as pd


In [134]:
# Define the class names
class_names = [
    "call", "dislike", "fist", "four", "like", "mute", "ok", "one", "palm", 
    "peace", "peace_inverted", "rock", "stop", "stop_inverted", "three", "three2", 
    "two_up", "two_up_inverted"
]
FORMATS = (".jpeg", ".jpg", ".jp2", ".png", ".tiff", ".jfif", ".bmp", ".webp", ".heic")

In [135]:
# Set the input directory path
data_dir = r"C:\Users\moham\Documents\Sem-III\DAB 322\subsample"

In [136]:
# Set the image dimensions
img_height, img_width = 48, 48

In [137]:
# Create empty lists for images and labels
images = []
labels = []

# Iterate through the folders inside the data directory
for class_name in class_names:
    folder_path = os.path.join(data_dir, class_name)
    
    # Iterate through the image files in each folder
    for filename in os.listdir(folder_path):
        if filename.endswith(".jpg") or filename.endswith(".png"):
            image_path = os.path.join(folder_path, filename)
            
            # Load and preprocess the image
            img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
            img_resized = cv2.resize(img, (img_width, img_height))
            
            
            # Append the preprocessed image and its corresponding label to the lists
            images.append(img_rgb)
            labels.append(class_names.index(class_name))

# Convert the lists to numpy arrays
images = np.array(images)
labels = np.array(labels, dtype=np.int64)


In [140]:
# Split the data into training, testing, and validation sets
train_images, test_images, train_labels, test_labels = train_test_split(
    images, labels, test_size=0.2, random_state=42)

train_images, val_images, train_labels, val_labels = train_test_split(
    train_images, train_labels, test_size=0.2, random_state=42)

# Print the shapes of the resulting datasets
print("Training data shape:", train_images.shape, train_labels.shape)
print("Validation data shape:", val_images.shape, val_labels.shape)
print("Testing data shape:", test_images.shape, test_labels.shape)

Training data shape: (1152, 48, 48, 3) (1152,)
Validation data shape: (288, 48, 48, 3) (288,)
Testing data shape: (360, 48, 48, 3) (360,)


In [141]:
# Encode the labels
label_encoder = LabelEncoder()
labels_encoded = label_encoder.fit_transform(labels)

In [142]:
# Perform one-hot encoding on the labels
labels_one_hot = to_categorical(labels_encoded)

In [143]:
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from torchvision.models import mobilenet_v2

# Define your custom dataset
class CustomDataset(Dataset):
    def __init__(self, images, labels, transform=None):
        self.images = images
        self.labels = labels
        self.transform = transform
    
    def __len__(self):
        return len(self.images)
    
    def __getitem__(self, idx):
        image = self.images[idx]
        label = self.labels[idx]
        
        if self.transform:
            image = self.transform(image)
        
        return image, label

In [144]:
# Define the transformations to be applied to the images
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [145]:
# Create the datasets and data loaders
train_dataset = CustomDataset(train_images, train_labels, transform=transform)
val_dataset = CustomDataset(val_images, val_labels, transform=transform)
test_dataset = CustomDataset(test_images, test_labels, transform=transform)

In [146]:
train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=32)
test_dataloader = DataLoader(test_dataset, batch_size=32)

In [152]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.models as models

# Load the MobileNet model
mobilenet = models.mobilenet_v2(pretrained=True)
num_ftrs = mobilenet.classifier[1].in_features
mobilenet.classifier[1] = nn.Linear(num_ftrs, len(class_names))  # Modify the last layer to match the number of classes

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
mobilenet = mobilenet.to(device)

# Define your custom CNN architecture
class CustomCNN(nn.Module):
    def __init__(self):
        super(CustomCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1)
        self.relu = nn.ReLU()
        self.maxpool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(64 * 24 * 24, 128)  # Update the input size
        self.dropout = nn.Dropout(0.5)
        self.fc2 = nn.Linear(128, 20)

    def forward(self, x):
        x = self.conv1(x)
        x = self.relu(x)
        x = self.maxpool(x)
        x = self.flatten(x)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.fc2(x)
        return x

# Create an instance of your custom CNN
custom_cnn = CustomCNN()

In [153]:
# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(custom_cnn.parameters(), lr=0.001)  

In [154]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
custom_cnn = custom_cnn.to(device)

In [155]:
num_epochs = 10
for epoch in range(num_epochs):
    print(f"Epoch {epoch+1}/{num_epochs}")
    custom_cnn.train()
    train_loss = 0.0
    
    for images, labels in train_dataloader:
        images = images.to(device)
        labels = labels.to(device, dtype=torch.long) 

        optimizer.zero_grad()
        
        outputs = custom_cnn(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item() * images.size(0)
    
    train_loss /= len(train_dataset)
    print(f"Train loss: {train_loss}")
    
    # Evaluate on the validation set
    custom_cnn.eval()
    val_loss = 0.0
    correct = 0
    
    with torch.no_grad():
        for images, labels in val_dataloader:
            images = images.to(device)
            labels = labels.to(device)
            
            outputs = custom_cnn(images)
            loss = criterion(outputs, labels)
            
            val_loss += loss.item() * images.size(0)
            _, predicted = torch.max(outputs, 1)
            correct += (predicted == labels).sum().item()
        
        val_loss /= len(val_dataset)
        accuracy = correct / len(val_dataset)
        print(f"Val loss: {val_loss}, Accuracy: {accuracy}")


Epoch 1/10
Train loss: 3.4809063143200345
Val loss: 2.992123391893175, Accuracy: 0.06944444444444445
Epoch 2/10
Train loss: 3.002975026766459
Val loss: 2.988594320085314, Accuracy: 0.06944444444444445
Epoch 3/10
Train loss: 2.99394592973921
Val loss: 2.9851805104149713, Accuracy: 0.06944444444444445
Epoch 4/10
Train loss: 2.989819096194373
Val loss: 2.982100327809652, Accuracy: 0.06944444444444445
Epoch 5/10
Train loss: 2.9858883950445385
Val loss: 2.9792018201616077, Accuracy: 0.06944444444444445
Epoch 6/10
Train loss: 2.9822345044877796
Val loss: 2.9764779143863254, Accuracy: 0.06944444444444445
Epoch 7/10
Train loss: 2.978741811381446
Val loss: 2.9739557372199164, Accuracy: 0.06944444444444445
Epoch 8/10
Train loss: 2.9754662182595997
Val loss: 2.9715692732069225, Accuracy: 0.06944444444444445
Epoch 9/10
Train loss: 2.9723162055015564
Val loss: 2.9693779945373535, Accuracy: 0.06944444444444445
Epoch 10/10
Train loss: 2.969391405582428
Val loss: 2.9672125710381403, Accuracy: 0.069444

In [156]:
# Evaluate the model on the test set
custom_cnn.eval()
test_loss = 0.0
correct = 0

with torch.no_grad():
    for images, labels in test_dataloader:
        images = images.to(device)
        labels = labels.to(device).long()
        
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        test_loss += loss.item() * images.size(0)
        _, predicted = torch.max(outputs, 1)
        correct += (predicted == labels).sum().item()
    
    test_loss /= len(test_dataset)
    accuracy = correct / len(test_dataset)
    print(f"Test loss: {test_loss}, Accuracy: {accuracy}")

Test loss: 6.548065047793918, Accuracy: 0.0


In [93]:
model.eval()
test_loss = 0.0
correct = 0

with torch.no_grad():
    for images, labels in test_dataloader:
        images = images.to(device)
        labels = labels.to(device).long()  # Convert labels to Long
        
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        test_loss += loss.item() * images.size(0)
        _, predicted = torch.max(outputs, 1)
        correct += (predicted == labels).sum().item()
    
    test_loss /= len(test_dataset)
    accuracy = correct / len(test_dataset)
    print(f"Test loss: {test_loss}, Accuracy: {accuracy}")


Test loss: 5.236646334330241, Accuracy: 0.06111111111111111


In [102]:
# Load and preprocess the image
image_path = "C:\\Users\\moham\\Documents\\Sem-III\\DAB 322\\dataset\\subsample\\dislike\\03edcf00-2b0d-4576-bab8-da2bb5da14e2.jpg"
image = cv2.imread(image_path)
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # Convert image to RGB
image_resized = cv2.resize(image_rgb, (img_width, img_height))
image_normalized = image_resized / 255.0  # Normalize the image

# Convert the preprocessed image into a PyTorch tensor
image_tensor = torch.tensor(image_normalized, dtype=torch.float32)
image_tensor = image_tensor.permute(2, 0, 1)  # Adjust the tensor dimensions
image_tensor = image_tensor.unsqueeze(0)  # Add an extra dimension for the batch

# Pass the image through the model
model.eval()
image_tensor = image_tensor.to(device)
output = model(image_tensor)

# Interpret the model's predictions
output = output.detach().cpu().numpy()
predicted_class_index = np.argmax(output)
predicted_class_label = class_names[predicted_class_index]

print("Predicted class:", predicted_class_label)


Predicted class: one
