In [1]:
import pandas as pd
import cv2
import matplotlib
import mediapipe as mp
from matplotlib import pyplot as plt
import torch
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader, random_split
import torchvision.models as models
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm
import sys
import os
import shutil
import warnings
from torch.optim import Adam
import torchmetrics
import time
from sklearn.metrics import accuracy_score

  warn(


In [2]:
warnings.filterwarnings("ignore", category=UserWarning, module='google.protobuf')

In [3]:
IMG_WIDTH = IMG_HEIGHT = SIZE = 100
BATCH_SIZE = 64

In [4]:
transform = transforms.Compose([
    transforms.Resize((IMG_HEIGHT, IMG_WIDTH)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

In [5]:
transform = transforms.Compose([
    transforms.Resize((IMG_HEIGHT, IMG_WIDTH)),
    transforms.ToTensor()
])

In [6]:
dataset = ImageFolder('data/processed_images_hands_init/', transform=transform)

In [7]:
train_size = int(0.8 * len(dataset))
valid_size = len(dataset) - train_size

train_dataset, valid_dataset = random_split(dataset, (train_size, valid_size))

In [8]:
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=BATCH_SIZE, shuffle=False)

In [9]:
def prepare_model(model, num_classes=29, lr=0.001, device=None):

    num_features = model.fc.in_features
    model.fc = nn.Linear(num_features, num_classes)
    
    for param in model.parameters():
        param.requires_grad = False

    for param in model.fc.parameters():
        param.requires_grad = True

    optimizer = Adam(model.fc.parameters(), lr=lr)
    
    criterion = nn.CrossEntropyLoss()

    if device is None:
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)

    accuracy = torchmetrics.Accuracy(task='MULTICLASS', num_classes=29)
    accuracy.to(device)
    
    return model, criterion, optimizer, accuracy, device

In [10]:
class EarlyStopping:
    def __init__(self, patience=5, verbose=False):
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.val_loss_min = float('inf')

    def __call__(self, val_loss, model):
        score = -val_loss

        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
        elif score < self.best_score:
            self.counter += 1
            if self.verbose:
                print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
            self.counter = 0

    def save_checkpoint(self, val_loss, model):
        '''Saves model when validation loss decreases.'''
        if self.verbose:
            print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ...')
        torch.save(model.state_dict(), 'models/vgg16_best_model.pth')
        self.val_loss_min = val_loss


In [11]:
def train_model(model, num_epochs, train_loader, val_loader, optimizer, loss_fun, accuracy, device, early_stopping=False):
    model.to(device)
    print(model.__class__.__name__)
    
    if early_stopping:
        early_stopping = EarlyStopping(patience=40, verbose=True)
        
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        num_batches = 0
        
        accuracy.reset()  # Reset metric at the beginning of each epoch
        
        for images, labels in tqdm(train_loader, desc=f"Training Epoch {epoch+1}/{num_epochs}", total=len(train_loader)):
            images, labels = images.to(device), labels.to(device)
            
            optimizer.zero_grad()
            outputs = model(images)
            loss = loss_fun(outputs, labels)
            loss.backward()
            optimizer.step()
            
            accuracy.update(outputs, labels)
            running_loss += loss.item()
            num_batches += 1
        
        average_loss = running_loss / num_batches
        epoch_accuracy = accuracy.compute()
        
        model.eval()
        val_loss = 0.0
        val_batches = 0
        val_accuracy = 0.0
        with torch.no_grad():
            for images, labels in tqdm(val_loader, desc=f"Validation Epoch {epoch+1}/{num_epochs}", total=len(val_loader)):
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = loss_fun(outputs, labels)
                val_loss += loss.item()
                val_batches += 1
                val_accuracy += torch.sum(torch.argmax(outputs, dim=1) == labels).item()
        
        average_val_loss = val_loss / val_batches
        val_accuracy = val_accuracy / len(val_loader.dataset)
        
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {average_loss:.4f}, Accuracy: {epoch_accuracy:.4f}, Val Loss: {average_val_loss:.4f}, Val Accuracy: {val_accuracy:.4f}")

        if early_stopping:
            early_stopping(average_val_loss, model)
            if early_stopping.early_stop:
                print("Early stopping")
                break


In [40]:
resnet50 = models.resnet50(pretrained=True)



In [41]:
model, criterion, optimizer, accuracy, device = prepare_model(resnet50)

In [51]:
num_epochs = 20

In [52]:
resnet50_trained = train_model(model, num_epochs, train_loader, valid_loader, optimizer, criterion, accuracy, device)

Training Epoch 1/20: 100%|███████████████████████████████████████████████████████████| 902/902 [06:24<00:00,  2.34it/s]
  return F.conv2d(input, weight, bias, self.stride,
Validation Epoch 1/20: 100%|█████████████████████████████████████████████████████████| 226/226 [01:35<00:00,  2.38it/s]


Epoch [1/20], Loss: 0.5252, Accuracy: 0.8715, Val Loss: 0.2479, Val Accuracy: 0.9299


Training Epoch 2/20: 100%|███████████████████████████████████████████████████████████| 902/902 [06:28<00:00,  2.32it/s]
Validation Epoch 2/20: 100%|█████████████████████████████████████████████████████████| 226/226 [01:33<00:00,  2.42it/s]


Epoch [2/20], Loss: 0.2072, Accuracy: 0.9435, Val Loss: 0.1836, Val Accuracy: 0.9435


Training Epoch 3/20: 100%|███████████████████████████████████████████████████████████| 902/902 [06:07<00:00,  2.45it/s]
Validation Epoch 3/20: 100%|█████████████████████████████████████████████████████████| 226/226 [01:32<00:00,  2.46it/s]


Epoch [3/20], Loss: 0.1561, Accuracy: 0.9560, Val Loss: 0.1271, Val Accuracy: 0.9636


Training Epoch 4/20: 100%|███████████████████████████████████████████████████████████| 902/902 [06:07<00:00,  2.45it/s]
Validation Epoch 4/20: 100%|█████████████████████████████████████████████████████████| 226/226 [01:32<00:00,  2.45it/s]


Epoch [4/20], Loss: 0.1255, Accuracy: 0.9647, Val Loss: 0.1214, Val Accuracy: 0.9657


Training Epoch 5/20: 100%|███████████████████████████████████████████████████████████| 902/902 [05:57<00:00,  2.52it/s]
Validation Epoch 5/20: 100%|█████████████████████████████████████████████████████████| 226/226 [01:33<00:00,  2.43it/s]


Epoch [5/20], Loss: 0.1125, Accuracy: 0.9678, Val Loss: 0.1223, Val Accuracy: 0.9632


Training Epoch 6/20: 100%|███████████████████████████████████████████████████████████| 902/902 [05:46<00:00,  2.61it/s]
Validation Epoch 6/20: 100%|█████████████████████████████████████████████████████████| 226/226 [01:31<00:00,  2.47it/s]


Epoch [6/20], Loss: 0.1004, Accuracy: 0.9702, Val Loss: 0.1000, Val Accuracy: 0.9702


Training Epoch 7/20: 100%|███████████████████████████████████████████████████████████| 902/902 [05:44<00:00,  2.62it/s]
Validation Epoch 7/20: 100%|█████████████████████████████████████████████████████████| 226/226 [01:38<00:00,  2.29it/s]


Epoch [7/20], Loss: 0.0898, Accuracy: 0.9733, Val Loss: 0.0917, Val Accuracy: 0.9725


Training Epoch 8/20: 100%|███████████████████████████████████████████████████████████| 902/902 [06:35<00:00,  2.28it/s]
Validation Epoch 8/20: 100%|█████████████████████████████████████████████████████████| 226/226 [01:41<00:00,  2.22it/s]


Epoch [8/20], Loss: 0.0842, Accuracy: 0.9759, Val Loss: 0.0799, Val Accuracy: 0.9762


Training Epoch 9/20: 100%|███████████████████████████████████████████████████████████| 902/902 [06:31<00:00,  2.30it/s]
Validation Epoch 9/20: 100%|█████████████████████████████████████████████████████████| 226/226 [01:35<00:00,  2.38it/s]


Epoch [9/20], Loss: 0.0744, Accuracy: 0.9782, Val Loss: 0.0829, Val Accuracy: 0.9748


Training Epoch 10/20: 100%|██████████████████████████████████████████████████████████| 902/902 [06:22<00:00,  2.36it/s]
Validation Epoch 10/20: 100%|████████████████████████████████████████████████████████| 226/226 [01:38<00:00,  2.29it/s]


Epoch [10/20], Loss: 0.0657, Accuracy: 0.9805, Val Loss: 0.0742, Val Accuracy: 0.9768


Training Epoch 11/20: 100%|██████████████████████████████████████████████████████████| 902/902 [06:17<00:00,  2.39it/s]
Validation Epoch 11/20: 100%|████████████████████████████████████████████████████████| 226/226 [01:34<00:00,  2.39it/s]


Epoch [11/20], Loss: 0.0582, Accuracy: 0.9833, Val Loss: 0.0740, Val Accuracy: 0.9784


Training Epoch 12/20: 100%|██████████████████████████████████████████████████████████| 902/902 [06:11<00:00,  2.43it/s]
Validation Epoch 12/20: 100%|████████████████████████████████████████████████████████| 226/226 [01:32<00:00,  2.45it/s]


Epoch [12/20], Loss: 0.0578, Accuracy: 0.9835, Val Loss: 0.0859, Val Accuracy: 0.9731


Training Epoch 13/20: 100%|██████████████████████████████████████████████████████████| 902/902 [06:06<00:00,  2.46it/s]
Validation Epoch 13/20: 100%|████████████████████████████████████████████████████████| 226/226 [01:37<00:00,  2.32it/s]


Epoch [13/20], Loss: 0.0527, Accuracy: 0.9846, Val Loss: 0.0733, Val Accuracy: 0.9766


Training Epoch 14/20: 100%|██████████████████████████████████████████████████████████| 902/902 [06:01<00:00,  2.49it/s]
Validation Epoch 14/20: 100%|████████████████████████████████████████████████████████| 226/226 [01:33<00:00,  2.43it/s]


Epoch [14/20], Loss: 0.0551, Accuracy: 0.9842, Val Loss: 0.0799, Val Accuracy: 0.9746


Training Epoch 15/20: 100%|██████████████████████████████████████████████████████████| 902/902 [06:05<00:00,  2.47it/s]
Validation Epoch 15/20: 100%|████████████████████████████████████████████████████████| 226/226 [01:35<00:00,  2.37it/s]


Epoch [15/20], Loss: 0.0527, Accuracy: 0.9849, Val Loss: 0.0799, Val Accuracy: 0.9752


Training Epoch 16/20: 100%|██████████████████████████████████████████████████████████| 902/902 [06:26<00:00,  2.33it/s]
Validation Epoch 16/20: 100%|████████████████████████████████████████████████████████| 226/226 [01:33<00:00,  2.43it/s]


Epoch [16/20], Loss: 0.0528, Accuracy: 0.9854, Val Loss: 0.0718, Val Accuracy: 0.9776


Training Epoch 17/20: 100%|██████████████████████████████████████████████████████████| 902/902 [06:02<00:00,  2.49it/s]
Validation Epoch 17/20: 100%|████████████████████████████████████████████████████████| 226/226 [01:35<00:00,  2.36it/s]


Epoch [17/20], Loss: 0.0446, Accuracy: 0.9870, Val Loss: 0.0602, Val Accuracy: 0.9825


Training Epoch 18/20: 100%|██████████████████████████████████████████████████████████| 902/902 [05:57<00:00,  2.52it/s]
Validation Epoch 18/20: 100%|████████████████████████████████████████████████████████| 226/226 [01:38<00:00,  2.28it/s]


Epoch [18/20], Loss: 0.0430, Accuracy: 0.9875, Val Loss: 0.0778, Val Accuracy: 0.9752


Training Epoch 19/20: 100%|██████████████████████████████████████████████████████████| 902/902 [05:59<00:00,  2.51it/s]
Validation Epoch 19/20: 100%|████████████████████████████████████████████████████████| 226/226 [01:32<00:00,  2.44it/s]


Epoch [19/20], Loss: 0.0410, Accuracy: 0.9884, Val Loss: 0.0629, Val Accuracy: 0.9799


Training Epoch 20/20: 100%|██████████████████████████████████████████████████████████| 902/902 [05:49<00:00,  2.58it/s]
Validation Epoch 20/20: 100%|████████████████████████████████████████████████████████| 226/226 [01:31<00:00,  2.46it/s]

Epoch [20/20], Loss: 0.0385, Accuracy: 0.9892, Val Loss: 0.0626, Val Accuracy: 0.9806





In [92]:
torch.save(model.state_dict(), 'models/MobileNet_20.pth')

In [54]:
def prepare_vgg16(model, num_classes=29, lr=0.001, device=None):
    num_features = model.classifier[-1].in_features
    model.classifier[-1] = nn.Linear(num_features, num_classes)

    for param in model.parameters():
        param.requires_grad = False

    for param in model.classifier[-1].parameters():
        param.requires_grad = True

    optimizer = Adam(model.classifier[-1].parameters(), lr=lr)
    
    criterion = nn.CrossEntropyLoss()
    
    if device is None:
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    accuracy = torchmetrics.Accuracy(task='MULTICLASS', num_classes=29)
    accuracy.to(device)
    
    return model, criterion, optimizer, accuracy, device

In [27]:
vgg16 = models.vgg16(pretrained=True)
model, criterion, optimizer, accuracy, device = prepare_vgg16(vgg16)

In [82]:
train_model(model, num_epochs, train_loader, valid_loader, optimizer, criterion, accuracy, device, True)

VGG


Training Epoch 1/20: 100%|███████████████████████████████████████████████████████████| 902/902 [08:56<00:00,  1.68it/s]
Validation Epoch 1/20: 100%|█████████████████████████████████████████████████████████| 226/226 [02:13<00:00,  1.69it/s]


Epoch [1/20], Loss: 0.4886, Accuracy: 0.8539, Val Loss: 0.1681, Val Accuracy: 0.9591
Validation loss decreased (inf --> 0.168092).  Saving model ...


Training Epoch 2/20: 100%|███████████████████████████████████████████████████████████| 902/902 [08:46<00:00,  1.71it/s]
Validation Epoch 2/20: 100%|█████████████████████████████████████████████████████████| 226/226 [02:12<00:00,  1.71it/s]


Epoch [2/20], Loss: 0.2668, Accuracy: 0.9125, Val Loss: 0.1150, Val Accuracy: 0.9719
Validation loss decreased (0.168092 --> 0.114986).  Saving model ...


Training Epoch 3/20: 100%|███████████████████████████████████████████████████████████| 902/902 [08:47<00:00,  1.71it/s]
Validation Epoch 3/20: 100%|█████████████████████████████████████████████████████████| 226/226 [02:10<00:00,  1.74it/s]


Epoch [3/20], Loss: 0.2425, Accuracy: 0.9189, Val Loss: 0.0865, Val Accuracy: 0.9779
Validation loss decreased (0.114986 --> 0.086510).  Saving model ...


Training Epoch 4/20: 100%|███████████████████████████████████████████████████████████| 902/902 [08:38<00:00,  1.74it/s]
Validation Epoch 4/20: 100%|█████████████████████████████████████████████████████████| 226/226 [02:12<00:00,  1.71it/s]


Epoch [4/20], Loss: 0.2238, Accuracy: 0.9254, Val Loss: 0.0815, Val Accuracy: 0.9774
Validation loss decreased (0.086510 --> 0.081471).  Saving model ...


Training Epoch 5/20: 100%|███████████████████████████████████████████████████████████| 902/902 [08:37<00:00,  1.74it/s]
Validation Epoch 5/20: 100%|█████████████████████████████████████████████████████████| 226/226 [02:09<00:00,  1.75it/s]


Epoch [5/20], Loss: 0.2217, Accuracy: 0.9268, Val Loss: 0.0726, Val Accuracy: 0.9797
Validation loss decreased (0.081471 --> 0.072644).  Saving model ...


Training Epoch 6/20: 100%|███████████████████████████████████████████████████████████| 902/902 [08:36<00:00,  1.75it/s]
Validation Epoch 6/20: 100%|█████████████████████████████████████████████████████████| 226/226 [02:11<00:00,  1.72it/s]


Epoch [6/20], Loss: 0.2119, Accuracy: 0.9295, Val Loss: 0.0639, Val Accuracy: 0.9822
Validation loss decreased (0.072644 --> 0.063908).  Saving model ...


Training Epoch 7/20: 100%|███████████████████████████████████████████████████████████| 902/902 [08:38<00:00,  1.74it/s]
Validation Epoch 7/20: 100%|█████████████████████████████████████████████████████████| 226/226 [02:10<00:00,  1.73it/s]


Epoch [7/20], Loss: 0.2092, Accuracy: 0.9307, Val Loss: 0.0619, Val Accuracy: 0.9833
Validation loss decreased (0.063908 --> 0.061851).  Saving model ...


Training Epoch 8/20: 100%|███████████████████████████████████████████████████████████| 902/902 [08:27<00:00,  1.78it/s]
Validation Epoch 8/20: 100%|█████████████████████████████████████████████████████████| 226/226 [02:11<00:00,  1.72it/s]


Epoch [8/20], Loss: 0.2086, Accuracy: 0.9324, Val Loss: 0.0633, Val Accuracy: 0.9822
EarlyStopping counter: 1 out of 40


Training Epoch 9/20: 100%|███████████████████████████████████████████████████████████| 902/902 [08:14<00:00,  1.83it/s]
Validation Epoch 9/20: 100%|█████████████████████████████████████████████████████████| 226/226 [02:08<00:00,  1.76it/s]


Epoch [9/20], Loss: 0.2044, Accuracy: 0.9330, Val Loss: 0.0578, Val Accuracy: 0.9830
Validation loss decreased (0.061851 --> 0.057838).  Saving model ...


Training Epoch 10/20: 100%|██████████████████████████████████████████████████████████| 902/902 [08:47<00:00,  1.71it/s]
Validation Epoch 10/20: 100%|████████████████████████████████████████████████████████| 226/226 [02:10<00:00,  1.74it/s]


Epoch [10/20], Loss: 0.2015, Accuracy: 0.9349, Val Loss: 0.0575, Val Accuracy: 0.9830
Validation loss decreased (0.057838 --> 0.057547).  Saving model ...


Training Epoch 11/20: 100%|██████████████████████████████████████████████████████████| 902/902 [08:30<00:00,  1.77it/s]
Validation Epoch 11/20: 100%|████████████████████████████████████████████████████████| 226/226 [02:09<00:00,  1.74it/s]


Epoch [11/20], Loss: 0.1963, Accuracy: 0.9375, Val Loss: 0.0512, Val Accuracy: 0.9859
Validation loss decreased (0.057547 --> 0.051178).  Saving model ...


Training Epoch 12/20: 100%|██████████████████████████████████████████████████████████| 902/902 [08:33<00:00,  1.76it/s]
Validation Epoch 12/20: 100%|████████████████████████████████████████████████████████| 226/226 [02:10<00:00,  1.73it/s]


Epoch [12/20], Loss: 0.1989, Accuracy: 0.9373, Val Loss: 0.0535, Val Accuracy: 0.9845
EarlyStopping counter: 1 out of 40


Training Epoch 13/20: 100%|██████████████████████████████████████████████████████████| 902/902 [08:17<00:00,  1.81it/s]
Validation Epoch 13/20: 100%|████████████████████████████████████████████████████████| 226/226 [02:11<00:00,  1.72it/s]


Epoch [13/20], Loss: 0.1915, Accuracy: 0.9401, Val Loss: 0.0588, Val Accuracy: 0.9815
EarlyStopping counter: 2 out of 40


Training Epoch 14/20: 100%|██████████████████████████████████████████████████████████| 902/902 [08:14<00:00,  1.83it/s]
Validation Epoch 14/20: 100%|████████████████████████████████████████████████████████| 226/226 [02:10<00:00,  1.73it/s]


Epoch [14/20], Loss: 0.1982, Accuracy: 0.9386, Val Loss: 0.0512, Val Accuracy: 0.9843
EarlyStopping counter: 3 out of 40


Training Epoch 15/20: 100%|██████████████████████████████████████████████████████████| 902/902 [08:16<00:00,  1.82it/s]
Validation Epoch 15/20: 100%|████████████████████████████████████████████████████████| 226/226 [02:12<00:00,  1.70it/s]


Epoch [15/20], Loss: 0.1903, Accuracy: 0.9407, Val Loss: 0.0469, Val Accuracy: 0.9858
Validation loss decreased (0.051178 --> 0.046871).  Saving model ...


Training Epoch 16/20: 100%|██████████████████████████████████████████████████████████| 902/902 [09:10<00:00,  1.64it/s]
Validation Epoch 16/20: 100%|████████████████████████████████████████████████████████| 226/226 [02:16<00:00,  1.66it/s]


Epoch [16/20], Loss: 0.1973, Accuracy: 0.9392, Val Loss: 0.0453, Val Accuracy: 0.9873
Validation loss decreased (0.046871 --> 0.045271).  Saving model ...


Training Epoch 17/20: 100%|██████████████████████████████████████████████████████████| 902/902 [09:06<00:00,  1.65it/s]
Validation Epoch 17/20: 100%|████████████████████████████████████████████████████████| 226/226 [02:15<00:00,  1.67it/s]


Epoch [17/20], Loss: 0.1997, Accuracy: 0.9381, Val Loss: 0.0463, Val Accuracy: 0.9879
EarlyStopping counter: 1 out of 40


Training Epoch 18/20: 100%|██████████████████████████████████████████████████████████| 902/902 [09:12<00:00,  1.63it/s]
Validation Epoch 18/20: 100%|████████████████████████████████████████████████████████| 226/226 [02:17<00:00,  1.65it/s]


Epoch [18/20], Loss: 0.1927, Accuracy: 0.9412, Val Loss: 0.0396, Val Accuracy: 0.9890
Validation loss decreased (0.045271 --> 0.039566).  Saving model ...


Training Epoch 19/20: 100%|██████████████████████████████████████████████████████████| 902/902 [09:29<00:00,  1.58it/s]
Validation Epoch 19/20: 100%|████████████████████████████████████████████████████████| 226/226 [02:16<00:00,  1.66it/s]


Epoch [19/20], Loss: 0.1899, Accuracy: 0.9412, Val Loss: 0.0501, Val Accuracy: 0.9847
EarlyStopping counter: 1 out of 40


Training Epoch 20/20: 100%|██████████████████████████████████████████████████████████| 902/902 [08:52<00:00,  1.69it/s]
Validation Epoch 20/20: 100%|████████████████████████████████████████████████████████| 226/226 [02:14<00:00,  1.69it/s]

Epoch [20/20], Loss: 0.1895, Accuracy: 0.9415, Val Loss: 0.0490, Val Accuracy: 0.9877
EarlyStopping counter: 2 out of 40





In [55]:
def prepare_mobilenetv2(model, num_classes=29, lr=0.001, device=None):
    num_features = model.classifier[1].in_features
    model.classifier[1] = nn.Linear(num_features, num_classes)

    for param in model.features.parameters():
        param.requires_grad = False

    optimizer = Adam(model.classifier.parameters(), lr=lr)

    criterion = nn.CrossEntropyLoss()
    
    if device is None:
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    accuracy = torchmetrics.Accuracy(task='MULTICLASS', num_classes=29)
    accuracy.to(device)
    
    return model, criterion, optimizer, accuracy, device

In [44]:
mobilenet = models.mobilenet_v2(pretrained=True)
model, criterion, optimizer, accuracy, device = prepare_mobilenetv2(mobilenet)

In [91]:
train_model(model, num_epochs, train_loader, valid_loader, optimizer, criterion, accuracy, device, True)

MobileNetV2


Training Epoch 1/20: 100%|███████████████████████████████████████████████████████████| 902/902 [05:39<00:00,  2.66it/s]
Validation Epoch 1/20: 100%|█████████████████████████████████████████████████████████| 226/226 [01:28<00:00,  2.57it/s]


Epoch [1/20], Loss: 0.4978, Accuracy: 0.8787, Val Loss: 0.1808, Val Accuracy: 0.9587
Validation loss decreased (inf --> 0.180827).  Saving model ...


Training Epoch 2/20: 100%|███████████████████████████████████████████████████████████| 902/902 [05:29<00:00,  2.73it/s]
Validation Epoch 2/20: 100%|█████████████████████████████████████████████████████████| 226/226 [01:30<00:00,  2.48it/s]


Epoch [2/20], Loss: 0.1989, Accuracy: 0.9442, Val Loss: 0.1392, Val Accuracy: 0.9633
Validation loss decreased (0.180827 --> 0.139204).  Saving model ...


Training Epoch 3/20: 100%|███████████████████████████████████████████████████████████| 902/902 [05:37<00:00,  2.67it/s]
Validation Epoch 3/20: 100%|█████████████████████████████████████████████████████████| 226/226 [01:23<00:00,  2.69it/s]


Epoch [3/20], Loss: 0.1629, Accuracy: 0.9514, Val Loss: 0.1030, Val Accuracy: 0.9722
Validation loss decreased (0.139204 --> 0.102962).  Saving model ...


Training Epoch 4/20: 100%|███████████████████████████████████████████████████████████| 902/902 [05:42<00:00,  2.63it/s]
Validation Epoch 4/20: 100%|█████████████████████████████████████████████████████████| 226/226 [01:25<00:00,  2.65it/s]


Epoch [4/20], Loss: 0.1411, Accuracy: 0.9566, Val Loss: 0.0929, Val Accuracy: 0.9756
Validation loss decreased (0.102962 --> 0.092850).  Saving model ...


Training Epoch 5/20: 100%|███████████████████████████████████████████████████████████| 902/902 [05:36<00:00,  2.68it/s]
Validation Epoch 5/20: 100%|█████████████████████████████████████████████████████████| 226/226 [01:31<00:00,  2.48it/s]


Epoch [5/20], Loss: 0.1306, Accuracy: 0.9586, Val Loss: 0.0829, Val Accuracy: 0.9758
Validation loss decreased (0.092850 --> 0.082890).  Saving model ...


Training Epoch 6/20: 100%|███████████████████████████████████████████████████████████| 902/902 [05:48<00:00,  2.59it/s]
Validation Epoch 6/20: 100%|█████████████████████████████████████████████████████████| 226/226 [01:25<00:00,  2.63it/s]


Epoch [6/20], Loss: 0.1252, Accuracy: 0.9594, Val Loss: 0.0805, Val Accuracy: 0.9754
Validation loss decreased (0.082890 --> 0.080547).  Saving model ...


Training Epoch 7/20: 100%|███████████████████████████████████████████████████████████| 902/902 [05:18<00:00,  2.83it/s]
Validation Epoch 7/20: 100%|█████████████████████████████████████████████████████████| 226/226 [01:24<00:00,  2.69it/s]


Epoch [7/20], Loss: 0.1212, Accuracy: 0.9609, Val Loss: 0.0847, Val Accuracy: 0.9746
EarlyStopping counter: 1 out of 40


Training Epoch 8/20: 100%|███████████████████████████████████████████████████████████| 902/902 [05:45<00:00,  2.61it/s]
Validation Epoch 8/20: 100%|█████████████████████████████████████████████████████████| 226/226 [01:23<00:00,  2.71it/s]


Epoch [8/20], Loss: 0.1159, Accuracy: 0.9621, Val Loss: 0.0775, Val Accuracy: 0.9778
Validation loss decreased (0.080547 --> 0.077483).  Saving model ...


Training Epoch 9/20: 100%|███████████████████████████████████████████████████████████| 902/902 [05:18<00:00,  2.84it/s]
Validation Epoch 9/20: 100%|█████████████████████████████████████████████████████████| 226/226 [01:21<00:00,  2.76it/s]


Epoch [9/20], Loss: 0.1090, Accuracy: 0.9630, Val Loss: 0.0744, Val Accuracy: 0.9777
Validation loss decreased (0.077483 --> 0.074443).  Saving model ...


Training Epoch 10/20: 100%|██████████████████████████████████████████████████████████| 902/902 [05:15<00:00,  2.86it/s]
Validation Epoch 10/20: 100%|████████████████████████████████████████████████████████| 226/226 [01:28<00:00,  2.56it/s]


Epoch [10/20], Loss: 0.1117, Accuracy: 0.9631, Val Loss: 0.0762, Val Accuracy: 0.9773
EarlyStopping counter: 1 out of 40


Training Epoch 11/20: 100%|██████████████████████████████████████████████████████████| 902/902 [05:22<00:00,  2.79it/s]
Validation Epoch 11/20: 100%|████████████████████████████████████████████████████████| 226/226 [01:20<00:00,  2.81it/s]


Epoch [11/20], Loss: 0.1124, Accuracy: 0.9643, Val Loss: 0.0669, Val Accuracy: 0.9799
Validation loss decreased (0.074443 --> 0.066865).  Saving model ...


Training Epoch 12/20: 100%|██████████████████████████████████████████████████████████| 902/902 [05:24<00:00,  2.78it/s]
Validation Epoch 12/20: 100%|████████████████████████████████████████████████████████| 226/226 [01:24<00:00,  2.69it/s]


Epoch [12/20], Loss: 0.1118, Accuracy: 0.9639, Val Loss: 0.0803, Val Accuracy: 0.9767
EarlyStopping counter: 1 out of 40


Training Epoch 13/20: 100%|██████████████████████████████████████████████████████████| 902/902 [05:36<00:00,  2.68it/s]
Validation Epoch 13/20: 100%|████████████████████████████████████████████████████████| 226/226 [01:22<00:00,  2.73it/s]


Epoch [13/20], Loss: 0.1077, Accuracy: 0.9650, Val Loss: 0.0673, Val Accuracy: 0.9819
EarlyStopping counter: 2 out of 40


Training Epoch 14/20: 100%|██████████████████████████████████████████████████████████| 902/902 [05:31<00:00,  2.72it/s]
Validation Epoch 14/20: 100%|████████████████████████████████████████████████████████| 226/226 [01:25<00:00,  2.65it/s]


Epoch [14/20], Loss: 0.1082, Accuracy: 0.9662, Val Loss: 0.0644, Val Accuracy: 0.9819
Validation loss decreased (0.066865 --> 0.064393).  Saving model ...


Training Epoch 15/20: 100%|██████████████████████████████████████████████████████████| 902/902 [05:31<00:00,  2.72it/s]
Validation Epoch 15/20: 100%|████████████████████████████████████████████████████████| 226/226 [01:20<00:00,  2.81it/s]


Epoch [15/20], Loss: 0.1064, Accuracy: 0.9646, Val Loss: 0.0671, Val Accuracy: 0.9813
EarlyStopping counter: 1 out of 40


Training Epoch 16/20: 100%|██████████████████████████████████████████████████████████| 902/902 [05:20<00:00,  2.81it/s]
Validation Epoch 16/20: 100%|████████████████████████████████████████████████████████| 226/226 [01:23<00:00,  2.69it/s]


Epoch [16/20], Loss: 0.1019, Accuracy: 0.9660, Val Loss: 0.0690, Val Accuracy: 0.9799
EarlyStopping counter: 2 out of 40


Training Epoch 17/20: 100%|██████████████████████████████████████████████████████████| 902/902 [05:38<00:00,  2.67it/s]
Validation Epoch 17/20: 100%|████████████████████████████████████████████████████████| 226/226 [01:22<00:00,  2.74it/s]


Epoch [17/20], Loss: 0.1049, Accuracy: 0.9665, Val Loss: 0.0625, Val Accuracy: 0.9818
Validation loss decreased (0.064393 --> 0.062538).  Saving model ...


Training Epoch 18/20: 100%|██████████████████████████████████████████████████████████| 902/902 [05:39<00:00,  2.66it/s]
Validation Epoch 18/20: 100%|████████████████████████████████████████████████████████| 226/226 [01:18<00:00,  2.89it/s]


Epoch [18/20], Loss: 0.1041, Accuracy: 0.9657, Val Loss: 0.0710, Val Accuracy: 0.9800
EarlyStopping counter: 1 out of 40


Training Epoch 19/20: 100%|██████████████████████████████████████████████████████████| 902/902 [05:25<00:00,  2.77it/s]
Validation Epoch 19/20: 100%|████████████████████████████████████████████████████████| 226/226 [01:23<00:00,  2.72it/s]


Epoch [19/20], Loss: 0.1048, Accuracy: 0.9651, Val Loss: 0.0757, Val Accuracy: 0.9807
EarlyStopping counter: 2 out of 40


Training Epoch 20/20: 100%|██████████████████████████████████████████████████████████| 902/902 [05:35<00:00,  2.69it/s]
Validation Epoch 20/20: 100%|████████████████████████████████████████████████████████| 226/226 [01:20<00:00,  2.80it/s]

Epoch [20/20], Loss: 0.1026, Accuracy: 0.9669, Val Loss: 0.0619, Val Accuracy: 0.9830
Validation loss decreased (0.062538 --> 0.061878).  Saving model ...





In [12]:
from PIL import Image
import numpy as np

def predict_image_class(model, image, transform, class_names):
    model.eval()
    image = transform(image).unsqueeze(0)
    image = image.to(device)
    with torch.no_grad():
        output = model(image)
        _, predicted = torch.max(output, 1)
    return class_names[predicted.item()]


In [13]:
def detect_hand(image, hands, padding=20, target_size=(300, 300), bg_color=(128, 128, 128)):
    rgb_image = image
    rgb_image.flags.writeable = False

    results = hands.process(rgb_image)

    rgb_image.flags.writeable = True

    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            h, w, _ = image.shape
            x_min, y_min = w, h
            x_max, y_max = 0, 0

            landmarks = []

            for landmark in hand_landmarks.landmark:
                x, y = int(landmark.x * w), int(landmark.y * h)
                x_min = min(x_min, x)
                y_min = min(y_min, y)
                x_max = max(x_max, x)
                y_max = max(y_max, y)
                landmarks.append((x, y, landmark.z))

            x_min = max(0, x_min - padding)
            y_min = max(0, y_min - padding)
            x_max = min(w, x_max + padding)
            y_max = min(h, y_max + padding)

            hand_image = image[y_min:y_max, x_min:x_max]

            hand_h, hand_w, _ = hand_image.shape
            scale = min(target_size[0] / hand_w, target_size[1] / hand_h)
            new_w = int(hand_w * scale)
            new_h = int(hand_h * scale)
            resized_hand_image = cv2.resize(hand_image, (new_w, new_h))

            result_image = np.full((target_size[1], target_size[0], 3), bg_color, dtype=np.uint8)
            x_offset = (target_size[0] - new_w) // 2
            y_offset = (target_size[1] - new_h) // 2
            result_image[y_offset:y_offset + new_h, x_offset:x_offset + new_w] = resized_hand_image

            return result_image, results, (x_min, y_min, scale, x_offset, y_offset), landmarks
    return None, None, None, None


In [14]:
cap = cv2.VideoCapture(0)
hands = mp.solutions.hands.Hands(max_num_hands=1)
draw = mp.solutions.drawing_utils

class_names = dataset.classes

In [23]:
def start_camera(model, hands):
    total_images = 0
    total_time = 0
    while True:
        success, image = cap.read()
        image = cv2.flip(image, 1)
        imageRGB = image

        hand_pattern, results, bbox_params, original_landmarks = detect_hand(image, hands)
        if results:
            for handLms in results.multi_hand_landmarks:
                draw.draw_landmarks(image, handLms, mp.solutions.hands.HAND_CONNECTIONS)

                x = int(handLms.landmark[0].x * image.shape[1])
                y = int(handLms.landmark[0].y * image.shape[0])
        
        if hand_pattern is not None:
            imageRGB = cv2.resize(hand_pattern, (150, 150))
        else:
            imageRGB = cv2.resize(imageRGB, (150, 150))
    
        if results:
            if results.multi_hand_landmarks:
                for handLms in results.multi_hand_landmarks:
                    draw.draw_landmarks(image, handLms, mp.solutions.hands.HAND_CONNECTIONS)
                    pil_image = Image.fromarray(imageRGB)
                    time_s = time.time()
                    predicted_class = predict_image_class(model, pil_image, transform, class_names)
                    total_time += time.time() - time_s
                    total_images += 1
                    cv2.putText(image, predicted_class, (x, y), cv2.FONT_HERSHEY_SIMPLEX, 1, (224, 255, 255), 2, cv2.LINE_AA)
                    dynamic_console_output(f"Predicted class: {predicted_class}")

        image_out = image

        height, width, _ = imageRGB.shape
        height_out, width_out, _ = image_out.shape
    
        x_offset = width_out - width
        y_offset = 0
        
        image_out[y_offset:y_offset+height, x_offset:x_offset+width] = imageRGB

        cv2.imshow('Hand', image_out)
        
        if cv2.waitKey(33) != -1:
            cv2.destroyAllWindows()
            # break
            return total_time / total_images

In [16]:
def dynamic_console_output(log):
    terminal_width = shutil.get_terminal_size().columns
    log = log.ljust(terminal_width)
    sys.stdout.write("\r\033[K" + log)
    sys.stdout.flush()

In [51]:
device = 'cpu'

In [46]:
resnet50_t = resnet50
resnet50_t.load_state_dict(torch.load('models/resnet50_20.pth'))

<All keys matched successfully>

In [21]:
vgg16_t = vgg16
vgg16_t.load_state_dict(torch.load('models/vgg16_20.pth'))

<All keys matched successfully>

In [45]:
mobilenet_t = mobilenet
mobilenet_t.load_state_dict(torch.load('models/mobilenet_20.pth'))

<All keys matched successfully>

In [132]:
start_camera(resnet50_t, hands)

[KPredicted class: S                                                                                                      

0.027346482331102543

In [38]:
start_camera(vgg16_t, hands)

[KPredicted class: M                                                                                                      

0.06815091768900554

In [134]:
start_camera(mobilenet_t, hands)

[KPredicted class: E                                                                                                      

0.013859311944430637

In [53]:
cap.release()

In [48]:
def predict_images_in_folder(folder_path, model):
    predictions = []
    true_labels = []
    
    subfolders = [f.path for f in os.scandir(folder_path) if f.is_dir()]
    
    for subfolder in subfolders:
        class_name = os.path.basename(subfolder)
        print(f'Processing images in class: {class_name}')
     
        files = [f for f in os.listdir(subfolder) if f.endswith('.jpg') or f.endswith('.png')]
        
        for file in files:
            image_path = os.path.join(subfolder, file)
            image = cv2.imread(image_path)

            hand_pattern, results_pattern, bbox_params, original_landmarks = detect_hand(image, hands)
            if hand_pattern is not None:
                imageRGB = cv2.resize(hand_pattern, (SIZE, SIZE))
            else:
                imageRGB = cv2.resize(image, (SIZE, SIZE))
            if results_pattern:
                if results_pattern.multi_hand_landmarks:
                    pil_image = Image.fromarray(imageRGB)
                    predicted_class = predict_image_class(model, pil_image, transform, class_names)
                else:
                    predicted_class = 'nothing'
            else:
                predicted_class = 'nothing'
            predictions.append(predicted_class)
            true_labels.append(class_name)
            
    accuracy = accuracy_score(true_labels, predictions)
    print(f'Accuracy: {accuracy}')

In [52]:
from sklearn.metrics import accuracy_score
path = 'data/test_images'
predict_images_in_folder(path, mobilenet_t)

Processing images in class: A
Processing images in class: B
Processing images in class: C
Processing images in class: D
Processing images in class: del
Processing images in class: E
Processing images in class: F
Processing images in class: G
Processing images in class: H
Processing images in class: I
Processing images in class: J
Processing images in class: K
Processing images in class: L
Processing images in class: M
Processing images in class: N
Processing images in class: nothing
Processing images in class: O
Processing images in class: P
Processing images in class: Q
Processing images in class: R
Processing images in class: S
Processing images in class: space
Processing images in class: T
Processing images in class: U
Processing images in class: V
Processing images in class: W
Processing images in class: X
Processing images in class: Y
Processing images in class: Z
Accuracy: 0.2824137931034483
