<a href="https://colab.research.google.com/github/lehuyphuong/Light_Control_By_Hand_Gestures/blob/feature%2Fstep_1_train_model/Step_1_train_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install mediapipe==0.10.18

Collecting mediapipe==0.10.18
  Downloading mediapipe-0.10.18-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.7 kB)
Collecting sounddevice>=0.4.4 (from mediapipe==0.10.18)
  Downloading sounddevice-0.5.1-py3-none-any.whl.metadata (1.4 kB)
Downloading mediapipe-0.10.18-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (36.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m36.1/36.1 MB[0m [31m32.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading sounddevice-0.5.1-py3-none-any.whl (32 kB)
Installing collected packages: sounddevice, mediapipe
Successfully installed mediapipe-0.10.18 sounddevice-0.5.1


In [None]:
!pip install torchmetrics

Collecting torchmetrics
  Downloading torchmetrics-1.6.0-py3-none-any.whl.metadata (20 kB)
Collecting lightning-utilities>=0.8.0 (from torchmetrics)
  Downloading lightning_utilities-0.11.9-py3-none-any.whl.metadata (5.2 kB)
Downloading torchmetrics-1.6.0-py3-none-any.whl (926 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m926.4/926.4 kB[0m [31m12.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading lightning_utilities-0.11.9-py3-none-any.whl (28 kB)
Installing collected packages: lightning-utilities, torchmetrics
Successfully installed lightning-utilities-0.11.9 torchmetrics-1.6.0


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchmetrics import Accuracy
from torch.utils.data import Dataset, DataLoader
import yaml
import torch.optim as optim
import cv2
import mediapipe as mp
import datetime as datetime
import os

In [None]:
def label_dict_from_config_file(relative_path):
    with open(relative_path,"r") as f:
       label_tag = yaml.full_load(f)["gestures"]
    return label_tag

In [None]:
class NeuralNetwork(nn.Module):
  def __init__(self):
    super(NeuralNetwork, self).__init__()
    self.flatten = nn.Flatten()
    list_label = label_dict_from_config_file("/content/drive/MyDrive/AIO_2024/Module_5/Week_5/hand_gesture.yaml")
    self.linear_relu_stack = nn.Sequential(
        nn.Linear(63, 128),
        nn.ReLU(),
        nn.BatchNorm1d(128),
        nn.Linear(128, 128),
        nn.ReLU(),
        nn.Dropout(p=0.4),
        nn.Linear(128, 128),
        nn.ReLU(),
        nn.Dropout(p=0.4),
        nn.Linear(128, 128),
        nn.ReLU(),
        nn.Dropout(p=0.6),
        nn.Linear(128, len(list_label))
    )

  def forward(self, x):
    x = self.flatten(x)
    x = self.linear_relu_stack(x)
    return x

  def predict(self, x, threshold=0.8):
    logits = self(x)
    softmax_prob = nn.Softmax(dim=1)(logits)
    chosen_ind = torch.argmax(softmax_prob, dim=1)
    return torch.where(softmax_prob[0, chosen_ind] > threshold, chosen_ind, -1)

  def predict_with_known_class(self, x):
    logits = self(x)
    softmax_prob = nn.Softmax(dim=1)(logits)
    return torch.argmax(softmax_prob, dim=1)

  def score(self, logits):
    return -torch.amax(logits, dim=1)

In [None]:
class HandLandmarksDetector():
    def __init__(self) -> None:
        self.mp_drawing = mp.solutions.drawing_utils
        self.mp_drawing_styles = mp.solutions.drawing_styles
        self.mp_hands = mp.solutions.hands
        self.detector = self.mp_hands.Hands(
            False, max_num_hands=1, min_detection_confidence=0.5)

    def detectHand(self, frame):
        hands = []
        frame = cv2.flip(frame, 1)
        annotated_image = frame.copy()
        results = self.detector.process(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))

        if results.multi_hand_landmarks is not None:
            for hand_landmarks in results.multi_hand_landmarks:
                hand = []
                self.mp_drawing.draw_landmarks(
                    annotated_image,
                    hand_landmarks,
                    self.mp_hands.HAND_CONNECTIONS,
                    self.mp_drawing_styles.get_default_hand_landmarks_style(),
                    self.mp_drawing_styles.get_default_hand_connections_style()
                )

                for landmark in hand_landmarks.landmark:
                    x, y, z = landmark.x, landmark.y, landmark.z
                    hand.extend([x, y, z])

            hands.append(hand)

        return hands, annotated_image

In [None]:
class CustomImageDataset(Dataset):
  def __init__(self, data_file):
    self.data = pd.read_csv(data_file)
    self.labels = torch.from_numpy(self.data.iloc[:, 0].to_numpy())

  def __len__(self):
    return len(self.data)

  def __getitem__(self, idx):
    one_hot_label = self.labels[idx]
    torch_data = torch.from_numpy(self.data.iloc[idx, 1:].to_numpy(dtype=np.float32))
    return torch_data, one_hot_label

In [None]:
class EarlyStopper:
  def __init__(self, patience=1, min_delta=0):
    self.patience = patience
    self.min_delta = min_delta
    self.counter = 0
    self.watched_metrics = np.inf

  def early_stop(self, current_value):
    if current_value < self.watched_metrics:
      self.watched_metrics = current_value
      self.counter = 0
    elif current_value > (self.watched_metrics + self.min_delta):
      self.counter += 1
      if self.counter >= self.patience:
        return True

    return False

In [None]:
def train(trainloader, val_loader, model, loss_function, early_stopper, optimizer):
  best_vloss = 1_000_000
  # timestamp = datetime.now().strftime('%d-%m %H:%M')

  for epoch in range(300):
    # training
    model.train(True)
    running_loss = 0.0
    acc_train = Accuracy(num_classes=len(LIST_LABEL), task='MULTICLASS')
    for batch_number, data in enumerate(trainloader):
      inputs, labels = data

      # reset gradient
      optimizer.zero_grad()

      # predict class bases on given inputs
      preds = model(inputs)

      # calculate loss, perform backward and update parameters
      loss = loss_function(preds, labels)
      loss.backward()
      optimizer.step()

      acc_train.update(model.predict_with_known_class(inputs), labels)
      running_loss += loss.item()

    avg_loss = running_loss / len(trainloader)

    # validating step
    model.train(False)
    running_vloss = 0.0
    acc_val = Accuracy(num_classes=len(LIST_LABEL), task='MULTICLASS')

    for i, vdata in enumerate(val_loader):
      vinputs, vlabels = vdata
      preds = model(vinputs)
      vloss = loss_function(preds, vlabels)
      running_vloss + vloss.item()
      acc_val.update(model.predict_with_known_class(vinputs), vlabels)


    # log the running loss averaged per batch
    print(f"Epoch {epoch}: ")
    print(f"Accuracy train: {acc_train.compute().item()}, val:{acc_val.compute().item()}")

    avg_vloss = running_vloss / len(val_loader)
    print('LOSS train {} valid {}'.format(avg_loss, avg_vloss))
    print('Training vs Validation loss',
          {'Training' : avg_loss, 'Validation' : avg_loss}, epoch + 1)

    print('Training vs validation loss',
          {'Training' : acc_train.compute().item(),
           'Validation' : acc_val.compute().item()}, epoch + 1)

    # track performance
    if avg_vloss < best_vloss:
      best_vloss = avg_vloss
      best_model_path = f'{save_path}/model_{model.__class__.__name__}_best'
      torch.save(model.state_dict(), best_model_path)

    if early_stopper.early_stop(avg_loss):
      print ( f" stopping at epoch { epoch } , minimum : { early_stopper . watched_metrics }")
      break

  model_path = f'{save_path}/model_{model.__class__.__name__}_last'
  torch.save(model.state_dict(), model_path)
  print(acc_val.compute())
  return model, best_model_path


In [None]:
DATA_FOLDER_PATH="/content/drive/MyDrive/AIO_2024/Module_5/Week_5/data"
LIST_LABEL = label_dict_from_config_file("/content/drive/MyDrive/AIO_2024/Module_5/Week_5/hand_gesture.yaml")
train_path = os.path.join(DATA_FOLDER_PATH,"landmark_train.csv")
val_path = os.path.join(DATA_FOLDER_PATH,"landmark_val.csv")
save_path = '/content/drive/MyDrive/AIO_2024/Module_5/Week_5/models'
os.makedirs(save_path,exist_ok=True)
trainset = CustomImageDataset(train_path)

trainloader = torch.utils.data.DataLoader(trainset, batch_size=40, shuffle=True)

valset = CustomImageDataset(os.path.join(val_path))
val_loader = torch.utils.data.DataLoader(valset, batch_size=50, shuffle=False)

model = NeuralNetwork()
loss_function = nn.CrossEntropyLoss()
early_stopper = EarlyStopper(patience=30, min_delta=0.01)

optimizer = optim.Adam(model.parameters(), lr=0.0001)

model, best_model_path = train(trainloader, val_loader, model, loss_function, early_stopper, optimizer)

Epoch 0: 
Accuracy train: 0.6592458486557007, val:0.9913240075111389
LOSS train 1.047553009967335 valid 0.0
Training vs Validation loss {'Training': 1.047553009967335, 'Validation': 1.047553009967335} 1
Training vs validation loss {'Training': 0.6592458486557007, 'Validation': 0.9913240075111389} 1
Epoch 1: 
Accuracy train: 0.9894027709960938, val:0.9922670722007751
LOSS train 0.09647761847934548 valid 0.0
Training vs Validation loss {'Training': 0.09647761847934548, 'Validation': 0.09647761847934548} 2
Training vs validation loss {'Training': 0.9894027709960938, 'Validation': 0.9922670722007751} 2
Epoch 2: 
Accuracy train: 0.9917029738426208, val:0.9924556612968445
LOSS train 0.04698825086940263 valid 0.0
Training vs Validation loss {'Training': 0.04698825086940263, 'Validation': 0.04698825086940263} 3
Training vs validation loss {'Training': 0.9917029738426208, 'Validation': 0.9924556612968445} 3
Epoch 3: 
Accuracy train: 0.9921137094497681, val:0.9920784831047058
LOSS train 0.040202

In [None]:
list_label = label_dict_from_config_file("/content/drive/MyDrive/AIO_2024/Module_5/Week_5/hand_gesture.yaml")
DATA_FOLDER_PATH="/content/drive/MyDrive/AIO_2024/Module_5/Week_5/data"

testset = CustomImageDataset(os.path.join(DATA_FOLDER_PATH,"landmark_test.csv"))
test_loader = torch.utils.data.DataLoader(testset, batch_size=20, shuffle=False)

network = NeuralNetwork()
network.load_state_dict(torch.load(best_model_path, weights_only=False))

network.eval()
acc_test = Accuracy(num_classes=len(list_label), task='MULTICLASS')
for i, test_data in enumerate(test_loader):
    test_input, test_label = test_data
    preds = network(test_input)
    acc_test.update(preds, test_label)

print(network.__class__.__name__)
print(f"Accuracy of model:{acc_test.compute().item()}")
print("========================================================================")

NeuralNetwork
Accuracy of model:0.9901162981987
