In [34]:
import os
import cv2
import yaml
import torch
import numpy as np
import pandas as pd
from  torch import nn
import mediapipe as mp
from torch import optim
from datetime import datetime
from torchmetrics import Accuracy
from torch.utils.data import Dataset, DataLoader

In [35]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.flatten = nn.Flatten()
        list_label = label_dict_from_config_file("hand_gesture.yaml")

        self.linear_relu_stack = nn.Sequential(
            ################## Your Code Here ################## Q1
            # '''Hoàn thành đoạn code để xây dựng một model gồm có 4 hidden layer,
            # lần lượt input và output là (63, 128), (128, 128), (128, 128), (128, 128).
            # Layer đầu tiên được theo sau bổi một Relu và Batchnorm1d.
            # Layer thứ 2, 3, và 4 được theo sau bỏi Relu và Dropout với rate lần lượt là 0.4, 0.4, 0.6.
            # Output layer có nhịêm vụ phân loại với input là 128 và output là số lượng class cử chỉ. '''
            nn.Linear(63,128),
            nn.ReLU(),
            nn.BatchNorm1d(128),
            nn.Linear(128,128),
            nn.ReLU(),
            nn.Dropout(0.4),
            nn.Linear(128,128),
            nn.ReLU(),
            nn.Dropout(0.4),
            nn.Linear(128,128),
            nn.ReLU(),
            nn.Dropout(0.6),
            nn.Linear(128,len(list_label))
            ####################################################
        )
    def forward(self, x):
        ################## Your Code Here ################## Q2
        ''' Hoàn thành code để thực hiện forward dự đoán cử chỉ với input x.
        Thực hiện flatten x
        Pass x vừa flatten vào linear_relu_stack
        Return  logits (outputs từ layer cuối cùng)
        '''
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits
        ####################################################

    def predict(self,x,threshold=0.8):
        logits = self(x)
        softmax_prob = nn.Softmax(dim=1)(logits)
        chosen_ind = torch.argmax(softmax_prob,dim=1)
        return torch.where(softmax_prob[0,chosen_ind]>threshold,chosen_ind,-1)

    def predict_with_known_class(self,x):
        logits = self(x)
        softmax_prob = nn.Softmax(dim=1)(logits)
        return torch.argmax(softmax_prob,dim=1)

    def score(self,logits):
        return -torch.amax(logits,dim=1)

In [36]:
def label_dict_from_config_file(relative_path):
    with open(relative_path,"r") as f:
       label_tag = yaml.full_load(f)["gestures"]
    return label_tag

In [37]:
class HandLandmarksDetector():
    def __init__(self) -> None:
        self.mp_drawing = mp.solutions.drawing_utils
        self.mp_drawing_styles = mp.solutions.drawing_styles
        self.mp_hands = mp.solutions.hands
        self.detector = self.mp_hands.Hands(False,max_num_hands=1,min_detection_confidence=0.5)

    def detectHand(self,frame):
        hands = []
        frame = cv2.flip(frame, 1)
        annotated_image = frame.copy()
        results = self.detector.process(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
        if results.multi_hand_landmarks is not None:
            for hand_landmarks in results.multi_hand_landmarks:
                hand = []
                self.mp_drawing.draw_landmarks(
                    annotated_image,
                    hand_landmarks,
                    self.mp_hands.HAND_CONNECTIONS,
                    self.mp_drawing_styles.get_default_hand_landmarks_style(),
                    self.mp_drawing_styles.get_default_hand_connections_style())
                for landmark in hand_landmarks.landmark:
                    x,y,z = landmark.x,landmark.y,landmark.z
                    hand.extend([x,y,z])
            hands.append(hand)
        return hands,annotated_image

In [38]:
class CustomImageDataset(Dataset):
    def __init__(self, data_file):
        self.data = pd.read_csv(data_file)
        self.labels = torch.from_numpy(self.data.iloc[:,0].to_numpy())

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        one_hot_label = self.labels[idx]
        torch_data = torch.from_numpy(self.data.iloc[idx,1:].to_numpy(dtype=np.float32))
        return torch_data, one_hot_label

In [39]:
class EarlyStopper:
    def __init__(self, patience=1, min_delta=0):
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.watched_metrics = np.inf

    def early_stop(self, current_value):
        if current_value < self.watched_metrics:
            self.watched_metrics = current_value
            self.counter = 0
        elif current_value > (self.watched_metrics + self.min_delta):
            self.counter += 1
            if self.counter >= self.patience:
                return True
        return False

In [40]:
def train(trainloader, val_loader, model, loss_function, early_stopper, optimizer):
    # add auroc score
    best_vloss = 1_000_000
    timestamp = datetime.now().strftime('%d-%m %H:%M')
    for epoch in range(300):
        #training step
        model.train(True)
        running_loss = 0.0
        acc_train = Accuracy(num_classes=len(LIST_LABEL), task='MULTICLASS')
        for batch_number,data in enumerate(trainloader):
            inputs,labels = data

            ################## Your Code Here ################## Q9
            ''' Hoàn thành code để thực hiện reset gradients và dự đoán class cử
            chỉ của inputs
            '''
            optimizer.zero_grad()
            preds = model(inputs)
            ####################################################

            ################## Your Code Here ################## Q10
            ''' Hoàn thành code để thực hiện tính loss dưa vào kết quả dự đoán
            và labels, sau đó thực hiện backwward và update parameters thông qua
            optimizer
            '''
            loss = loss_function(preds, labels)
            loss.backward()
            optimizer.step()

            ####################################################

            acc_train.update(model.predict_with_known_class(inputs), labels)
            running_loss += loss.item()
        avg_loss = running_loss / len(trainloader)
        # validating step
        model.train(False)
        running_vloss = 0.0
        acc_val = Accuracy(num_classes=len(LIST_LABEL), task='MULTICLASS')
        for i, vdata in enumerate(val_loader):
            vinputs, vlabels = vdata
            preds = model(vinputs)
            vloss = loss_function(preds, vlabels)
            running_vloss += vloss.item()
            acc_val.update(model.predict_with_known_class(vinputs), vlabels)

        # Log the running loss averaged per batch
        # for both training and validation
        print(f"Epoch {epoch}: ")
        print(f"Accuracy train:{acc_train.compute().item()}, val:{acc_val.compute().item()}")
        avg_vloss = running_vloss / len(val_loader)
        print('LOSS train {} valid {}'.format(avg_loss, avg_vloss))
        print('Training vs. Validation Loss',
                        { 'Training' : avg_loss, 'Validation' : avg_vloss },
                        epoch + 1)
        print('Training vs. Validation accuracy',
                        { 'Training' : acc_train.compute().item()
                        , 'Validation' : acc_val.compute().item() },
                        epoch + 1)

        # Track best performance, and save the model's state
        if avg_vloss < best_vloss:
            best_vloss = avg_vloss
            best_model_path = f'./{save_path}/model_{timestamp}_{model.__class__.__name__}_best'
            torch.save(model.state_dict(), best_model_path)

        if early_stopper.early_stop(avg_vloss):
            ################## Your Code Here ################## Q5
            ''' Hoàn thành đoạn code bên dưới để  print ra epoch hiện tại và
            minimum watched metric và thoát loop
            '''
            print (f"stopping at epoch {epoch}, minimum: {early_stopper.watched_metrics}")
            break
            ####################################################



    model_path = f'./{save_path}/model_{timestamp}_{model.__class__.__name__}_last'
    torch.save(model.state_dict(), model_path)

    print(acc_val.compute())
    return model, best_model_path

In [41]:
trainset = CustomImageDataset(train_path)
################## Your Code Here ################## Q3
'''Hoàn thành code để thực hiện khởi tạo DataLoader cho trainset với
batch_size 40 và cho phép xáo trộn.'''
train_loader = DataLoader(trainset, batch_size=50, shuffle=True)
####################################################

valset = CustomImageDataset(val_path)
val_loader = DataLoader(valset, batch_size=50, shuffle=False)

################## Your Code Here ################## Q8
'''Hoàn thành code để thực hiện khởi tạo NeuralNetwork model đã xây dựng ở trên,
khởi tạo hàm loss sử dụng CrossEntropyLoss và khởi tạo early stopper với patience
là 30 và min_delta là 0.01
'''
model = NeuralNetwork()
loss_function = nn.CrossEntropyLoss()
early_stopper = EarlyStopper(patience = 30, min_delta = 0.01)
####################################################

################## Your Code Here ################## Q4
'''Hoàn thành code để thực hiện cấu hình Adam optimizer cho các tham số của
model với tốc độ học là 0.0001
'''
optimizer = optim.Adam(model.parameters(), lr=0.0001)
####################################################

model, best_model_path = train(train_loader, val_loader, model, loss_function, early_stopper, optimizer)

Epoch 0: 
Accuracy train:0.23748773336410522, val:0.276450514793396
LOSS train 1.6008672941298712 valid 1.597583293914795
Training vs. Validation Loss {'Training': 1.6008672941298712, 'Validation': 1.597583293914795} 1
Training vs. Validation accuracy {'Training': 0.23748773336410522, 'Validation': 0.276450514793396} 1
Epoch 1: 
Accuracy train:0.2953876256942749, val:0.34812286496162415
LOSS train 1.5792588903790428 valid 1.578514854113261
Training vs. Validation Loss {'Training': 1.5792588903790428, 'Validation': 1.578514854113261} 2
Training vs. Validation accuracy {'Training': 0.2953876256942749, 'Validation': 0.34812286496162415} 2
Epoch 2: 
Accuracy train:0.3267909586429596, val:0.3242320716381073
LOSS train 1.5616309131894792 valid 1.5573774973551433
Training vs. Validation Loss {'Training': 1.5616309131894792, 'Validation': 1.5573774973551433} 3
Training vs. Validation accuracy {'Training': 0.3267909586429596, 'Validation': 0.3242320716381073} 3
Epoch 3: 
Accuracy train:0.379784

In [42]:
list_label = label_dict_from_config_file("hand_gesture.yaml")
DATA_FOLDER_PATH="./data/"
testset = CustomImageDataset(os.path.join(DATA_FOLDER_PATH,"landmark_test.csv"))

# Test DataLoader instantiation
################## Your Code Here ################## Q6
''' Hoàn thành code bên dưới để  khởi tạo DataLoader cho testset with batch size
20, không cho phép shuffle
'''
test_loader = DataLoader(testset, batch_size=20, shuffle=False)
####################################################

network = NeuralNetwork()
network.load_state_dict(torch.load(best_model_path, weights_only=False))

network.eval()
acc_test = Accuracy(num_classes=len(list_label), task='MULTICLASS')
for i, test_data in enumerate(test_loader):
    test_input, test_label = test_data
    ################## Your Code Here ################## Q7
    '''Hoàn thành code bên dưới để  predict class của cử chỉ và update accuracy
    với kết quả predict và true labels
    '''
    preds = network.predict_with_known_class(test_input)
    acc_test.update(preds, test_label)
    
    ####################################################

print(network.__class__.__name__)
print(f"Accuracy of model:{acc_test.compute().item()}")
print("========================================================================")

NeuralNetwork
Accuracy of model:0.9724409580230713
