# Task 1: Understand body language by gesture recognition with fully connected neural network

## 1. Do literature search on gesture recognition and its application in Human-Robot Interaction. Summarize what you have learned.

## 2. Referring to the previous example about building a neural network based classifier, use what you have learned to read the code for gesture classification below and design your own network architecture using fully connected layers.

## 3. Run the model. Analyse and comment on the performance of your model based on fully connected layers.

In [1]:
import cv2
import numpy as np
import os
import itertools
import torch.utils.data as utils_data

## 1) data preprocessing

Define a function to preprocess the images including resizing and binaryzation

In [2]:
def processSkinImage(filePath, resize_HW=48):
    # step 1
    # read the image
    original = cv2.imread(filename=filePath)

    # step 2
    # resize the image to
    image_resized = cv2.resize(original, (resize_HW, resize_HW))

    # step 3
    # convert the image from rgb to YCbCr
    image_ycbcr = cv2.cvtColor(image_resized, cv2.COLOR_BGR2YCR_CB)

    # step 4
    # get the central color of the image
    # expected the hand to be in the central of the image
    Cb_center_color = image_ycbcr[int(resize_HW/2), int(resize_HW/2), 1]
    Cr_center_color = image_ycbcr[int(resize_HW/2), int(resize_HW/2), 2]
    # set the range
    Cb_Difference = 15
    Cr_Difference = 10

    # step 5
    # detect skin pixels
    Cb = image_ycbcr[:, :, 1]
    Cr = image_ycbcr[:, :, 2]
    index = np.where((Cb >= Cb_center_color-Cb_Difference) & (Cb <= Cb_center_color+Cb_Difference)
                     & (Cr >= Cr_center_color-Cr_Difference) & (Cr <= Cr_center_color+Cr_Difference))

    # Mark detected pixels and output
    image_output = np.zeros((resize_HW, resize_HW))
    image_output[index] = 255

    # show image
    # cv2.imshow("", image_output)
    # cv2.waitKey(0)
    
    return image_output

Deal with all the images using the function defined above.
The processed data is stored in a new folder 'dataset_processed'.

Generate labels for each class. (class 0, 1, ..., num_classes)


In [3]:
path = './dataset/images'
path_processed = './dataset_processed/images'

# -------------------images processing--------------
for mainDir, subDir, fileList in os.walk(path):
    for file in fileList:
        currentPath = os.path.join(mainDir, file)
        processedImage = processSkinImage(currentPath)

        new_mainDir = path_processed + mainDir.split(path)[-1]
        if not os.path.exists(new_mainDir):
            os.makedirs(new_mainDir)
        cv2.imwrite(os.path.join(new_mainDir, file), processedImage)

# -----------------label generation----------------
label_path = './dataset_processed/labels'
if not os.path.exists(label_path):
    os.makedirs(label_path)

files = os.listdir(path)
for i, file in enumerate(files):
    subclass_label_path = os.path.join(label_path, file+'.txt')
    with open(subclass_label_path, 'w') as f:
        f.write('#label\n')
    for _ in range(len(os.listdir(os.path.join(path_processed, file)))):
        with open(subclass_label_path, 'a') as f:
            f.write('{:d}\n'.format(i))
    f.close()

error: OpenCV(4.8.0) /Users/xperience/GHA-OpenCV-Python/_work/opencv-python/opencv-python/opencv/modules/imgproc/src/resize.cpp:4062: error: (-215:Assertion failed) !ssize.empty() in function 'resize'


## 2) load data

In [4]:
Image = []
path_images = './dataset_processed/images'
for mainDir, subDir, fileList in os.walk(path_images):
    for file in fileList:
        currentPath = os.path.join(mainDir, file)
        Image.append(cv2.imread(currentPath)[:, :, 0])
Image = np.array(Image)
dataset_size, H, W = Image.shape
# for FCNN model, the image need to be stretched into one dimension: (b, h, w)->(b, h*w)
Image = Image.reshape(dataset_size, -1)


Label = []
path_labels = './dataset_processed/labels'
for file in os.listdir(path_labels):
    Label.append(np.loadtxt(os.path.join(path_labels, file)))
Label = np.array(list(itertools.chain.from_iterable(Label)))
num_classes = int(np.max(Label))+1

## 3) build your own neural network based on fully connected layers.

Design the neural network architecture

In [5]:
import torch.nn as nn

class FCNNModel(nn.Module):
    def __init__(self, input_layer_size, hidden_layer_size, num_classes):
        super(FCNNModel, self).__init__()
        self.fc1 = nn.Linear(input_layer_size, hidden_layer_size)
        self.fc2 = nn.Linear(hidden_layer_size, num_classes)
        # code by yourself
        self.relu = nn.ReLU()

    
    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x
        # code by yourself

instantiate your model, set a optimizer and define a loss function.

In [6]:
import torch.optim as optim

model = FCNNModel(input_layer_size=H*W, hidden_layer_size=int(H*W/2), num_classes=num_classes)
# if torch.cuda.is_available():
#     model = model.cuda()
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=0.0001)
loss_func = nn.CrossEntropyLoss()

## 4) train and test

Encapsulate data

In [7]:
import torch

dataset = utils_data.TensorDataset(torch.Tensor(Image), torch.LongTensor(Label))
split_ratio = 0.8
train_size = int(split_ratio * dataset_size)
test_size = dataset_size - train_size
train_set, test_set = utils_data.random_split(dataset, [train_size, test_size])
train_loader = utils_data.DataLoader(dataset=train_set, batch_size=8, shuffle=True)
test_loader = utils_data.DataLoader(dataset=test_set, batch_size=8, shuffle=True)
print('Data is ready!')

Data is ready!


The following is the training and testing process in detail.

In [8]:
best_accuracy = 0
for epoch in range(500):
    running_loss = 0.0
    train_acc = 0.0
    for step, (batch_image, batch_label) in enumerate(train_loader):
        model.train()
        # if torch.cuda.is_available():
        #     batch_image, batch_label = batch_image.cuda(), batch_label.cuda()
        batch_output = model(batch_image)
        batch_loss = loss_func(batch_output, batch_label)

        optimizer.zero_grad()
        batch_loss.backward()
        optimizer.step()

        running_loss += batch_loss.item()

        # train accuracy
        _, train_predicted = torch.max(batch_output.data, 1)
        train_acc += (train_predicted == batch_label).sum().item()

    train_acc /= train_size
    running_loss /= (step+1)

    # ----------test----------
    model.eval()
    test_acc = 0.0
    for test_image, test_label in test_loader:
        test_output = model(test_image)
        _, predicted = torch.max(test_output.data, 1)
        test_acc += (predicted == test_label).sum().item()
    test_acc /= test_size

    print('epoch={:d}\ttrain loss={:.6f}\ttrain accuracy={:.3f}\ttest accuracy={:.3f}'.format(
        epoch, running_loss, train_acc, test_acc))

    if test_acc >= best_accuracy:
        save_path = './trained_models/'
        if not os.path.exists(save_path):
             os.makedirs(save_path)
        torch.save(model.state_dict(), './trained_models/FCNN_model.pkl')
        best_accuracy = test_acc


epoch=0	train loss=378.286875	train accuracy=0.242	test accuracy=0.875
epoch=1	train loss=95.043324	train accuracy=0.710	test accuracy=0.875
epoch=2	train loss=18.894134	train accuracy=0.774	test accuracy=0.875
epoch=3	train loss=32.315509	train accuracy=0.742	test accuracy=0.812
epoch=4	train loss=6.564349	train accuracy=0.935	test accuracy=0.938
epoch=5	train loss=3.658256	train accuracy=0.952	test accuracy=0.875
epoch=6	train loss=2.024286	train accuracy=0.903	test accuracy=0.938
epoch=7	train loss=0.266802	train accuracy=0.984	test accuracy=0.875
epoch=8	train loss=0.450499	train accuracy=0.968	test accuracy=0.938
epoch=9	train loss=0.696644	train accuracy=0.968	test accuracy=0.812
epoch=10	train loss=0.525465	train accuracy=0.984	test accuracy=0.938
epoch=11	train loss=1.399021	train accuracy=0.968	test accuracy=0.812
epoch=12	train loss=0.859764	train accuracy=0.952	test accuracy=0.938
epoch=13	train loss=2.756449	train accuracy=0.952	test accuracy=0.875
epoch=14	train loss=2.806

epoch=119	train loss=0.000000	train accuracy=1.000	test accuracy=0.938
epoch=120	train loss=0.000000	train accuracy=1.000	test accuracy=0.938
epoch=121	train loss=0.000000	train accuracy=1.000	test accuracy=0.938
epoch=122	train loss=0.000000	train accuracy=1.000	test accuracy=0.938
epoch=123	train loss=0.000000	train accuracy=1.000	test accuracy=0.938
epoch=124	train loss=0.000000	train accuracy=1.000	test accuracy=0.938
epoch=125	train loss=0.000000	train accuracy=1.000	test accuracy=0.938
epoch=126	train loss=0.000000	train accuracy=1.000	test accuracy=0.938
epoch=127	train loss=0.000000	train accuracy=1.000	test accuracy=0.938
epoch=128	train loss=0.000000	train accuracy=1.000	test accuracy=0.938
epoch=129	train loss=0.000000	train accuracy=1.000	test accuracy=0.938
epoch=130	train loss=0.000000	train accuracy=1.000	test accuracy=0.938
epoch=131	train loss=0.000000	train accuracy=1.000	test accuracy=0.938
epoch=132	train loss=0.000000	train accuracy=1.000	test accuracy=0.938
epoch=

epoch=235	train loss=0.000000	train accuracy=1.000	test accuracy=0.938
epoch=236	train loss=0.000000	train accuracy=1.000	test accuracy=0.938
epoch=237	train loss=0.000000	train accuracy=1.000	test accuracy=0.938
epoch=238	train loss=0.000000	train accuracy=1.000	test accuracy=0.938
epoch=239	train loss=0.000000	train accuracy=1.000	test accuracy=0.938
epoch=240	train loss=0.000000	train accuracy=1.000	test accuracy=0.938
epoch=241	train loss=0.000000	train accuracy=1.000	test accuracy=0.938
epoch=242	train loss=0.000000	train accuracy=1.000	test accuracy=0.938
epoch=243	train loss=0.000000	train accuracy=1.000	test accuracy=0.938
epoch=244	train loss=0.000000	train accuracy=1.000	test accuracy=0.938
epoch=245	train loss=0.000000	train accuracy=1.000	test accuracy=0.938
epoch=246	train loss=0.000000	train accuracy=1.000	test accuracy=0.938
epoch=247	train loss=0.000000	train accuracy=1.000	test accuracy=0.938
epoch=248	train loss=0.000000	train accuracy=1.000	test accuracy=0.938
epoch=

epoch=351	train loss=0.000000	train accuracy=1.000	test accuracy=0.938
epoch=352	train loss=0.000000	train accuracy=1.000	test accuracy=0.938
epoch=353	train loss=0.000000	train accuracy=1.000	test accuracy=0.938
epoch=354	train loss=0.000000	train accuracy=1.000	test accuracy=0.938
epoch=355	train loss=0.000000	train accuracy=1.000	test accuracy=0.938
epoch=356	train loss=0.000000	train accuracy=1.000	test accuracy=0.938
epoch=357	train loss=0.000000	train accuracy=1.000	test accuracy=0.938
epoch=358	train loss=0.000000	train accuracy=1.000	test accuracy=0.938
epoch=359	train loss=0.000000	train accuracy=1.000	test accuracy=0.938
epoch=360	train loss=0.000000	train accuracy=1.000	test accuracy=0.938
epoch=361	train loss=0.000000	train accuracy=1.000	test accuracy=0.938
epoch=362	train loss=0.000000	train accuracy=1.000	test accuracy=0.938
epoch=363	train loss=0.000000	train accuracy=1.000	test accuracy=0.938
epoch=364	train loss=0.000000	train accuracy=1.000	test accuracy=0.938
epoch=

epoch=467	train loss=0.000000	train accuracy=1.000	test accuracy=0.938
epoch=468	train loss=0.000000	train accuracy=1.000	test accuracy=0.938
epoch=469	train loss=0.000000	train accuracy=1.000	test accuracy=0.938
epoch=470	train loss=0.000000	train accuracy=1.000	test accuracy=0.938
epoch=471	train loss=0.000000	train accuracy=1.000	test accuracy=0.938
epoch=472	train loss=0.000000	train accuracy=1.000	test accuracy=0.938
epoch=473	train loss=0.000000	train accuracy=1.000	test accuracy=0.938
epoch=474	train loss=0.000000	train accuracy=1.000	test accuracy=0.938
epoch=475	train loss=0.000000	train accuracy=1.000	test accuracy=0.938
epoch=476	train loss=0.000000	train accuracy=1.000	test accuracy=0.938
epoch=477	train loss=0.000000	train accuracy=1.000	test accuracy=0.938
epoch=478	train loss=0.000000	train accuracy=1.000	test accuracy=0.938
epoch=479	train loss=0.000000	train accuracy=1.000	test accuracy=0.938
epoch=480	train loss=0.000000	train accuracy=1.000	test accuracy=0.938
epoch=