# Hand Gesture Recognition with a Convolutional Neural Network
## Training a convolutional neural network to classify images of hand gestures into "fist", "okay", and "other" based on Deep Learning with Python, 2nd edition, by François Chollet.

The dataset used is Hand Gesture Recognition Image Dataset (HaGRID) on Kaggle.

In [14]:
# Import
import tensorflow as tf
from tensorflow import keras
import numpy
import matplotlib.pyplot as plt
from keras.utils import image_dataset_from_directory
from keras import models

In [3]:
devices = tf.config.list_physical_devices()
print("\nDevices: ", devices)

print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))


Devices:  [PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'), PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
Num GPUs Available:  1


## 1. Data preprocessing
### 1.1 Downloading the dataset and organizing in folders

Pictures are downloaded from https://www.kaggle.com/datasets/kapitanov/hagrid and are in RGB format.

In [7]:
import os, shutil, pathlib, random

# 6K, 2K, 2K TESTING SPLIT

seed = 1337
random.seed(seed)
original_dir = pathlib.Path("data/hagrid_dataset_512")
new_base_dir = pathlib.Path(f"data/hagrid_6k_2k_2k_seed{seed}")

# Extract gesture list
gestures = []
for gesture in os.listdir(original_dir):
    gestures.append(gesture)
gestures.remove(".DS_Store")

# Create gesture lists and randomly shuffle them
files_dict = {}
for gesture in gestures:
    files_dict[f"{gesture}_files"] = [file for file in os.listdir(original_dir / gesture) if file.endswith(".jpg")]
for files in files_dict:
    random.shuffle(files_dict[files])

# Function to make subset
def make_subset(subset_name, start_index, end_index):
    for category in gestures:
        dir = new_base_dir / subset_name / category
        os.makedirs(dir)
        fnames = files_dict[f"{category}_files"][start_index:end_index]
        for fname in fnames:
            shutil.copyfile(src=original_dir / category / fname,
                            dst=dir / fname)

In [None]:
# Make training, validation, and testing subsets
make_subset("train", start_index=0, end_index=6000)
make_subset("validation", start_index=6000, end_index=8000)
make_subset("test", start_index=8000, end_index=10000)

In [8]:
# Turn subsets into keras datasets
train_dataset = image_dataset_from_directory(
    new_base_dir / "train",
    image_size=(180, 180),
    batch_size=128)
validation_dataset = image_dataset_from_directory(
    new_base_dir / "validation",
    image_size=(180, 180),
    batch_size=128)
test_dataset = image_dataset_from_directory(
    new_base_dir / "test",
    image_size=(180, 180),
    batch_size=128)

Found 108000 files belonging to 18 classes.


2024-06-17 16:40:20.209857: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M2
2024-06-17 16:40:20.209917: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 16.00 GB
2024-06-17 16:40:20.209927: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 5.33 GB
2024-06-17 16:40:20.210141: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2024-06-17 16:40:20.210159: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


Found 36000 files belonging to 18 classes.
Found 36000 files belonging to 18 classes.


## 1.2 Data preprocessing

In [9]:
from keras import layers

data_augmentation = keras.Sequential(
    [
        layers.RandomFlip("horizontal"),
        layers.RandomRotation(0.1),
        layers.RandomZoom(0.2),
    ]
)

## 2. Building the model

The expected model input is a **grayscale** image of size **180 x 180**.

In [10]:
inputs = keras.Input(shape=(180, 180, 3))
x = data_augmentation(inputs)

x = layers.Rescaling(1./255)(x)
x = layers.Conv2D(filters=32, kernel_size=5, use_bias=False)(x)

for size in [32, 64, 128, 256, 512]:
    residual = x

    x = layers.BatchNormalization()(x)
    x = layers.Activation("relu")(x)
    x = layers.SeparableConv2D(size, 3, padding="same", use_bias=False)(x)

    x = layers.BatchNormalization()(x)
    x = layers.Activation("relu")(x)
    x = layers.SeparableConv2D(size, 3, padding="same", use_bias=False)(x)

    x = layers.MaxPooling2D(3, strides=2, padding="same")(x)

    residual = layers.Conv2D(
        size, 1, strides=2, padding="same", use_bias=False)(residual)
    x = layers.add([x, residual])

x = layers.GlobalAveragePooling2D()(x)
x = layers.Dropout(0.5)(x)
outputs = layers.Dense(18, activation="softmax")(x)
model = keras.Model(inputs=inputs, outputs=outputs)

In [11]:
model.summary()

In [12]:
model.compile(loss="sparse_categorical_crossentropy",
              optimizer="rmsprop",
              metrics=["accuracy"])

In [18]:
callbacks = [
    keras.callbacks.ModelCheckpoint(
        filepath="convnet_H6k2k2k_061724_best.keras",
        save_best_only=True,
        monitor="val_loss"),
    keras.callbacks.EarlyStopping(monitor='val_accuracy', 
                                  patience=5, 
                                  restore_best_weights=True, 
                                  verbose=1),
    keras.callbacks.TensorBoard(log_dir='./logs', 
                                write_graph=True, 
                                write_images=True)
]

In [16]:
model = keras.models.load_model("convnet_H6k2k2k_061724_best.keras")

In [19]:
history = model.fit(
    train_dataset,
    epochs=100,
    validation_data=validation_dataset,
    callbacks=callbacks)

Epoch 1/100


2024-06-17 16:55:26.056955: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


[1m269/844[0m [32m━━━━━━[0m[37m━━━━━━━━━━━━━━[0m [1m15:45[0m 2s/step - accuracy: 0.8469 - loss: 0.4948

## 3. Evaluation

In [None]:
import matplotlib.pyplot as plt
accuracy = history.history["accuracy"]
val_accuracy = history.history["val_accuracy"]
loss = history.history["loss"]
val_loss = history.history["val_loss"]
epochs = range(1, len(accuracy) + 1)
plt.plot(epochs, accuracy, "bo", label="Training accuracy")
plt.plot(epochs, val_accuracy, "b", label="Validation accuracy")
plt.title("Training and validation accuracy")
plt.legend()
plt.figure()
plt.plot(epochs, loss, "bo", label="Training loss")
plt.plot(epochs, val_loss, "b", label="Validation loss")
plt.title("Training and validation loss")
plt.legend()
plt.show()

In [None]:
test_model = keras.models.load_model("convent_from_scratch.keras")
test_loss, test_acc = test_model.evaluate(test_dataset)
print(f"Test accuracy: {test_acc:.3f}")