In [1]:
def download_Project():
  !pip install gdown
  !gdown https://drive.google.com/uc?id=1ehyrYBQ5rbQQe6yL4XbLWe3FMvuVUGiL
  !unzip str(pathlib.Path("Project_data.zip").resolve())

download_Project()

Collecting gdown
  Downloading gdown-5.2.0-py3-none-any.whl (18 kB)
Collecting beautifulsoup4
  Downloading beautifulsoup4-4.12.3-py3-none-any.whl (147 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m147.9/147.9 KB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
Collecting soupsieve>1.2
  Downloading soupsieve-2.6-py3-none-any.whl (36 kB)
Collecting PySocks!=1.5.7,>=1.5.6
  Downloading PySocks-1.7.1-py3-none-any.whl (16 kB)
Installing collected packages: soupsieve, PySocks, beautifulsoup4, gdown
Successfully installed PySocks-1.7.1 beautifulsoup4-4.12.3 gdown-5.2.0 soupsieve-2.6
You should consider upgrading via the '/usr/bin/python -m pip install --upgrade pip' command.[0m[33m
[0mDownloading...
From (original): https://drive.google.com/uc?id=1ehyrYBQ5rbQQe6yL4XbLWe3FMvuVUGiL
From (redirected): https://drive.google.com/uc?id=1ehyrYBQ5rbQQe6yL4XbLWe3FMvuVUGiL&confirm=t&uuid=16c5f201-07fd-480d-88ec-8c80cb8b2313
To: /home/Gesture_Recognition/Project_data

In [1]:
from datetime import datetime
import numpy as np
import pandas as pd

import random as rn
import numpy.random as nrn
import tensorflow as tf
import os
import pathlib

from tensorflow import keras
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras import layers
from tensorflow.keras.layers import Dense, GRU, Flatten, TimeDistributed, Flatten, BatchNormalization, Activation
from tensorflow.keras.layers import Conv3D, MaxPooling3D, Dropout, MaxPooling2D, GlobalAveragePooling2D
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras import optimizers
from tensorflow.keras.applications import ResNet50, ResNet101

import PIL
from PIL import Image
import cv2

import matplotlib.pyplot as plt

In [2]:
batch_size = 10
num_epochs = 50
image_shape = (224, 224, 3)
vid_shape = (15, 224, 224, 3)

rn.seed(37)
nrn.seed(37)
tf.random.set_seed(37)


def create_dfs(data_folder):
    train_df = pd.read_csv(f"{data_folder}/train.csv", sep=";", header=None)
    train_df.columns = ['folder', 'type', 'class']
    val_df = pd.read_csv(f"{data_folder}/val.csv", sep=";", header=None)
    val_df.columns = ['folder', 'type', 'class']
    return train_df, val_df


def get_folder_list(df_csv):
  fptr = open(df_csv)
  folder_paths = fptr.readlines()
  fptr.close()
  return folder_paths


def get_batch_details(folder_list, batch_size):
  num_batches = len(folder_list) // batch_size
  folders_remain = len(folder_list) - (num_batches * batch_size)
  return num_batches, folders_remain


def create_batch_folders(batch_size, img_ids, image_shape):
         ## batch_images                                                                               ## batch_labels
  return (np.zeros(shape = (batch_size, img_ids, image_shape[0], image_shape[1], 3)), np.zeros(shape = (batch_size, 5)))


def process_images(imgs_path, batch_images, folder, image_shape, img_ids, img_crop):
    ## get images and labels for each folder in the batch
    imgs = os.listdir(imgs_path)
    for idx, ind in enumerate(img_ids):
        ## read image
        image = cv2.imread(f'{imgs_path}/{imgs[ind]}').astype(np.float32)
        ## resize image
        image = cv2.resize(image, (image_shape[0], image_shape[1]), interpolation = cv2.INTER_LINEAR)
        ## Seperating them into their respective R, G, B channels and normalizing the values
        ## Load labels for each image
        batch_images[folder, idx, :, :, 0] = image[:,:,0]/255.0   ## R
        batch_images[folder, idx, :, :, 1] = image[:,:,1]/255.0   ## G
        batch_images[folder, idx, :, :, 2] = image[:,:,2]/255.0   ## B


## path => folder path as string
## folder_list => list of folders in train/test folder as list
## batch_size => chosen batch size as int
## image_shape => as tuple (image_height, image_width)
def run_generator(path, folder_list, batch_size, image_shape, img_crop):
  num_batches, folders_remain = get_batch_details(folder_list, batch_size)
  ## set how many images to view from total images of the video
  img_ids = list(range(0,30,2))
  while True:
    folder_list = np.random.permutation(folder_list)
    for batch in range(num_batches):
      batch_images, batch_labels = create_batch_folders(batch_size, len(img_ids), image_shape)
      ## get folders for that batch
      for folder in range(batch_size):
        ## get images and labels for each folder in the batch
        process_images(f'{path}/{folder_list[folder + (batch * batch_size)].split(";")[0]}', batch_images, folder, image_shape, img_ids, 0)
          ## Load labels for each image
        batch_labels[folder, int(folder_list[folder + (batch * batch_size)].strip().split(';')[2])] = 1
      yield batch_images, batch_labels

    ## Process the remaining folders
    if folders_remain > 0:
      batch_images, batch_labels = create_batch_folders(folders_remain, len(img_ids), image_shape)  
      folder_rem = folder_list[-folders_remain:]
      for folder in range(folders_remain):
        process_images(f'{path}/{folder_rem[folder].split(";")[0]}', batch_images, folder, image_shape, img_ids, 0)
        batch_labels[folder, int(folder_rem[folder].strip().split(';')[2])] = 1
      yield batch_images, batch_labels


def model_log_save():
  curr_dt_time = datetime.now()

  model_name = 'model_cnn3D' + '_' + str(curr_dt_time).replace(' ','').replace(':','_') + '/'

  if not os.path.exists(model_name):
      os.mkdir(model_name)

  filepath = model_name + 'model-{epoch:05d}-{loss:.5f}-{categorical_accuracy:.5f}-{val_loss:.5f}-{val_categorical_accuracy:.5f}.weights.h5'
  checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=False, save_weights_only=True, mode='auto')
  LR = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=4, verbose=1, min_lr=0.001, cooldown=2)
  callbacks_list = [checkpoint, LR]

  return callbacks_list


def folders_list(data_folder):
    fptr = open(data_folder)
    data_paths = fptr.readlines()
    fptr.close()
    folder_lists = np.random.permutation(data_paths)
    return folder_lists


def total_epochs(train_folder_list, val_folder_list, batch_size):
    if (len(train_folder_list)%batch_size) == 0:
        steps_per_epoch = int(len(train_folder_list)/batch_size)
    else:
        steps_per_epoch = int(len(train_folder_list)//batch_size) + 1

    if (len(val_folder_list)%batch_size) == 0:
        validation_steps = int(len(val_folder_list)/batch_size)
    else:
        validation_steps = int(len(val_folder_list)//batch_size) + 1

    return steps_per_epoch, validation_steps


def run_model(gru_opt, process_type):
    data_folder = str(pathlib.Path("Project_data").resolve())

    train_df, val_df = create_dfs(data_folder)
    folder_lists = folders_list(f'{data_folder}/train.csv')
    split_val = len(folder_lists) - (len(folder_lists)*(20)//100)
    train_folder_list = folder_lists[:split_val]
    val_folder_list = folder_lists[split_val:]

    num_classes = train_df['class'].unique().size

    steps_per_epoch, validation_steps = total_epochs(train_folder_list, val_folder_list, batch_size)

    train_generator = run_generator(f'{data_folder}/train', train_folder_list, batch_size, image_shape, 0)
    val_generator = run_generator(f'{data_folder}/train', val_folder_list, batch_size, image_shape, 0)

    if gru_opt:
        model = gru_model(num_classes)
        model.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=0.01), loss='categorical_crossentropy', metrics=['categorical_accuracy'])
    else:
        model = cnn3D_model(vid_shape, num_classes)
        model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.01), loss='categorical_crossentropy', metrics=['categorical_accuracy'])

    return model, train_generator, val_generator, steps_per_epoch, validation_steps, num_classes


def run_test_model():
    data_folder = str(pathlib.Path("Project_data").resolve())
    folder_lists = folders_list(f'{data_folder}/val.csv')
    testing_generator = test_generator(f'{data_folder}/val', folder_lists, batch_size, image_shape, 0)
    return testing_generator


In [3]:
## 64 + 64 + norm + 128 + 128 + dropout + norm + 256 + 256 + 1024_dense(2) + dropout
def cnn3D_model(vid_shape, num_classes):
    model = Sequential()

    model.add(layers.Input(shape=vid_shape)),

    model.add(layers.Conv3D(filters=64, kernel_size=(3, 3, 3), activation='relu')),
    model.add(layers.Conv3D(filters=64, kernel_size=(3, 3, 3), activation='relu')),
    model.add(layers.BatchNormalization()),
    model.add(layers.MaxPooling3D(pool_size=(1, 2, 2))),

    model.add(layers.Conv3D(filters=128, kernel_size=(3, 3, 3), activation='relu')),
    model.add(layers.Conv3D(filters=128, kernel_size=(3, 3, 3), padding = 'same', activation='relu')),
    model.add(layers.MaxPooling3D(pool_size=(1, 2, 2))),

    model.add(layers.Conv3D(filters=256, kernel_size=(3, 3, 3), padding = 'same', activation='relu')),
    model.add(layers.BatchNormalization()),
    model.add(layers.MaxPooling3D(pool_size=(1, 2, 2))),
    model.add(layers.Conv3D(filters=512, kernel_size=(3, 3, 3), padding = 'same', activation='relu')),

    ## Flatten and Dense layers
    model.add(layers.GlobalAveragePooling3D()),
    model.add(layers.Dense(1024, activation='relu')),
    model.add(layers.Dropout(rate=0.3)),
    model.add(layers.Dense(1024, activation='relu')),
    model.add(layers.Dense(num_classes, activation='softmax'))

    return model

In [5]:
## 64 + 64 + norm + 128 + 128 + 256 + dropout(0.3) + norm + 512 + 1024_dense(2) + dropout(0.3)
cnn3D_model, train_generator, val_generator, steps_per_epoch, validation_steps, num_classes = run_model(0, 3)
cnn3D_model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv3d (Conv3D)             (None, 13, 222, 222, 64)  5248      
                                                                 
 conv3d_1 (Conv3D)           (None, 11, 220, 220, 64)  110656    
                                                                 
 batch_normalization (BatchN  (None, 11, 220, 220, 64)  256      
 ormalization)                                                   
                                                                 
 max_pooling3d (MaxPooling3D  (None, 11, 110, 110, 64)  0        
 )                                                               
                                                                 
 conv3d_2 (Conv3D)           (None, 9, 108, 108, 128)  221312    
                                                                 
 conv3d_3 (Conv3D)           (None, 9, 108, 108, 128)  4

2025-01-08 11:45:57.962842: W tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc:39] Overriding allow_growth setting because the TF_FORCE_GPU_ALLOW_GROWTH environment variable is set. Original config value was 0.
2025-01-08 11:45:57.962896: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 14800 MB memory:  -> device: 0, name: Quadro RTX 5000, pci bus id: 0000:1b:00.0, compute capability: 7.5


In [5]:
## 64 + 64 + norm + 128 + 128 + 256 + dropout(0.3) + norm + 512 + 1024_dense(2) + dropout(0.3)
cnn3D_model.fit(
                    train_generator, 
                    steps_per_epoch    = steps_per_epoch,
                    epochs             = num_epochs,
                    verbose            = 1,
                    callbacks          = model_log_save(),
                    validation_data    = val_generator,
                    validation_steps   = validation_steps,
                    class_weight       = None,
                    initial_epoch      = 0
                 )

Epoch 1/50


2025-01-08 09:46:19.451900: I tensorflow/stream_executor/cuda/cuda_dnn.cc:377] Loaded cuDNN version 8302


Epoch 00001: saving model to model_cnn3D_2025-01-0809_46_17.088253/model-00001-21.38396-0.24859-66.01678-0.22727.weights.h5
Epoch 2/50
Epoch 00002: saving model to model_cnn3D_2025-01-0809_46_17.088253/model-00002-1.58806-0.24670-5.81804-0.24242.weights.h5
Epoch 3/50
Epoch 00003: saving model to model_cnn3D_2025-01-0809_46_17.088253/model-00003-1.55489-0.25989-2.16487-0.20455.weights.h5
Epoch 4/50
Epoch 00004: saving model to model_cnn3D_2025-01-0809_46_17.088253/model-00004-1.52726-0.29567-1.55897-0.26515.weights.h5
Epoch 5/50
Epoch 00005: saving model to model_cnn3D_2025-01-0809_46_17.088253/model-00005-1.50288-0.30320-1.84236-0.18182.weights.h5
Epoch 6/50
Epoch 00006: saving model to model_cnn3D_2025-01-0809_46_17.088253/model-00006-1.49684-0.31262-1.59573-0.25000.weights.h5
Epoch 7/50
Epoch 00007: saving model to model_cnn3D_2025-01-0809_46_17.088253/model-00007-1.46876-0.35782-1.57001-0.28030.weights.h5
Epoch 8/50
Epoch 00008: saving model to model_cnn3D_2025-01-0809_46_17.088253/

<keras.callbacks.History at 0x7fb7616a4a30>

In [19]:
## Load selected model
file_path = str(pathlib.Path("model_cnn3D_2025-01-0809_46_17.088253/model-00050-0.41419-0.82674-0.55758-0.79545.weights.h5").resolve())
vid_shape = (15, 224, 224, 3)
test_model = cnn3D_model(vid_shape, 5)
test_model.load_weights(file_path)
test_model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.01), loss='categorical_crossentropy', metrics=['categorical_accuracy'])
test_model.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv3d_18 (Conv3D)          (None, 13, 222, 222, 64)  5248      
                                                                 
 conv3d_19 (Conv3D)          (None, 11, 220, 220, 64)  110656    
                                                                 
 batch_normalization_6 (Batc  (None, 11, 220, 220, 64)  256      
 hNormalization)                                                 
                                                                 
 max_pooling3d_9 (MaxPooling  (None, 11, 110, 110, 64)  0        
 3D)                                                             
                                                                 
 conv3d_20 (Conv3D)          (None, 9, 108, 108, 128)  221312    
                                                                 
 conv3d_21 (Conv3D)          (None, 9, 108, 108, 128) 

In [10]:
def test_generator(path, folder_list, batch_size, image_shape, img_crop):
    num_batches, folders_remain = get_batch_details(folder_list, batch_size)
    ## set how many images to view from total images of the video
    img_ids = list(range(0,30,2))
    folder_list = np.random.permutation(folder_list)
    for batch in range(num_batches):
      batch_images, batch_labels = create_batch_folders(batch_size, len(img_ids), image_shape)
      ## get folders for that batch
      for folder in range(batch_size):
        ## get images and labels for each folder in the batch
        process_images(f'{path}/{folder_list[folder + (batch * batch_size)].split(";")[0]}', batch_images, folder, image_shape, img_ids, 0)
          ## Load labels for each image
        batch_labels[folder, int(folder_list[folder + (batch * batch_size)].strip().split(';')[2])] = 1
      yield batch_images, batch_labels

    ## Process the remaining folders
    if folders_remain > 0:
      batch_images, batch_labels = create_batch_folders(folders_remain, len(img_ids), image_shape)  
      folder_rem = folder_list[-folders_remain:]
      for folder in range(folders_remain):
        process_images(f'{path}/{folder_rem[folder].split(";")[0]}', batch_images, folder, image_shape, img_ids, 0)
        batch_labels[folder, int(folder_rem[folder].strip().split(';')[2])] = 1
      yield batch_images, batch_labels


In [14]:
testing_generator = run_test_model()
test_model.evaluate(testing_generator)



[0.6465699672698975, 0.75]

In [None]:
###############################################################################################################################################

In [6]:
## Running the model on cropped images

In [5]:
## 64 + 64 + norm + 128 + 128 + 256 + dropout(0.3) + norm + 512 + 1024_dense(2) + dropout(0.3)
cnn3D_crop_model, train_generator, val_generator, steps_per_epoch, validation_steps, num_classes = run_model(1, 2)
cnn3D_crop_model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv3d (Conv3D)             (None, 13, 158, 198, 64)  5248      
                                                                 
 conv3d_1 (Conv3D)           (None, 11, 156, 196, 64)  110656    
                                                                 
 batch_normalization (BatchN  (None, 11, 156, 196, 64)  256      
 ormalization)                                                   
                                                                 
 max_pooling3d (MaxPooling3D  (None, 11, 78, 98, 64)   0         
 )                                                               
                                                                 
 conv3d_2 (Conv3D)           (None, 9, 76, 96, 128)    221312    
                                                                 
 conv3d_3 (Conv3D)           (None, 9, 76, 96, 128)    4

2025-01-07 21:12:29.815903: W tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc:39] Overriding allow_growth setting because the TF_FORCE_GPU_ALLOW_GROWTH environment variable is set. Original config value was 0.
2025-01-07 21:12:29.815967: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 14800 MB memory:  -> device: 0, name: Quadro RTX 5000, pci bus id: 0000:1c:00.0, compute capability: 7.5


In [6]:
## 64 + 64 + norm + 128 + 128 + 256 + dropout(0.3) + norm + 512 + 1024_dense(2) + dropout(0.3)
cnn3D_crop_model.fit(
                    train_generator, 
                    steps_per_epoch    = steps_per_epoch,
                    epochs             = num_epochs,
                    verbose            = 1,
                    validation_data    = val_generator,
                    validation_steps   = validation_steps,
                    class_weight       = None,
                    initial_epoch      = 0
                 )

Epoch 1/50


2025-01-07 21:12:55.046727: I tensorflow/stream_executor/cuda/cuda_dnn.cc:377] Loaded cuDNN version 8302


Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x7f8d7a5da4c0>

In [None]:
                                                ########################################
                                                ## Build a model using ResNet50 + GRU ##
                                                ########################################

In [3]:
def gru_model(num_classes):
    gru_model = Sequential()

    resnet_model = ResNet101(
        include_top=False,
        weights='imagenet',
        pooling=None,
        input_shape=(224, 224, 3)
    )

    splitAt = 95
    for layer in resnet_model.layers[:splitAt]:
        layer.trainable = False
    for layer in resnet_model.layers[splitAt:]:
        layer.trainable = True
    # for layer in resnet_model.layers:
    #     layer.trainable = False

    gru_model.add(layers.TimeDistributed(resnet_model, input_shape = (15, 224, 224, 3))),
    gru_model.add(layers.TimeDistributed(layers.GlobalAveragePooling2D())),
    gru_model.add(layers.GRU(units=512, return_sequences=False)),
    gru_model.add(layers.Dropout(rate=0.2)),
    gru_model.add(layers.Dense(num_classes, activation='softmax'))

    return gru_model

In [4]:
gru_model, train_generator, val_generator, steps_per_epoch, validation_steps, num_classes = run_gru_model()
gru_model.summary()

2025-01-07 16:36:25.561644: W tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc:39] Overriding allow_growth setting because the TF_FORCE_GPU_ALLOW_GROWTH environment variable is set. Original config value was 0.
2025-01-07 16:36:25.561720: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 14800 MB memory:  -> device: 0, name: Quadro RTX 5000, pci bus id: 0000:1c:00.0, compute capability: 7.5


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 time_distributed (TimeDistr  (None, 15, 7, 7, 2048)   42658176  
 ibuted)                                                         
                                                                 
 time_distributed_1 (TimeDis  (None, 15, 2048)         0         
 tributed)                                                       
                                                                 
 gru (GRU)                   (None, 512)               3935232   
                                                                 
 dropout (Dropout)           (None, 512)               0         
                                                                 
 dense (Dense)               (None, 5)                 2565      
                                                                 
Total params: 46,595,973
Trainable params: 43,262,725
No

In [5]:
gru_model.fit(
                train_generator, 
                steps_per_epoch    = steps_per_epoch,
                epochs             = num_epochs,
                verbose            = 1,
                callbacks          = model_log_save(),
                validation_data    = val_generator,
                validation_steps   = validation_steps,
                class_weight       = None,
                initial_epoch      = 0
             )

Epoch 1/50


2025-01-07 16:36:44.602329: I tensorflow/stream_executor/cuda/cuda_dnn.cc:377] Loaded cuDNN version 8302


Epoch 00001: saving model to model_cnn3D_2025-01-0716_36_32.105701/model-00001-1.90766-0.29379-1.76808-0.23485.keras


  layer_config = serialize_layer_fn(layer)


Epoch 2/50
Epoch 00002: saving model to model_cnn3D_2025-01-0716_36_32.105701/model-00002-1.53566-0.36723-1.79598-0.25000.keras
Epoch 3/50
Epoch 00003: saving model to model_cnn3D_2025-01-0716_36_32.105701/model-00003-1.33579-0.46893-1.73425-0.32576.keras
Epoch 4/50
Epoch 00004: saving model to model_cnn3D_2025-01-0716_36_32.105701/model-00004-1.09232-0.57815-1.34760-0.54545.keras
Epoch 5/50
Epoch 00005: saving model to model_cnn3D_2025-01-0716_36_32.105701/model-00005-0.92774-0.63465-2.00192-0.27273.keras
Epoch 6/50
Epoch 00006: saving model to model_cnn3D_2025-01-0716_36_32.105701/model-00006-0.77365-0.70433-1.17034-0.56061.keras
Epoch 7/50
Epoch 00007: saving model to model_cnn3D_2025-01-0716_36_32.105701/model-00007-0.58472-0.77778-1.25456-0.53788.keras
Epoch 8/50
Epoch 00008: saving model to model_cnn3D_2025-01-0716_36_32.105701/model-00008-0.53142-0.80414-1.67012-0.48485.keras
Epoch 9/50
Epoch 00009: saving model to model_cnn3D_2025-01-0716_36_32.105701/model-00009-0.39007-0.8512

<keras.callbacks.History at 0x7f339367b100>