In [1]:
%matplotlib inline
import os

import tensorflow as tf

from tensorflow import keras
import numpy as np
import matplotlib.pyplot as plt
import cv2
import pandas as pd
from sklearn.model_selection import train_test_split # Helps with organizing data for training
from sklearn.metrics import confusion_matrix # Helps present results as a confusion-matrix

print(tf.__version__)

2023-08-18 23:04:13.418181: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


2.13.0


In [3]:
import imageio
import os
import cv2
import imgaug as ia
import numpy as np
%matplotlib inline
from imgaug import augmenters as iaa
ia.seed(4)

image_folder = "./gesture/09_sideup"
image_extensions = ('.jpg', '.jpeg', '.png', '.gif')

image_files = []
for root, dirs, files in os.walk(image_folder):
    for file in files:
        if file.lower().endswith(image_extensions):
            image_files.append(os.path.join(root, file))

start

In [3]:
import os

# Directory path where your images are stored
image_directory = './gesture'

# List to store image paths
image_paths = []

for root, dirs, files in os.walk(image_directory):
    for name in files:
        path = os.path.join(root, name)
        if path.endswith("png"):  # We want only the images
            image_paths.append(path)

print(len(image_paths)) 


107


In [4]:
def plot_image(path):
  img = cv2.imread(path) # Reads the image into a numpy.array
  img_cvt = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # Converts into the corret colorspace (RGB)
  print(img_cvt.shape) # Prints the shape of the image just to check
  plt.grid(False) # Without grid so we can see better
  plt.imshow(img_cvt) # Shows the image
  plt.xlabel("Width")
  plt.ylabel("Height")
  plt.title("Image " + path)

In [6]:
X = [] # Image data
y = [] # Labels

# Loops through imagepaths to load images and labels into arrays
for path in image_paths:
  img = cv2.imread(path) # Reads image and returns np.array
  img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # Converts into the corret colorspace (GRAY)
  img = cv2.resize(img, (320, 120)) # Reduce image size so training can be faster
  X.append(img)
  
  # Processing label in image path
  category = path.split("/")[3]
  label = int(category.split("_")[0]) # We need to convert 10_down to 00_down, or else it crashes
  y.append(label)

# Turn X and y into np.array to speed up train_test_split
X = np.array(X, dtype="uint8")
X = X.reshape(len(image_paths), 120, 320, 1) # Needed to reshape so CNN knows it's different images
y = np.array(y)

print("Images loaded: ", len(X))
print("Labels loaded: ", len(y))

print(y[0], image_paths[0]) # Debugging

Images loaded:  107
Labels loaded:  107
2 ./gesture/02_backside/02_0009.png


In [7]:
ts = 0.2 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=ts, random_state=42)

In [9]:
# Import of keras model and hidden layers for our convolutional network
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Dense, Flatten

In [10]:
# Construction of model
model = Sequential()
model.add(Conv2D(32, (5, 5), activation='relu', input_shape=(120, 320, 1))) 
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu')) 
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(16, activation='softmax'))

2023-08-18 08:17:44.526853: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-08-18 08:17:44.921853: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-08-18 08:17:44.922040: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-08-18 08:17:44.922688: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorF

In [11]:
from keras.utils import plot_model
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense

In [12]:
from tensorflow.keras.optimizers import Adam
# Configures the model for training
learning_rate = 0.001  # Set your desired learning rate here
optimizer = Adam(learning_rate=learning_rate)

model.compile(optimizer=optimizer, # Optimization routine, which tells the computer how to adjust the parameter values to minimize the loss function.
              loss='sparse_categorical_crossentropy', # Loss function, which tells us how bad our predictions are.
              metrics=['accuracy']) # List of metrics to be evaluated by the model during training and testing.

In [13]:
model.fit(X_train, y_train, epochs=50, batch_size=16, verbose=2, validation_data=(X_test, y_test))

Epoch 1/50


2023-08-18 08:17:47.939801: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:428] Loaded cuDNN version 8101
2023-08-18 08:17:52.145040: I tensorflow/compiler/xla/service/service.cc:173] XLA service 0x1cc94f10 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2023-08-18 08:17:52.145064: I tensorflow/compiler/xla/service/service.cc:181]   StreamExecutor device (0): NVIDIA GeForce GTX 1070, Compute Capability 6.1
2023-08-18 08:17:52.149575: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2023-08-18 08:17:52.272829: I tensorflow/compiler/jit/xla_compilation_cache.cc:477] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


6/6 - 7s - loss: 137.9972 - accuracy: 0.2824 - val_loss: 26.0749 - val_accuracy: 0.5000 - 7s/epoch - 1s/step
Epoch 2/50
6/6 - 0s - loss: 14.7943 - accuracy: 0.4706 - val_loss: 8.3560 - val_accuracy: 0.4545 - 115ms/epoch - 19ms/step
Epoch 3/50
6/6 - 0s - loss: 2.8720 - accuracy: 0.6000 - val_loss: 2.1327 - val_accuracy: 0.5909 - 114ms/epoch - 19ms/step
Epoch 4/50
6/6 - 0s - loss: 0.5850 - accuracy: 0.8118 - val_loss: 1.5770 - val_accuracy: 0.5909 - 112ms/epoch - 19ms/step
Epoch 5/50
6/6 - 0s - loss: 0.1147 - accuracy: 0.9529 - val_loss: 2.3219 - val_accuracy: 0.5000 - 113ms/epoch - 19ms/step
Epoch 6/50
6/6 - 0s - loss: 0.1954 - accuracy: 0.9529 - val_loss: 3.0484 - val_accuracy: 0.6364 - 113ms/epoch - 19ms/step
Epoch 7/50
6/6 - 0s - loss: 0.2001 - accuracy: 0.9412 - val_loss: 1.4709 - val_accuracy: 0.6818 - 120ms/epoch - 20ms/step
Epoch 8/50
6/6 - 0s - loss: 0.1479 - accuracy: 0.9765 - val_loss: 2.0682 - val_accuracy: 0.6364 - 120ms/epoch - 20ms/step
Epoch 9/50
6/6 - 0s - loss: 0.2754 -

<keras.callbacks.History at 0x7f97bdbbf0d0>

In [14]:
model.save('handrecognition_model.h5')