<h1>Dataset Preprocessing to Enable Application of Classical ML Models</h1>

In [21]:
import json
from timeit import default_timer

In [3]:
import numpy as np
import tensorflow as tf

In [4]:
from model.model import build_preprocessing

In [5]:
# Load configuration file from json in the given folder
with open("config.json", "r") as config_file:
    config = json.load(config_file)

In [6]:
# Set up the list of gestures
with open(config["Paths"]["Gesture list"], "r") as gesture_list:
    gestures = gesture_list.readlines()[0].split(", ")

In [7]:
img_size = config["General parameters"]["Image size"]

In [8]:
train_images, test_images = tf.keras.preprocessing.image_dataset_from_directory("Data",
                                                                                labels="inferred",
                                                                                label_mode="int",
                                                                                class_names=gestures,
                                                                                color_mode="rgb",
                                                                                batch_size=128,
                                                                                image_size=(img_size,
                                                                                            img_size),
                                                                                shuffle=True,
                                                                                seed=42,
                                                                                validation_split=0.2,
                                                                                subset="both")

Found 31198 files belonging to 49 classes.
Using 24959 files for training.
Using 6239 files for validation.


In [9]:
# Set the default preprocessing pipeline if not specified
preprocessing_layers = config["Model"]["Default preprocessing"]
preprocessing_layers = "I,G,R"

# Build the preprocessing pipeline according to given instructions
img_size = 64
resize = tf.keras.layers.Resizing(img_size, img_size)
preprocessing = build_preprocessing(inp_shape=[img_size,
                                               img_size,
                                               3],
                                    instructions=preprocessing_layers,
                                    name="preprocessing_pipeline")

In [10]:
# Apply the same preprocessing steps as for the CNN but flatten the images
channels = 1 if "G" in preprocessing_layers else 3
train_images = train_images.map(lambda x, y: (tf.reshape(preprocessing(resize(x)), [-1, img_size ** 2 * channels]), y)).unbatch()
test_images = test_images.map(lambda x, y: (tf.reshape(preprocessing(resize(x)), [-1, img_size ** 2 * channels]), y)).unbatch()

Instructions for updating:
Lambda fuctions will be no more assumed to be used in the statement where they are used, or at least in the same block. https://github.com/tensorflow/tensorflow/issues/56089


In [11]:
# Convert the tf.data.Datasets to numpy arrays containing image & label per sample
train_images = np.array(list(train_images.as_numpy_iterator()), dtype=tuple)
test_images = np.array(list(test_images.as_numpy_iterator()), dtype=tuple)

In [12]:
# Extract the images and the labels separately
train_X, train_y = np.array([element[0] for element in train_images]), np.ravel(np.vstack([element[1] for element in train_images]))
test_X, test_y = np.array([element[0] for element in test_images]), np.ravel(np.vstack([element[1] for element in test_images]))

In [13]:
# Check memory consumption for the training dataset
print("Training dataset memory consumption: {:.1f}MB".format(train_X.nbytes / (1024 * 1024.0)))

Training dataset memory consumption: 390.0MB


<h1>k-NN Gesture Classification</h1>

In [14]:
import matplotlib.pyplot as plt
import seaborn as sb
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.model_selection import GridSearchCV

In [15]:
# Initialize and train the k-NN classifier with parameters obtained from grid search
knn = KNeighborsClassifier(n_neighbors=1,
                           p=1,
                           n_jobs=-1)
knn.fit(train_X, train_y)

In [85]:
# Evaluate the classifier on the test dataset
start = default_timer()
knn_predictions = knn.predict(test_X)

print("Time per gesture:", str(round((default_timer() - start) / (test_X.shape[0]), 5)) + " s")
print("Accuracy:", accuracy_score(test_y, knn_predictions))
#print(classification_report(test_y,
#                            knn_predictions,
#                            labels=gestures[:-1],
#                            target_names=gestures[:-1]))

Time per gesture: 0.0247 s
Accuracy: 0.9889405353422023


In [17]:
np.unique(knn_predictions, return_counts=True)

(array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
        17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
        34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47]),
 array([130, 124, 136, 136, 136, 134, 141, 138, 124, 134, 140, 136, 137,
        107, 150, 129, 123, 148, 129, 129, 136, 129, 134, 130, 128, 131,
        112, 119, 138, 139, 126, 107, 110, 120, 127, 114, 113, 126, 148,
        130, 114, 139, 141, 166, 113, 117, 131, 140], dtype=int64))

In [18]:
np.unique(test_y, return_counts=True)

(array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
        17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
        34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47]),
 array([128, 125, 138, 137, 135, 134, 142, 138, 123, 134, 141, 137, 136,
        107, 150, 129, 122, 149, 130, 131, 138, 128, 134, 129, 127, 131,
        111, 118, 139, 141, 123, 111, 107, 120, 127, 110, 116, 126, 149,
        129, 112, 138, 140, 159, 116, 122, 133, 139], dtype=int64))

<h1>Decision Tree Gesture Classification</h1>

In [19]:
import matplotlib.pyplot as plt
import seaborn as sb
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.model_selection import GridSearchCV

In [50]:
# Initialize and train the Random Forest classifier with parameters obtained from grid search
rfc = RandomForestClassifier(n_estimators=100,
                             max_depth=55,
                             min_samples_split=5,
                             n_jobs=-1)
rfc.fit(train_X, train_y)

In [84]:
# Evaluate the classifier on the test dataset
start = default_timer()
rfc_predictions = rfc.predict(test_X)

print("Time per gesture:", str(round((default_timer() - start) / (test_X.shape[0]), 6)) + " s")
print("Accuracy:", accuracy_score(test_y, rfc_predictions))
#print(classification_report(test_y,
#                            rfc_predictions,
#                            labels=gestures[:-1],
#                            target_names=gestures[:-1]))

Time per gesture: 4.4e-05 s
Accuracy: 0.9778810706844046


In [52]:
np.unique(rfc_predictions, return_counts=True)

(array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
        17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
        34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47]),
 array([127, 124, 136, 134, 142, 134, 142, 139, 124, 134, 144, 134, 136,
        107, 148, 131, 123, 148, 130, 127, 137, 130, 134, 131, 133, 131,
        115, 121, 129, 142, 121, 109, 108, 118, 123, 108, 116, 126, 146,
        132, 111, 137, 146, 155, 114, 127, 134, 141], dtype=int64))

In [53]:
np.unique(test_y, return_counts=True)

(array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
        17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
        34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47]),
 array([128, 125, 138, 137, 135, 134, 142, 138, 123, 134, 141, 137, 136,
        107, 150, 129, 122, 149, 130, 131, 138, 128, 134, 129, 127, 131,
        111, 118, 139, 141, 123, 111, 107, 120, 127, 110, 116, 126, 149,
        129, 112, 138, 140, 159, 116, 122, 133, 139], dtype=int64))