In [None]:
import os
import time
import numpy as np
from matplotlib import pyplot as plt
import cv2 as cv
import mediapipe as mp
import seaborn as sns
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras import models
from tensorflow.keras import layers
from sklearn.metrics import confusion_matrix, classification_report

In [None]:
tf.__version__

In [None]:
tf.test.is_gpu_available('gpu')

In [None]:
input_types = ['palm', 'fist', 'thumbsup', 'gun', 'call']

In [None]:
path = 'Inputs/'
data = []
for types in input_types:
    temp = path + types + '/'
    l = []
    for file in os.listdir(temp):
        if file.endswith('.jpg'):
            image_matrix = plt.imread(temp + file)
            l.append(image_matrix)
    data.append(l)
data = np.array(data)

In [None]:
data.shape

In [None]:
plt.imshow(data[0][30])

In [None]:
plt.imshow(data[1][70])

In [None]:
plt.imshow(data[2][700])

In [None]:
data.shape

In [None]:
X = []
Y = []
i = 0
for class_ in data:
    for image in class_:
        X.append(image)
        Y.append(i)
    i += 1
X = np.array(X)
Y = np.array(Y)
print(X.shape)
print(Y.shape)

In [None]:
i = 77
print(Y[i])
plt.imshow(X[i])

In [None]:
i = 1999
print(Y[i])
plt.imshow(X[i])

In [None]:
i = 2700
print(Y[i])
plt.imshow(X[i])

In [None]:
X = X / 255

In [None]:
X[56]

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=20, shuffle=True)

In [None]:
cnn = models.Sequential([
    #conv layer 01
    layers.Conv2D(input_shape = X.shape[1:], filters = 32, kernel_size = (3,3), strides = (1, 1), padding = 'same', activation = 'relu'),
    #maxpool layer 01
    layers.MaxPooling2D(pool_size = (2, 2), strides = (2,2), padding = 'same'),
    #dropout layer 01
    layers.Dropout(0.2),
    #conv layer 02
    layers.Conv2D(filters = 64, kernel_size = (3,3), strides = (1, 1), padding = 'same', activation = 'relu'),
    #maxpool layer 02
    layers.MaxPooling2D(pool_size = (2, 2), strides = (2,2), padding = 'same'),
    #dropout layer 02
    layers.Dropout(0.2),
    #input layer
    layers.Flatten(),
    #hidden layer 01
    layers.Dense(units = 512, activation = 'relu'),
    #dropout layer 03
    layers.Dropout(0.2),
    #output layer
    layers.Dense(units = len(input_types), activation = 'softmax')
])

In [None]:
cnn.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [None]:
start_time = time.time()
cnn.fit(X_train, Y_train, epochs=10)
end_time = time.time()
print("total time in seconds", (end_time - start_time))

In [None]:
cnn.evaluate(X_test, Y_test)

In [None]:
Y_pred = cnn.predict(X_test)
Y_pred_classes = [np.argmax(e) for e in Y_pred]
print("Classification Report: 
", classification_report(Y_test, Y_pred_classes))

In [None]:
cnn.summary()

In [None]:
cnn.get_weights()

In [None]:
if os.path.isfile("cnn.h5") is False:
    cnn.save("cnn.h5")

In [None]:
def Predict(img):
    class_ = np.argmax(cnn.predict(img))
    return input_types[class_]

In [None]:
mp_drawing = mp.solutions.drawing_utils
mp_holistic = mp.solutions.holistic

In [None]:
holistic = mp_holistic.Holistic()

In [None]:
R = 25
thickness = 2
webcam = 0
capture = cv.VideoCapture(webcam)
fps = int(capture.get(cv.CAP_PROP_FPS))
print("fps is "+str(fps))
_, frame = capture.read()
height, width, channel = frame.shape
while capture.isOpened():
    if cv.waitKey(1) & 0xFF == 13:
        break
    black = np.zeros(shape = frame.shape)
    _, frame = capture.read()
    frame_rgb = cv.cvtColor(frame, cv.COLOR_BGR2RGB)
    result = holistic.process(frame_rgb)
    try:
        hand_landmarks = result.right_hand_landmarks.landmark
        if hand_landmarks:
            x_max = 0
            y_max = 0
            x_min = width
            y_min = height
            for i in range(0,21,1):
                lm = hand_landmarks[i]
                x, y = int(lm.x * width), int(lm.y * height)
                if x > x_max:
                    x_max = x
                if x < x_min:
                    x_min = x
                if y > y_max:
                    y_max = y
                if y < y_min:
                    y_min = y
            frame_bgr = cv.cvtColor(frame_rgb, cv.COLOR_RGB2BGR)
            mp_drawing.draw_landmarks(frame_bgr, result.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS)
            cv.rectangle(frame_bgr, (x_min - R, y_min - R), (x_max + R, y_max + R), (0, 255, 0), thickness)
            result1 = frame_bgr
            mirror1 = cv.flip(result1, 1)
            mp_drawing.draw_landmarks(black, result.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS)
            croped = black[y_min - R + thickness: y_max + R - thickness, x_min - R + thickness : x_max + R - thickness]
            resized = cv.resize(croped, (96, 96))
            mirror2 = cv.flip(resized, 1)
            result2 = mirror2
            img_mat = np.array([result2])
            class_ = Predict(img_mat)
            cv.putText(mirror1, str(class_), (100, 100), cv.FONT_HERSHEY_PLAIN, 2, (255,0,0), 1)
            cv.imshow("Frame2", result2)
    except:
        result1 = frame
        mirror1 = cv.flip(result1, 1)
        pass
    cv.imshow('frame1', mirror1)
capture.release()
cv.destroyAllWindows()

In [None]:
fps is 30