Collecting Image

In [11]:
import os
import cv2

DATA_DIR = './data'
if not os.path.exists(DATA_DIR):
    os.makedirs(DATA_DIR)

number_of_classes = 20
dataset_size = 200

# Try different camera indices until one works
camera_indices = [0, 1, 2, 3, 4, 5 , 6 , 7 , 8 , 9 , 10, 11, 12 ,13 , 14 , 15 , 16 ,17 ,18 ,19]

for camera_index in camera_indices:
    cap = cv2.VideoCapture(camera_index)
    if cap.isOpened():
        break

if not cap.isOpened():
    print("Error: Could not open any camera.")
    exit()

for j in range(number_of_classes):
    if not os.path.exists(os.path.join(DATA_DIR, str(j))):
        os.makedirs(os.path.join(DATA_DIR, str(j)))

    print('Collecting data for class {}'.format(j))

    done = False
    while True:
        ret, frame = cap.read()
        if not ret:
            print("Error: Could not read frame from camera.")
            break

        cv2.putText(frame, 'Ready? Press "C" ! :)', (100, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.3, (0, 255, 0), 3,
                    cv2.LINE_AA)
        cv2.imshow('frame', frame)
        if cv2.waitKey(25) == ord('c'):
            break

    counter = 0
    while counter < dataset_size:
        ret, frame = cap.read()
        if not ret:
            print("Error: Could not read frame from camera.")
            break

        # Flip the frame horizontally
        frame = cv2.flip(frame, 1)

        cv2.imshow('frame', frame)
        key = cv2.waitKey(100)
        if key == ord('q'):
            done = True
            break

        cv2.imwrite(os.path.join(DATA_DIR, str(j), '{}.jpg'.format(counter)), frame)

        counter += 1

    if done:
        break

cap.release()
cv2.destroyAllWindows()


Collecting data for class 0
Collecting data for class 1
Collecting data for class 2


Create Dataset foe model

In [15]:
import os
import pickle

import mediapipe as mp
import cv2
import matplotlib.pyplot as plt


mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles

hands = mp_hands.Hands(static_image_mode=True, min_detection_confidence=0.3)

DATA_DIR = './data'

data = []
labels = []
for dir_ in os.listdir(DATA_DIR):
    # Skip files in the directory
    if not os.path.isdir(os.path.join(DATA_DIR, dir_)):
        continue
    
    for img_path in os.listdir(os.path.join(DATA_DIR, dir_)):
        data_aux = []

        x_ = []
        y_ = []

        img = cv2.imread(os.path.join(DATA_DIR, dir_, img_path))
        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        results = hands.process(img_rgb)
        if results.multi_hand_landmarks:
            for hand_landmarks in results.multi_hand_landmarks:
                for i in range(len(hand_landmarks.landmark)):
                    x = hand_landmarks.landmark[i].x
                    y = hand_landmarks.landmark[i].y

                    x_.append(x)
                    y_.append(y)

                for i in range(len(hand_landmarks.landmark)):
                    x = hand_landmarks.landmark[i].x
                    y = hand_landmarks.landmark[i].y
                    data_aux.append(x - min(x_))
                    data_aux.append(y - min(y_))

            data.append(data_aux)
            labels.append(dir_)

f = open('data.pickle', 'wb')
pickle.dump({'data': data, 'labels': labels}, f)
f.close()


Train model classifier 

In [13]:
import pickle
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from tensorflow.keras.preprocessing.sequence import pad_sequences

data_dict = pickle.load(open('./data.pickle', 'rb'))

data = data_dict['data']
labels = data_dict['labels']

# Pad sequences to ensure they all have the same length
max_sequence_length = max(len(seq) for seq in data)
data_padded = pad_sequences(data, maxlen=max_sequence_length, padding='post', truncating='post', dtype='float32')

x_train, x_test, y_train, y_test = train_test_split(data_padded, labels, test_size=0.2, shuffle=True, stratify=labels)

model = RandomForestClassifier()

model.fit(x_train, y_train)

y_predict = model.predict(x_test)

score = accuracy_score(y_predict, y_test)

print('{}% of samples were classified correctly !'.format(score * 100))

# Save the model
with open('model.p', 'wb') as f:
    pickle.dump({'model': model}, f)


100.0% of samples were classified correctly !


In [14]:
import os
import pickle
import cv2
import mediapipe as mp
import numpy as np
from sklearn.metrics import accuracy_score

# Load the model
model_dict = pickle.load(open('./model.p', 'rb'))
model = model_dict['model']

# Load test data and labels
data_dict = pickle.load(open('./data.pickle', 'rb'))
x_test = data_dict['data']
y_test = data_dict['labels']

# Initialize the camera
camera_indices = [0, 1]
cap = None
for camera_index in camera_indices:
    cap = cv2.VideoCapture(camera_index)
    if cap.isOpened():
        break

if cap is None or not cap.isOpened():
    print("Error: Could not open any camera.")
    exit()

mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles

hands = mp_hands.Hands(static_image_mode=True, min_detection_confidence=0.3)

labels_dict = {0: 'Hello', 1: 'Victory!', 2: 'Yes' , 3: 'Thank you', 4: 'Good' , 5:'bad',6:'I_Love_You',7:'What are you doing?', 8:'Wow ,Expert', 9:'Question & Really', 10:'bang bang!!', 11:'fuck you!' ,12:'NO',13:'Goodby' ,14:'goodluck' , 15:'heart', 16:'call me', 17:'I feel you', 18:'Suck', 19:'what ever' }# Initialize variables for accuracy calculation
true_labels = []
predicted_labels = []

# Define the font scale and thickness for text
font_scale = 1
thickness = 2

while True:
    ret, frame = cap.read()

    if not ret:
        print("Error: Failed to read frame from camera.")
        continue

    # Enlarge the frame size
    frame = cv2.resize(frame, None, fx=1.5, fy=1.5)

    H, W, _ = frame.shape

    # Flip the frame horizontally
    frame = cv2.flip(frame, 1)

    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    results = hands.process(frame_rgb)
    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            mp_drawing.draw_landmarks(
                frame, hand_landmarks, mp_hands.HAND_CONNECTIONS,
                mp_drawing_styles.get_default_hand_landmarks_style(),
                mp_drawing_styles.get_default_hand_connections_style())

        for hand_landmarks in results.multi_hand_landmarks:
            data_aux = []
            x_ = []
            y_ = []

            for i in range(len(hand_landmarks.landmark)):
                x = hand_landmarks.landmark[i].x
                y = hand_landmarks.landmark[i].y

                x_.append(x)
                y_.append(y)

            # Normalize hand landmarks
            min_x, min_y = min(x_), min(y_)
            for i in range(len(hand_landmarks.landmark)):
                x = hand_landmarks.landmark[i].x
                y = hand_landmarks.landmark[i].y
                data_aux.append(x - min_x)
                data_aux.append(y - min_y)

            # Check if data_aux has enough features
            if len(data_aux) < 84:
                # Pad data_aux with zeros to match the required number of features
                data_aux += [0] * (84 - len(data_aux))

            x1 = int(min(x_) * W) - 10
            y1 = int(min(y_) * H) - 10

            x2 = int(max(x_) * W) - 10
            y2 = int(max(y_) * H) - 10

            prediction = model.predict([np.asarray(data_aux)])

            predicted_character = labels_dict[int(prediction[0])]

            # Draw label in red color and medium size
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 0, 0), 4)
            #cv2.putText(frame, predicted_character, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, font_scale, (0, 0, 255), thickness,
            #           cv2.LINE_AA)

            # Print predicted label with accuracy
            accuracy = model.predict_proba([np.asarray(data_aux)])
            max_accuracy = np.max(accuracy) * 100

            # Define the position and size of the black rectangle
            rect_width = 920
            rect_height = 120
            rect_x = 20
            rect_y = H - rect_height - 20

            # Draw black rectangle
            cv2.rectangle(frame, (rect_x, rect_y), (rect_x + rect_width, rect_y + rect_height), (0, 0, 0), -1)

            # Write the label and accuracy inside the rectangle
            label_text = f'Text Detected: {predicted_character}'
            accuracy_text = f'Accuracy: {max_accuracy:.2f}%'
            cv2.putText(frame, label_text, (rect_x + 10, rect_y + 30), cv2.FONT_HERSHEY_SIMPLEX, font_scale, (255, 255, 255), thickness,
                        cv2.LINE_AA)
            cv2.putText(frame, accuracy_text, (rect_x + 10, rect_y + 70), cv2.FONT_HERSHEY_SIMPLEX, font_scale, (255, 255, 255), thickness,
                        cv2.LINE_AA)

    # Display the frame
    cv2.imshow('frame', frame)
    key = cv2.waitKey(1) & 0xFF
    if key == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()
