**IMPORT DEPENDENCIES**

In [None]:
import os
import cv2
import json
import numpy as np
import mediapipe as mp
import multiprocessing
from tqdm import tqdm
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
from keras.api.utils import to_categorical
from keras.api.callbacks import TensorBoard
from keras.api.models import Sequential
from keras.api.layers import Dense, Conv1D, Flatten, MaxPooling1D, Dropout
from keras.api.models import load_model

IMAGE_PATH = os.path.join("Images")
DATA_PATH = os.path.join("Data")

**DEFINE KEY FUNCTIONS**

In [None]:
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles

def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image.flags.writeable = False
    results = model.process(image)
    image.flags.writeable = True
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    return image, results

def draw_landmarks(image, results):
    for hand_landmarks in results.multi_hand_landmarks:
        mp_drawing.draw_landmarks(
            image,
            hand_landmarks,
            mp_hands.HAND_CONNECTIONS,
            mp_drawing_styles.get_default_hand_landmarks_style(),
            mp_drawing_styles.get_default_hand_connections_style())
    
def extract_keypoints(image, results):
    if results.multi_hand_landmarks:
        hand_landmarks = results.multi_hand_landmarks[0]
        keypoints = []
        for landmrk in hand_landmarks.landmark:
            keypoints.append({"x": landmrk.x, "y": landmrk.y, "z": landmrk.z})
        return keypoints

**EXTRACT KEYPOINTS ON KAGGLE ASL IMAGE DATASET**

In [None]:
with mp_hands.Hands(static_image_mode=True, min_detection_confidence=0.7, max_num_hands=1) as hands:
    path = os.path.join(IMAGE_PATH, "normal")
    for dir in os.listdir(path)[:19]:
        dir_path = os.path.join(path, dir)
        for file in tqdm(os.listdir(dir_path)):
            
            image_path = os.path.join(dir_path, file)
            image = cv2.imread(image_path)

            if image is None:
                continue

            image, results = mediapipe_detection(image, hands)
            
            keypoints = extract_keypoints(image, results)
            
            if keypoints:
                if not os.path.exists(os.path.join(DATA_PATH, dir)):
                    os.makedirs(os.path.join(DATA_PATH, dir))
                with open(os.path.join(DATA_PATH, dir, file.replace(".jpg", ".json")), "w") as f:
                    json.dump(keypoints, f)

        print("Processed directory:", dir)

**PREPROCESSING DATA FOR TRAINING**

In [None]:
alphabets = np.array(os.listdir(DATA_PATH))
label_map = {label: num for num, label in enumerate(alphabets)}

data, labels = [], []
  
for label in tqdm(alphabets):
    path = os.path.join(DATA_PATH, label)
    for file in os.listdir(path):
        file_path = os.path.join(path, file)
        try:
            with open(file_path, 'r') as f:
                keypoints = json.load(f)

            flattened_keypoints = np.array([[landmrk["x"], landmrk["y"], landmrk["z"]] for landmrk in keypoints]).flatten()

            data.append(flattened_keypoints)
            labels.append(label_map[label])

        except (ValueError, TypeError, json.JSONDecodeError) as e:
            print(f"Skipping file {file_path} due to error: {e}")

# Convert to arrays and one-hot encode labels
X = np.array(data)
y = to_categorical(np.array(labels), num_classes=len(alphabets))

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

100%|██████████| 23/23 [15:34<00:00, 40.61s/it]


In [7]:
np.save("X_train.npy", X_train)
np.save("X_test.npy", X_test)
np.save("y_train.npy", y_train)
np.save("y_test.npy", y_test)

X_train, X_test, y_train, y_test = np.load("X_train.npy"), np.load("X_test.npy"), np.load("y_train.npy"), np.load("y_test.npy")

**TRAIN MODEL**

In [8]:
log_dir = os.path.join("Logs")
tb_callback = TensorBoard(log_dir=log_dir)

model = Sequential()
# The first convolutional layer
model.add(Conv1D(32, kernel_size=3, activation='relu', input_shape=(X_train.shape[1], 1)))
model.add(MaxPooling1D(pool_size=2))
# The second convolutional layer
model.add(Conv1D(64, kernel_size=3, activation='relu'))
model.add(MaxPooling1D(pool_size=2))

# Flatten layer
model.add(Flatten())

# Fully connected layers
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(23, activation='softmax'))
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['categorical_accuracy'])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


**BUILD MODEL**

In [9]:
model.fit(X_train, y_train, epochs=15, callbacks=[tb_callback])

Epoch 1/15
[1m3965/3965[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 3ms/step - categorical_accuracy: 0.5333 - loss: 1.5288
Epoch 2/15
[1m3965/3965[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 3ms/step - categorical_accuracy: 0.8148 - loss: 0.5808
Epoch 3/15
[1m3965/3965[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 3ms/step - categorical_accuracy: 0.8661 - loss: 0.4173
Epoch 4/15
[1m3965/3965[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 3ms/step - categorical_accuracy: 0.8954 - loss: 0.3276
Epoch 5/15
[1m3965/3965[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 3ms/step - categorical_accuracy: 0.9115 - loss: 0.2756
Epoch 6/15
[1m3965/3965[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 3ms/step - categorical_accuracy: 0.9203 - loss: 0.2541
Epoch 7/15
[1m3965/3965[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 3ms/step - categorical_accuracy: 0.9284 - loss: 0.2270
Epoch 8/15
[1m3965/3965[0m [32m━━━━━━━━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x1b2acc948d0>

**SAVE MODEL**

In [10]:
model.save("Models/action.h5")



**LOAD MODEL AND TESTING**

In [13]:
model = load_model("Models/action.h5")
threshold = 0.8

cap = cv2.VideoCapture(0)
with mp_hands.Hands(max_num_hands=1, min_detection_confidence=0.5, min_tracking_confidence=0.5) as hands:
    while cap.isOpened():
        ret, frame = cap.read()

        image, results = mediapipe_detection(frame, hands)
        
        if results.multi_hand_landmarks:
            draw_landmarks(image, results)
        
            keypoints = extract_keypoints(image, results)
            
            if keypoints:
                # Convert keypoints from list of dictionaries to flattened array
                flattened_keypoints = np.array([[landmrk["x"], landmrk["y"], landmrk["z"]] for landmrk in keypoints]).flatten()
                
                prediction = model.predict(np.expand_dims(flattened_keypoints, axis=0), verbose=0)
                class_id = np.argmax(prediction)
                confidence = np.max(prediction)

                if confidence > threshold:
                    label = alphabets[class_id]
                    cv2.putText(image, f'{label} {int(confidence * 100)}%', (10, 70),
                                cv2.FONT_HERSHEY_SIMPLEX, 2, (255, 0, 0), 3)

        cv2.imshow('ASL Recognition', image)
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()

