In [4]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import seaborn as sns
import cv2
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer
from sklearn.metrics import classification_report, confusion_matrix
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import img_to_array
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Flatten, Dense, Input, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam

In [5]:
DATA_PATH = r'C:\Users\shail\Desktop\NSL\data'

In [6]:
categories = sorted(os.listdir(DATA_PATH))
print(f"Categories: {categories}")

Categories: ['1', '2', '3', '4', '5', '6', '7', '8', '9']


In [7]:
def load_images_and_labels(data_path, categories, img_size=(64, 64)):
    images = []
    labels = []
    for category in categories:
        category_path = os.path.join(data_path, category)
        for img_name in os.listdir(category_path):
            img_path = os.path.join(category_path, img_name)
            img = cv2.imread(img_path)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            img = cv2.resize(img, img_size)
            img_array = img_to_array(img)
            images.append(img_array)
            labels.append(category)
    return np.array(images, dtype=np.float16), np.array(labels)

# Load images and labels
images, labels = load_images_and_labels(DATA_PATH, categories)

In [8]:
# Encoding labels
label_binarizer = LabelBinarizer()
labels_encoded = label_binarizer.fit_transform(labels)

In [9]:
labels_encoded[0]

array([1, 0, 0, 0, 0, 0, 0, 0, 0])

In [10]:
X_train, X_test, y_labels, y_test_labels = train_test_split(images, labels_encoded, test_size=0.3, random_state=42)

In [11]:
X_train = X_train / 255.0
X_test = X_test / 255.0

In [12]:
datagen = ImageDataGenerator(
    rotation_range=10,          
    zoom_range=0.1,              
    width_shift_range=0.1,       
    height_shift_range=0.1,      
    horizontal_flip=True,       
    fill_mode="nearest"        
)

datagen.fit(X_train)


LOADING PRETRAINED MODEL VGG16

In [57]:
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
from tensorflow.keras.models import Model

In [38]:
X_train[0].shape

(64, 64, 3)

In [39]:
base_model = VGG16(weights='imagenet', include_top=False, input_shape=X_train[0].shape)

In [40]:
base_model.trainable = False

In [41]:
X_train = preprocess_input(X_train)
X_test = preprocess_input(X_test)

In [42]:
base_model.summary()

CUSTOM OUTPUT LAYERS

In [82]:
x = base_model.output
x = GlobalAveragePooling2D()(x)  
x = Flatten()(x)
x = Dense(128, activation='relu')(x)  
x = Dense(64, activation='relu')(x)  


predictions = Dense(len(categories), activation='softmax')(x)


model = Model(inputs=base_model.input, outputs=predictions)

In [83]:
from tensorflow.keras.callbacks import EarlyStopping

model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

In [84]:
es = EarlyStopping(monitor='val_accuracy', mode='max', patience=5, restore_best_weights = True)

In [85]:
model.fit(X_train, y_labels, epochs=50,  validation_data=(X_test, y_test), batch_size=32, callbacks=[es])

Epoch 1/50
[1m189/189[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m210s[0m 1s/step - accuracy: 0.1307 - loss: 2.3510 - val_accuracy: 0.1210 - val_loss: 2.0990
Epoch 2/50
[1m189/189[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m213s[0m 1s/step - accuracy: 0.1980 - loss: 2.1042 - val_accuracy: 0.1925 - val_loss: 1.9619
Epoch 3/50
[1m189/189[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m200s[0m 1s/step - accuracy: 0.3229 - loss: 1.8810 - val_accuracy: 0.4418 - val_loss: 1.5760
Epoch 4/50
[1m189/189[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m200s[0m 1s/step - accuracy: 0.4590 - loss: 1.5346 - val_accuracy: 0.5780 - val_loss: 1.2921
Epoch 5/50
[1m189/189[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m201s[0m 1s/step - accuracy: 0.5783 - loss: 1.2583 - val_accuracy: 0.5668 - val_loss: 1.0738
Epoch 6/50
[1m189/189[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m194s[0m 1s/step - accuracy: 0.6289 - loss: 1.0581 - val_accuracy: 0.7407 - val_loss: 0.8837
Epoch 7/50
[1m189/189

<keras.src.callbacks.history.History at 0x23cf3ff0ef0>

In [89]:
model.save('_pretrained_vgg16.keras')

In [18]:
from tensorflow.keras.models import Model
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.resnet import preprocess_input
from tensorflow.keras.callbacks import EarlyStopping

In [19]:
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(X_train[0].shape))

base_model.trainable = False

x = base_model.output
x = GlobalAveragePooling2D()(x)  
x = Flatten()(x)  
x = Dense(128, activation='relu')(x) 
x = Dense(64, activation='relu')(x)   


predictions = Dense(len(categories), activation='softmax')(x)


model = Model(inputs=base_model.input, outputs=predictions)


model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])


model.summary()


In [20]:
es = EarlyStopping(monitor='val_accuracy', mode='max', patience=5, restore_best_weights = True)

In [None]:
model.fit(X_train, y_labels, validation_data=(X_test, y_test_labels), epochs=10, batch_size=32, callbacks=[es])


Epoch 1/10
[1m237/237[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m166s[0m 631ms/step - accuracy: 0.4023 - loss: 1.8104 - val_accuracy: 0.9068 - val_loss: 0.4952
Epoch 2/10
[1m 80/237[0m [32m━━━━━━[0m[37m━━━━━━━━━━━━━━[0m [1m1:05[0m 419ms/step - accuracy: 0.9515 - loss: 0.4083

In [93]:
import cv2
import mediapipe as mp
import numpy as np
from tensorflow.keras.models import load_model
from tensorflow.keras import layers, models

# Load the gesture recognizer model (make sure this model accepts 42-dimensional input)
model = models.Sequential([
    layers.InputLayer(input_shape=(42,)),  # Accepts 42-dimensional input (21 landmarks with x and y)
    layers.Dense(128, activation='relu'),
    layers.Dense(64, activation='relu'),
    layers.Dense(32, activation='relu'),
    layers.Dense(10, activation='softmax')  # Assuming 10 classes for numbers 0-9
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Load class names
classNames = [
    '1', '2', '3', '4', '5', '6', '7', '8', '9']  # Replace with your own class names

# Initialize MediaPipe Hands
mpHands = mp.solutions.hands
hands = mpHands.Hands(max_num_hands=1, min_detection_confidence=0.7)
mpDraw = mp.solutions.drawing_utils

# Initialize the webcam for hand gesture recognition
cap = cv2.VideoCapture(0)

while True:
    # Read each frame from the webcam
    ret, frame = cap.read()
    if not ret:
        break
    
    # Flip the frame horizontally for a natural mirror view
    frame = cv2.flip(frame, 1)
    x, y, c = frame.shape
    
    # Convert the BGR frame to RGB
    framergb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    
    # Process the frame with MediaPipe
    result = hands.process(framergb)
    
    className = ''

    # If hand landmarks are detected
    if result.multi_hand_landmarks:
        for handslms in result.multi_hand_landmarks:
            # Collect landmark points (21 landmarks with x, y coordinates)
            landmarks = []
            for lm in handslms.landmark:
                lmx = int(lm.x * x)
                lmy = int(lm.y * y)
                landmarks.append([lmx, lmy])

            # Flatten the landmarks to a 42-dimensional vector
            flattened_landmarks = np.array(landmarks).flatten()

            # Predict hand sign based on the flattened landmarks
            prediction = model.predict(flattened_landmarks.reshape(1, -1))
            classID = np.argmax(prediction)
            className = classNames[classID]

            # Draw the hand landmarks on the frame
            mpDraw.draw_landmarks(frame, handslms, mpHands.HAND_CONNECTIONS)

            # Display the predicted class (hand sign number) on the frame
            cv2.putText(frame, className, (10, 50), cv2.FONT_HERSHEY_SIMPLEX,
                        1, (0, 0, 255), 2, cv2.LINE_AA)
    
    # Show the output frame
    cv2.imshow("Hand Gesture Recognition", frame)
    
    # Exit the loop if 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release the webcam and close all OpenCV windows
cap.release()
cv2.destroyAllWindows()




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 120ms/step


IndexError: list index out of range