# Task-04


### **Develop a hand gesture recognition model that can accurately identify and classify different hand gestures from image or video data, enabling intuitive human-computer interaction and gesture-based control systems.**


Dataset :-  https://www.kaggle.com/gti-upm/leapgestrecog

### Used **Model-1** in this Colab NB, so to run **Model-2** copy it and paste it into new jupyter/colab NB and download then unzip the dataset and upload it onto the colab(if using) to make the model load the dataset.

# **Model-1**

In [None]:
# Install Kaggle API
!pip install kaggle

# Import necessary libraries
import os
import zipfile
import pandas as pd
import cv2

# Set up Kaggle API credentials (upload kaggle.json file)
from google.colab import files
files.upload()  # Choose the kaggle.json file you downloaded from Kaggle

# Create a .kaggle directory and move kaggle.json there
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/

# Change permissions of the file
!chmod 600 ~/.kaggle/kaggle.json

# Download the dataset using Kaggle API
!kaggle datasets download -d gti-upm/leapgestrecog

# Unzip the dataset
!unzip leapgestrecog.zip -d /content/leapGestRecog

# Initialize lists to store data
data = []
labels = []

# Iterate through each gesture folder
gestures_path = '/content/leapGestRecog/leapGestRecog/'
for gesture_folder in os.listdir(gestures_path):
    if gesture_folder.startswith('.'):
        continue
    gesture_folder_path = os.path.join(gestures_path, gesture_folder)
    for gesture_label in os.listdir(gesture_folder_path):
        if gesture_label.startswith('.'):
            continue
        gesture_label_path = os.path.join(gesture_folder_path, gesture_label)
        for image_name in os.listdir(gesture_label_path):
            if image_name.startswith('.'):
                continue
            image_path = os.path.join(gesture_label_path, image_name)
            # Read image using OpenCV
            image = cv2.imread(image_path)
            # Convert image to grayscale
            image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
            # Resize image if necessary
            image = cv2.resize(image, (128, 128))
            # Append image data and label to lists
            data.append(image)
            labels.append(gesture_label)

# Convert lists to numpy arrays
import numpy as np
data = np.array(data)
labels = np.array(labels)

# Normalize image data
data = data / 255.0

# Encode labels to integers
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
labels = le.fit_transform(labels)

# Split data into training and testing sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, random_state=42)

# Reshape data for model input
X_train = X_train.reshape(-1, 128, 128, 1)
X_test = X_test.reshape(-1, 128, 128, 1)

# Verify shapes
print("X_train shape:", X_train.shape)
print("X_test shape:", X_test.shape)
print("y_train shape:", y_train.shape)
print("y_test shape:", y_test.shape)

# Build a simple CNN model
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 1)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(len(le.classes_), activation='softmax')
])

# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train, epochs=10, validation_data=(X_test, y_test))

# Evaluate the model
test_loss, test_acc = model.evaluate(X_test, y_test, verbose=2)
print(f'Test accuracy: {test_acc}')




[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: /content/leapGestRecog/leapgestrecog/leapGestRecog/07/06_index/frame_07_06_0020.png  
  inflating: /content/leapGestRecog/leapgestrecog/leapGestRecog/07/06_index/frame_07_06_0021.png  
  inflating: /content/leapGestRecog/leapgestrecog/leapGestRecog/07/06_index/frame_07_06_0022.png  
  inflating: /content/leapGestRecog/leapgestrecog/leapGestRecog/07/06_index/frame_07_06_0023.png  
  inflating: /content/leapGestRecog/leapgestrecog/leapGestRecog/07/06_index/frame_07_06_0024.png  
  inflating: /content/leapGestRecog/leapgestrecog/leapGestRecog/07/06_index/frame_07_06_0025.png  
  inflating: /content/leapGestRecog/leapgestrecog/leapGestRecog/07/06_index/frame_07_06_0026.png  
  inflating: /content/leapGestRecog/leapgestrecog/leapGestRecog/07/06_index/frame_07_06_0027.png  
  inflating: /content/leapGestRecog/leapgestrecog/leapGestRecog/07/06_index/frame_07_06_0028.png  
  inflating: /content/leapGestRecog/leapgest

Key Differences in both the models.

Loading Data:

Model-1: Uses the Kaggle API to download and extract the dataset. It then loads the images into a DataFrame, normalizes the data, and encodes the labels.

Model-2: Directly loads images from the local file system, resizes them, and converts them to grayscale. It uses the directory structure to determine labels.

Image Preprocessing:

Model-1: Normalizes image data to the range [0, 1]. It resizes images to 128x128 and uses LabelEncoder for label encoding.

Model-2: Converts images to grayscale, resizes them to 64x64, normalizes the images, and uses to_categorical for one-hot encoding of labels.

Model Architecture:

Model-1: Uses a simple CNN with two convolutional layers followed by max-pooling, flattening, dense layers, and dropout.

Model-2: Uses a more complex CNN with three convolutional layers, each followed by max-pooling and dropout, and ends with dense layers and dropout.

Dataset Handling and Paths:

Model-1: Utilizes a simpler structure to read images and labels, assuming a uniform directory structure and making use of the Kaggle API.

Model-2: Incorporates more detailed error checking, such as verifying the base path and ensuring images are loaded correctly.

Training and Evaluation:

Model-1: Trains for 10 epochs with validation data, using sparse categorical cross-entropy loss.

Model-2: Trains for 10 epochs with batch size 32, using categorical cross-entropy loss. It also includes more detailed plotting of training/validation accuracy and loss.

Which is Better?

Technology Used:

Model-1: Makes use of Kaggle API for data handling, which is a modern and convenient way to handle large datasets hosted on Kaggle.

Model-2: Uses local file system operations, which are more flexible and provide more control over data handling.

Details and Complexity:

Model-2: More detailed in terms of error handling and data verification. It also includes more comprehensive preprocessing steps.

Model-1: Simpler and more straightforward, making it easier for beginners or for quick prototyping.

Model Complexity:

Model-2: Uses a more complex CNN architecture, which might provide better performance on challenging tasks.

Model-1: Uses a simpler architecture, which is easier to understand and faster to train but might be less effective on complex datasets.

Conclusion
For Simplicity and Quick Prototyping: The Model-1 is preferable as it uses the Kaggle API for dataset handling, and has a straightforward approach to data preprocessing and model building.

For Detailed and Robust Implementation: The Model-2 is better. It includes comprehensive error handling, more detailed preprocessing, and a more complex CNN architecture, which might yield better performance on complex datasets.

# **Model-2**

In [None]:
import cv2
import numpy as np
import os
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
import matplotlib.pyplot as plt

# Load the images and labels
data = []
labels = []
base_path = 'leapgestrecog/leapGestRecog/'

if not os.path.exists(base_path):
    raise ValueError(f"Base path {base_path} does not exist. Make sure the dataset is correctly downloaded and unzipped.")

# List contents of base directory for verification
for root, dirs, files in os.walk(base_path):
    level = root.replace(base_path, '').count(os.sep)
    indent = ' ' * 4 * level
    print(f"{indent}{os.path.basename(root)}/")
    subindent = ' ' * 4 * (level + 1)
    for f in files:
        print(f"{subindent}{f}")

# Load images from nested directories
for class_label, class_name in enumerate(os.listdir(base_path)):
    class_path = os.path.join(base_path, class_name)
    if not os.path.isdir(class_path):
        continue
    print(f"Processing class '{class_name}' with label {class_label}")

    for person_dir in os.listdir(class_path):
        person_path = os.path.join(class_path, person_dir)
        if not os.path.isdir(person_path):
            continue

        for img_name in os.listdir(person_path):
            img_path = os.path.join(person_path, img_name)
            img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
            if img is not None:  # Check if the image was loaded correctly
                img = cv2.resize(img, (64, 64))
                data.append(img)
                labels.append(class_label)
            else:
                print(f"Image {img_path} not loaded correctly")

data = np.array(data)
labels = np.array(labels)

if data.shape[0] == 0:
    raise ValueError("No images were loaded. Please check the dataset and paths.")

print(f"Loaded {data.shape[0]} images.")

# Normalize the images
data = data / 255.0

# Reshape the data
data = data.reshape(data.shape[0], 64, 64, 1)

# One-hot encode the labels
# Ensure that the number of classes matches the model's output
num_classes = len(os.listdir(base_path))  # Count the number of classes from the directory structure
labels = to_categorical(labels, num_classes)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, random_state=42)

print(f"Training data shape: {X_train.shape}")
print(f"Testing data shape: {X_test.shape}")

# Build the CNN model
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(64, 64, 1)),
    MaxPooling2D((2, 2)),
    Dropout(0.25),

    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Dropout(0.25),

    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Dropout(0.25),

    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(num_classes, activation='softmax')  # Use num_classes here
])

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Print the model summary
model.summary()

# Train the model
history = model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))

# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Test Accuracy: {accuracy * 100:.2f}%')

# Plot training & validation accuracy values
plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend(['Train', 'Test'], loc='upper left')

# Plot training & validation loss values
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend(['Train', 'Test'], loc='upper left')

plt.tight_layout()
plt.show()

# Save the model
model.save('hand_gesture_recognition_model.h5')
print("Model saved as hand_gesture_recognition_model.h5")
