In [3]:
import os
import numpy as np
import pandas as pd
import cv2
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Dataset parameters
dataset_path = 'C:/Users/nisht/Project/ISL_Dataset'  # Replace with your dataset path
IMG_SIZE = 64
BATCH_SIZE = 32

# Data loading and preprocessing
def load_data(dataset_path, img_size=IMG_SIZE):
    images = []
    labels = []
    
    for label_dir in os.listdir(dataset_path):
        label_path = os.path.join(dataset_path, label_dir)
        if os.path.isdir(label_path):
            for img_file in os.listdir(label_path):
                img_path = os.path.join(label_path, img_file)
                img = cv2.imread(img_path)
                img = cv2.resize(img, (img_size, img_size))  # Resize image
                images.append(img)
                labels.append(label_dir)
    
    images = np.array(images) / 255.0  # Normalize images
    labels = np.array(pd.factorize(labels)[0])  # Convert labels to integers
    return images, labels

images, labels = load_data(dataset_path)

# Split data
X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.2, random_state=42)

# Data augmentation
train_datagen = ImageDataGenerator(
    rotation_range=10,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.1,
    zoom_range=0.1,
    horizontal_flip=True
)
train_generator = train_datagen.flow(X_train, y_train, batch_size=BATCH_SIZE)


  labels = np.array(pd.factorize(labels)[0])  # Convert labels to integers


In [5]:
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Flatten, Dense, Dropout

def build_vgg16():
    base_model = VGG16(weights='imagenet', include_top=False, input_shape=(IMG_SIZE, IMG_SIZE, 3))
    for layer in base_model.layers:
        layer.trainable = False
    model = Sequential([
        base_model,
        Flatten(),
        Dense(128, activation='relu'),
        Dropout(0.5),
        Dense(len(np.unique(labels)), activation='softmax')
    ])
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

# Train VGG16 model
vgg16_model = build_vgg16()
vgg16_model.fit(train_generator, epochs=20, validation_data=(X_test, y_test))
y_pred_vgg16 = np.argmax(vgg16_model.predict(X_test), axis=1)
print(classification_report(y_test, y_pred_vgg16))
vgg16_model.save('vgg16_model.h5')


Epoch 1/20


  self._warn_if_super_not_called()


[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 193ms/step - accuracy: 0.0477 - loss: 3.4542 - val_accuracy: 0.1064 - val_loss: 3.0752
Epoch 2/20
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 180ms/step - accuracy: 0.0794 - loss: 3.0823 - val_accuracy: 0.1277 - val_loss: 3.0783
Epoch 3/20
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 182ms/step - accuracy: 0.1382 - loss: 3.0097 - val_accuracy: 0.1702 - val_loss: 3.0224
Epoch 4/20
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 192ms/step - accuracy: 0.1566 - loss: 2.9703 - val_accuracy: 0.1489 - val_loss: 3.0072
Epoch 5/20
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 195ms/step - accuracy: 0.1527 - loss: 2.8701 - val_accuracy: 0.1915 - val_loss: 2.9714
Epoch 6/20
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 194ms/step - accuracy: 0.1522 - loss: 2.7943 - val_accuracy: 0.1489 - val_loss: 2.9811
Epoch 7/20
[1m18/18[0m [32m━━━━━━━━━



              precision    recall  f1-score   support

           0       0.00      0.00      0.00         4
           1       0.14      0.10      0.12        10
           2       0.22      0.18      0.20        11
           3       0.25      0.33      0.29         6
           4       0.15      0.36      0.21        11
           5       0.00      0.00      0.00         3
           6       0.67      0.29      0.40         7
           7       0.17      0.17      0.17         6
           8       0.00      0.00      0.00         6
           9       0.11      0.12      0.12         8
          10       0.00      0.00      0.00         6
          11       0.00      0.00      0.00         1
          12       0.62      0.56      0.59         9
          13       0.00      0.00      0.00         7
          14       0.00      0.00      0.00         4
          15       1.00      0.25      0.40         4
          16       0.33      0.14      0.20         7
          17       0.00    

In [130]:
# Summarizing model performance
model_reports = {
    "CNN": classification_report(y_test, y_pred_cnn, output_dict=True),
    "VGG16": classification_report(y_test, y_pred_vgg16, output_dict=True),
    "ResNet50": classification_report(y_test, y_pred_resnet50, output_dict=True),
}

best_model_name = max(model_reports, key=lambda x: model_reports[x]['accuracy'])
print(f"The best model is {best_model_name}.")


The best model is VGG16.


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [None]:
import cv2
import numpy as np

# Use the trained model from the current notebook
# Assuming your best model is stored in the variable `cnn_model` or any other model variable
model = vgg16_model  # Replace `cnn_model` with the variable name of your best model

# Define parameters
IMG_SIZE = 64  # Image size used for training
class_labels =  ['A', 'B', 'C', 'D', 'E','F','G','I','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Z']  # Replace with your actual class labels

# Preprocessing function for live frames
def preprocess_frame(frame):
    # Resize the frame to match the input size of the model
    resized_frame = cv2.resize(frame, (IMG_SIZE, IMG_SIZE))
    # Normalize the frame
    normalized_frame = resized_frame / 255.0
    # Add a batch dimension
    return np.expand_dims(normalized_frame, axis=0)

# Initialize webcam
cap = cv2.VideoCapture(0)

if not cap.isOpened():
    print("Error: Unable to access the webcam.")
    exit()

print("Press 'q' to quit the application.")

# Real-time video processing
while True:
    ret, frame = cap.read()
    if not ret:
        print("Error: Unable to read frame.")
        break

    # Mirror the frame for a more intuitive interface
    frame = cv2.flip(frame, 1)

    # Draw a rectangle for the region of interest (ROI)
    roi_start = (200, 100)
    roi_end = (400, 300)
    cv2.rectangle(frame, roi_start, roi_end, (0, 255, 0), 2)

    # Extract the ROI
    roi = frame[roi_start[1]:roi_end[1], roi_start[0]:roi_end[0]]

    # Preprocess the ROI and make predictions
    preprocessed_roi = preprocess_frame(roi)
    prediction = model.predict(preprocessed_roi)
    predicted_label = class_labels[np.argmax(prediction)]

    # Display the predicted label
    cv2.putText(frame, f"Prediction: {predicted_label}", (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)

    # Show the frame
    cv2.imshow("Sign Language Recognition", frame)

    # Break the loop if 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release the webcam and close all OpenCV windows
cap.release()
cv2.destroyAllWindows()


Press 'q' to quit the application.
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 81ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 153ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 57ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 111ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 61ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 85ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 79ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━