In [1]:
import cv2
import os 
import numpy as np 


DATASET Preprocessing

In [2]:
data_dir=r"C:\Users\User\OneDrive\isl0" #Load the ISL(Indian Sign Language Dataset) 
#path to your directory


In [3]:
out_dir=r"D:\Users\User\Desktop\isl" # output directory to store the preprocessed dataset 
#path to your directory

In [4]:
resize_size=(64,64) #desired resized size for the images 



for root,dirs, files in os.walk(data_dir):
    for filename in files:                                    # read all the images from their respective folder for preprocessing 
        img_path=os.path.join(root, filename)
        img=cv2.imread(img_path)
        
        if img is None:
            print(f"failed to load:{img_path}")
            continue
       
        
        g_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)    #grayscale conversion of the images
                                                         
        resize_img=cv2.resize(g_img,resize_size)         #resize the gray images 
        
        relative_path=os.path.relpath(root, data_dir)
        output_subdir=os.path.join(out_dir,relative_path)   # store the preprocessed images in output directory 
        
        os.makedirs(output_subdir, exist_ok=True)
        
        output_path = os.path.join(output_subdir, filename)
        cv2.imwrite(output_path, resize_img)
        
           
    

KeyboardInterrupt: 

In [5]:

import os       
        

Splitting the Pre-processed Dataset into Training and Testing Dataset


In [6]:
import random
import shutil

In [8]:
train_dir=r'D:Users\User\Desktop\isl\train' #make the required train dataset directory
os.makedirs('train',exist_ok=True)
test_dir=r'D:Users\User\Desktop\isl\test' #make the required test dataset directory
os.makedirs('test',exist_ok=True)


In [11]:
dataset_dir=r"D:\Users\User\Desktop\isl"
split_ratio=0.8                                    # spitting the data into 80% Training data
                                                   # 20% Testing data
for root,dirs,files in os.walk(dataset_dir):
    for file in files:
        src_path=os.path.join(root,file)
        if random.random() < split_ratio:
            dst_dir=os.path.join('train',os.path.relpath(root,dataset_dir))
            
        else:
            dst_dir=os.path.join('test',os.path.relpath(root, dataset_dir))
        dst_path=os.path.join(dst_dir, file)
        os.makedirs(dst_dir,exist_ok=True)
        shutil.copy(src_path, dst_path)


In [7]:
import os

train_dir = 'train'
test_dir = 'test'

# Get the absolute paths to train and test directories
train_path = os.path.abspath(train_dir)
test_path = os.path.abspath(test_dir)

print("Train data path:", train_path)
print("Test data path:", test_path)


Train data path: C:\Users\User\train
Test data path: C:\Users\User\test


Model Creation Deep Learning (VGGNET)


In [8]:
from keras.preprocessing.image import ImageDataGenerator #import Image DataGenerator for understanding Image Augmentation

In [9]:
train_dir=r'D:\Users\User\Desktop\isl\train' 
test_dir=r'D:\Users\User\Desktop\isl\test' 

train_datagen=ImageDataGenerator(rescale=1./255)#load the train_datagen with the train dataset
test_datagen=ImageDataGenerator(rescale=1./255) #load the test_datagen with the test dataset

img_size=(64,64) #image size should be same as the preprocessed images
batch_size=32

train_generator=train_datagen.flow_from_directory(  
   train_dir,
target_size=img_size,    #preprocessed image size 
batch_size=batch_size,   #the no.of images to include in a batch
class_mode='categorical', #multi-class classification(35 classes for ISL sign language detection)
color_mode='grayscale',   #mode should be grayscale    
)

# Load the test dataset
test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=img_size,  #preprocessed image size 
    batch_size=batch_size,  #the no.of images to include in a batch
    class_mode='categorical',#multi-class classification(35 classes for ISL sign language detection (1-9+26letters))
    color_mode='grayscale',  #mode should be grayscale
)

Found 33610 images belonging to 35 classes.
Found 8390 images belonging to 35 classes.


In [10]:
from tensorflow import keras
from tensorflow.keras import layers 
from tensorflow.keras.models import Sequential
from keras.layers import Conv2D,MaxPooling2D      #importing the required modules , convulation layers and Pooling 
from keras.layers import Dropout,Flatten,Dense
from keras import backend as K

input_shape=(64,64,1)  #input_shape in the dataset must be in the format of the preprocessed images #64,64 resize size
                       #1 for grayscale

def create_vggnet(input_shape,num_classes): #the function will work on the two parameters passed input_shape and no.of class=35
    model1=keras.Sequential()
    
    model1.add(layers.Conv2D(64,(3,3),activation='relu',input_shape=input_shape))
    
    model1.add(layers.Conv2D(64,(3,3), activation='relu',padding='same'))
    model1.add(layers.MaxPooling2D((2,2), strides=(2,2)))
    
    model1.add(layers.Conv2D(128, (3, 3), activation='relu', padding='same'))
    model1.add(layers.Conv2D(128, (3, 3), activation='relu', padding='same'))
    model1.add(layers.MaxPooling2D((2, 2), strides=(2, 2)))

    model1.add(layers.Conv2D(256, (3, 3), activation='relu', padding='same'))
    model1.add(layers.Conv2D(256, (3, 3), activation='relu', padding='same'))
    model1.add(layers.Conv2D(256, (3, 3), activation='relu', padding='same'))
    model1.add(layers.MaxPooling2D((2, 2), strides=(2, 2)))

    model1.add(layers.Flatten())
    model1.add(layers.Dense(4096, activation='relu'))
    model1.add(layers.Dense(4096, activation='relu'))
    model1.add(layers.Dense(num_classes, activation='softmax'))
    
    return model1


num_classes=35

model1=create_vggnet(input_shape,num_classes)


In [11]:
from keras.optimizers import Adam #Adam is used to optimize the laerning rate for the parameters and even handling noisy data

In [12]:
model1.compile(loss='categorical_crossentropy', optimizer=Adam(learning_rate=0.0001, decay=1e-6),metrics=['accuracy'])
#model compilation - calculating the loss function , Adam is used to check the learning rates , decay and the overall accuracy 
#of the model



model1_info = model1.fit(
                 train_generator,
                 steps_per_epoch=33610// 64,      #fitting the model on the train_gen for 33610 images(taining_dataset)
                  epochs=5, #5                    #no.of iterations over the whole dataset is 5
                 validation_data=test_generator,  #checking the validation data(test_gen) for 8390 images found
                 validation_steps=8390// 64)




Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [13]:
model1.compile(loss='categorical_crossentropy', optimizer=Adam(learning_rate=0.0001, decay=1e-6),metrics=['accuracy'])


In [15]:
test_loss, test_accuracy = model1 .evaluate(test_generator)
print(f'Test Loss: {test_loss}, Test Accuracy: {test_accuracy}')
#checking the overall test loss and test accuracy of the model for Sign Language Detection


Test Loss: 5.4203115951168e-07, Test Accuracy: 1.0


In [16]:
model1.save_weights('model1.h5') #saving the weights of the model and storing it at a specified location
model1.save(r'C:Users\User\model1.h5')

Real-Time Sign Language Prediction

In [14]:
import cv2
import numpy as np
from tensorflow import keras
import tensorflow as tf

# Load the trained model
model = tf.keras.models.load_model(r'C:Users\User\model1.h5') #path to your saved model 

# Define the sign language classes (modify according to your classes)
classes = ['1','2','3','4','5','6','7','8','9','A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
           'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z'] #35 classes for ISL detection

# Set up video capture
cap = cv2.VideoCapture(0)  # Use 0 for the default webcam
frame_height=64 #same as the training dataset images
frame_width=64 #same as the test dataset images

while True:
    # Read video frame
    ret, frame = cap.read()

    if not ret:
        break

    # Preprocess frame (resize, normalize, convert to grayscale, etc.)
    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    frame = cv2.resize(frame, (frame_width, frame_height))
    frame = frame / 255.0
    input_data=np.expand_dims(frame,axis=-1)
    
    

    # Make prediction
    prediction = model.predict(np.array([input_data]))
    predicted_class = np.argmax(prediction)
    predicted_label = classes[predicted_class]
    

    # Display prediction label on the frame
    cv2.putText(frame, predicted_label, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)

    # Display the frame
    cv2.imshow('Sign Language Detection', frame)

    # Exit loop if 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release video capture and close windows
cap.release()
cv2.destroyAllWindows()






In [15]:
print(f"Predicted Class: {predicted_class}") #print the predicted class of the sign captured through webcam
print(f"Confidence:{confidence}") #print the confidence of the model in predicting the sign
print(f"Predicted Class Label: {classes[predicted_class]}") #print the label of the predicted class

Predicted Class: 30
Confidence:1.0
Predicted Class Label: V


In [None]:
#atleast show the sign for atleast 10-15s for the model to predict the right sign
