### Importing Neccesory Libraries

In [None]:
import os
import cv2
import numpy as np
import tensorflow as tf
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.models import load_model

import mediapipe as mp

### Data Preparation And Visualisations

In [1]:
def load_frames_from_directory(directory):
    img_id=[]
    img_path=[]
    for root, dirs, files in os.walk(directory):
        # temparary image and path
        ti=[]
        tp=[]
        for file in files:
            
            # Check if the file is an image file
            if file.lower().endswith(('.jpg', '.jpeg', '.png')):
                frame_path = os.path.join(root, file)
                
                if file not in ti:
                    ti.append(file)
                    tp.append(frame_path)
                    
        if ti:
            img_id.append(ti)
            img_path.append(tp)
                    
    return img_id,img_path

All_image_id,All_image_path=load_frames_from_directory('Frames_Sentence_Level')

word_image_id,word_image_path=load_frames_from_directory('Frames_Word_Level')

All_image_id.extend(word_image_id)
All_image_path.extend(word_image_path)

df1=pd.read_csv('corpus_csv_files\\ISL Corpus sign glosses.csv')
df2=pd.read_excel('corpus_csv_files\\ISL_CSLRT_Corpus_frame_details.xlsx')
df3=pd.read_excel('corpus_csv_files\\ISL_CSLRT_Corpus_word_details.xlsx')

data_num=len(All_image_path)

captions = []

for i in range(len(All_image_path)):

    if 'ISL_CSLRT_Corpus\\' + All_image_path[i][0] in df2['Frames path'].values:
        word_value = df2.loc[df2['Frames path'] == 'ISL_CSLRT_Corpus\\' + All_image_path[i][0], 'Sentence'].values
        sign_gloss = df1.loc[df1['Sentence'] == word_value[0], 'SIGN GLOSSES'].values
        if sign_gloss.size<=0:
            sign_gloss=np.array([word_value[0].upper()])
        
    elif 'ISL_CSLRT_Corpus\\' + All_image_path[i][0] in df3['Frames path'].values:
        sign_gloss = df3.loc[df3['Frames path'] == 'ISL_CSLRT_Corpus\\' + All_image_path[i][0], 'Word'].values
        
    else:
        sign_gloss = np.array(["No caption found"])
        
    captions.append('<start> '+sign_gloss[0]+' <end>')
    

In [2]:
padded_frames=[]
max_length=30

for i in range(len(All_image_path)):
    if len(All_image_path[i])==max_length:
        frames=All_image_path[i]
        padded_frames.append(frames)
        continue
    if len(All_image_path[i])<max_length:
        frames=All_image_path[i] + [All_image_path[i][-1]]  * (max_length - len(All_image_path[i]))
        
    else:
        interval = len(All_image_path[i]) / max_length
        frames=[All_image_path[i][int(j)] for j in np.arange(0,len(All_image_path[i])-1,interval)]
        
    padded_frames.append(frames)

In [None]:
tokenizer=tf.keras.preprocessing.text.Tokenizer(filters='!"#$%&()*+,-./:;=?@[\\]^_`{|}~')
tokenizer.fit_on_texts(captions)
max_size=len(tokenizer.word_index)+1


train_seqs=tokenizer.texts_to_sequences(captions)

max_len=max(len(t) for t in train_seqs)

caption_vector=tf.keras.preprocessing.sequence.pad_sequences(train_seqs,padding='post')

# Initialize LabelEncoder
label_encoder = LabelEncoder()

# Fit and transform the labels to numerical values
y = label_encoder.fit_transform(captions)
y = to_categorical(y).astype(int)

In [None]:
#sequence,labels=[],[]
#for i in tqdm(range(len(padded_frames))):
    #sequence.append([load_image(img) for img in padded_frames[i]])
    #labels.append(caption_vector[i])
#sequence=np.array(sequence)
#np.save('seq.npy',sequence)

In [None]:
labels=np.array(captions)
sequence=np.load('seq.npy')

### Extracting Body Coordinates by Mediapipe

In [6]:
mp_holistic = mp.solutions.holistic # Holistic model
mp_drawing = mp.solutions.drawing_utils # Drawing utilities

def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # COLOR CONVERSION BGR 2 RGB
    image.flags.writeable = False                  # Image is no longer writeable
    results = model.process(image)                 # Make prediction
    image.flags.writeable = True                   # Image is now writeable 
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # COLOR COVERSION RGB 2 BGR
    return image, results

In [7]:
def draw_styled_landmarks(image, results):
    # Draw pose connections
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
                             mp_drawing.DrawingSpec(color=(80,22,10), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(80,44,121), thickness=2, circle_radius=2)) 
    # Draw left hand connections
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)) 
    # Draw right hand connections  
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)) 

In [8]:
holistic=mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5)

In [11]:
def extract_keypoints(results):
    pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    return np.concatenate([pose, lh, rh])

In [87]:
def load_image(image_path):
    img=cv2.imread(image_path)
    img=cv2.resize(img,(480,480))
    image, results = mediapipe_detection(img, holistic)
    keypoints = extract_keypoints(results)
    return keypoints

### Training The Data

In [97]:
X = sequence
y = labels

In [99]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [100]:
print(y_train.shape)
print(X_train.shape)

(621, 211)
(621, 30, 258)


In [64]:
#del model

In [105]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense , Dropout, Flatten

# Define the model
model = Sequential()

# Add LSTM layer with return_sequences=True for time-distributed behavior
model.add(LSTM(units=256, input_shape=(30, 258), return_sequences=True))
model.add(LSTM(units=256, return_sequences=False))
model.add(Dropout(0.6))

model.add(Flatten())
model.add(Dense(units=250, activation='relu'))
model.add(Dense(units=250, activation='relu'))
model.add(Dropout(0.6))
model.add(Dense(units=211, activation='softmax'))

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Display the model summary
model.summary()

Model: "sequential_23"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_45 (LSTM)              (None, 30, 256)           527360    
                                                                 
 lstm_46 (LSTM)              (None, 256)               525312    
                                                                 
 dropout_70 (Dropout)        (None, 256)               0         
                                                                 
 flatten_14 (Flatten)        (None, 256)               0         
                                                                 
 dense_56 (Dense)            (None, 250)               64250     
                                                                 
 dense_57 (Dense)            (None, 250)               62750     
                                                                 
 dropout_71 (Dropout)        (None, 250)             

In [111]:
early_stopping = EarlyStopping(monitor='val_loss', patience=50, restore_best_weights=True)

In [112]:
model.fit(X_train, y_train, epochs=1000,validation_split=0.1,callbacks=[early_stopping])

Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 76/500
Epoch 77/500
Epoch 78

<keras.callbacks.History at 0x1b73bd05dd0>

In [113]:
model.evaluate(X_test,y_test)



[5.222554683685303, 0.0]

In [75]:
model.save('model01.keras')

array([[1, 2, 7],
       [0, 9, 8],
       [1, 9, 8]])

In [11]:
model=load_model('model01.keras')