In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os
import numpy as np
import cv2
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input

# Load pre-trained MobileNetV2 model + higher level layers
model = MobileNetV2(weights='imagenet', include_top=False, pooling='avg', input_shape=(224, 224, 3))

# Function to extract features from a single frame
def extract_features(frame):
    img = image.img_to_array(frame)
    img = np.expand_dims(img, axis=0)
    img = preprocess_input(img)
    features = model.predict(img)
    return features

# Function to extract features from a video
def extract_video_features(video_path):
    cap = cv2.VideoCapture(video_path)
    features_list = []
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        frame = cv2.resize(frame, (224, 224))  # Ensure the frame size matches the model's input size
        features = extract_features(frame)
        features_list.append(features)
    cap.release()
    return np.mean(features_list, axis=0).squeeze()  # Average pooling over all frames and squeeze to remove extra dimensions

# Directory containing preprocessed videos
preprocessed_video_dir = '/content/drive/MyDrive/processed_sign_language_dataset'

# List to store extracted features and labels
video_features_list = []
labels = []

# Extract features for each video in each class
for class_folder in os.listdir(preprocessed_video_dir):
    class_folder_path = os.path.join(preprocessed_video_dir, class_folder)
    if os.path.isdir(class_folder_path):
        for filename in os.listdir(class_folder_path):
            if filename.endswith(".mp4"):
                video_path = os.path.join(class_folder_path, filename)
                features = extract_video_features(video_path)
                video_features_list.append(features)
                labels.append(class_folder)

# Convert lists to numpy arrays
X = np.array(video_features_list)
y = np.array(labels)

print("Feature extraction complete.")
print("Shape of extracted features:", X.shape)


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Feature extraction complete.
Shape of extracted features: (30, 1280)


In [None]:
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
import joblib

# Encode the labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)
y_one_hot = to_categorical(y_encoded, num_classes=9)  # Adjust num_classes based on your dataset

print("Label encoding complete.")
print("Shape of one-hot encoded labels:", y_one_hot.shape)

# Save the label encoder to disk
joblib.dump(label_encoder, 'label_encoder.pkl')

Label encoding complete.
Shape of one-hot encoded labels: (30, 9)


['label_encoder.pkl']

In [None]:
from sklearn.model_selection import train_test_split

# First split: train and temp (validation + test)
X_train, X_temp, y_train, y_temp = train_test_split(X, y_one_hot, test_size=0.4, random_state=42)

# Second split: validation and test from temp
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

print("Data splitting complete.")
print("Shape of training data:", X_train.shape, y_train.shape)
print("Shape of validation data:", X_val.shape, y_val.shape)
print("Shape of test data:", X_test.shape, y_test.shape)

Data splitting complete.
Shape of training data: (18, 1280) (18, 9)
Shape of validation data: (6, 1280) (6, 9)
Shape of test data: (6, 1280) (6, 9)


In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

# Define the model
model = Sequential()
model.add(Dense(512, activation='relu', input_shape=(X_train.shape[1],)))
model.add(Dropout(0.5))
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(9, activation='softmax'))  # Adjust num_classes based on your dataset

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_val, y_val))


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.src.callbacks.History at 0x7896cb987ac0>

In [None]:
# Evaluate the model on the test set
loss, accuracy = model.evaluate(X_val, y_val)
print(f'Validation Accuracy: {accuracy}')


Validation Accuracy: 0.8333333134651184


In [None]:
# Evaluate the model on the test set
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Test Accuracy: {accuracy}')


Test Accuracy: 0.8333333134651184


In [None]:
model.save('sign_language_model.h5')


  saving_api.save_model(
