In [1]:
import numpy as np
import os
from keras.layers import Conv3D, MaxPooling3D
from keras.layers import Dense, Dropout, Flatten
from keras.models import Sequential
from keras.layers import Activation, ZeroPadding3D, TimeDistributed, LSTM, GRU, Reshape
#from keras.utils import plot_model
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import gc
from sklearn.metrics import classification_report, balanced_accuracy_score
from sklearn.metrics import roc_curve, auc
from sklearn.preprocessing import label_binarize
from itertools import cycle
from sklearn.preprocessing import LabelEncoder
import sklearn
import seaborn as sns
import tensorflow as tf
from keras import regularizers
from tensorflow.keras.utils import to_categorical
from prettytable import PrettyTable
from matplotlib import pyplot as plt

import time




In [3]:
import os
import cv2

def extract_frames(video_folder, output_folder):
    # Create output folder if it doesn't exist
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    # Iterate through each video file in the folder
    for filename in os.listdir(video_folder):
        if filename.endswith(".mp4") or filename.endswith(".avi"):
            video_path = os.path.join(video_folder, filename)
            video_name = os.path.splitext(filename)[0]

            # Create a subfolder for each video
            video_output_folder = os.path.join(output_folder, video_name)
            if not os.path.exists(video_output_folder):
                os.makedirs(video_output_folder)

            # Open the video file
            cap = cv2.VideoCapture(video_path)
            frame_count = 0

            # Read and process each frame
            while(cap.isOpened()):
                ret, frame = cap.read()
                if not ret:
                    break
                
                # Perform your processing here to identify words and numbers in the frame
                # For simplicity, let's just save every 16th frame
                if frame_count % 16 == 0:
                    output_frame_path = os.path.join(video_output_folder, f"{video_name}_frame_{frame_count}.jpg")
                    cv2.imwrite(output_frame_path, frame)

                frame_count += 1

            cap.release()

# Example usage:
video_dataset_folder = "D:\\New Dataset Lip Movement Projec\\FINAL YEAR PROJECT\\Dataset2.0"
output_frames_folder = "D:\\New Dataset Lip Movement Projec\\FINAL YEAR PROJECT\\Frames"
extract_frames(video_dataset_folder, output_frames_folder)


# VGG MODEL BUILDING

In [20]:
import tensorflow as tf
from tensorflow.keras.layers import Input, LSTM, Dense, Flatten, Reshape
from tensorflow.keras.applications import VGG16

# Hyperparameters (adjust as needed)
IMAGE_SIZE = (96, 72)  # Assuming your image size is 112x80
BATCH_SIZE = 16
EPOCHS = 100
NUM_CLASSES = 17
TRAIN_DATA_DIR = "D:\\New Dataset Lip Movement Projec\\FINAL YEAR PROJECT\\Frames"



# Define data generator
datagen = tf.keras.preprocessing.image.ImageDataGenerator(
    #rescale=1./255,
    validation_split=0.2
)

# Load and preprocess the data with splitting
train_generator = datagen.flow_from_directory(
    TRAIN_DATA_DIR,
    target_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    subset='training'  # 80% of the data will be used for training
)

validation_generator = datagen.flow_from_directory(
    TRAIN_DATA_DIR,
    target_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    subset='validation'  # 20% of the data will be used for validation
)

# Load pre-trained VGG-16 model, excluding the top classifier layers
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(IMAGE_SIZE[0], IMAGE_SIZE[1], 3))

# Freeze pre-trained layers to prevent them from being updated during training
for layer in base_model.layers:
    layer.trainable = False

# Define the model architecture
inputs = Input(shape=(IMAGE_SIZE[0], IMAGE_SIZE[1], 3))  # Input layer for images

# Extract features using VGG-16
vgg_features = base_model(inputs)

# **Reshape features to ensure compatibility with LSTM (remove unnecessary dimension):**
# Adjust the shape based on the actual dimensions of `vgg_features` (e.g., if features are (None, 7, 7, 512), reshape to (None, 7, 512))
desired_feature_shape = (vgg_features.shape[1], vgg_features.shape[2] * vgg_features.shape[3])  # Calculate desired shape
reshaped_features = Reshape(desired_feature_shape)(vgg_features)

# Define the LSTM-based classifier
lstm_1 = LSTM(64, return_sequences=True)(reshaped_features)  # First LSTM layer
lstm_2 = LSTM(16)(lstm_1)                                     # Second LSTM layer

Dropout(0.7)
# Final dense layer for classification
predictions = Dense(NUM_CLASSES, activation='softmax')(lstm_2)

# Create the final model
model = tf.keras.Model(inputs=inputs, outputs=predictions)

model.compile(optimizer=tf.keras.optimizers.SGD(clipnorm=1),  # Create Adam optimizer with clipnorm
              loss='categorical_crossentropy',
              metrics=['accuracy'],
              run_eagerly=True,  # Enables early stopping callbacks
              )

# Print model summary
model.summary()

# Train and evaluate your model
# ... (Train your model using train_generator and evaluate using validation_generator)


Found 127 images belonging to 17 classes.


Found 22 images belonging to 17 classes.
Model: "model_6"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_14 (InputLayer)       [(None, 96, 72, 3)]       0         
                                                                 
 vgg16 (Functional)          (None, 3, 2, 512)         14714688  
                                                                 
 reshape_6 (Reshape)         (None, 3, 1024)           0         
                                                                 
 lstm_12 (LSTM)              (None, 3, 64)             278784    
                                                                 
 lstm_13 (LSTM)              (None, 16)                5184      
                                                                 
 dense_6 (Dense)             (None, 17)                289       
                                                                 
Total params: 1499

In [18]:
print(vgg_features)

KerasTensor(type_spec=TensorSpec(shape=(None, 3, 2, 512), dtype=tf.float32, name=None), name='vgg16/block5_pool/MaxPool:0', description="created by layer 'vgg16'")


# MODEL TRAINING

In [21]:
start_time=time.time()
# Train the model
history = model.fit(
    train_generator,
    steps_per_epoch=train_generator.samples // BATCH_SIZE,
    epochs=EPOCHS,
    validation_data=validation_generator,
    validation_steps=validation_generator.samples // BATCH_SIZE
)
# Stop the timer
end_time = time.time()

# Compute the computational time
comp_time = end_time - start_time

model.save('vgg_lstm5.h5')

# Evaluate the model
train_loss, train_acc = model.evaluate(train_generator)
val_loss, val_acc = model.evaluate(validation_generator)

print(f'Training Accuracy: {train_acc * 100}')
print(f'Validation Accuracy: {val_acc * 100}')
print(f'Training time: {comp_time}')


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100