In [1]:
!pip install yt-dlp
# !pip install opencv-python
# !pip install pandas
# !pip install google-colab

Collecting yt-dlp
  Downloading yt_dlp-2023.11.16-py2.py3-none-any.whl (3.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.1/3.1 MB[0m [31m31.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting mutagen (from yt-dlp)
  Downloading mutagen-1.47.0-py3-none-any.whl (194 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m194.4/194.4 kB[0m [31m19.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pycryptodomex (from yt-dlp)
  Downloading pycryptodomex-3.19.0-cp35-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m65.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting websockets (from yt-dlp)
  Downloading websockets-12.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (130 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m130.2/130.2 kB[0m [31m7.5 MB/s[0m eta [36m0:00:00[0m
Collecting brotli 

In [2]:
import os
import json
import yt_dlp
import cv2
import math
import random
import concurrent.futures
import tensorflow as tf
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.layers import Dense, Flatten, Dropout, LSTM, TimeDistributed
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.layers import Input, Lambda
from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing.image import img_to_array, load_img
import numpy as np
from tensorflow.keras.utils import to_categorical


In [3]:

from google.colab import drive
drive.mount('/content/drive',force_remount=True)

with open('/content/drive/My Drive/Colab Notebooks/MS-ASL/MSASL_train.json', 'r') as file:
  train_data = json.load(file)

with open('/content/drive/My Drive/Colab Notebooks/MS-ASL/MSASL_val.json', 'r') as file:
  val_data = json.load(file)


Mounted at /content/drive


In [4]:
def download_and_extract_frames(video_data, save_frames_path):
    video_url = video_data['url']
    video_id = video_data['file']
    ydl_opts = {
        'format': 'bestvideo[ext=mp4]',
        'quiet': True,
        'no_warnings': True
    }

    try:
        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
            video_info = ydl.extract_info(video_url, download=False)
            video_url = video_info['url']
    except Exception as e:
        print(f"Error downloading {video_url}: {e}")
        return
    cap = cv2.VideoCapture(video_url)
    output_dir = os.path.join(save_frames_path, video_id)
    print(output_dir)
    os.makedirs(output_dir, exist_ok=True)

    frame_idx = 0
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        frame_filename = os.path.join(output_dir, f"frame_{frame_idx}.jpg")
        cv2.imwrite(frame_filename, frame)
        frame_idx += 1
    cap.release()



os.makedirs(os.path.dirname("/content/drive/My Drive/Colab Notebooks/MS-ASL/preprocessing_checkpoint.txt"), exist_ok=True)
checkpoint_path = "/content/drive/My Drive/Colab Notebooks/MS-ASL/preprocessing_checkpoint.txt"


def preprocess_data_multithreaded(data, save_frames_path, checkpoint_path, max_workers=10):
    # Load existing checkpoints if available
    if os.path.exists(checkpoint_path):
        with open(checkpoint_path, 'r') as file:
            processed_videos = set(line.strip() for line in file.readlines())
    else:
        processed_videos = set()

    with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
        # Filter out already processed videos and videos without a 'file' key
        filtered_data = [item for item in data if 'file' in item and item['file'] not in processed_videos]

        # Process videos and update checkpoint after each video
        for item in filtered_data:
            download_and_extract_frames(item, save_frames_path)

            # Debugging information
            print(f"Processed video: {item['file']}")

            os.makedirs(os.path.dirname(checkpoint_path), exist_ok=True)
            with open(checkpoint_path, 'a') as file:
                file.write(item['file'] + '\n')

            # Debugging information
            print(f"Updated checkpoint with video: {item['file']}")




In [None]:
# Define the paths where frames will be saved
TRAIN_FRAMES_PATH = '/content/drive/My Drive/Colab Notebooks/MS-ASL/asl_dataset/'
VAL_FRAMES_PATH = "/content/drive/My Drive/Colab Notebooks/MS-ASL/asl_val_dataset/"

# Preprocess the training data
preprocess_data_multithreaded(train_data, TRAIN_FRAMES_PATH, '/content/drive/My Drive/Colab Notebooks/MS-ASL/preprocessing_checkpoint.txt')

# Preprocess the validation data
preprocess_data_multithreaded(val_data, VAL_FRAMES_PATH, "/content/drive/My Drive/Colab Notebooks/MS-ASL/val_preprocessing_checkpoint.txt")


In [5]:


def get_frames_per_class(data_path):
    frames_per_class = {}
    for class_folder in os.listdir(data_path):
        class_folder_path = os.path.join(data_path, class_folder)
        if os.path.isdir(class_folder_path):
            frames = [name for name in os.listdir(class_folder_path) if name.endswith(('.jpg', '.jpeg', '.png'))]
            frames_per_class[class_folder] = len(frames)
    return frames_per_class

def load_data(data_path, batch_size, num_classes, num_frames_per_sequence=5):
    classes = sorted(os.listdir(data_path))
    class_indices = dict(zip(classes, range(num_classes)))

    while True:
        batch_x = []
        batch_y = []

        while len(batch_x) < batch_size:
          try:
              class_name = random.choice(classes)
              class_path = os.path.join(data_path, class_name)
              frame_files = sorted([os.path.join(class_path, f) for f in os.listdir(class_path) if f.endswith(('.jpg', '.jpeg', '.png'))])

              if len(frame_files) < num_frames_per_sequence:
                  continue

              start_index = random.randint(0, len(frame_files) - num_frames_per_sequence)
              sequence_frames = frame_files[start_index:start_index + num_frames_per_sequence]

              sequence_images = np.array([img_to_array(load_img(frame, target_size=(224, 224))) / 255.0 for frame in sequence_frames])

              if sequence_images.shape == (num_frames_per_sequence, 224, 224, 3):
                  batch_x.append(sequence_images)
                  batch_y.append(class_indices[class_name])
          except Exception as e:
                continue  # Skip this image and continue

        batch_x = np.array(batch_x)
        batch_y = to_categorical(batch_y, num_classes=num_classes)
        yield batch_x, batch_y




In [6]:


TRAIN_FRAMES_PATH = '/content/drive/My Drive/Colab Notebooks/MS-ASL/asl_dataset/'
VAL_FRAMES_PATH = "/content/drive/My Drive/Colab Notebooks/MS-ASL/asl_val_dataset/"

# Model parameters
num_frames_per_sequence = 5  # Adjust as needed
input_shape = (num_frames_per_sequence, 224, 224, 3)  # e.g., 5 frames of 224x224 RGB images
num_classes = len(os.listdir(TRAIN_FRAMES_PATH))  # Number of classes

batch_size = 32
epochs = 10

def build_model(input_shape, num_classes, num_frames_per_sequence):
    # Base model (VGG16)
    vgg16 = VGG16(weights='imagenet', include_top=False, input_shape=input_shape[1:])
    for layer in vgg16.layers:
        layer.trainable = False

    # Sequential model to handle frame sequences
    model = Sequential()
    model.add(Input(shape=input_shape))

    # Apply VGG16 to each frame individually
    model.add(TimeDistributed(vgg16))

    # Flattening and LSTM layers
    model.add(TimeDistributed(Flatten()))
    model.add(LSTM(256, return_sequences=False))

    # Dropout and output layers
    model.add(Dropout(0.5))
    model.add(Dense(num_classes, activation='softmax'))

    return model

# Model parameters
input_shape = (num_frames_per_sequence, 224, 224, 3)  # e.g., 5 frames of 224x224 RGB images
num_classes = len(os.listdir(TRAIN_FRAMES_PATH))
num_frames_per_sequence = 5  # Adjust as needed

# Build and compile the model
model = build_model(input_shape, num_classes, num_frames_per_sequence)
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Load training and validation data
train_generator = load_data(TRAIN_FRAMES_PATH, batch_size, num_classes, num_frames_per_sequence)
val_generator = load_data(VAL_FRAMES_PATH, batch_size, num_classes, num_frames_per_sequence)

fixed_steps_per_epoch = 100
fixed_validation_steps = 50

checkpoint_path = "/content/drive/My Drive/Colab Notebooks/MS-ASL/checkpoints/model-{epoch:02d}-{val_accuracy:.2f}.h5"
checkpoint = ModelCheckpoint(
    checkpoint_path,
    monitor='val_accuracy',
    verbose=1,
    save_best_only=True,
    mode='max'
)

# Training
history = model.fit(
    train_generator,
    steps_per_epoch=fixed_steps_per_epoch,
    epochs=epochs,
    validation_data=val_generator,
    validation_steps=fixed_validation_steps,
    callbacks=[checkpoint]
)


# Save the model
model_save_path = "/content/drive/My Drive/Colab Notebooks/Saved-Model/my_model.h5"
model.save(model_save_path)
print(f"Model saved!")

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
Epoch 1/10
Epoch 1: val_accuracy improved from -inf to 0.00000, saving model to /content/drive/My Drive/Colab Notebooks/MS-ASL/checkpoints/model-01-0.00.h5
Epoch 2/10
Epoch 2: val_accuracy did not improve from 0.00000
Epoch 3/10
Epoch 3: val_accuracy did not improve from 0.00000
Epoch 4/10
Epoch 4: val_accuracy did not improve from 0.00000
Epoch 5/10
Epoch 5: val_accuracy did not improve from 0.00000
Epoch 6/10
Epoch 6: val_accuracy did not improve from 0.00000
Epoch 7/10
Epoch 7: val_accuracy improved from 0.00000 to 0.00438, saving model to /content/drive/My Drive/Colab Notebooks/MS-ASL/checkpoints/model-07-0.00.h5
Epoch 8/10
Epoch 8: val_accuracy did not improve from 0.00438
Epoch 9/10
Epoch 9: val_accuracy did not improve from 0.00438
Epoch 10/10
Epoch 10: val_accuracy did not improve from 0.00438
Model saved!


In [None]:
# Testing the generator
TRAIN_FRAMES_PATH = '/content/drive/My Drive/Colab Notebooks/MS-ASL/asl_dataset/'
VAL_FRAMES_PATH = "/content/drive/My Drive/Colab Notebooks/MS-ASL/asl_val_dataset/"
test_generator = load_data(TRAIN_FRAMES_PATH, batch_size, num_classes, num_frames_per_sequence)
x, y = next(test_generator)
print("Shape of x:", x.shape)
print("Shape of y:", y.shape)

Shape of x: (32, 5, 5, 224, 3)
Shape of y: (32, 970)
