In [3]:
import cv2
import os

In [19]:
# Define input and output directories
input_videos_path = "split-videos/train"
output_frames_path = "cropped-frames"

In [16]:

# Crop the center of the frame
def crop_center_square(frame):
    y, x = frame.shape[0:2]
    min_dim = min(y, x)
    start_x = (x // 3) - (min_dim // 5) 
    start_y = (y // 2) - (min_dim // 2)
    
    # Crop the frame to center
    crop = frame[start_y : start_y + min_dim, start_x : start_x + min_dim] 
    
    # Resize the cropped frame to 128x128 pixels
    return cv2.resize(crop, (128, 128))

# Process a single video, crop each frame, and save them as images
def process_and_save_frames(input_video_path, output_dir, max_frames=0):
    cap = cv2.VideoCapture(input_video_path)
    
    frame_count = 0
    processed_frames = 0

    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    try:
        while True:
            ret, frame = cap.read()
            if not ret or (max_frames > 0 and processed_frames >= max_frames):
                break
            
            # Crop the frame and resize
            cropped_frame = crop_center_square(frame)
            
            # Save the processed frame as an image
            frame_filename = os.path.join(output_dir, f"frame_{processed_frames:04d}.jpg")
            cv2.imwrite(frame_filename, cropped_frame)
            
            frame_count += 1
            processed_frames += 1

    finally:
        cap.release()
    
    print(f"Processed and saved {processed_frames} frames from {input_video_path} to {output_dir}")


In [17]:
# Process all videos in a directory and save the frames as images in corresponding output directories
def process_all_videos(input_dir, output_dir, max_frames=0):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    # Loop through all video files in the input directory
    for class_folder in os.listdir(input_dir):
        class_path = os.path.join(input_dir, class_folder)
        
        # Ensure the current path is a directory
        if not os.path.isdir(class_path):
            continue
        
        # Create output directory for the class if it doesn't exist
        output_class_dir = os.path.join(output_dir, class_folder)
        if not os.path.exists(output_class_dir):
            os.makedirs(output_class_dir)
        
        # Iterate over each video in the class folder
        for video_file in os.listdir(class_path):
            if video_file.endswith(('.mp4', '.avi', '.mov')):  # Add extensions as needed
                input_video_path = os.path.join(class_path, video_file)
                output_video_frames_dir = os.path.join(output_class_dir, os.path.splitext(video_file)[0])
                
                print(f"Processing video: {input_video_path}")
                
                # Process the video and save frames as images
                process_and_save_frames(input_video_path, output_video_frames_dir, max_frames)

In [20]:

# Process all videos in the directory and save the frames
process_all_videos(input_videos_path, output_frames_path, max_frames=0)


Processing video: split-videos/train\Amilo\20241001_152509_brightness.mp4
Processed and saved 46 frames from split-videos/train\Amilo\20241001_152509_brightness.mp4 to cropped-frames\Amilo\20241001_152509_brightness
Processing video: split-videos/train\Amilo\20241001_152509_color_jitter.mp4
Processed and saved 46 frames from split-videos/train\Amilo\20241001_152509_color_jitter.mp4 to cropped-frames\Amilo\20241001_152509_color_jitter
Processing video: split-videos/train\Amilo\20241001_152509_noise.mp4
Processed and saved 46 frames from split-videos/train\Amilo\20241001_152509_noise.mp4 to cropped-frames\Amilo\20241001_152509_noise
Processing video: split-videos/train\Amilo\20241001_152509_rotate.mp4
Processed and saved 46 frames from split-videos/train\Amilo\20241001_152509_rotate.mp4 to cropped-frames\Amilo\20241001_152509_rotate
Processing video: split-videos/train\Amilo\20241001_152509_slow_down.mp4
Processed and saved 92 frames from split-videos/train\Amilo\20241001_152509_slow_dow

In [4]:
def convert_frames_to_rgb(root_folder):
    # Traverse through each subfolder
    for subfolder in os.listdir(root_folder):
        subfolder_path = os.path.join(root_folder, subfolder)
        
        # Check if it's a directory
        if os.path.isdir(subfolder_path):
            # Loop through each frame folder within the subfolder
            for frame_folder in os.listdir(subfolder_path):
                frame_folder_path = os.path.join(subfolder_path, frame_folder)
                
                if os.path.isdir(frame_folder_path):
                    # Convert each frame in the folder
                    for frame_file in os.listdir(frame_folder_path):
                        frame_path = os.path.join(frame_folder_path, frame_file)
                        
                        # Check if the file is an image
                        if frame_file.endswith(('.jpg', '.jpeg', '.png')):
                            # Read the image in BGR format
                            frame = cv2.imread(frame_path)
                            
                            # Convert BGR to RGB
                            rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                            
                            # Save the converted image, overwriting the original
                            cv2.imwrite(frame_path, rgb_frame)
                            print(f"Converted {frame_path} to RGB format.")



In [None]:
# Example usage
root_folder = 'cropped-frames'  # Replace with the actual root path
convert_frames_to_rgb(root_folder)


Converted cropped-frames\Amilo\20241001_152509_brightness\frame_0000.jpg to RGB format.
Converted cropped-frames\Amilo\20241001_152509_brightness\frame_0001.jpg to RGB format.
Converted cropped-frames\Amilo\20241001_152509_brightness\frame_0002.jpg to RGB format.
Converted cropped-frames\Amilo\20241001_152509_brightness\frame_0003.jpg to RGB format.
Converted cropped-frames\Amilo\20241001_152509_brightness\frame_0004.jpg to RGB format.
Converted cropped-frames\Amilo\20241001_152509_brightness\frame_0005.jpg to RGB format.
Converted cropped-frames\Amilo\20241001_152509_brightness\frame_0006.jpg to RGB format.
Converted cropped-frames\Amilo\20241001_152509_brightness\frame_0007.jpg to RGB format.
Converted cropped-frames\Amilo\20241001_152509_brightness\frame_0008.jpg to RGB format.
Converted cropped-frames\Amilo\20241001_152509_brightness\frame_0009.jpg to RGB format.
Converted cropped-frames\Amilo\20241001_152509_brightness\frame_0010.jpg to RGB format.
Converted cropped-frames\Amilo\2

In [27]:
import os
import numpy as np
import imageio
import cv2

from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.layers import GlobalAveragePooling2D
from tensorflow.keras.models import Model


In [28]:
# Replace label_processor with a manually defined class vocabulary
class_vocab = ["Amilo", "Baisakh"]  # Define your class names here
MAX_SEQ_LENGTH = 55 # defining sequence 
NUM_FEATURES = 1024
IMG_SIZE = 128

EPOCHS = 50

In [29]:
# Load ResNet50 with an additional global pooling layer to get a 1D vector
base_model = ResNet50(weights="imagenet", include_top=False, input_shape=(IMG_SIZE, IMG_SIZE, 3))
x = base_model.output
x = GlobalAveragePooling2D()(x)  # Global pooling to reduce dimensions
feature_extractor = Model(inputs=base_model.input, outputs=x)

# Update NUM_FEATURES to match the output shape of feature_extractor
NUM_FEATURES = feature_extractor.output_shape[-1]  # should be 2048 after global pooling



In [30]:

# Ensure these are defined or imported as needed
# feature_extractor, trained_model, load_video, MAX_SEQ_LENGTH, NUM_FEATURES, IMG_SIZE

def prepare_single_video(frames):
    frame_features = np.zeros(shape=(1, MAX_SEQ_LENGTH, NUM_FEATURES), dtype="float32")

    # Pad shorter videos.
    if len(frames) < MAX_SEQ_LENGTH:
        diff = MAX_SEQ_LENGTH - len(frames)
        padding = np.zeros((diff, IMG_SIZE, IMG_SIZE, 3))
        frames = np.concatenate((frames, padding))

    frames = frames[None, ...]

    # Extract features from the frames of the current video.
    for i, batch in enumerate(frames):
        video_length = batch.shape[0]
        length = min(MAX_SEQ_LENGTH, video_length)
        for j in range(length):
            if np.mean(batch[j, :]) > 0.0:
                preprocessed_frame = preprocess_input(batch[None, j, :])
                frame_features[i, j, :] = feature_extractor.predict(preprocessed_frame)
            else:
                frame_features[i, j, :] = 0.0

    return frame_features


def predict_action(path):
    frames = load_video(path)
    frame_features = prepare_single_video(frames)
    probabilities = trained_model.predict(frame_features)[0]

    # Print predictions in descending order of probability
    for i in np.argsort(probabilities)[::-1]:
        print(f"  {class_vocab[i]}: {probabilities[i] * 100:5.2f}%")
    
    return frames, class_vocab[np.argmax(probabilities)]


def process_all_videos_in_subfolders(root_dir):
    results = []
    for root, _, files in os.walk(root_dir):
        for file in files:
            if file.endswith((".mp4", ".avi", ".mov")):  # Include all supported video formats
                video_path = os.path.join(root, file)
                print(f"Processing video: {video_path}")
                frames, predicted_action = predict_action(video_path)
                results.append((video_path, predicted_action))
                to_gif(frames[:MAX_SEQ_LENGTH])  # Optional: save GIF for visualization
    return results


def to_gif(images):
    converted_images = images.astype(np.uint8)
    imageio.mimsave("animation.gif", converted_images, fps=10)
    return embed.embed_file("animation.gif")



In [31]:


def load_video(path, img_size=(IMG_SIZE, IMG_SIZE)):
    frames = []
    cap = cv2.VideoCapture(path)
    
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        # Resize frame to the desired image size
        frame = cv2.resize(frame, img_size)
        frames.append(frame)
    
    cap.release()
    frames = np.array(frames)
    return frames

In [32]:
# Example usage:
root_video_dir = "sample-videos"
results = process_all_videos_in_subfolders(root_video_dir)
for video_path, action in results:
    print(f"Predicted action for {video_path}: {action}")


Processing video: sample-videos\Amilo\20241001_152509_flip.mp4
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 94ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 99ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 89ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 96ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 90ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 102ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 83ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 82ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 89ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 93ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 85ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 85ms/step
[

NameError: name 'trained_model' is not defined

In [None]:
import os
import numpy as np


In [None]:
#extract features from videos
def prepare_all_videos(df, root_dir):
    num_samples = len(df)
    labels = df["tag"].values
    labels = label_processor(labels[..., None]).numpy()
    
    # `frame_features` are what we will feed to our sequence model.
    frame_features = np.zeros(
        shape=(num_samples, MAX_SEQ_LENGTH, NUM_FEATURES), dtype="float32"
    )

    # Collect all video paths from subfolders
    video_paths = []
    for root, _, files in os.walk(root_dir):
        for file in files:
            if file in df["video_name"].values:
                video_paths.append(os.path.join(root, file))

    # Ensure video paths are ordered as in DataFrame
    video_paths = sorted(video_paths, key=lambda x: df[df["video_name"] == os.path.basename(x)].index[0])

    # For each video.
    for idx, path in enumerate(video_paths):
        # Gather all its frames and add a batch dimension.
        frames = load_video(path)

        # Pad shorter videos.
        if len(frames) < MAX_SEQ_LENGTH:
            diff = MAX_SEQ_LENGTH - len(frames)
            padding = np.zeros((diff, IMG_SIZE, IMG_SIZE, 3))
            frames = np.concatenate((frames, padding))

        frames = frames[None, ...]

        # Initialize placeholder to store the features of the current video.
        temp_frame_features = np.zeros(
            shape=(1, MAX_SEQ_LENGTH, NUM_FEATURES), dtype="float32"
        )

        # Extract features from the frames of the current video.
        for i, batch in enumerate(frames):
            video_length = batch.shape[0]
            length = min(MAX_SEQ_LENGTH, video_length)
            for j in range(length):
                if np.mean(batch[j, :]) > 0.0:
                    temp_frame_features[i, j, :] = feature_extractor.predict(
                        batch[None, j, :]
                    )
                else:
                    temp_frame_features[i, j, :] = 0.0

        frame_features[idx,] = temp_frame_features.squeeze()

    return frame_features, labels


In [None]:
features = np.load("/media/gpu/157/hand_sign/models/dense121/train_dense121_seq55.npz")
train_data, train_labels,test_data, test_labels = features["arr_0"], features["arr_1"], features["arr_2"], features["arr_3"]

In [None]:
# Feature extraction
start_time = time.time()
train_data, train_labels = prepare_all_videos(train_df, "dataset") # extracting train data feature
test_data, test_labels = prepare_all_videos(test_df, "dataset") # # extracting test data feature


print(f"Frame features in train set: {train_data[0].shape}")
print(f"Frame masks in train set: {train_data[1].shape}")
print(f"Total time taken for feature extraction is {(time.time()-start_time)/60}")

In [None]:
# Model Building For Transformer for classification
class PositionalEmbedding(layers.Layer):
    def __init__(self, sequence_length, output_dim, **kwargs):
        super(PositionalEmbedding,self).__init__(**kwargs)
        self.position_embeddings = layers.Embedding(
            input_dim=sequence_length, output_dim=output_dim
        )
        self.sequence_length = sequence_length
        self.output_dim = output_dim

    def call(self, inputs):
        # The inputs are of shape: `(batch_size, frames, num_features)`
        length = tf.shape(inputs)[1]
        positions = tf.range(start=0, limit=length, delta=1)
        embedded_positions = self.position_embeddings(positions)
        return inputs + embedded_positions

    def compute_mask(self, inputs, mask=None):
        mask = tf.reduce_any(tf.cast(inputs, "bool"), axis=-1)
        return mask
    
    def get_config(self):
        config = super(PositionalEmbedding,self).get_config()
        config.update({
            "position_embeddings": self.position_embeddings,
            "sequence_length": self.sequence_length,
            "output_dim": self.output_dim
        })
        return config
