This scripts contain experimental codes for video frame conversion. The codes are not well organized and may not work properly. Please use the codes in the main directory.

In [5]:
import os
import cv2
import random
import numpy as np
import tensorflow as tf
"""
In this updated code, 
the transform_video function now takes in two arguments: 
	video_path, which is the path to the input video,
	and output_dir, which is the directory to which the transformed video should be written. 
The transformed video is saved to a file using the tf.io.TFRecordWriter class, 
with the file name being the same as the input video. 
The transform_dataset function iterates over all subdirectories of the input dataset directory 
and applies the transform_video function to each video file. The transformed videos are written 
to separate directories with the same folder structure as the input dataset, 
which is specified by the output_root_dir argument.
"""
def transform_video(video_path, output_dir):
    # Load the video using OpenCV
    cap = cv2.VideoCapture(video_path)
    video = []
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        video.append(frame)
    cap.release()

    # Resize each frame to 224x224
    video = [cv2.resize(frame, (224, 224)) for frame in video]

    # Convert each frame to a tensor
    video = [tf.constant(frame, dtype=tf.float32) for frame in video]

    # Stack the list of tensors into a tensor of shape (num_frames, 3, 224, 224)
    video = tf.stack(video, axis=0)

    # Sample 30 random frames from the tensor
    num_frames = video.shape[0]
    frames_idx = random.sample(range(num_frames), 30)
    video = tf.gather(video, frames_idx, axis=0)

    # Write the transformed video to a file
    output_path = os.path.join(output_dir, os.path.basename(video_path))
    with tf.io.TFRecordWriter(output_path) as writer:
        writer.write(tf.io.serialize_tensor(video))

def transform_dataset(dataset_dir, output_dir):
    for class_dir in os.listdir(dataset_dir):
        class_path = os.path.join(dataset_dir, class_dir)
        if not os.path.isdir(class_path):
            continue

        output_class_dir = os.path.join(output_dir, class_dir)
        if not os.path.exists(output_class_dir):
            os.makedirs(output_class_dir)

        for video_file in os.listdir(class_path):
            video_path = os.path.join(class_path, video_file)
            transform_video(video_path, output_class_dir)

# Example usage:
# transform_dataset('path/to/dataset', 'path/to/transformed_dataset')

In [6]:
src_dir = '/Users/rudyhendrawan/miniforge3/datasets/small-Dasar-Gerakan-Tari-Bali-All-Women/test'
dest_dir = '/Users/rudyhendrawan/miniforge3/datasets/small-Dasar-Gerakan-Tari-Bali-All-Women/test-224x224'
transform_dataset(src_dir, dest_dir)

In [None]:
import tensorflow as tf
import cv2
import numpy as np
import random
import os

def extract_frames(video_path, num_frames=30):
    # Open the video file
    video = cv2.VideoCapture(video_path)

    # Get the total number of frames in the video
    total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))

    # Select num_frames random frames from the video
    frame_indices = sorted(random.sample(range(total_frames), num_frames))

    # Create a list to hold the frames
    frames = []

    # Loop through the selected frames
    for i in frame_indices:
        # Set the video to the ith frame
        video.set(cv2.CAP_PROP_POS_FRAMES, i)

        # Read the frame
        success, frame = video.read()

        # Break the loop if we reached the end of the video
        if not success:
            break

        # Resize the frame to (224,224)
        frame = cv2.resize(frame, (224, 224))

        # Add the frame to the list of frames
        frames.append(frame)

    # Convert the list of frames to a tensor
    frames = np.stack(frames, axis=0)

    # Normalize the frames
    frames = frames / 255.0

    # Return the frames
    return frames

def video_to_tensor(video_path):
    # Extract the frames from the video
    frames = extract_frames(video_path)

    # Convert the frames to a tensor
    frames = tf.convert_to_tensor(frames, dtype=tf.float32)

    # Return the frames
    return frames

def create_dataset(data_dir, batch_size=32, buffer_size=1024, repeat=None):
    # Create a list to hold the file paths of all the videos
    video_paths = []

    # Loop through the subdirectories in data_dir
    for class_dir in os.listdir(data_dir):
        class_dir = os.path.join(data_dir, class_dir)

        # Skip this iteration if class_dir is not a directory
        if not os.path.isdir(class_dir):
            continue

        # Loop through the videos in the class_dir
        for video_file in os.listdir(class_dir):
            video_path = os.path.join(class_dir, video_file)

            # Skip this iteration if video_path is not a file
            if not os.path.isfile(video_path):
                continue

            # Add the video_path to the list of video_paths
            video_paths.append(video_path)

    # Convert the list of video_paths to a tensor
    video_paths = tf.constant(video_paths)

    # Create a dataset from the video_paths
    dataset = tf.data.Dataset.from_tensor_slices(video_paths)
	


Yes, here are some ways to optimize the code to increase its performance:

1. Use the map function in the tf.data.Dataset API to preprocess the videos in parallel.

2. Use the interleave function in the tf.data.Dataset API to interleave the video processing and loading operations, which can help to hide the loading time of one video behind the processing of another.

3. Use the prefetch function in the tf.data.Dataset API to prefetch data to the GPU, allowing the GPU to perform computations while the CPU is loading data.

4. Use a batch size that is large enough to make efficient use of GPU memory and compute resources, but not so large that it causes memory issues.

5. Consider using TensorFlow's tf.data.TFRecordDataset to load the videos from disk, as it can be faster than loading from individual video files.

6. Use tf.data.Dataset.cache to cache the preprocessed data in memory. This can be useful if the preprocessing takes a long time and you want to avoid repeating the same preprocessing for each epoch.

7. If possible, consider converting the videos to a format that is more GPU-friendly, such as JPEG or PNG, and then use TensorFlow's tf.image functions to preprocess the data.

Note: The specific optimizations that will be most effective will depend on the specifics of your dataset and hardware. Try experimenting with different combinations of these optimizations to find the best combination for your use case.

In [None]:
import os
import cv2
import torch
from torchvision import transforms

def video_to_images(video_path, output_folder, num_frames=30, frame_size=(224,224)):
    """
    Converts a video into a sequence of images and saves them in the output folder.
    The output images will have the same folder structure as the input video.

    Arguments:
    video_path: str, path to the video file
    output_folder: str, path to the folder where the image sequence will be saved
    num_frames: int, number of frames to extract from the video
    frame_size: tuple, size of the frames to be extracted
    
    Returns:
    None
    """
    # Read the video
    video = cv2.VideoCapture(video_path)

    # Extract the video name and class from the video path
    video_name = os.path.splitext(os.path.basename(video_path))[0]
    video_class = os.path.basename(os.path.dirname(video_path))

    # Create the output folder
    class_folder = os.path.join(output_folder, video_class)
    if not os.path.exists(class_folder):
        os.makedirs(class_folder)

    # Create a transform to resize the frames
    transform = transforms.Resize(frame_size)

    # Initialize a counter for the number of frames
    frame_count = 0
    while True:
        # Read a frame from the video
        success, frame = video.read()

        # Break the loop if the video has ended
        if not success:
            break

        # Apply the transform to resize the frame
        frame = transform(frame)

        # Convert the frame to a tensor
        frame = torch.from_numpy(frame.numpy().transpose((2, 0, 1)))

        # Save the frame as an image
        image_path = os.path.join(class_folder, f"{video_name}_{frame_count}.jpg")
        torch.save(frame, image_path)

        # Increase the frame count
        frame_count += 1

        # Break the loop if the desired number of frames has been extracted
        if frame_count >= num_frames:
            break

def convert_dataset(dataset_folder, output_folder, num_frames=30, frame_size=(224,224)):
    """
    Converts the entire video dataset into a sequence of images and saves them in the output folder.
    The output images will have the same folder structure as the dataset.

    Arguments:
    dataset_folder: str, path to the folder containing the dataset
    output_folder: str, path to the folder where the image sequence will be saved
    num_frames: int, number of frames to extract from each video
    frame_size: tuple, size of the frames to be extracted
    
    Returns:
	None
	"""
	# Loop over all the class folders in the dataset folder
	for class_folder in os.listdir(dataset_folder):
		class_folder = os.path.join(dataset_folder, class_folder)
		
		# Skip the class folder if it's not a directory
		if not os.path.isdir(class_folder):
			continue
		
		# Loop over all the video files in the class folder
		for video_file in os.listdir(class_folder):
			video_path = os.path.join(class_folder, video_file)
			
			# Skip the file if it's not a video
			if not video_path.endswith((".mp4", ".avi")):
				continue
			
			# Convert the video into a sequence of images
			video_to_images(video_path, output_folder, num_frames, frame_size)
            

"""

In this code, the `video_to_images` function takes a video file and converts it into a sequence of images. The images are resized using the `transforms.Resize` transform, converted to a tensor using `torch.from_numpy`, and saved as JPEG images using `torch.save`.

The `convert_dataset` function takes the entire video dataset and converts each video in the dataset into a sequence of images using the `video_to_images` function. The images are saved in the output folder, preserving the same folder structure as the original dataset.
"""


In [5]:
src_dir = '/Users/rudyhendrawan/miniforge3/datasets/small-Dasar-Gerakan-Tari-Bali-All-Women/test/Agem_Kanan'
dest_dir = '/Users/rudyhendrawan/miniforge3/datasets/small-Dasar-Gerakan-Tari-Bali-All-Women/test/Agem_Kanan-frames'

# from video2images import Video2Images
# Video2Images(video_filepath=src_dir,
#              capture_rate=1,
#              out_dir=dest_dir)

In [6]:
import os
import cv2
import threading
from queue import Queue

"""
Given individual video files (mp4, webm) on disk, creates a folder for
every video file and saves the video's RGB frames as jpeg files in that
folder.
It can be used to turn SomethingSomethingV2, which comes as 
many ".webm" files, into an RGB folder for each ".webm" file.
Uses multithreading to extract frames faster.
Modify the two filepaths at the bottom and then run this script.
"""


def video_to_rgb(video_filename, out_dir, resize_shape):
    file_template = 'frame_{0:012d}.jpg'
    reader = cv2.VideoCapture(video_filename)
    success, frame, = reader.read()  # read first frame

    count = 0
    while success:
        out_filepath = os.path.join(out_dir, file_template.format(count))
        frame = cv2.resize(frame, resize_shape)
        cv2.imwrite(out_filepath, frame)
        success, frame = reader.read()
        count += 1

def process_videofile(video_filename, video_path, rgb_out_path, file_extension: str ='.mp4'):
    filepath = os.path.join(video_path, video_filename)
    video_filename = video_filename.replace(file_extension, '')

    out_dir = os.path.join(rgb_out_path, video_filename)
    os.mkdir(out_dir)
    video_to_rgb(filepath, out_dir, resize_shape=(224, 224))

def thread_job(queue, video_path, rgb_out_path, file_extension='.webm'):
    while not queue.empty():
        video_filename = queue.get()
        process_videofile(video_filename, video_path, rgb_out_path, file_extension=file_extension)
        queue.task_done()


if __name__ == '__main__':
    # the path to the folder which contains all video files (mp4, webm, or other)
    video_path = src_dir
    # the root output path where RGB frame folders should be created
    rgb_out_path = dest_dir
    # the file extension that the videos have
    file_extension = '.webm'

    video_filenames = os.listdir(video_path)
    queue = Queue()
    [queue.put(video_filename) for video_filename in video_filenames]

    NUM_THREADS = 30
    for i in range(NUM_THREADS):
        worker = threading.Thread(target=thread_job, args=(queue, video_path, rgb_out_path, file_extension))
        worker.start()

    print('waiting for all videos to be completed.', queue.qsize(), 'videos')
    print('This can take an hour or two depending on dataset size')
    queue.join()
    print('all done')

waiting for all videos to be completed. 0 videos
This can take an hour or two depending on dataset size
all done
