In [None]:
!pip install av decord

Collecting av
  Downloading av-14.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.6 kB)
Collecting decord
  Downloading decord-0.6.0-py3-none-manylinux2010_x86_64.whl.metadata (422 bytes)
Downloading av-14.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (39.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m39.7/39.7 MB[0m [31m14.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading decord-0.6.0-py3-none-manylinux2010_x86_64.whl (13.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.6/13.6 MB[0m [31m42.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: decord, av
Successfully installed av-14.2.0 decord-0.6.0


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import av
from decord import VideoReader, cpu
import os
import cv2

def split_video(video_path, audio_output_path, video_output_path):
    """
    Splits video into audio (using PyAV) and video (using Decord to decode, OpenCV to re-encode)

    Args:
        video_path: Path to the input video file.
        audio_output_path: Path to save the extracted audio (WAV format).
        video_output_path: Path to save the video without audio (MP4 format).
    """
    try:
        # --- 1. Extract Audio using PyAV ---
        container = av.open(video_path)
        audio_stream = next((s for s in container.streams if s.type == 'audio'), None)
        try:
            if not audio_stream:
                print("No audio stream found in the video.")
            else:

                audio_output = av.open(audio_output_path, mode="w", format="wav")
                new_audio_stream = audio_output.add_stream('pcm_s16le', rate=audio_stream.rate)
                new_audio_stream.layout = audio_stream.layout
                new_audio_stream.rate = audio_stream.rate
                new_audio_stream.codec_context.time_base = audio_stream.time_base

                for frame in container.decode(audio_stream):
                    for packet in new_audio_stream.encode(frame):
                        audio_output.mux(packet)

                # Flush Stream
                for packet in new_audio_stream.encode():
                    audio_output.mux(packet)
                # Close the audio
                audio_output.close()
        except Exception as e:
            print(f"Error processing audio {e}")
        # Close the container, for better usage of Resources
        container.close()
        # --- 2. Extract Video without audio,  using Decord (for decoding) and OpenCV( for encoding) ---
        vr = VideoReader(video_path, ctx=cpu(0))
        num_frames = len(vr)

        # Get video properties from Decord, then set in OpenCV
        first_frame = vr.get_batch([0]).asnumpy()[0]
        height, width, _ = first_frame.shape
        try:
            fps = container.streams.video[0].average_rate # This is important for consistency
        except:
            fps = 30 # Default FPS for Video

        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
        out = cv2.VideoWriter(video_output_path, fourcc, fps, (width, height))

        # Iterate through frames
        for i in range(num_frames):
            frame = vr.get_batch([i]).asnumpy()[0]
            frame_bgr = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) # Convert BGR from RBG
            out.write(frame_bgr)

        out.release()
    except Exception as e:
        print(f"Error: {e}")
import cv2
# Example usage:
video_file = "input.mp4"  # Replace with your video file
audio_file = "audio.wav"
video_no_audio_file = "video_no_audio.mp4"

split_video(video_file, audio_file, video_no_audio_file)
print("Processing Completed!")

Error: [Errno 2] No such file or directory: 'input.mp4'
Processing Completed!


In [None]:
# prompt: write a code so that I can view the frames generated in the previous output

import cv2
import matplotlib.pyplot as plt
import os

def display_frames(directory):
  """Displays frames from a directory using Matplotlib."""
  frame_files = sorted([f for f in os.listdir(directory) if f.endswith('.jpg')])
  for frame_file in frame_files:
    frame_path = os.path.join(directory, frame_file)
    frame = cv2.imread(frame_path)
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)  # Convert to RGB for display
    plt.imshow(frame_rgb)
    plt.title(f"Frame: {frame_file}")
    plt.axis('off')
    plt.show()

# Example usage:
display_frames("video_frames")


In [None]:
# prompt: remove the contents from the folder video_frames

import shutil
import os

def remove_folder_contents(folder_path):
  """Removes all files and folders within a specified directory."""
  try:
    for filename in os.listdir(folder_path):
      file_path = os.path.join(folder_path, filename)
      try:
        if os.path.isfile(file_path) or os.path.islink(file_path):
          os.unlink(file_path)
        elif os.path.isdir(file_path):
          shutil.rmtree(file_path)
      except Exception as e:
        print('Failed to delete %s. Reason: %s' % (file_path, e))
  except FileNotFoundError:
    print(f"Folder '{folder_path}' not found.")


# Example usage:
remove_folder_contents("preprocess")


In [None]:
import av
from decord import VideoReader, gpu, cpu  #Import Cuda
import os
import cv2
import concurrent.futures

def split_video(video_path, audio_output_path, video_output_path, use_cuda=True):
    """
    Splits video into audio (using PyAV) and video (using Decord to decode, OpenCV to re-encode),
    with GPU acceleration options.
    """
    try:
        # --- 1. Extract Audio using PyAV ---
        container = av.open(video_path)
        audio_stream = next((s for s in container.streams if s.type == 'audio'), None)

        try:
            if not audio_stream:
                print("No audio stream found in the video.")
            else:

                audio_output = av.open(audio_output_path, mode="w", format="wav")
                new_audio_stream = audio_output.add_stream('pcm_s16le', rate=audio_stream.rate)
                new_audio_stream.layout = audio_stream.layout
                new_audio_stream.rate = audio_stream.rate
                new_audio_stream.codec_context.time_base = audio_stream.time_base

                for frame in container.decode(audio_stream):
                    for packet in new_audio_stream.encode(frame):
                        audio_output.mux(packet)

                # Flush Stream
                for packet in new_audio_stream.encode():
                    audio_output.mux(packet)
                # Close the audio
                audio_output.close()
        except Exception as e:
            print(f"Error processing audio {e}")
        # Close the container, for better usage of Resources
        container.close()
        # --- 2. Extract Video without audio,  using Decord (for decoding) and OpenCV( for encoding) ---
        # Use GPU if requested and available, otherwise default to CPU
        ctx = gpu(0) if use_cuda and cv2.cuda.getCudaEnabledDeviceCount() > 0 else cpu(0)
        vr = VideoReader(video_path, ctx=ctx) #Switch to GPU

        num_frames = len(vr)

        # Get video properties from Decord, then set in OpenCV
        first_frame = vr.get_batch([0]).asnumpy()[0]
        height, width, _ = first_frame.shape
        try:
            fps = container.streams.video[0].average_rate # This is important for consistency
        except:
            fps = 30 # Default FPS for Video

        fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # Or try 'H264'

        #VideoWriter settings for CUDA
        if use_cuda and cv2.cuda.getCudaEnabledDeviceCount() > 0: #Use Cuda to encode only if it's setup correctly
             out = cv2.VideoWriter(video_output_path, cv2.CAP_FFMPEG, fourcc, fps, (width, height)) #Change the VideoWriter to use CUDA
        else:
            out = cv2.VideoWriter(video_output_path, fourcc, fps, (width, height))

        # Iterate through frames and write to video file
        for i in range(num_frames):
            frame = vr.get_batch([i]).asnumpy()[0]
            frame_bgr = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) # Convert BGR from RBG
            out.write(frame_bgr)

        out.release()

    except Exception as e:
        print(f"Error: {e}")

def process_video(input_file_path, output_folder, use_cuda=True):
    """Wrapper function to process a single video."""
    print(f"Processing: {input_file_path}")

    file_name = os.path.basename(input_file_path)
    file_name_without_ext, file_ext = os.path.splitext(file_name)
    video_output_file = os.path.join(output_folder, f"{file_name_without_ext}_video.mp4")
    audio_output_file = os.path.join(output_folder, f"{file_name_without_ext}_audio.wav")

    split_video(input_file_path, audio_output_file, video_output_file, use_cuda) #Set default to CUDA enabled.
    print(f"Finished Processing: {input_file_path}")

def traverse_directory(root_directory, output_root_folder, max_workers=4, use_cuda=True):
    """
    Traverses a directory and processes all video files in parallel using a thread pool.
    """
    video_files = []
    for foldername, subfolders, filenames in os.walk(root_directory):
        for filename in filenames:
            if filename.lower().endswith(('.mp4', '.avi', '.mov', '.mkv', '.webm')):
                input_file_path = os.path.join(foldername, filename)
                relative_path = os.path.relpath(foldername, root_directory)
                output_folder = os.path.join(output_root_folder, relative_path)
                os.makedirs(output_folder, exist_ok=True)
                video_files.append((input_file_path, output_folder))  # Store path and folder

    with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
        executor.map(lambda item: process_video(item[0], item[1], use_cuda), video_files)  # Pass use_cuda
if __name__ == '__main__':
    input_directory = "/content/drive/MyDrive/Datasets/dataset/1005-2004"  # Replace with your input directory
    output_directory = "/content/drive/MyDrive/Datasets/preprocess3"  # Replace with your output directory

    # Example usage:
    # video_file = "input.mp4"  # Replace with your video file
    # audio_file = "audio.wav"
    # video_no_audio_file = "video_no_audio.mp4"
    #
    # split_video(video_file, audio_file, video_no_audio_file) #Single execution
    # print("Processing Completed!")

    traverse_directory(input_directory, output_directory, max_workers=2, use_cuda=True) #To Use CPU please change to false
    print("Traversing Completed")

Processing: /content/drive/MyDrive/Datasets/dataset/1005-2004/NewAdd.Dream.Team.8__#1_label_A.mp4
Processing: /content/drive/MyDrive/Datasets/dataset/1005-2004/NewAdd.NBA-2017.12.25_CLE@GSW__#00-00-00_00-21-00_label_A.mp4
Finished Processing: /content/drive/MyDrive/Datasets/dataset/1005-2004/NewAdd.NBA-2017.12.25_CLE@GSW__#00-00-00_00-21-00_label_A.mp4
Processing: /content/drive/MyDrive/Datasets/dataset/1005-2004/NewAdd.NBA-2017.12.25_CLE@GSW__#00-21-00_00-44-20_label_A.mp4
Finished Processing: /content/drive/MyDrive/Datasets/dataset/1005-2004/NewAdd.NBA-2017.12.25_CLE@GSW__#00-00-00_00-21-00_label_A.mp4
Processing: /content/drive/MyDrive/Datasets/dataset/1005-2004/NewAdd.NBA-2017.12.25_CLE@GSW__#00-21-00_00-44-20_label_A.mp4
Finished Processing: /content/drive/MyDrive/Datasets/dataset/1005-2004/NewAdd.NBA-2017.12.25_CLE@GSW__#00-21-00_00-44-20_label_A.mp4
Processing: /content/drive/MyDrive/Datasets/dataset/1005-2004/NewAdd.NBA-2017.12.25_CLE@GSW__#00-44-20_01-08-34_label_A.mp4
Finishe

In [None]:
# prompt: download to my local system the files under the folder preprocess  d
import os
from google.colab import files

def download_folder(folder_path):
  """Downloads all files within a specified folder to your local system."""
  try:
    for filename in os.listdir(folder_path):
      file_path = os.path.join(folder_path, filename)
      if os.path.isfile(file_path):
        files.download(file_path)
  except FileNotFoundError:
    print(f"Folder '{folder_path}' not found.")


# Example usage:
download_folder("/content/preprocess")


In [None]:
# prompt: download this particular file path

from google.colab import files
import os

def download_file_from_path(file_path):
  """Downloads a single file from the specified path."""
  try:
    if os.path.isfile(file_path):
      files.download(file_path)
    else:
      print(f"File '{file_path}' not found.")
  except Exception as e:
    print(f"Error downloading file: {e}")


# Example usage:
download_file_from_path("/content/preprocess/NewAdd.Dream.Team.8__#1_label_A_audio.wav")


In [None]:
!zip -r /content/drive/MyDrive/preprocess.zip /content/preprocess

  adding: content/preprocess/ (stored 0%)
  adding: content/preprocess/One.Day.2011__#00-42-52_00-45-31_label_A_audio.wavFinished Processing: /content/drive/MyDrive/dataset/1005-2004/The.Pursuit.of.Happyness.2006__#00-04-20_00-05-35_label_A.mp4
Processing: /content/drive/MyDrive/dataset/1005-2004/The.Pursuit.of.Happyness.2006__#00-08-55_00-10-50_label_A.mp4
Finished Processing: /content/drive/MyDrive/dataset/1005-2004/The.Pursuit.of.Happyness.2006__#00-05-52_00-08-22_label_A.mp4
Processing: /content/drive/MyDrive/dataset/1005-2004/The.Pursuit.of.Happyness.2006__#00-11-00_00-15-08_label_A.mp4
 (deflated 42%)
  adding: content/preprocess/Skyfall.2012__#02-08-20_02-08-43_label_B2-0-0_audio.wav (deflated 14%)
  adding: content/preprocess/Salt.2010__#00-40-10_00-42-22_label_A_video.mp4 (deflated 1%)
  adding: content/preprocess/Rush.Hour.1998.BluRay__#01-06-07_01-08-37_label_A_audio.wavFinished Processing: /content/drive/MyDrive/dataset/1005-2004/The.Pursuit.of.Happyness.2006__#00-08-55_00-

In [None]:
from google.colab import files
files.download("/content/preprocess.zip")

In [None]:
# prompt: DELETE all the files from a folder

def delete_all_files_in_folder(folder_path):
  """Deletes all files within a specified folder."""
  try:
    for filename in os.listdir(folder_path):
      file_path = os.path.join(folder_path, filename)
      try:
        if os.path.isfile(file_path) or os.path.islink(file_path):
          os.unlink(file_path)
      except Exception as e:
        print('Failed to delete %s. Reason: %s' % (file_path, e))
  except FileNotFoundError:
    print(f"Folder '{folder_path}' not found.")


# Example usage:
delete_all_files_in_folder("/content/drive/MyDrive/Datasets/preprocess3")


In [None]:
# prompt: python code to count the number of files in a folder

import os

def count_files_in_folder(folder_path):
  """Counts the number of files in a given folder.

  Args:
    folder_path: The path to the folder.

  Returns:
    The number of files in the folder.
  """
  count = 0
  for _, _, files in os.walk(folder_path):
    count += len(files)
  return count

# Example usage:
folder_path = '/content/drive/MyDrive/Datasets/preprocess3'  # Replace with the actual folder path
# folder_path = '//content/drive/MyDrive/Datasets/dataset/1005-2004'  # Replace with the actual folder path
num_files = count_files_in_folder(folder_path)
print(f"The number of files in the folder is: {num_files}")


The number of files in the folder is: 1932


In [None]:
import os
import time

def process_file(input_path, output_path):
    """Simulates processing a file (you would replace this with your actual processing logic)."""
    try:
        print(f"Processing: {input_path} -> {output_path}")
        # Simulate some work
        time.sleep(1)  # Simulate processing time
        with open(output_path, "w") as f:
            f.write(f"Processed content from: {input_path}\n")
        print(f"Successfully processed: {input_path} -> {output_path}")
    except Exception as e:
        print(f"Error processing: {input_path}: {e}")

def process_files_sequentially(input_directory, output_directory):
    """Processes files from a directory, one by one, waiting for each processing step to complete before proceeding.

    Args:
        input_directory (str): The directory to search for files.
        output_directory (str): The directory to save the processed output files.
    """
    os.makedirs(output_directory, exist_ok=True)  # Create output directory if needed

    for filename in os.listdir(input_directory):
        try:
            input_path = os.path.join(input_directory, filename)
            output_path = os.path.join(output_directory, f"processed_{filename}")  # Example output filename

            # Check if it's a file
            if not os.path.isfile(input_path):
                print(f"Skipping non-file: {input_path}")
                continue

            # Process, while waiting for the end of the work
            process_file(input_path, output_path)

            # Wait, in case a pause has to happen
            time.sleep(1)  # Pause after each file (optional - adjust as needed)

        except Exception as e:
            print(f"An error occurred while processing {filename}: {e}")

if __name__ == "__main__":
    input_directory = "/content/preprocess"  # Replace with your input directory
    output_directory = "/Users/kushagraagarwal/Documents/New"  # Replace with your output directory

    # #Create some test file
    # for i in range(0,5):
    #     file_path = os.path.join(input_directory, f"file_{i}.txt")  # Replace with the desired path
    #     os.makedirs(input_directory, exist_ok = True)
    #     with open(file_path, "w") as file:
    #          file.write("Hello, this is a sample file.")

    process_files_sequentially(input_directory, output_directory)

    print("All files processed.")

Processing: /content/preprocess/The.Pursuit.of.Happyness.2006__#00-38-52_00-40-31_label_A_video.mp4 -> /Users/kushagraagarwal/Documents/New/processed_The.Pursuit.of.Happyness.2006__#00-38-52_00-40-31_label_A_video.mp4
Successfully processed: /content/preprocess/The.Pursuit.of.Happyness.2006__#00-38-52_00-40-31_label_A_video.mp4 -> /Users/kushagraagarwal/Documents/New/processed_The.Pursuit.of.Happyness.2006__#00-38-52_00-40-31_label_A_video.mp4
Processing: /content/preprocess/One.Day.2011__#00-42-52_00-45-31_label_A_audio.wav -> /Users/kushagraagarwal/Documents/New/processed_One.Day.2011__#00-42-52_00-45-31_label_A_audio.wav
Successfully processed: /content/preprocess/One.Day.2011__#00-42-52_00-45-31_label_A_audio.wav -> /Users/kushagraagarwal/Documents/New/processed_One.Day.2011__#00-42-52_00-45-31_label_A_audio.wav
Processing: /content/preprocess/Skyfall.2012__#02-08-20_02-08-43_label_B2-0-0_audio.wav -> /Users/kushagraagarwal/Documents/New/processed_Skyfall.2012__#02-08-20_02-08-43_l