In [112]:
import cv2
import os
import shutil
from tqdm import tqdm
from collections.abc import Generator

# START EXTRACTING FRAMES

In [109]:
def extract_frames(video_path: str, output_folder: str) -> None:
    """
    Extract frames from a video and save them as image files.

    Args:
        video_path (str): The path to the input video file.
        output_folder (str): The path to the folder where extracted frames will be saved.

    Returns:
        None

    This function takes an input video file and extracts its frames, saving each frame as a separate
    image file in the specified output folder. The image files are named sequentially, starting from
    'frame_0.jpg' and incrementing for each subsequent frame.
    """
    # Ensure that the output folder exists; create it if it doesn't.
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    # Open the video file for reading.
    cap = cv2.VideoCapture(video_path)
    
    # Initialize a counter to keep track of the frame number.
    frame_count = 0
    
    # Split the tail again to get the second-to-last element
    path_components = output_folder.split("/")
    if len(path_components) >= 2:
        if path_components[-1] == "":
            suffix = path_components[-3]
        else:
            suffix = path_components[-2]
        print(f"suffix: {suffix}")
    else:
        raise KeyError("Path does not contain a second last element.")
    
    # Loop through the video frames until there are no more frames to read.
    while True:
        ret, frame = cap.read()
        
        # If there are no more frames, break out of the loop.
        if not ret:
            break
        
        # Define the path to save the current frame as an image file.
        frame_path = os.path.join(output_folder, f'{frame_count}_{suffix}.jpg')
        
        # Save the current frame as an image file.
        cv2.imwrite(frame_path, frame)
        
        # Increment the frame count for the next frame.
        frame_count += 1

    # Release the video file.
    cap.release()

In [110]:
output_dir = "media/1239c/frames/"
extract_frames("media/GOPR1239c_flip.mp4", output_dir)
len(os.listdir(output_dir))

suffix: 1239c


4443

# END EXTRACTING FRAMES

# Given transition frames, label everything between

## How to use:
- extract frames into folder in desired directory called "frames"
- label transition frames in folders called "holding_transitions" and "not_holding_transitions" in same directory
- these transition frames are "firsts" i.e. first frame in a row with contact or without
- set base_dir to the directory containing the three folders

In [113]:
def move_files_in_range(source_dir: str, destination_dir: str, start: int, end: int, suffix: str) -> None:
    """
    Move files with names in a specific numeric range from the source directory to the destination directory.

    Args:
        source_dir (str): The source directory containing the files to move.
        destination_dir (str): The destination directory where files will be moved to.
        start (int): The start of the numeric range (inclusive).
        end (int): The end of the numeric range (exclusive).
        suffix (str): Suffix: like 1238c.jpg
    """
    # make sure min and max are in right order
    tmp = start
    start = min(start, end)
    end = max(tmp, end)
    
    files_to_move = []
    for file_num in range(start, end):
        files_to_move.append(str(file_num) + "_" + suffix)

    for filename in files_to_move:
        source_path = os.path.join(source_dir, filename)
        destination_path = os.path.join(destination_dir, filename)
        shutil.copy(source_path, destination_path)


def create_pairs(hold: list[str], not_hold: list[str]) -> Generator[str, str]:
    """
    Generate pairs of filenames indicating transition points for holding actions.

    This function creates pairs of filenames from the hold and not-hold lists, representing the 
    start and end points of holding and not-holding actions. It is designed to manage frames from 
    a video where an object is repeatedly picked up and put down.

    Args:
        hold (list[str]): A list of filenames indicating the start of a holding action, sorted by frame order.
        not_hold (list[str]): A list of filenames indicating the start of a not-holding action, sorted by frame order.

    Yields:
        tuple[str, str]: A pair of filenames (start_frame, end_frame) indicating the transition points for holding actions.

    Example:
        Given lists: 
        hold = ['446_1238c.jpg', '2072_1238c.jpg']
        not_hold = ['897_1238c.jpg', '2785_1238c.jpg']
        This function will yield:
        ('0_anything.jpg', '446_1238c.jpg'), ('446_1238c.jpg', '897_1238c.jpg'),
        ('897_1238c.jpg', '2072_1238c.jpg'), ('2072_1238c.jpg', '2785_1238c.jpg')
    """
    # Starting with a dummy filename to pair with the first holding action
    previous = '0_anything.jpg'  
    
    # Iterate over each start and end frame simultaneously
    for start, end in zip(hold, not_hold):
        # Yield the pair: (previous, start of contact)
        yield previous, start

        # Yield the pair: (start of holding, start of non-contact)
        yield start, end
        
        # Update the 'previous' variable for the next iteration
        previous = end



def move_frames_from_transitions(whole_dir: str, hold_template_dir: str, not_hold_template_dir: str, base_dir: str) -> None:
    """
    Move frames between transition points to appropriate holding or not-holding directories.
    
    This function is designed to manage frames from a video where an object is repeatedly picked up and
    put down. It uses the provided template directories to determine the start and end frames of each 
    transition and then moves the frames to the appropriate output directories.
    
    Args:
        whole_dir (str): The directory containing all frames to be sorted.
        hold_template_dir (str): Directory containing labeled frames indicating the start of contact.
        not_hold_template_dir (str): Directory containing labeled frames indicating the start non-contact.
        base_dir (str): The base directory where the sorted frames will be moved to.
    
    Note:
        The function expects filenames in the format of "xxxx_suffix.jpg", where xxxx is a numeric part.
        The suffix is used to filter and move the right files. The suffix is determined based on the files 
        in the hold_template_dir.
    """
    
    # Get all template files from the hold and not-hold directories, sorted by their numeric prefix.
    hold_template_files = sorted(os.listdir(hold_template_dir), key=lambda name: int(name.split("_")[0]))
    not_hold_template_files = sorted(os.listdir(not_hold_template_dir), key=lambda name: int(name.split("_")[0]))
    
    # Extract the suffix (with the file extension) from the first hold template file to use as a filter.
    suffix = hold_template_files[0].split("_")[-1]
    
    # Define the output directories for holding and not-holding frames.
    output_holding = os.path.join(base_dir, "holding")
    output_not_holding = os.path.join(base_dir, "not_holding")
    
    # Create the output directories if they do not exist.
    os.makedirs(output_holding, exist_ok=True)
    os.makedirs(output_not_holding, exist_ok=True)
    
    # Initialize the holding flag. We start with the assumption that the first frames are not holding.
    holding = False  
    
    # Process each pair of start and end frames.
    with tqdm(total=len(hold_template_files) + len(not_hold_template_files) + 1) as progress_bar:
        for start, end in create_pairs(hold_template_files, not_hold_template_files):
            # Choose the appropriate output directory based on the holding flag.
            if holding:
                current_output_dir = output_holding
            else:
                current_output_dir = output_not_holding
            
            # Move the frames that fall between the current start and end points to the chosen directory.
            move_files_in_range(whole_dir, current_output_dir, int(start.split('_')[0]), int(end.split('_')[0]), suffix)
            
            # Toggle the holding flag for the next iteration.
            holding = not holding  
            
            # Update the progress bar.
            progress_bar.update(1)

        # After all pairs are processed, handle the frames after the last transition point.
        last_file_number = int(sorted(os.listdir(hold_template_dir), key=lambda name: int(name.split("_")[0]))[-1].split("_")[0])
        move_files_in_range(whole_dir, current_output_dir, int(end.split('_')[0]), last_file_number, suffix)
        progress_bar.update(1)
        

In [114]:
base_dir = "./media/1239c/"
move_frames_from_transitions(base_dir + "frames", base_dir + "holding_transitions", base_dir + "not_holding_transitions", base_dir)

100%|██████████| 11/11 [00:01<00:00,  8.81it/s]
