### Frame-Extraction of Videos

In [None]:
import os
import subprocess
import csv

def extract_keyframes(input_video, output_folder, start_time=None, end_time=None):
    os.makedirs(output_folder, exist_ok=True)

    if start_time is not None and end_time is not None:
        time_options = f"-ss {start_time} -to {end_time}"
    else:
        time_options = ""

    # Extract filename without extension
    filename = os.path.splitext(os.path.basename(input_video))[0]
    
    command = (
        f"ffmpeg {time_options} -i \"{input_video}\" -vf "
        f"\"select='eq(pict_type\\,I)',showinfo\" -vsync vfr "
        f"\"{output_folder}/{filename}_%03d.png\" && "
        f"ffmpeg {time_options} -i \"{input_video}\" -vf "
        f"\"select='eq(pict_type\\,I)',showinfo\" -vsync vfr "
        f"-copyts -f null - 2>&1 | findstr \"pts_time:\""
    )

    # Run the command and capture the output
    output = subprocess.check_output(command, shell=True, encoding="utf-8")

    # Extract pts_time values
    pts_times = []
    for line in output.splitlines():
        if "pts_time:" in line:
            timestamp = line.split("pts_time:")[1].split()[0]
            pts_times.append([filename,timestamp])

    # Save pts_time values to a CSV file
    output_csv = os.path.join(output_folder, "timestamps.csv")
    if not os.path.exists(output_csv):
        with open(output_csv, "w", newline="") as csvfile:
            writer = csv.writer(csvfile, delimiter=",")
            # Write header row
            writer.writerow(["Keyframes", "Timestamps"])
    with open(output_csv, "a", newline="") as csvfile:
        writer = csv.writer(csvfile, delimiter=",")
        # Write keyframes and timestamps
        writer.writerows(pts_times)
        
def update_hierarchy(input_video, output_folder):
    # Extract filename without extension
    filename = os.path.splitext(os.path.basename(input_video))[0]
    
    # Split the path to get the folder containing the video
    folder_name = os.path.basename(os.path.dirname(input_video))
    
    # Construct the new hierarchy
    new_hierarchy = os.path.join(output_folder, folder_name, "labels", "Frames")
    
    return new_hierarchy

def extract_keyframes_from_hierarchy(root_folder, output_root_folder, start_time=None, end_time=None):
    for subdir, dirs, files in os.walk(root_folder):
        for file in files:
            if file.endswith(".mp4"):
                input_video = os.path.join(subdir, file)
                output_folder_updated = update_hierarchy(input_video, output_root_folder)
                extract_keyframes(input_video, output_folder_updated, start_time, end_time)

# Specify the root folder of the hierarchy which contain surgeries folder
root_folder = "Demo/Videos"

# Specify the output root folder, should be same as we are saving the Frames in our main folder which is same for videos.
output_root_folder = "Demo/Videos"

# Specify start and end times if needed
# start_time = "00:01:50"
# end_time = "00:02:20"

# Extract keyframes from the hierarchy
extract_keyframes_from_hierarchy(root_folder, output_root_folder)

### Clip Generation

In [None]:
import os
import subprocess
import csv

def get_frame_rate(input_video):
    command = [
        'ffprobe',
        '-v', 'error',
        '-select_streams', 'v:0',
        '-show_entries', 'stream=r_frame_rate',
        '-of', 'default=noprint_wrappers=1:nokey=1',
        input_video
    ]
    
    result = subprocess.run(command, capture_output=True, text=True)
    if result.returncode == 0:
        frame_rate_str = result.stdout.strip()
        try:
            num, denom = frame_rate_str.split('/')
            return float(num) / float(denom)
        except ValueError:
            print("Error: Unable to parse frame rate.")
            return None
    else:
        print("Error: ffprobe command failed.")
        return None

def extract_clips(input_videos, output_root_folder):
    for input_video in input_videos:
        video_folder = os.path.dirname(input_video)
        timestamps_csv = os.path.join(video_folder, "labels", "Frames", "timestamps.csv")
        output_folder = os.path.join(output_root_folder, os.path.basename(video_folder), "labels", "Videos")
        os.makedirs(output_folder, exist_ok=True)
        
        frame_rate = get_frame_rate(input_video)
        if frame_rate is None:
            continue

        count_total_frames = f"ffprobe -v error -select_streams v:0 -count_frames -show_entries stream=nb_frames -of csv=p=0 {input_video}"
        result = subprocess.run(count_total_frames, shell=True, capture_output=True, text=True)
        total_frames = int(result.stdout.strip())
        
        video_filename = os.path.splitext(os.path.basename(input_video))[0]  # Extract video file name without extension
        
        # Generate output CSV files based on the folder being processed
        output_csv = os.path.join(output_folder, "clips_info.csv")

        with open(output_csv, "a", newline="") as csvfile:
            fieldnames = ["clip_filename", "start_time", "end_time"]
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            if os.path.getsize(output_csv) == 0:
                writer.writeheader()
                
            with open(timestamps_csv, "r") as csvfile:
                reader = csv.DictReader(csvfile)
                for idx, row in enumerate(reader):
                    if not video_filename in row["Keyframes"]:
                        continue  
                    
                    timestamp = float(row["Timestamps"])
                    keyframe_frame = int(timestamp * frame_rate)
                    
                    # Skip first keyframe to handle issues like files with no frames
                    if keyframe_frame == 0:
                        continue
                    
                    start_frame = max(0, keyframe_frame - 60 * frame_rate)  
                    end_frame = min(keyframe_frame, total_frames - 1)  
                    
                    clip_duration_sec = end_frame - start_frame
                    clip_filename = f"{output_folder}/{video_filename}-{start_frame/frame_rate:.3f}-{end_frame/frame_rate:.3f}.mp4"
                    

                    command = (
                        f"ffmpeg -i {input_video} "
                        f"-vf \"select='between(n,{start_frame},{end_frame})'\" "
                        f"-vsync vfr -q:v 2 -frames:v {int(clip_duration_sec*frame_rate)} "
                        f"-ss {start_frame / frame_rate} -to {end_frame / frame_rate} "
                        f"{clip_filename}"
                    )

    
                    subprocess.run(command, shell=True)

                    # Write clip information to CSV
                    writer.writerow({
                        "clip_filename": os.path.basename(clip_filename),
                        "start_time": round(start_frame / frame_rate, 3),
                        "end_time": round(end_frame / frame_rate, 3)
                    })

# Specify the root folder containing the hierarchy of videos
root_folder = "Demo/Videos"
# Specify the output root folder, should be same as we are saving the Clips in our main folder which is same for videos.
output_root_folder = "Demo/Videos"

input_videos = []
for subdir, dirs, files in os.walk(root_folder):
    for file in files:
        if file.endswith(".mp4"):
            input_videos.append(os.path.join(subdir, file))

extract_clips(input_videos, output_root_folder)