### Extracting Keyframes from a video

In [None]:
import os
import subprocess

def extract_keyframes(input_video, output_folder, start_time=None, end_time=None):
    os.makedirs(output_folder, exist_ok=True)

    if start_time is not None and end_time is not None:
        time_options = f"-ss {start_time} -to {end_time}"
    else:
        time_options = ""

    command = (
        f"ffmpeg {time_options} -i \"{input_video}\" -vf "
        f"\"select='eq(pict_type\\,I)',showinfo\" -vsync vfr "
        f"\"{output_folder}/keyframes_%03d.jpg\" && "
        f"ffmpeg {time_options} -i \"{input_video}\" -vf "
        f"\"select='eq(pict_type\\,I)',showinfo\" -vsync vfr "
        f"-copyts -f null - 2>&1 | findstr \"pts_time:\" > \"timestamps.txt\""
    )

    subprocess.run(command, shell=True)

# Explain the input video path whose frames you need to extract
input_video = "Input/2023-07-18_154344_VID003.mp4"
output_folder = "Output/Keyframes/"
# start_time = "00:02:49"
# end_time = "00:03:50"
# We can remove start_time and end_time if we need whole video
extract_keyframes(input_video, output_folder)

### Extracting pts_time Variable from .txt file and saving into csv
- To save keyframe time when they are being extracted from the video

In [1]:
import csv

def extract_pts_time(input_txt, output_csv):
    
    pts_times = []
    with open(input_txt, "r") as txt_file:
        for line in txt_file:
            if "pts_time:" in line:
                timestamp = line.split("pts_time:")[1].split()[0]
                pts_times.append([timestamp])

    with open(output_csv, "w", newline="") as csvfile:
        writer = csv.writer(csvfile, delimiter=" ")
        writer.writerow(["Timestamps"])
        writer.writerows(pts_times)

# Text file path         
input_txt = "Output/timestamps.txt"
output_csv = "Output/timestamps.csv"
extract_pts_time(input_txt, output_csv)

### Creating Clips of Videos

In [6]:
import os
import subprocess
import csv

def get_frame_rate(input_video):
    command = [
        'ffprobe',
        '-v', 'error',
        '-select_streams', 'v:0',
        '-show_entries', 'stream=r_frame_rate',
        '-of', 'default=noprint_wrappers=1:nokey=1',
        input_video
    ]
    
    result = subprocess.run(command, capture_output=True, text=True)
    if result.returncode == 0:
        frame_rate_str = result.stdout.strip()
        try:
            num, denom = frame_rate_str.split('/')
            return float(num) / float(denom)
        except ValueError:
            print("Error: Unable to parse frame rate.")
            return None
    else:
        print("Error: ffprobe command failed.")
        return None

def extract_clips(input_video, output_folder, timestamps_csv, output_csv):
    os.makedirs(output_folder, exist_ok=True)

    timestamps = []
    with open(timestamps_csv, "r") as csvfile:
        reader = csv.DictReader(csvfile)
        for row in reader:
            timestamps.append(float(row["Timestamps"]))

    frame_rate = get_frame_rate(input_video)
    if frame_rate is None:
        return  

    count_total_frames = f"ffprobe -v error -select_streams v:0 -count_frames -show_entries stream=nb_frames -of csv=p=0 {input_video}"
    result = subprocess.run(count_total_frames, shell=True, capture_output=True, text=True)
    total_frames = int(result.stdout.strip())
    
    video_filename = os.path.splitext(os.path.basename(input_video))[0]  # Extract video file name without extension

    clip_times = [] 

    for idx, timestamp in enumerate(timestamps):
        start_frame = max(0, int(timestamp * frame_rate) - 60)
        end_frame = min(total_frames - 1, int(timestamp * frame_rate) + 60)
        
#         clip_duration_sec = (end_frame - start_frame) / frame_rate
        clip_duration_sec = end_frame - start_frame
        
#         clip_duration_frames = round(clip_duration_sec * frame_rate)
        clip_duration_frames = round(clip_duration_sec)
        
        clip_filename = f"{output_folder}/{video_filename}_{idx}_{timestamp:.2f}.mp4"
        command = (
            f"ffmpeg -i {input_video} "
            f"-vf \"select='between(n,{start_frame},{end_frame})'\" "
            f"-vsync vfr -q:v 2 -frames:v {clip_duration_frames} "
            f"-ss {start_frame / frame_rate} "
            f"{clip_filename}"
        )

        subprocess.run(command, shell=True)

        clip_times.append({
            "clip_filename": clip_filename.split('/')[-1],
            "from": start_frame / frame_rate,
            "to": end_frame / frame_rate
        })

    with open(output_csv, "w", newline="") as csvfile:
        fieldnames = ["clip_filename", "from", "to"]
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
        writer.writerows(clip_times)

# Same video whose frame we extracted originally
input_video = "Input/2023-07-18_154344_VID003.mp4"
output_folder = "Output/Clips1/"
timestamps_csv = "Output/timestamps.csv"
output_csv = "Output/clips_info1.csv"

extract_clips(input_video, output_folder, timestamps_csv, output_csv)

### Pick Annotation of a video file from JSON file
- After picking the annotation we will take start time, end time and labels from it and save them into a csv file

In [24]:
import json
import csv
import os

with open('Input/2023-07-18_154344_VID003.json', 'r') as f:
    data = json.load(f)

video_filename = "2023-07-18_154344_VID003.mp4"

video_entry = next((entry for entry in data if video_filename in entry["file_upload"]), None)

if video_entry:
    annotations = video_entry["annotations"]
    
    annotation_data = []
    for annotation in annotations:
        for result in annotation["result"]:
            start = result["value"]["start"]
            end = result["value"]["end"]
            labels = ', '.join(result["value"]["labels"])
            annotation_data.append({"start": start, "end": end, "labels": labels})

    folder = 'Output'
    os.makedirs(folder, exist_ok=True)  
    csv_file = os.path.join(folder, "labels1.csv")

    with open(csv_file, mode='w', newline='') as csvfile:
        fieldnames = ['start', 'end', 'labels']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
        for annotation in annotation_data:
            writer.writerow(annotation)
    print(f"Annotations extracted and saved to {csv_file}")
else:
    print("Video file not found in the JSON data.")

Scope movement
Setup
Scope movement
Aspiration
Dissection
Washout
Aspiration
Scope movement
Dissection
Washout
Scope movement
Dissection
Washout
Washout
Washout
Washout
Washout
Washout
Washout
Scope movement
Scope movement
Washout
Dissection
Washout
Scope insert


### Joining Labels and Clips

In [4]:
import pandas as pd

# Path of labels file that we extracted from JSON
df_labels = pd.read_csv("Output/labels1.csv")

# Path of clips file which contains information about each clip related to timing of a clip
df_clips = pd.read_csv("Output/clips_info1.csv")

clip_labels = []

for index, row_clip in df_clips.iterrows():
    labels = []    
    for index, row_label in df_labels.iterrows():        
        if row_clip['from'] >= row_label['start'] and row_clip['to'] <= row_label['end']:
            labels.append(row_label['labels'])
    if not labels:
        labels.append("No Label Found")    

    clip_labels.append(', '.join(labels))

# print(clip_labels)
df_clips['labels'] = clip_labels
# df_clips
df_clips.to_csv("Output/Clips_labeling.csv", index=False)

### Hierarichal Structure
- Extract keyframes from all the videos found in surgeries folder and saves them into Frames Folder inside labels with their time in csv file.

In [None]:
import os
import subprocess
import csv

def extract_keyframes(input_video, output_folder, start_time=None, end_time=None):
    os.makedirs(output_folder, exist_ok=True)

    if start_time is not None and end_time is not None:
        time_options = f"-ss {start_time} -to {end_time}"
    else:
        time_options = ""

    # Extract filename without extension
    filename = os.path.splitext(os.path.basename(input_video))[0]
    
    command = (
        f"ffmpeg {time_options} -i \"{input_video}\" -vf "
        f"\"select='eq(pict_type\\,I)',showinfo\" -vsync vfr "
        f"\"{output_folder}/{filename}_%03d.png\" && "
        f"ffmpeg {time_options} -i \"{input_video}\" -vf "
        f"\"select='eq(pict_type\\,I)',showinfo\" -vsync vfr "
        f"-copyts -f null - 2>&1 | findstr \"pts_time:\""
    )

    # Run the command and capture the output
    output = subprocess.check_output(command, shell=True, encoding="utf-8")

    # Extract pts_time values
    pts_times = []
    for line in output.splitlines():
        if "pts_time:" in line:
            timestamp = line.split("pts_time:")[1].split()[0]
            pts_times.append([filename,timestamp])

    # Save pts_time values to a CSV file
    output_csv = os.path.join(output_folder, "timestamps.csv")
    if not os.path.exists(output_csv):
        with open(output_csv, "w", newline="") as csvfile:
            writer = csv.writer(csvfile, delimiter=",")
            # Write header row
            writer.writerow(["Keyframes", "Timestamps"])
    with open(output_csv, "a", newline="") as csvfile:
        writer = csv.writer(csvfile, delimiter=",")
        # Write keyframes and timestamps
        writer.writerows(pts_times)
        
def update_hierarchy(input_video, output_folder):
    # Extract filename without extension
    filename = os.path.splitext(os.path.basename(input_video))[0]
    
    # Split the path to get the folder containing the video
    folder_name = os.path.basename(os.path.dirname(input_video))
    
    # Construct the new hierarchy
    new_hierarchy = os.path.join(output_folder, folder_name, "labels", "Frames")
    
    return new_hierarchy

def extract_keyframes_from_hierarchy(root_folder, output_root_folder, start_time=None, end_time=None):
    for subdir, dirs, files in os.walk(root_folder):
        for file in files:
            if file.endswith(".mp4"):
                input_video = os.path.join(subdir, file)
                output_folder_updated = update_hierarchy(input_video, output_root_folder)
                extract_keyframes(input_video, output_folder_updated, start_time, end_time)

# Specify the root folder of the hierarchy
root_folder = "Demo6/Videos"

# Specify the output root folder
output_root_folder = "Demo6/Videos"

# Specify start and end times if needed
# start_time = "00:01:50"
# end_time = "00:02:20"

# Extract keyframes from the hierarchy
extract_keyframes_from_hierarchy(root_folder, output_root_folder)

### Creat multiple clips of Videos

In [None]:
import os
import subprocess
import csv

def get_frame_rate(input_video):
    command = [
        'ffprobe',
        '-v', 'error',
        '-select_streams', 'v:0',
        '-show_entries', 'stream=r_frame_rate',
        '-of', 'default=noprint_wrappers=1:nokey=1',
        input_video
    ]
    
    result = subprocess.run(command, capture_output=True, text=True)
    if result.returncode == 0:
        frame_rate_str = result.stdout.strip()
        try:
            num, denom = frame_rate_str.split('/')
            return float(num) / float(denom)
        except ValueError:
            print("Error: Unable to parse frame rate.")
            return None
    else:
        print("Error: ffprobe command failed.")
        return None

def extract_clips(input_videos, output_root_folder):
    for input_video in input_videos:
        video_folder = os.path.dirname(input_video)
        timestamps_csv = os.path.join(video_folder, "labels", "Frames", "timestamps.csv")
        output_folder = os.path.join(output_root_folder, os.path.basename(video_folder), "labels", "Videos")
        os.makedirs(output_folder, exist_ok=True)
        
        frame_rate = get_frame_rate(input_video)
        if frame_rate is None:
            continue

        count_total_frames = f"ffprobe -v error -select_streams v:0 -count_frames -show_entries stream=nb_frames -of csv=p=0 {input_video}"
        result = subprocess.run(count_total_frames, shell=True, capture_output=True, text=True)
        total_frames = int(result.stdout.strip())
        
        video_filename = os.path.splitext(os.path.basename(input_video))[0]  # Extract video file name without extension
        
        # Generate output CSV files based on the folder being processed
        output_csv = os.path.join(output_folder, "clips_info.csv")

        with open(output_csv, "a", newline="") as csvfile:
            fieldnames = ["clip_filename", "start_time", "end_time"]
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            if os.path.getsize(output_csv) == 0:
                writer.writeheader()
                
            with open(timestamps_csv, "r") as csvfile:
                reader = csv.DictReader(csvfile)
                for idx, row in enumerate(reader):
                    if not video_filename in row["Keyframes"]:
                        continue  
                    
                    timestamp = float(row["Timestamps"])
                    keyframe_frame = int(timestamp * frame_rate)
                    
                    # Skip first keyframe to handle issues like files with no frames
                    if keyframe_frame == 0:
                        continue
                    
                    start_frame = max(0, keyframe_frame - 60 * frame_rate)  
                    end_frame = min(keyframe_frame, total_frames - 1)  
                    
                    clip_duration_sec = end_frame - start_frame
                    clip_filename = f"{output_folder}/{video_filename}-{start_frame/frame_rate:.3f}-{end_frame/frame_rate:.3f}.mp4"
                    

                    command = (
                        f"ffmpeg -i {input_video} "
                        f"-vf \"select='between(n,{start_frame},{end_frame})'\" "
                        f"-vsync vfr -q:v 2 -frames:v {int(clip_duration_sec*frame_rate)} "
                        f"-ss {start_frame / frame_rate} -to {end_frame / frame_rate} "
                        f"{clip_filename}"
                    )

    
                    subprocess.run(command, shell=True)

                    # Write clip information to CSV
                    writer.writerow({
                        "clip_filename": os.path.basename(clip_filename),
                        "start_time": round(start_frame / frame_rate, 3),
                        "end_time": round(end_frame / frame_rate, 3)
                    })

# Specify the root folder containing the hierarchy of videos
root_folder = "Demo1/Videos"
# Specify the output root folder for saving the clips
output_root_folder = "Demo1/Videos"

input_videos = []
for subdir, dirs, files in os.walk(root_folder):
    for file in files:
        if file.endswith(".mp4"):
            input_videos.append(os.path.join(subdir, file))

extract_clips(input_videos, output_root_folder)