### Extracting Keyframes from a video

In [1]:
import os
import subprocess

def extract_keyframes(input_video, output_folder, start_time=None, end_time=None):
    os.makedirs(output_folder, exist_ok=True)

    if start_time is not None and end_time is not None:
        time_options = f"-ss {start_time} -to {end_time}"
    else:
        time_options = ""

    command = (
        f"ffmpeg {time_options} -i \"{input_video}\" -vf "
        f"\"select='eq(pict_type\\,I)',showinfo\" -vsync vfr "
        f"\"{output_folder}/keyframes_%03d.jpg\" && "
        f"ffmpeg {time_options} -i \"{input_video}\" -vf "
        f"\"select='eq(pict_type\\,I)',showinfo\" -vsync vfr "
        f"-copyts -f null - 2>&1 | findstr \"pts_time:\" > \"{output_folder}/timestamps.txt\""
    )

    subprocess.run(command, shell=True)

# Explain the input video path whose frames you need to extract
input_video = "Input/2023-07-18_154344_VID003.mp4"
output_folder = "Demo/Images"
start_time = "00:01:49"
end_time = "00:02:00"
# We can remove start_time and end_time if we need whole video
extract_keyframes(input_video, output_folder,start_time,end_time)

### Extracting pts_time Variable from .txt file and saving into csv
- To save keyframe time when they are being extracted from the video

In [2]:
import csv

def extract_pts_time(input_txt, output_csv):
    
    pts_times = []
    with open(input_txt, "r") as txt_file:
        for line in txt_file:
            if "pts_time:" in line:
                timestamp = line.split("pts_time:")[1].split()[0]
                pts_times.append([timestamp])

    with open(output_csv, "w", newline="") as csvfile:
        writer = csv.writer(csvfile, delimiter=" ")
        writer.writerow(["Timestamps"])
        writer.writerows(pts_times)

# Text file path         
input_txt = "Keyframe/Latest/timestamps.txt"
output_csv = "Keyframe/Latest/timestamps.csv"
extract_pts_time(input_txt, output_csv)

### Creating Clips of Videos

In [3]:
import os
import subprocess
import csv

def get_frame_rate(input_video):
    command = [
        'ffprobe',
        '-v', 'error',
        '-select_streams', 'v:0',
        '-show_entries', 'stream=r_frame_rate',
        '-of', 'default=noprint_wrappers=1:nokey=1',
        input_video
    ]
    
    result = subprocess.run(command, capture_output=True, text=True)
    if result.returncode == 0:
        frame_rate_str = result.stdout.strip()
        try:
            num, denom = frame_rate_str.split('/')
            return float(num) / float(denom)
        except ValueError:
            print("Error: Unable to parse frame rate.")
            return None
    else:
        print("Error: ffprobe command failed.")
        return None

def extract_clips(input_video, output_folder, timestamps_csv, output_csv):
    os.makedirs(output_folder, exist_ok=True)

    timestamps = []
    with open(timestamps_csv, "r") as csvfile:
        reader = csv.DictReader(csvfile)
        for row in reader:
            timestamps.append(float(row["Timestamps"]))

    frame_rate = get_frame_rate(input_video)
    if frame_rate is None:
        return  # Abort if frame rate cannot be determined

    count_total_frames = f"ffprobe -v error -select_streams v:0 -count_frames -show_entries stream=nb_frames -of csv=p=0 {input_video}"
    result = subprocess.run(count_total_frames, shell=True, capture_output=True, text=True)
    total_frames = int(result.stdout.strip())
    
    video_filename = os.path.splitext(os.path.basename(input_video))[0]  # Extract video file name without extension

    clip_times = []  # List to store clip start and end times

    for idx, timestamp in enumerate(timestamps):
        clip_duration_sec = 4  # Default clip duration in seconds
        num_frames_per_clip = int(clip_duration_sec * frame_rate)

        start_frame = max(0, int(timestamp * frame_rate) - num_frames_per_clip // 2)
        end_frame = min(total_frames - 1, int(timestamp * frame_rate) + num_frames_per_clip // 2)
        
        # Saves the clip with desired filename format: video_file_name_index_timestamp.mp4
        clip_filename = f"{output_folder}/{video_filename}_{idx}_{timestamp:.2f}.mp4"
        command = (
            f"ffmpeg -i {input_video} "
            f"-vf \"select='between(n,{start_frame},{end_frame})'\" "
            f"-vsync vfr -q:v 2 -frames:v {num_frames_per_clip} "
            f"-ss {start_frame / frame_rate} "
            f"{clip_filename}"
        )

        subprocess.run(command, shell=True)

        # Store clip filename and start/end times
        clip_times.append({
            "clip_filename": clip_filename,
            "from": start_frame / frame_rate,
            "to": end_frame / frame_rate
        })

    # Write clip times to CSV file
    with open(output_csv, "w", newline="") as csvfile:
        fieldnames = ["clip_filename", "from", "to"]
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
        writer.writerows(clip_times)

# Same video whose frame we extracted originally
input_video = "Laparoscopic_appendectomy.mp4"
output_folder = "Keyframe/Latest/Clips"
timestamps_csv = "Keyframe/Latest/timestamps.csv"
output_csv = "Keyframe/Latest/clips_info.csv"

extract_clips(input_video, output_folder, timestamps_csv, output_csv)

In [5]:
def extract_clips(input_video, output_folder, timestamps_csv):
    os.makedirs(output_folder, exist_ok=True)

    timestamps = []
    with open(timestamps_csv, "r") as csvfile:
        reader = csv.DictReader(csvfile)
        for row in reader:
            timestamps.append(float(row["Timestamps"]))

    count_total_frames = f"ffprobe -v error -select_streams v:0 -count_frames -show_entries stream=nb_frames -of csv=p=0 {input_video}"
    result = subprocess.run(count_total_frames, shell=True, capture_output=True, text=True)
    total_frames = int(result.stdout.strip())

    video_filename = os.path.splitext(os.path.basename(input_video))[0]
    
    
    for idx, timestamp in enumerate(timestamps):
        start_frame = max(0, int(timestamp * FRAME_RATE) - 60)
        end_frame = min(total_frames - 1, int(timestamp * FRAME_RATE) + 60)
        
        # Saves the clip as clip_(number for clip)_keyframe_timing
        clip_filename = f"{output_folder}/{video_filename}_{idx}_{timestamp:.2f}.mp4"
        command = (
            f"ffmpeg -i {input_video} "
            f"-vf \"select='between(n,{start_frame},{end_frame})'\" "
            f"-vsync vfr -q:v 2 -frames:v 121 "
            f"-ss {start_frame / FRAME_RATE} "
            f"{clip_filename}"
        )

        subprocess.run(command, shell=True)

# Same video whose frame we extracted originally
input_video = "Laparoscopic_appendectomy.mp4"
output_folder = "Keyframe/Latest/Clips2"
timestamps_csv = "Keyframe/Latest/timestamps.csv"
FRAME_RATE = 30  

extract_clips(input_video, output_folder, timestamps_csv)

### Calculate Frame Rate of a Video

In [10]:
import subprocess

def get_frame_rate(input_video):
    command = [
        'ffprobe',
        '-v', 'error',
        '-select_streams', 'v:0',
        '-show_entries', 'stream=r_frame_rate',
        '-of', 'default=noprint_wrappers=1:nokey=1',
        input_video
    ]
    
    result = subprocess.run(command, capture_output=True, text=True)
    if result.returncode == 0:
        frame_rate_str = result.stdout.strip()
        print(frame_rate_str)
        try:
            num, denom = frame_rate_str.split('/')
            return float(num) / float(denom)
        except ValueError:
            print("Error: Unable to parse frame rate.")
            return None
    else:
        print("Error: ffprobe command failed.")
        return None

# Example usage
input_video = "Laparoscopic_appendectomy.mp4"
frame_rate = get_frame_rate(input_video)
if frame_rate is not None:
    print("Frame rate:", frame_rate)

30000/1001
Frame rate: 29.97002997002997


### Calculating frame rate of a video

In [None]:
import subprocess

def get_frame_rate(input_video):
    command = [
        'ffprobe',
        '-v', 'error',
        '-select_streams', 'v:0',
        '-show_entries', 'stream=r_frame_rate',
        '-of', 'default=noprint_wrappers=1:nokey=1',
        input_video
    ]
    
    result = subprocess.run(command, capture_output=True, text=True)
    if result.returncode == 0:
        frame_rate_str = result.stdout.strip()
        print(frame_rate_str)
        try:
            num, denom = frame_rate_str.split('/')
            return float(num) / float(denom)
        except ValueError:
            print("Error: Unable to parse frame rate.")
            return None
    else:
        print("Error: ffprobe command failed.")
        return None

# Example usage
input_video = "Output/Clips/2023-07-18_154344_VID003_100_97.92.mp4"
frame_rate = get_frame_rate(input_video)
if frame_rate is not None:
    print("Frame rate:", frame_rate)

### Creating Clips from frames

In [None]:
def extract_clips(input_video, output_folder, timestamps_csv):
    os.makedirs(output_folder, exist_ok=True)

    timestamps = []
    with open(timestamps_csv, "r") as csvfile:
        reader = csv.DictReader(csvfile)
        for row in reader:
            timestamps.append(float(row["Timestamps"]))

    total_frames_cmd = f"ffprobe -v error -select_streams v:0 -count_frames -show_entries stream=nb_frames -of csv=p=0 {input_video}"
    result = subprocess.run(total_frames_cmd, shell=True, capture_output=True, text=True)
    total_frames = int(result.stdout.strip())

    for idx, timestamp in enumerate(timestamps):
        start_frame = max(0, int(timestamp * FRAME_RATE) - 60)
        end_frame = min(total_frames - 1, int(timestamp * FRAME_RATE) + 60)

        clip_filename = f"{output_folder}/clip_{idx}_{timestamp:.2f}.mp4"
        command = (
            f"ffmpeg -i {input_video} "
            f"-vf \"select='between(n,{start_frame},{end_frame})'\" "
            f"-vsync vfr -q:v 2 -frames:v 121 "
            f"-ss {start_frame / FRAME_RATE} "
            f"{clip_filename}"
        )

        subprocess.run(command, shell=True)

input_video = "Input/2023-07-18_154344_VID003.mp4"
output_folder = "Output/Clips"
timestamps_csv = "Output/timestamps.csv"
FRAME_RATE = 30  

extract_clips(input_video, output_folder, timestamps_csv)

In [None]:
import json
import csv

def extract_info_from_json(json_file, target_video_file):
    with open(json_file, 'r') as f:
        data = json.load(f)

    # Find annotations specific to the target video file
    annotations = []
    for item in data:
        if item["file_upload"] == target_video_file:
            annotations.extend(item["annotations"])

    if not annotations:
        print(f"No annotations found for the video file '{target_video_file}'.")
        return

    # Extract start time, end time, and label for each annotation
    rows = []
    for annotation in annotations:
        for result in annotation["result"]:
            start_time = result["value"]["start"]
            end_time = result["value"]["end"]
            label = result["value"]["labels"][0]  # Assuming only one label per result
            rows.append([start_time, end_time, label])

    # Write extracted information to a CSV file
    csv_file = f"{target_video_file}.csv"
    with open(csv_file, 'w', newline='') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(["Start Time", "End Time", "Label"])
        writer.writerows(rows)

    print(f"CSV file '{csv_file}' has been created.")

# Example usage
json_file = "Input/2023-07-18_154344_VID003.JSON"
target_video_file = "2023-07-18_154344_VID003"  # Specify the target video file
extract_info_from_json(json_file, target_video_file)

### Pick Annotation of a Video file

In [9]:
import json
import csv
import os

# Load JSON data
with open('Input/2023-07-18_154344_VID003.json', 'r') as f:
    data = json.load(f)

# Define the filename of the video you want to extract annotations for
video_filename = "2023-07-18_154344_VID003.mp4"

# Find the entry corresponding to the specified video filename
video_entry = next((entry for entry in data if video_filename in entry["file_upload"]), None)

if video_entry:
    # Extract annotations
    annotations = video_entry["annotations"]

    # Extract start, end, and labels values from annotations
    annotation_data = []
    for annotation in annotations:
        for result in annotation["result"]:
            start = result["value"]["start"]
            end = result["value"]["end"]
            labels = ', '.join(result["value"]["labels"])
            annotation_data.append({"start": start, "end": end, "labels": labels})

    # Define folder and new CSV file path
    folder = 'Output'
    os.makedirs(folder, exist_ok=True)  # Create folder if it doesn't exist
    csv_file = os.path.join(folder, "labels.csv")

    # Write annotation data to new CSV file
    with open(csv_file, mode='w', newline='') as csvfile:
        fieldnames = ['start', 'end', 'labels']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
        for annotation in annotation_data:
            writer.writerow(annotation)
    print(f"Annotations extracted and saved to {csv_file}")
else:
    print("Video file not found in the JSON data.")

Annotations extracted and saved to Output\labels.csv


In [11]:
import pandas as pd

# Load the first CSV file containing start, end, and labels
df_labels = pd.read_csv("Output/labels.csv")

# Load the second CSV file containing clip information
df_clips = pd.read_csv("Output/clips_info.csv")

# Initialize a list to store the labels for each clip
clip_labels = []

# Iterate through each clip in the second CSV file
for index, row_clip in df_clips.iterrows():
    label = None
    # Iterate through each annotation in the first CSV file
    for index, row_label in df_labels.iterrows():
        # Check if the clip's start time falls within the annotation's time range
        if row_clip['from'] >= row_label['start'] and row_clip['to'] <= row_label['end']:
            label = row_label['labels']
            break  # Stop searching if a matching annotation is found
    # Append the label to the list
    clip_labels.append(label)

# Add a new column for labels to the second dataframe
df_clips['labels'] = clip_labels

# Save the modified dataframe to a new CSV file
df_clips.to_csv("modified_second_file.csv", index=False)

### Marking Labels on clips

In [18]:
import pandas as pd

# Load the first CSV file containing start, end, and labels
df_labels = pd.read_csv("Output/labels.csv")

# Load the second CSV file containing clip information
df_clips = pd.read_csv("Output/clips_info.csv")

# Initialize a list to store the labels for each clip
clip_labels = []

# Iterate through each clip in the second CSV file
for index, row_clip in df_clips.iterrows():
    labels = []
    # Iterate through each annotation in the first CSV file
    for index, row_label in df_labels.iterrows():
        # Check if the clip's start time falls within the annotation's time range
        if row_clip['from'] >= row_label['start'] and row_clip['to'] <= row_label['end']:
            labels.append(row_label['labels'])

    # If no label found, assign a default value
    if not labels:
        labels.append("No Label Found")
    
    # Append all labels found for the clip
    clip_labels.append(', '.join(labels))

# Add a new column for labels to the second dataframe
df_clips['labels'] = clip_labels

# Save the modified dataframe to a new CSV file
df_clips.to_csv("Output/modified_second_file.csv", index=False)

In [5]:
input_video = "Output/Clips1/2023-07-18_154344_VID003_100_97.92.mp4"
count_total_frames = f"ffprobe -v error -select_streams v:0 -count_frames -show_entries stream=nb_frames -of csv=p=0 {input_video}"
result = subprocess.run(count_total_frames, shell=True, capture_output=True, text=True)
# total_frames = int(result.stdout.strip())


CompletedProcess(args='ffprobe -v error -select_streams v:0 -count_frames -show_entries stream=nb_frames -of csv=p=0 Output/Clips1/2023-07-18_154344_VID003_100_97.92.mp4', returncode=0, stdout='120\n', stderr='')

In [7]:
result.stdout.strip()

'120'

In [None]:
import json
import csv
import os

with open('Input/2023-07-18_154344_VID003.json', 'r') as f:
    data = json.load(f)

video_filename = "2023-07-18_154344_VID003.mp4"

video_entry = next((entry for entry in data if video_filename in entry["file_upload"]), None)

# print(video_entry)
if video_entry:
    annotations = video_entry["annotations"]
# annotations
    
    annotation_data = []
    for annotation in annotations:
#         print(annotation)
        for result in annotation["result"]:
#             print(result)
#             start = result["value"]
            start = result["value"]["start"]
#             print(start)
            end = result["value"]["end"]
            labels = ', '.join(result["value"]["labels"])
#           print(result['value']['labels'][0])
#           print(labels)
            annotation_data.append({"start": start, "end": end, "labels": labels})

folder = 'Output'
os.makedirs(folder, exist_ok=True)  
csv_file = os.path.join(folder, "labels1.csv")

    with open(csv_file, mode='w', newline='') as csvfile:
        fieldnames = ['start', 'end', 'labels']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
        for annotation in annotation_data:
            writer.writerow(annotation)
    print(f"Annotations extracted and saved to {csv_file}")
else:
    print("Video file not found in the JSON data.")

### Extract Keyframes and save time in CSV

In [None]:
import os
import subprocess
import csv

def extract_keyframes(input_video, output_folder, start_time=None, end_time=None):
    os.makedirs(output_folder, exist_ok=True)

    time_options = ""
    if start_time is not None and end_time is not None:
        time_options = f"-ss {start_time} -to {end_time}"

    command = (
        f"ffmpeg {time_options} -i \"{input_video}\" -vf "
        f"\"select='eq(pict_type\\,I)',showinfo\" -vsync vfr "
        f"\"{output_folder}/{os.path.splitext(os.path.basename(input_video))[0]}_%03d.jpg\" && "
        f"ffmpeg {time_options} -i \"{input_video}\" -vf "
        f"\"select='eq(pict_type\\,I)',showinfo\" -vsync vfr "
        f"-copyts -f null - 2>&1 | findstr \"pts_time:\""
    )

#     print("Command:", command)  # Print the command for debugging
    output = subprocess.check_output(command, shell=True, encoding="utf-8")
    pts_times = []
    for line in output.splitlines():
        if "pts_time:" in line:
            timestamp = line.split("pts_time:")[1].split()[0]
            pts_times.append([timestamp])

    # Save pts_time values to a CSV file
    output_csv = os.path.join(output_folder, "timestamps.csv")
    with open(output_csv, "w", newline="") as csvfile:
        writer = csv.writer(csvfile, delimiter=" ")
        writer.writerow(["Timestamps"])
        writer.writerows(pts_times)

    
    
# Explain the input video path whose frames you need to extract
input_video = "Input/2023-07-18_154344_VID003.mp4"
output_folder = "Demo3/Frames/"
start_time = "00:02:49"
end_time = "00:03:00"
# We can remove start_time and end_time if we need whole video
extract_keyframes(input_video, output_folder,start_time,end_time)

### Hierarical Approach
- Making keyframes from multiple videos and saving their time in CSV file. 

In [34]:
import os
import subprocess
import csv

def extract_keyframes(input_video, output_folder, start_time=None, end_time=None):
    os.makedirs(output_folder, exist_ok=True)

    if start_time is not None and end_time is not None:
        time_options = f"-ss {start_time} -to {end_time}"
    else:
        time_options = ""

    # Extract filename without extension
    filename = os.path.splitext(os.path.basename(input_video))[0]
    
    command = (
        f"ffmpeg {time_options} -i \"{input_video}\" -vf "
        f"\"select='eq(pict_type\\,I)',showinfo\" -vsync vfr "
        f"-copytb 1 -f null - && "
        f"ffmpeg {time_options} -i \"{input_video}\" -vf "
        f"\"select='eq(pict_type\\,I)',showinfo\" -vsync vfr "
        f"-copyts -f null - 2>&1 | findstr \"pts_time:\""
    )

    output = subprocess.check_output(command, shell=True, encoding="utf-8")

    # Extract keyframe timestamps
    keyframe_timestamps = []
    for line in output.splitlines():
        if "pts_time:" in line:
            timestamp = line.split("pts_time:")[1].split()[0]
            keyframe_timestamps.append(float(timestamp))

    # Filter out duplicate keyframes within the same second
    unique_keyframe_timestamps = []
    last_second = None
    for timestamp in keyframe_timestamps:
        current_second = int(timestamp)
        if current_second != last_second:
            unique_keyframe_timestamps.append([filename,timestamp])
            last_second = current_second

    # Save unique keyframes
    for timestamp in unique_keyframe_timestamps:
        output_file = os.path.join(output_folder, f"{filename}_{int(timestamp[1]):03d}.png")
        command = (
            f"ffmpeg {time_options} -ss {timestamp[1]} -i \"{input_video}\" -frames:v 1 "
            f"-vf \"select='eq(pict_type\\,I)',showinfo\" -vsync vfr "
            f"\"{output_folder}/{filename}_{timestamp[1]}.png\""
        )
        subprocess.run(command, shell=True)

    # Save pts_time values to a CSV file
    output_csv = os.path.join(output_folder, "timestamps.csv")
    # Check if the CSV file exists and write the header row only if it's a new file
    write_header = not os.path.exists(output_csv)
    with open(output_csv, "a", newline="") as csvfile:
        writer = csv.writer(csvfile)
        if write_header:
            writer.writerow(["Keyframes", "Timestamps"])
        for timestamp in unique_keyframe_timestamps:
            writer.writerow([timestamp[0], timestamp[1]])

        
def update_hierarchy(input_video, output_folder):
    # Extract filename without extension
    filename = os.path.splitext(os.path.basename(input_video))[0]
    
    # Split the path to get the folder containing the video
    folder_name = os.path.basename(os.path.dirname(input_video))
    
    # Construct the new hierarchy
    new_hierarchy = os.path.join(output_folder, folder_name, "labels", "Frames")
    
    return new_hierarchy

def extract_keyframes_from_hierarchy(root_folder, start_time=None, end_time=None):
    for subdir, dirs, files in os.walk(root_folder):
        for file in files:
            if file.endswith(".mp4"):
                input_video = os.path.join(subdir, file)
                output_folder_updated = update_hierarchy(input_video, root_folder)
                extract_keyframes(input_video, output_folder_updated, start_time, end_time)

# Specify the root folder of the hierarchy
root_folder = "E:\SPS\Frame_Extraction_Project\Demo\Videos"

# Specify start and end times if needed
# start_time = "00:01:50"
# end_time = "00:02:20"

# Extract keyframes from the hierarchy
extract_keyframes_from_hierarchy(root_folder)

In [None]:
import os
import subprocess
import csv

def extract_keyframes(input_video, output_folder, start_time=None, end_time=None):
    os.makedirs(output_folder, exist_ok=True)

    if start_time is not None and end_time is not None:
        time_options = f"-ss {start_time} -to {end_time}"
    else:
        time_options = ""

    # Extract filename without extension
    filename = os.path.splitext(os.path.basename(input_video))[0]
    
    command = (
        f"ffmpeg {time_options} -i \"{input_video}\" -vf "
        f"\"select='eq(pict_type\\,I)',showinfo\" -vsync vfr "
        f"\"{output_folder}/{filename}_%03d.png\" && "
        f"ffmpeg {time_options} -i \"{input_video}\" -vf "
        f"\"select='eq(pict_type\\,I)',showinfo\" -vsync vfr "
        f"-copyts -f null - 2>&1 | findstr \"pts_time:\""
    )

    # Run the command and capture the output
    output = subprocess.check_output(command, shell=True, encoding="utf-8")

    # Extract pts_time values
    pts_times = []
    for line in output.splitlines():
        if "pts_time:" in line:
            timestamp = line.split("pts_time:")[1].split()[0]
            pts_times.append([filename,timestamp])

    # Save pts_time values to a CSV file
    output_csv = os.path.join(output_folder, "timestamps.csv")
    if not os.path.exists(output_csv):
        with open(output_csv, "w", newline="") as csvfile:
            writer = csv.writer(csvfile, delimiter=",")
            # Write header row
            writer.writerow(["Keyframes", "Timestamps"])
    with open(output_csv, "a", newline="") as csvfile:
        writer = csv.writer(csvfile, delimiter=",")
        # Write keyframes and timestamps
        writer.writerows(pts_times)
        
def update_hierarchy(input_video, output_folder):
    # Extract filename without extension
    filename = os.path.splitext(os.path.basename(input_video))[0]
    
    # Split the path to get the folder containing the video
    folder_name = os.path.basename(os.path.dirname(input_video))
    
    # Construct the new hierarchy
    new_hierarchy = os.path.join(output_folder, folder_name, "labels", "Frames")
    
    return new_hierarchy

def extract_keyframes_from_hierarchy(root_folder, start_time=None, end_time=None):
    for subdir, dirs, files in os.walk(root_folder):
        for file in files:
            if file.endswith(".mp4"):
                input_video = os.path.join(subdir, file)
                output_folder_updated = update_hierarchy(input_video, root_folder)
                extract_keyframes(input_video, output_folder_updated, start_time, end_time)

# Specify the root folder of the hierarchy which contain surgeries folder
root_folder = "E:\Frame_Extraction_Project\Demo8\Videos"

# Specify start and end times if needed
# start_time = "00:01:50"
# end_time = "00:02:10"

# Extract keyframes from the hierarchy
extract_keyframes_from_hierarchy(root_folder,start_time,end_time)

### Clip Formation for multiple videos

### Latest

### Solving start and end

In [2]:
import os
import subprocess
import csv

def get_frame_rate(input_video):
    command = [
        'ffprobe',
        '-v', 'error',
        '-select_streams', 'v:0',
        '-show_entries', 'stream=r_frame_rate',
        '-of', 'default=noprint_wrappers=1:nokey=1',
        input_video
    ]
    
    result = subprocess.run(command, capture_output=True, text=True)
    if result.returncode == 0:
        frame_rate_str = result.stdout.strip()
        try:
            num, denom = frame_rate_str.split('/')
            return float(num) / float(denom)
        except ValueError:
            print("Error: Unable to parse frame rate.")
            return None
    else:
        print("Error: ffprobe command failed.")
        return None

def extract_clips(input_videos, root_folder):
    for input_video in input_videos:
        video_folder = os.path.dirname(input_video)
        timestamps_csv = os.path.join(video_folder, "labels", "Frames", "timestamps.csv")
        output_folder = os.path.join(root_folder, os.path.basename(video_folder), "labels", "Videos")
        os.makedirs(output_folder, exist_ok=True)
        
        frame_rate = get_frame_rate(input_video)
        if frame_rate is None:
            continue

        count_total_frames = f"ffprobe -v error -select_streams v:0 -count_frames -show_entries stream=nb_frames -of csv=p=0 {input_video}"
        result = subprocess.run(count_total_frames, shell=True, capture_output=True, text=True)
        total_frames = int(result.stdout.strip())
        
        video_filename = os.path.splitext(os.path.basename(input_video))[0]  # Extract video file name without extension
        
        # Generate output CSV files based on the folder being processed
        output_csv = os.path.join(output_folder, "clips_info.csv")

        with open(output_csv, "a", newline="") as csvfile:
            fieldnames = ["clip_filename", "start_time", "end_time"]
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            if os.path.getsize(output_csv) == 0:
                writer.writeheader()
                
            with open(timestamps_csv, "r") as csvfile:
                reader = csv.DictReader(csvfile)
                for idx, row in enumerate(reader):
                    if not video_filename in row["Keyframes"]:
                        continue  
                    
                    timestamp = float(row["Timestamps"])
                    keyframe_frame = int(timestamp * frame_rate)
                    
                    # Skip first keyframe to handle issues like files with no frames
                    if keyframe_frame == 0:
                        continue
                    
                    start_frame = max(0, keyframe_frame - 60 * frame_rate)  
                    end_frame = min(keyframe_frame, total_frames - 1)  
                    
                    clip_duration_sec = end_frame - start_frame
                    clip_filename = f"{output_folder}/{video_filename}-{start_frame/frame_rate:.3f}-{end_frame/frame_rate:.3f}.mp4"

                    command = (
                        f"ffmpeg -i {input_video} "
                        f"-vf \"select='between(n,{start_frame},{end_frame})'\" "
                        f"-vsync vfr -q:v 2 -frames:v {int(clip_duration_sec*frame_rate)} "
                        f"-ss {start_frame / frame_rate} -to {end_frame / frame_rate} "
                        f"{clip_filename}"
                    )

    
                    subprocess.run(command, shell=True)

                    # Write clip information to CSV
                    writer.writerow({
                        "clip_filename": os.path.basename(clip_filename),
                        "start_time": round(start_frame / frame_rate, 3),
                        "end_time": round(end_frame / frame_rate, 3)
                    })

# Specify the root folder containing the hierarchy of videos
root_folder = "E:\Frame_Extraction_Project\Demo\Videos"

input_videos = []
for subdir, dirs, files in os.walk(root_folder):
    for file in files:
        if file.endswith(".mp4"):
            input_videos.append(os.path.join(subdir, file))

extract_clips(input_videos, root_folder)

In [6]:
import json
import csv
import os

def save_annotations(output_folder, annotation_data):
    csv_file = os.path.join(output_folder, "labels.csv")
    
    with open(csv_file, mode='w', newline='') as csvfile:
        fieldnames = ['start', 'end', 'labels']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
        for annotation in annotation_data:
            writer.writerow(annotation)
    print(f"Annotations extracted and saved to {csv_file}")

def extract_annotations(json_file, video_filename):
    with open(json_file, 'r') as f:
        data = json.load(f)

    video_entry = next((entry for entry in data if video_filename in entry["file_upload"]), None)

    if video_entry:
        annotations = video_entry["annotations"]
        
        annotation_data = []
        for annotation in annotations:
            for result in annotation["result"]:
                start = result["value"]["start"]
                end = result["value"]["end"]
                labels = ', '.join(result["value"]["labels"])
                annotation_data.append({"start": start, "end": end, "labels": labels})
        
        return annotation_data
    else:
        print(f"Video file {video_filename} not found in the JSON data.")
        return None

video_directories = [
    "E:\Frame_Extraction_Project\Demo\Videos\Surg001",
    "E:\Frame_Extraction_Project\Demo\Videos\Surg002",
    "E:\Frame_Extraction_Project\Demo\Videos\Surg003"
]

for directory in video_directories:
    print(f"Searching in: {directory}")
    json_file = "E:\Frame_Extraction_Project\Input\2023-07-18_154344_VID003.json"  # Adjust the JSON file name as needed
    
    if os.path.exists(json_file):
        print("JSON file found. Starting extraction process.")
        video_files = [file for file in os.listdir(directory) if file.endswith(".mp4")]
        print(f"Found MP4 files: {video_files}")
        
        for video_file in video_files:
            video_filename = os.path.splitext(video_file)[0]
            annotations = extract_annotations(json_file, video_filename)
            if annotations:
                output_folder = os.path.join(directory, "labels")
                print(f"Annotations: {annotations}")
                save_annotations(output_folder, annotations)
    else:
        print(f"No JSON file found for {directory}")

Searching in: E:\Frame_Extraction_Project\Demo\Videos\Surg001
No JSON file found for E:\Frame_Extraction_Project\Demo\Videos\Surg001
Searching in: E:\Frame_Extraction_Project\Demo\Videos\Surg002
No JSON file found for E:\Frame_Extraction_Project\Demo\Videos\Surg002
Searching in: E:\Frame_Extraction_Project\Demo\Videos\Surg003
No JSON file found for E:\Frame_Extraction_Project\Demo\Videos\Surg003


In [15]:
import json
import csv
import os

def save_annotations(output_folder, annotation_data):
    csv_file = os.path.join(output_folder, "labels.csv")
    
    with open(csv_file, mode='w', newline='') as csvfile:
        fieldnames = ['start', 'end', 'labels']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
        for annotation in annotation_data:
            writer.writerow(annotation)
    print(f"Annotations extracted and saved to {csv_file}")

def extract_annotations(json_file, video_filename):
    with open(json_file, 'r') as f:
        data = json.load(f)

    video_entry = next((entry for entry in data if video_filename in entry["file_upload"]), None)

    if video_entry:
        annotations = video_entry["annotations"]
        
        annotation_data = []
        for annotation in annotations:
            for result in annotation["result"]:
                start = result["value"]["start"]
                end = result["value"]["end"]
                labels = ', '.join(result["value"]["labels"])
                annotation_data.append({"start": start, "end": end, "labels": labels})
        
        return annotation_data
    else:
        print(f"Video file {video_filename} not found in the JSON data.")
        return None

root_folder = r"E:\Frame_Extraction_Project\acha\videos"

surg_directories = [os.path.join(root_folder, directory) for directory in os.listdir(root_folder) if directory.startswith("Surg")]

for surg_directory in surg_directories:
    print(f"Searching in: {surg_directory}")
    json_file = "E:\Frame_Extraction_Project\Input\2023-07-18_154344_VID003.json"  # Adjust the JSON file name as needed
    
    if os.path.exists(json_file):
        print("JSON file found. Starting extraction process.")
        video_files = [file for file in os.listdir(surg_directory) if file.endswith(".mp4")]
        print(f"Found MP4 files: {video_files}")
        
        for video_file in video_files:
            video_filename = os.path.splitext(video_file)[0]
            annotations = extract_annotations(json_file, video_filename)
            if annotations:
                output_folder = os.path.join(surg_directory, "labels")
                os.makedirs(output_folder, exist_ok=True)
                print(f"Annotations: {annotations}")
                save_annotations(output_folder, annotations)
    else:
        print(f"No JSON file found for {surg_directory}")

In [22]:
for subdir, dirs, files in os.walk(root_folder):
    frames_folder = os.path.join(subdir, "labels", "Frames")

In [20]:
for frames_subdir, _, frames_files in os.walk(frames_folder):
                    for frame_file in frames_files:
                        print(frame_file)
#                         if frame_file == "timestamps.csv":
#                             timestamps_csv = os.path.join(frames_subdir, frame_file)
#                             break
#                     if timestamps_csv:
#                         break  # Stop searching once timestamps.csv is found


D
e
m
o
2
/
V
i
d
e
o
s
\
s
u
r
g
0
0
3
\
l
a
b
e
l
s
\
V
i
d
e
o
s
\
2
0
2
3
-
0
7
-
1
8
_
1
5
4
3
4
4
_
V
I
D
0
0
3
_
2
\
l
a
b
e
l
s
\
F
r
a
m
e
s


In [19]:
timestamps_csv

NameError: name 'timestamps_csv' is not defined

In [17]:
os.walk(frames_folder)

<generator object _walk at 0x0000016E646428F0>

In [7]:
print('a')

a


In [13]:
for i in os.listdir(root_folder):
    print(i)

surg001
surg001 - Copy
surg001 - Copy (2)


In [15]:
u = os.path.join(root_folder,i)

In [16]:
os.listdir(u)

['2023-07-18_154344_VID003.mp4',
 '2023-07-18_154344_VID003_1.mp4',
 '2023-07-18_154344_VID003_2.mp4',
 'labels']

### Additional Ideas/New

In [8]:
import os
import subprocess

def extract_keyframes(input_video, output_folder, start_time=None, end_time=None):
    os.makedirs(output_folder, exist_ok=True)

    time_options = ""
    if start_time is not None and end_time is not None:
        time_options = f"-ss {start_time} -to {end_time}"

    command = (
        f"ffmpeg {time_options} -i \"{input_video}\" -vf "
        f"\"select='eq(pict_type\\,I)',showinfo\" -vsync vfr "
        f"\"{output_folder}/{os.path.splitext(os.path.basename(input_video))[0]}_%03d.jpg\" && "
        f"ffmpeg {time_options} -i \"{input_video}\" -vf "
        f"\"select='eq(pict_type\\,I)',showinfo\" -vsync vfr "
        f"-copyts -f null - 2>&1 | findstr \"pts_time:\" > \"{output_folder}timestamps.txt\""
    )

#     print("Command:", command)  # Print the command for debugging
    subprocess.run(command, shell=True)

# Explain the input video path whose frames you need to extract
input_video = "Input/2023-07-18_154344_VID003.mp4"
output_folder = "Surgery/Frames/"
# start_time = "00:02:49"
# end_time = "00:03:00"
# We can remove start_time and end_time if we need whole video
extract_keyframes(input_video, output_folder)

In [9]:
import csv

def extract_pts_time(input_txt, output_csv):
    
    pts_times = []
    with open(input_txt, "r") as txt_file:
        for line in txt_file:
            if "pts_time:" in line:
                timestamp = line.split("pts_time:")[1].split()[0]
                pts_times.append([timestamp])

    with open(output_csv, "w", newline="") as csvfile:
        writer = csv.writer(csvfile, delimiter=" ")
        writer.writerow(["Timestamps"])
        writer.writerows(pts_times)

# Text file path         
input_txt = "Surgery/Frames/timestamps.txt"
output_csv = "Surgery/Frames/timestamps.csv"
extract_pts_time(input_txt, output_csv)

In [None]:
import os
import subprocess
import csv

def get_frame_rate(input_video):
    command = [
        'ffprobe',
        '-v', 'error',
        '-select_streams', 'v:0',
        '-show_entries', 'stream=r_frame_rate',
        '-of', 'default=noprint_wrappers=1:nokey=1',
        input_video
    ]
    
    result = subprocess.run(command, capture_output=True, text=True)
    if result.returncode == 0:
        frame_rate_str = result.stdout.strip()
        try:
            num, denom = frame_rate_str.split('/')
            return float(num) / float(denom)
        except ValueError:
            print("Error: Unable to parse frame rate.")
            return None
    else:
        print("Error: ffprobe command failed.")
        return None

def extract_clips(input_video, output_folder, timestamps_csv, output_csv):
    os.makedirs(output_folder, exist_ok=True)

    timestamps = []
    with open(timestamps_csv, "r") as csvfile:
        reader = csv.DictReader(csvfile)
        for row in reader:
            timestamps.append(float(row["Timestamps"]))

    frame_rate = get_frame_rate(input_video)
    if frame_rate is None:
        return  

    count_total_frames = f"ffprobe -v error -select_streams v:0 -count_frames -show_entries stream=nb_frames -of csv=p=0 {input_video}"
    result = subprocess.run(count_total_frames, shell=True, capture_output=True, text=True)
    total_frames = int(result.stdout.strip())
    
    video_filename = os.path.splitext(os.path.basename(input_video))[0]  # Extract video file name without extension

    clip_times = [] 

    for idx, timestamp in enumerate(timestamps):
        keyframe_frame = int(timestamp * frame_rate)
        start_frame = max(0, keyframe_frame - 60 * frame_rate)  
        end_frame = min(keyframe_frame, total_frames - 1)  
        
        clip_duration_sec = end_frame - start_frame
        clip_filename = f"{output_folder}/{video_filename}-{start_frame/frame_rate}-{end_frame/frame_rate}.mp4"
        command = (
        f"ffmpeg -ss {start_frame / frame_rate} -i {input_video} "
        f"-vf \"select='between(n,{start_frame},{end_frame})',setpts=PTS-STARTPTS\" "
        f"-t {clip_duration_sec / frame_rate} -vsync vfr -q:v 2 "
        f"{clip_filename}"
        )

        subprocess.run(command, shell=True)

        clip_times.append({
            "clip_filename": os.path.basename(clip_filename),
            "from": start_frame / frame_rate,
            "to": end_frame / frame_rate
        })

    with open(output_csv, "w", newline="") as csvfile:
        fieldnames = ["clip_filename", "from", "to"]
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
        writer.writerows(clip_times)

# Same video whose frame we extracted originally
input_video = "Input/2023-07-18_154344_VID003.mp4"
output_folder = "Surgery/Clips/"
timestamps_csv = "Surgery/Frames/timestamps.csv"
output_csv = "Surgery/Clips/clips_info.csv"

extract_clips(input_video, output_folder, timestamps_csv, output_csv)

In [6]:
import json
import csv
import os

with open('Input/2023-07-18_154344_VID003.json', 'r') as f:
    data = json.load(f)

video_filename = "2023-07-18_154344_VID003.mp4"

video_entry = next((entry for entry in data if video_filename in entry["file_upload"]), None)

if video_entry:
    annotations = video_entry["annotations"]
    
    annotation_data = []
    for annotation in annotations:
        for result in annotation["result"]:
            start = result["value"]["start"]
            end = result["value"]["end"]
            labels = ', '.join(result["value"]["labels"])
            annotation_data.append({"start": start, "end": end, "labels": labels})

    folder = 'Output2/Annotations'
    os.makedirs(folder, exist_ok=True)  
    csv_file = os.path.join(folder, "annotations.csv")

    with open(csv_file, mode='w', newline='') as csvfile:
        fieldnames = ['start', 'end', 'labels']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
        for annotation in annotation_data:
            writer.writerow(annotation)
    print(f"Annotations extracted and saved to {csv_file}")
else:
    print("Video file not found in the JSON data.")

Annotations extracted and saved to Output2/Annotations\annotations.csv


In [7]:
import pandas as pd

# Path of labels file that we extracted from JSON
df_labels = pd.read_csv("Output2/Annotations/annotations.csv")

# Path of clips file which contains information about each clip related to timing of a clip
df_clips = pd.read_csv("Output2/Clips/clips_info.csv")

clip_labels = []

for index, row_clip in df_clips.iterrows():
    labels = []    
    for index, row_label in df_labels.iterrows():        
        if row_clip['from'] >= row_label['start'] and row_clip['to'] <= row_label['end']:
            labels.append(row_label['labels'])
    if not labels:
        labels.append("No Label Found")    

    clip_labels.append(', '.join(labels))

# print(clip_labels)
df_clips['labels'] = clip_labels
# df_clips
df_clips.to_csv("labels2.csv", index=False)

In [6]:
import json
import pandas as pd
import os

# Load JSON data
with open(r"E:\Frame_Extraction_Project\Demo8\2023-07-18_154344_VID003.json", 'r') as f:
    data = json.load(f)

video_filename = "2023-07-18_154344_VID003.mp4"

# Find annotations for the given video file
video_entry = next((entry for entry in data if video_filename in entry["file_upload"]), None)

if video_entry:
    annotations = video_entry["annotations"]
    
    annotation_data = []
    for annotation in annotations:
        for result in annotation["result"]:
            start = result["value"]["start"]
            end = result["value"]["end"]
            labels = ', '.join(result["value"]["labels"])
            annotation_data.append({"start": start, "end": end, "labels": labels})

    # Load clips info
    df_clips = pd.read_csv("E:\Frame_Extraction_Project\Demo8\clips_info.csv")

    clip_labels = []

    # Match labels to clips
    for _, row_clip in df_clips.iterrows():
        labels = []
        for annotation in annotation_data:
            if annotation['start'] <= row_clip['end_time'] <= annotation['end']:
                labels.append(annotation['labels'])
        if not labels:
            # Find the next available label
            next_label_index = next((i for i, annotation in enumerate(annotation_data) if annotation['start'] > row_clip['end_time']), None)
            if next_label_index is not None:
                labels.append(annotation_data[next_label_index]['labels'])
            else:
                labels.append("No Label Found")

        clip_labels.append(', '.join(labels))
    
    # Save the result to labels.csv
    output_folder = "E:\Frame_Extraction_Project\Demo8"
    os.makedirs(output_folder, exist_ok=True)
    output_file = os.path.join(output_folder, "labels1.csv")

    # Create a DataFrame for the clip labels
    df_labels = pd.DataFrame({"clip_filename": df_clips["clip_filename"], "start_time": df_clips["start_time"], "end_time": df_clips["end_time"], "labels": clip_labels})

    # Save the DataFrame to a CSV file
    df_labels.to_csv(output_file, index=False)

    print(f"Labels extracted and saved to {output_file}")
else:
    print("Video file not found in the JSON data.")

Labels extracted and saved to E:\Frame_Extraction_Project\Demo8\labels1.csv


In [31]:
import json
import pandas as pd
import os

def extract_labels(json_file, root_folder):
    # Load JSON data
    with open(json_file, 'r') as f:
        data = json.load(f)

    for subdir, dirs, files in os.walk(root_folder):
        if os.path.basename(subdir).startswith("surg"):  
            mp4_files_found = False
            for file in files:
                if file.endswith(".mp4"):
                    mp4_files_found = True
                    video_file = os.path.join(subdir, file)

                    video_entry = next((entry for entry in data if file in entry["file_upload"]), None)
                    if video_entry:
                        annotations = video_entry["annotations"]
                        clips_info_csv = os.path.join(subdir, "labels", "Videos", "clips_info.csv")

                        if os.path.exists(clips_info_csv):
                            df_clips = pd.read_csv(clips_info_csv)

                            annotation_data = []
                            for annotation in annotations:
                                for result in annotation["result"]:
                                    start = result["value"]["start"]
                                    end = result["value"]["end"]
                                    labels = ', '.join(result["value"]["labels"])
                                    annotation_data.append({"start": start, "end": end, "labels": labels})

                            clip_labels = []

                            for _, row_clip in df_clips.iterrows():
                                labels = []
                                for annotation in annotation_data:
                                    if annotation['start'] <= row_clip['end_time'] <= annotation['end']:
                                        labels.append(annotation['labels'])
                                if not labels:
                                    next_label_index = next((i for i, annotation in enumerate(annotation_data) if annotation['start'] > row_clip['end_time']), None)
                                    if next_label_index is not None:
                                        labels.append(annotation_data[next_label_index]['labels'])
                                    else:
                                        labels.append("No Label Found")

                                clip_labels.append(', '.join(labels))

                            labels_folder = os.path.join(subdir, "labels")
                            os.makedirs(labels_folder, exist_ok=True)
                            output_file = os.path.join(labels_folder, "labels.csv")

                            df_labels = pd.DataFrame({"clip_filename": df_clips["clip_filename"], "start_time": df_clips["start_time"], "end_time": df_clips["end_time"], "labels": clip_labels})

                            df_labels.to_csv(output_file, index=False)

            if not mp4_files_found:
                print("No .mp4 files found in this folder.")

    print("Label extraction completed.")

# Specify the root folder containing the hierarchy of videos
root_folder = r"E:\Frame_Extraction_Project\Videos"
json_file = r"E:\Frame_Extraction_Project\Demo8\2023-07-18_154344_VID003.json"

extract_labels(json_file, root_folder)

Label extraction completed.


In [1]:
import json
import pandas as pd
import os

def extract_labels(json_file, root_folder):
    # Load JSON data
    with open(json_file, 'r') as f:
        data = json.load(f)

    for subdir, dirs, files in os.walk(root_folder):
        print(f"Processing folder: {subdir}")
        if os.path.basename(subdir).startswith("surg"):  
            mp4_files_found = False
            for file in files:
                if file.endswith(".mp4"):
                    mp4_files_found = True
                    video_file = os.path.join(subdir, file)
                    print(f"Processing video file: {video_file}")

                    video_entry = next((entry for entry in data if file in entry["file_upload"]), None)
                    if video_entry:
                        print(f"Annotations found for video: {file}")
                        annotations = video_entry["annotations"]
                        clips_info_csv = os.path.join(subdir, "labels", "Videos", "clips_info.csv")
                        print(f"Clips info CSV: {clips_info_csv}")

                        if os.path.exists(clips_info_csv):
                            df_clips = pd.read_csv(clips_info_csv)
                            print("Clips info loaded successfully.")
                            
                            annotation_data = []
                            for annotation in annotations:
                                for result in annotation["result"]:
                                    start = result["value"]["start"]
                                    end = result["value"]["end"]
                                    labels = ', '.join(result["value"]["labels"])
                                    annotation_data.append({"start": start, "end": end, "labels": labels})

                            clip_labels = []

                            for _, row_clip in df_clips.iterrows():
                                labels = []
                                for annotation in annotation_data:
                                    if annotation['start'] <= row_clip['end_time'] <= annotation['end']:
                                        labels.append(annotation['labels'])
                                if not labels:
                                    next_label_index = next((i for i, annotation in enumerate(annotation_data) if annotation['start'] > row_clip['end_time']), None)
                                    if next_label_index is not None:
                                        labels.append(annotation_data[next_label_index]['labels'])
                                    else:
                                        labels.append("No Label Found")

                                clip_labels.append(', '.join(labels))

                            labels_folder = os.path.join(subdir, "labels")
                            os.makedirs(labels_folder, exist_ok=True)
                            output_file = os.path.join(labels_folder, "labels.csv")

                            df_labels = pd.DataFrame({"clip_filename": df_clips["clip_filename"], "start_time": df_clips["start_time"], "end_time": df_clips["end_time"], "labels": clip_labels})

                            df_labels.to_csv(output_file, index=False)
                            else:
                            print(f"Error: clips_info.csv not found in directory: {os.path.dirname(clips_info_csv)}")
                    else:
                        print(f"Annotations not found for video: {file} in the JSON data.")

            if not mp4_files_found:
                print("No .mp4 files found in this folder.")

    print("Label extraction completed.")

# Specify the root folder containing the hierarchy of videos
root_folder = r"E:\SPS\Frame_Extraction_Project\Demo\Videos"
json_file = r"E:\Frame_Extraction_Project\Demo8\2023-07-18_154344_VID003.json"

extract_labels(json_file, root_folder)

SyntaxError: invalid syntax (2694416069.py, line 62)

In [8]:
import json
import pandas as pd
import os

def extract_labels(json_file, root_folder):
    # Load JSON data
    with open(json_file, 'r') as f:
        data = json.load(f)

    for subdir, dirs, files in os.walk(root_folder):
        print(f"Processing folder: {subdir}")
        if os.path.basename(subdir).startswith("Surg"):  
            mp4_files_found = False
            for file in files:
                if file.endswith(".mp4"):
                    mp4_files_found = True
                    video_file = os.path.join(subdir, file)
                    print(f"Processing video file: {video_file}")

                    video_entry = next((entry for entry in data if file in entry["file_upload"]), None)
                    if video_entry:
                        print(f"Annotations found for video: {file}")
                        annotations = video_entry["annotations"]
                        clips_info_csv = os.path.join(subdir, "labels", "Videos", "clips_info.csv")
                        print(f"Clips info CSV: {clips_info_csv}")

                        if os.path.exists(clips_info_csv):
                            df_clips = pd.read_csv(clips_info_csv)
                            print("Clips info loaded successfully.")
                            
                            annotation_data = []
                            for annotation in annotations:
                                for result in annotation["result"]:
                                    start = result["value"]["start"]
                                    end = result["value"]["end"]
                                    labels = ', '.join(result["value"]["labels"])
                                    annotation_data.append({"start": start, "end": end, "labels": labels})

                            clip_labels = []

                            for _, row_clip in df_clips.iterrows():
                                labels = []
#                                 video_name = row_clip['clip_filename']
#                                 filename = video_name.split('-')[0]
                                for annotation in annotation_data:
                                    if filename in annotation['filename']:
                                        if annotation['start'] <= row_clip['end_time'] <= annotation['end']:
                                            labels.append(annotation['labels'])
                                if not labels:
                                    next_label_index = next((i for i, annotation in enumerate(annotation_data) if annotation['start'] > row_clip['end_time']), None)
                                    if next_label_index is not None:
                                        labels.append(annotation_data[next_label_index]['labels'])
                                    else:
                                        labels.append("No Label Found")

                                clip_labels.append(', '.join(labels))

                            labels_folder = os.path.join(subdir, "labels")
                            os.makedirs(labels_folder, exist_ok=True)
                            output_file = os.path.join(labels_folder, "labels.csv")

                            df_labels = pd.DataFrame({"clip_filename": df_clips["clip_filename"], "start_time": df_clips["start_time"], "end_time": df_clips["end_time"], "labels": clip_labels})

                            df_labels.to_csv(output_file, index=False)
                        else:
                            print(f"Error: clips_info.csv not found in directory: {os.path.dirname(clips_info_csv)}")
                    else:
                        print(f"Annotations not found for video: {file} in the JSON data.")

            if not mp4_files_found:
                print("No .mp4 files found in this folder.")

    print("Label extraction completed.")

# Specify the root folder containing the hierarchy of videos
root_folder = r"E:\SPS\Frame_Extraction_Project\Demo\Videos"
json_file = r"E:\SPS\Frame_Extraction_Project\Demo8\2023-07-18_154344_VID003.json"

extract_labels(json_file, root_folder)

Processing folder: E:\SPS\Frame_Extraction_Project\Demo\Videos
Processing folder: E:\SPS\Frame_Extraction_Project\Demo\Videos\Surg001
Processing video file: E:\SPS\Frame_Extraction_Project\Demo\Videos\Surg001\2023-07-18_154344_VID003.mp4
Annotations found for video: 2023-07-18_154344_VID003.mp4
Clips info CSV: E:\SPS\Frame_Extraction_Project\Demo\Videos\Surg001\labels\Videos\clips_info.csv
Clips info loaded successfully.


KeyError: 'filename'

In [5]:
import json
import pandas as pd
import os

def extract_labels(json_file, root_folder):
    # Load JSON data
    with open(json_file, 'r') as f:
        data = json.load(f)

    for subdir, dirs, files in os.walk(root_folder):
        print(f"Processing folder: {subdir}")
        if os.path.basename(subdir).startswith("Surg"):  
            mp4_files_found = False
            for file in files:
                if file.endswith(".mp4"):
                    mp4_files_found = True
                    video_file = os.path.join(subdir, file)
                    print(f"Processing video file: {video_file}")

                    video_entry = next((entry for entry in data if file in entry["file_upload"]), None)
                    if video_entry:
                        print(f"Annotations found for video: {file}")
                        annotations = video_entry["annotations"]
                        clips_info_csv = os.path.join(subdir, "labels", "Videos", "clips_info.csv")
                        print(f"Clips info CSV: {clips_info_csv}")

                        if os.path.exists(clips_info_csv):
                            df_clips = pd.read_csv(clips_info_csv)
                            print("Clips info loaded successfully.")
                            
                            annotation_data = []
                            for annotation in annotations:
                                for result in annotation["result"]:
                                    start = result["value"]["start"]
                                    end = result["value"]["end"]
                                    labels = ', '.join(result["value"]["labels"])
                                    filename = os.path.basename(file)  # Extract filename without extension
                                    annotation_data.append({"filename": filename, "start": start, "end": end, "labels": labels})

                            clip_labels = []

                            for _, row_clip in df_clips.iterrows():
                                labels = []
                                video_name = row_clip['clip_filename']
                                clipname = video_name.rsplit('-', 2)[0]
                                for annotation in annotation_data:
                                    if clipname == annotation['filename']:
                                        if annotation['start'] <= row_clip['end_time'] <= annotation['end']:
                                            labels.append(annotation['labels'])
                                if not labels:
#                                     next_label_index = next((i for i, annotation in enumerate(annotation_data) if annotation['start'] > row_clip['end_time']), None)
#                                     if next_label_index is not None:
#                                         labels.append(annotation_data[next_label_index]['labels'])
#                                     else:
                                    labels.append("No Label Found")

                                clip_labels.append(', '.join(labels))

                            labels_folder = os.path.join(subdir, "labels")
                            os.makedirs(labels_folder, exist_ok=True)
                            output_file = os.path.join(labels_folder, "labels.csv")

                            df_labels = pd.DataFrame({"clip_filename": df_clips["clip_filename"], "start_time": df_clips["start_time"], "end_time": df_clips["end_time"], "labels": clip_labels})

                            df_labels.to_csv(output_file, index=False)
                        else:
                            print(f"Error: clips_info.csv not found in directory: {os.path.dirname(clips_info_csv)}")
                    else:
                        print(f"Annotations not found for video: {file} in the JSON data.")

            if not mp4_files_found:
                print("No .mp4 files found in this folder.")

    print("Label extraction completed.")

# Specify the root folder containing the hierarchy of videos
root_folder = r"E:\SPS\Frame_Extraction_Project\Demo9\Videos"
json_file = r"E:\SPS\Frame_Extraction_Project\Demo8\2023-07-18_154344_VID003.json"

extract_labels(json_file, root_folder)

Processing folder: E:\SPS\Frame_Extraction_Project\Demo9\Videos
Processing folder: E:\SPS\Frame_Extraction_Project\Demo9\Videos\Surg001
Processing video file: E:\SPS\Frame_Extraction_Project\Demo9\Videos\Surg001\2023-07-18_154344_VID003.mp4
Annotations found for video: 2023-07-18_154344_VID003.mp4
Clips info CSV: E:\SPS\Frame_Extraction_Project\Demo9\Videos\Surg001\labels\Videos\clips_info.csv
Clips info loaded successfully.
Processing video file: E:\SPS\Frame_Extraction_Project\Demo9\Videos\Surg001\2023-07-18_154344_VID004.mp4
Annotations found for video: 2023-07-18_154344_VID004.mp4
Clips info CSV: E:\SPS\Frame_Extraction_Project\Demo9\Videos\Surg001\labels\Videos\clips_info.csv
Clips info loaded successfully.
Processing video file: E:\SPS\Frame_Extraction_Project\Demo9\Videos\Surg001\2023-07-18_154344_VID005.mp4
Annotations found for video: 2023-07-18_154344_VID005.mp4
Clips info CSV: E:\SPS\Frame_Extraction_Project\Demo9\Videos\Surg001\labels\Videos\clips_info.csv
Clips info loaded

In [1]:
import json
import pandas as pd
import os

def extract_labels(json_file, root_folder):
    # Load JSON data
    with open(json_file, 'r') as f:
        data = json.load(f)

    for subdir, dirs, files in os.walk(root_folder):
        print(f"Processing folder: {subdir}")
        if os.path.basename(subdir).startswith("Surg"):  
            mp4_files_found = False
            for file in files:
                if file.endswith(".mp4"):
                    mp4_files_found = True
                    video_file = os.path.join(subdir, file)
                    print(f"Processing video file: {video_file}")

                    video_entry = next((entry for entry in data if file in entry["file_upload"]), None)
                    if video_entry:
                        print(f"Annotations found for video: {file}")
                        annotations = video_entry["annotations"]
                        clips_info_csv = os.path.join(subdir, "labels", "Videos", "clips_info.csv")
                        print(f"Clips info CSV: {clips_info_csv}")

                        if os.path.exists(clips_info_csv):
                            df_clips = pd.read_csv(clips_info_csv)
                            print("Clips info loaded successfully.")
                            
                            annotation_data = []
                            for annotation in annotations:
                                for result in annotation["result"]:
                                    start = result["value"]["start"]
                                    end = result["value"]["end"]
                                    labels = ', '.join(result["value"]["labels"])
                                    filename = file.split('.')[0]  # Extract filename without extension
                                    annotation_data.append({"filename": filename, "start": start, "end": end, "labels": labels})
                                    
                            print("Annotation data:")
                            for ann in annotation_data:
                                print(ann)

                            clip_labels = []

                            clip_labels = []

                            for _, row_clip in df_clips.iterrows():
                                print(f"Processing clip: {row_clip['clip_filename']}")
                                labels = []
                                clipname = row_clip['clip_filename']
                                clip_filename = clipname.rsplit('-', 2)[0]

                                # Filtering annotation data for current and subsequent video files
                                matching_annotations = [annotation for annotation in annotation_data if annotation['filename'] == clip_filename]

                                found_labels = False
                                for annotation in matching_annotations:
                                    if row_clip['start_time'] <= annotation['end'] and annotation['start'] <= row_clip['end_time']:
                                        labels.append(annotation['labels'])
                                        found_labels = True
                                        break  # Exit loop if labels are found for the clip

                                if not found_labels:
                                    next_label_index = next((i for i, annotation in enumerate(annotation_data) if annotation['filename'] == clip_filename and annotation['start'] > row_clip['end_time']), None)
                                    if next_label_index is not None:
                                        labels.append(annotation_data[next_label_index]['labels'])
                                        found_labels = True

                                if not found_labels:
                                    labels.append("No Label Found")

                                clip_labels.append(', '.join(labels))

                            labels_folder = os.path.join(subdir, "labels")
                            os.makedirs(labels_folder, exist_ok=True)
                            output_file = os.path.join(labels_folder, "labels.csv")

                            df_labels = pd.DataFrame({"clip_filename": df_clips["clip_filename"], "start_time": df_clips["start_time"], "end_time": df_clips["end_time"], "labels": clip_labels})

                            df_labels.to_csv(output_file, index=False)
                        else:
                            print(f"Error: clips_info.csv not found in directory: {os.path.dirname(clips_info_csv)}")
                    else:
                        print(f"Annotations not found for video: {file} in the JSON data.")

            if not mp4_files_found:
                print("No .mp4 files found in this folder.")

    print("Label extraction completed.")

# Specify the root folder containing the hierarchy of videos
root_folder = r"E:\SPS\Frame_Extraction_Project\Demo9\Videos"
json_file = r"E:\SPS\Frame_Extraction_Project\Demo8\2023-07-18_154344_VID003.json"

extract_labels(json_file, root_folder)

Processing folder: E:\SPS\Frame_Extraction_Project\Demo9\Videos
Processing folder: E:\SPS\Frame_Extraction_Project\Demo9\Videos\Surg001
Processing video file: E:\SPS\Frame_Extraction_Project\Demo9\Videos\Surg001\2023-07-18_154344_VID003.mp4
Annotations found for video: 2023-07-18_154344_VID003.mp4
Clips info CSV: E:\SPS\Frame_Extraction_Project\Demo9\Videos\Surg001\labels\Videos\clips_info.csv
Clips info loaded successfully.
Annotation data:
{'filename': '2023-07-18_154344_VID003', 'start': 0.992440380733945, 'end': 17.863926853211012, 'labels': 'Scope movement'}
{'filename': '2023-07-18_154344_VID003', 'start': 17.367706662844036, 'end': 50.6144594174312, 'labels': 'Setup'}
{'filename': '2023-07-18_154344_VID003', 'start': 50.6144594174312, 'end': 65.00484493807339, 'labels': 'Scope movement'}
{'filename': '2023-07-18_154344_VID003', 'start': 65.00484493807339, 'end': 80.88389102981651, 'labels': 'Aspiration'}
{'filename': '2023-07-18_154344_VID003', 'start': 80.88389102981651, 'end':

In [12]:
a = '2023-07-18_154344_VID003-14.608-74.608.mp4'

In [14]:
a.rsplit('-', 2)[0]

'2023-07-18_154344_VID003'

In [4]:
import json
import pandas as pd
import os

def extract_labels(json_file, root_folder):
    # Load JSON data
    with open(json_file, 'r') as f:
        data = json.load(f)

    for subdir, dirs, files in os.walk(root_folder):
        print(f"Processing folder: {subdir}")
        if os.path.basename(subdir).startswith("Surg"):  
            mp4_files_found = False
            for file in files:
                if file.endswith(".mp4"):
                    mp4_files_found = True
                    video_file = os.path.join(subdir, file)
                    print(f"Processing video file: {video_file}")

                    video_entry = next((entry for entry in data if file in entry["file_upload"]), None)
                    if video_entry:
                        print(f"Annotations found for video: {file}")
                        annotations = video_entry["annotations"]
                        clips_info_csv = os.path.join(subdir, "labels", "Videos", "clips_info.csv")
                        print(f"Clips info CSV: {clips_info_csv}")

                        if os.path.exists(clips_info_csv):
                            df_clips = pd.read_csv(clips_info_csv)
                            print("Clips info loaded successfully.")
                            
                            annotation_data = []
                            for annotation in annotations:
                                for result in annotation["result"]:
                                    start = result["value"]["start"]
                                    end = result["value"]["end"]
                                    labels = ', '.join(result["value"]["labels"])
                                    filename = file.split('.')[0]  # Extract filename without extension
                                    annotation_data.append({"filename": filename, "start": start, "end": end, "labels": labels})
                                    
                            print("Annotation data:")
                            for ann in annotation_data:
                                print(ann)

                            clip_labels = []

                            for _, row_clip in df_clips.iterrows():
                                print(f"Processing clip: {row_clip['clip_filename']}")
                                labels = []
                                clipname = row_clip['clip_filename']
                                clip_filename = clipname.rsplit('-', 2)[0]

                                # Filtering annotation data for current video file
                                matching_annotations = [annotation for annotation in annotation_data if annotation['filename'] == clip_filename]

                                if matching_annotations:
                                    for annotation in matching_annotations:
                                        if annotation['start'] <= row_clip['end_time'] <= annotation['end']:
                                            labels.append(annotation['labels'])

                                    if labels:
                                        clip_labels.append(', '.join(labels))
                                    else:
                                        clip_labels.append("No Label Found")

                                else:
                                    clip_labels.append("No Label Found")

                            labels_folder = os.path.join(subdir, "labels")
                            os.makedirs(labels_folder, exist_ok=True)
                            output_file = os.path.join(labels_folder, "labels.csv")

                            df_labels = pd.DataFrame({"clip_filename": df_clips["clip_filename"], "start_time": df_clips["start_time"], "end_time": df_clips["end_time"], "labels": clip_labels})

                            df_labels.to_csv(output_file, index=False)
                        else:
                            print(f"Error: clips_info.csv not found in directory: {os.path.dirname(clips_info_csv)}")
                    else:
                        print(f"Annotations not found for video: {file} in the JSON data.")

            if not mp4_files_found:
                print("No .mp4 files found in this folder.")

    print("Label extraction completed.")

# Specify the root folder containing the hierarchy of videos
root_folder = r"E:\SPS\Frame_Extraction_Project\Demo8\Videos"
json_file = r"E:\SPS\Frame_Extraction_Project\Demo8\2023-07-18_154344_VID003.json"

extract_labels(json_file, root_folder)

Processing folder: E:\SPS\Frame_Extraction_Project\Demo8\Videos
Processing folder: E:\SPS\Frame_Extraction_Project\Demo8\Videos\Surg001
Processing video file: E:\SPS\Frame_Extraction_Project\Demo8\Videos\Surg001\2023-07-18_154344_VID003.mp4
Annotations found for video: 2023-07-18_154344_VID003.mp4
Clips info CSV: E:\SPS\Frame_Extraction_Project\Demo8\Videos\Surg001\labels\Videos\clips_info.csv
Clips info loaded successfully.
Annotation data:
{'filename': '2023-07-18_154344_VID003', 'start': 0.992440380733945, 'end': 17.863926853211012, 'labels': 'Scope movement'}
{'filename': '2023-07-18_154344_VID003', 'start': 17.367706662844036, 'end': 50.6144594174312, 'labels': 'Setup'}
{'filename': '2023-07-18_154344_VID003', 'start': 50.6144594174312, 'end': 65.00484493807339, 'labels': 'Scope movement'}
{'filename': '2023-07-18_154344_VID003', 'start': 65.00484493807339, 'end': 80.88389102981651, 'labels': 'Aspiration'}
{'filename': '2023-07-18_154344_VID003', 'start': 80.88389102981651, 'end':