In [None]:
import cv2
import os
import numpy as np
from PIL import Image

source_path = "MuST/data/cholec80/videos/"  # original path
save_path = "MuST/data/cholec80/frames/"    # save path for frames

def ensure_dir(path):
    if not os.path.exists(path):
        os.makedirs(path)

# def change_size(image):
#     binary_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
#     _, binary_image2 = cv2.threshold(binary_image, 15, 255, cv2.THRESH_BINARY)
#     binary_image2 = cv2.medianBlur(binary_image2, 19)

#     x, y = binary_image2.shape
#     edges_x, edges_y = [], []
#     for i in range(x):
#         for j in range(10, y-10):
#             if binary_image2[i, j] != 0:
#                 edges_x.append(i)
#                 edges_y.append(j)

#     if not edges_x:
#         return image

#     left, right = min(edges_x), max(edges_x)
#     bottom, top = min(edges_y), max(edges_y)
#     return image[left:right, bottom:top]
# --- Optimized change_size function (as provided above) ---
def change_size(image):
    binary_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    ret, binary_image2 = cv2.threshold(binary_image, 15, 255, cv2.THRESH_BINARY)
    binary_image2 = cv2.medianBlur(binary_image2, 19)

    coords = cv2.findNonZero(binary_image2)
    if coords is None:
        return image # Return original if no non-zero pixels
    
    x, y, w, h = cv2.boundingRect(coords)
    cropped_image = image[y:y+h, x:x+w]
    return cropped_image
# --- End of optimized change_size function ---

# Process all .mp4 files in the source_path
#video_files = [f for f in os.listdir(source_path) if f.endswith('.mp4')]
import re

# video_files = sorted([
#     f for f in os.listdir(source_path)
#     if re.match(r'video\d{2}\.mp4$', f)
# ])


# for video_file in video_files:
#     video_name = os.path.splitext(video_file)[0]
#     video_save_path = os.path.join(save_path, video_name)
#     ensure_dir(video_save_path)
    
video_files = []
for f in os.listdir(source_path):
    match = re.match(r'video(\d+)\.mp4$', f) # Capture the number
    if match:
        video_number = int(match.group(1)) # Convert the captured string to an integer
        if 11 <= video_number <= 20:
            video_files.append(f)

video_files.sort() # Sort the files numerically after filtering
# Pre-calculate common operations outside the loop
FINAL_OUTPUT_SIZE = (250, 250) # The fixed 250x250 output size

for video_file in video_files:
    video_name = os.path.splitext(video_file)[0]
    video_save_path = os.path.join(save_path, video_name)
    ensure_dir(video_save_path)
    print(f"Processing: {video_file}, saving to: {video_save_path}")

    video_path = os.path.join(source_path, video_file)
    cap = cv2.VideoCapture(video_path)
    frame_num = 0

    if not cap.isOpened():
        print(f"Error: Could not open video file {video_file}")
        continue

    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    print(f"Extracting {total_frames} frames from {video_file}...")

    while True:
        ret, frame = cap.read()
        if not ret:
            break
        # --- Refined Image Processing Pipeline ---
        # 1. Apply your dynamic cropping/transformation (change_size)
        processed_frame = change_size(frame)
        # 2. Finally, resize to the fixed output dimension (250x250)
        #    Use INTER_AREA for shrinking, which is typical after cropping.
        #    This is now a single resize operation to the final size.
        img_result = cv2.resize(processed_frame, FINAL_OUTPUT_SIZE, interpolation=cv2.INTER_AREA)

        # No cvtColor to RGB needed if saving with cv2.imwrite for JPG
        # img_result = cv2.cvtColor(img_result, cv2.COLOR_BGR2RGB) # REMOVE
        # img_result = Image.fromarray(img_result) # REMOVE

        frame_filename = os.path.join(video_save_path, f"{frame_num:06d}.jpg")
        cv2.imwrite(frame_filename, img_result) # Faster saving
        
        print(f"\rExtracted: {frame_num + 1}/{total_frames} frames from {video_file}", end="")
        frame_num += 1

    cap.release()
    print(f"\nFinished cutting {video_file}")
    print("-" * 50)


Processing: video11.mp4, saving to: MuST/data/cholec80/frames/video11
Extracting 80501 frames from video11.mp4...
Extracted: 27596/80501 frames from video11.mp4