In [9]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
import os
from tqdm.notebook import tqdm
import pandas as pd


In [10]:
path = '/cosma5/data/durham/dc-fras4/ultrasound/SLURP/MFT_videos' 


In [11]:
def extract_frame(video_path, frame_number=0):
    # Open the video file
    cap = cv2.VideoCapture(video_path)
    
    # Check if video opened successfully
    if not cap.isOpened():
        print("Error: Could not open video.")
        return None, None, None
    
    # Get video properties
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    fps = cap.get(cv2.CAP_PROP_FPS)
    
    # print(f"Total frames: {total_frames}")
    # print(f"FPS: {fps}")
    
    # Set frame position
    cap.set(cv2.CAP_PROP_POS_FRAMES, frame_number)
    
    # Read frame
    ret, frame = cap.read()
    
    if not ret or frame is None:
        print("Error: Could not read frame.")
        return None, total_frames, fps
    
    # Release video capture object
    cap.release()

    return frame, total_frames, fps

In [12]:
def check_video_layout(video_path, relative_area_threshold=0.6):
    
    frame, _, _ = extract_frame(video_path, frame_number=0)

    if frame is None:
        return False
    # 1. Create binary mask
    frame_height, frame_width = frame.shape[:2]
    
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    # Use a threshold of 5 (instead of 1) to be safe against 
    # near-black compression artifacts
    _, mask = cv2.threshold(gray, 5, 255, cv2.THRESH_BINARY)

    # 2. Clean mask (this removes small text/noise)
    kernel = np.ones((5, 5), np.uint8)
    mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel)
    mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel, iterations=2)

    # 3. Find contours
    contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    if contours:  # Check if any contours were found
    # Find the contour with the maximum area
        largest_contour = max(contours, key=cv2.contourArea)

    # Get the (x, y) coordinates and (width, height) of the largest contour
        x, y, w, h = cv2.boundingRect(largest_contour)

        relative_area = (w * h) / (frame_width * frame_height)
        if relative_area > relative_area_threshold:
            return True
        else:
            return False



In [13]:
# def find_b_m_mode_videos(directory_to_scan):
    
#     print(f"Scanning directory: {directory_to_scan}\n")

#     # Check if directory exists
#     if not os.path.exists(directory_to_scan):
#         print(f"ERROR: Directory does not exist: {directory_to_scan}")
#         return []
    
#     if not os.path.isdir(directory_to_scan):
#         print(f"ERROR: Path is not a directory: {directory_to_scan}")
#         return []
    
#     video_extensions = ('.mp4', '.avi', '.mkv')
#     b_m_mode_list = []

#     # Walk through all subdirectories
#     video_files = []
#     for root, dirs, files in os.walk(directory_to_scan):
#         for filename in files:
#             if filename.lower().endswith(video_extensions):
#                 full_path = os.path.join(root, filename)
#                 video_files.append(full_path)
    
#     print(f"Video files found: {len(video_files)}\n")
    
#     if len(video_files) == 0:
#         print("No video files found. Check your video extensions.")
#         return []

#     for full_path in tqdm(video_files, desc="Processing videos"):
#         try:
#             is_b_m_mode = check_video_layout(full_path)
            
#             if is_b_m_mode:
#                 b_m_mode_list.append(full_path)  # Store full path
                
#         except Exception as e:
#             print(f"Error processing {full_path}: {e}")

#     return b_m_mode_list

In [14]:
# find_b_m_mode_videos(path)

In [15]:
# # save list to txt file
# b_m_mode_videos = find_b_m_mode_videos(path)
# with open('b_m_mode_videos.txt', 'w') as f:
#     for video_path in b_m_mode_videos:
#         f.write(f"{video_path}\n")

        


In [16]:
def cropping_to_scan_area(video_path):
    
    frame, _, _ = extract_frame(video_path, frame_number=0)

    if frame is None:
        return False
    

    
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    _, mask = cv2.threshold(gray, 5, 255, cv2.THRESH_BINARY)

    # Clean mask
    kernel = np.ones((5, 5), np.uint8)
    mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel)
    mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel, iterations=2)

    # Find contours
    contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    if contours:  # Check if any contours were found
        # Find the contour with the maximum area
        largest_contour = max(contours, key=cv2.contourArea)

        # Get the (x, y) coordinates and (width, height) of the largest contour
        x, y, w, h = cv2.boundingRect(largest_contour)

        # Crop the frame to the bounding rectangle
        cropped_frame = frame[y:y+h, x:x+w]

        return x, y, x+w, y+h, cropped_frame
    else:
        print("No contours found for cropping.")
        return frame  # Return original frame if no contours found

In [17]:
# make table with video path and crop coordinates
def make_crop_table(video_list):
    crop_data = []
    for video_path in video_list:
        x_start, y_start, x_end, y_end, _ = cropping_to_scan_area(video_path)
        crop_data.append((video_path, x_start, y_start, x_end, y_end))
    return crop_data

In [18]:
# make video list excluding b_m_mode videos
video_list = []
with open('b_m_mode_videos.txt', 'r') as f:
    b_m_mode_videos = f.read().splitlines()
for root, dirs, files in os.walk(path):
    for filename in files:
        if filename.lower().endswith(('.mp4', '.avi', '.mkv')):
            full_path = os.path.join(root, filename)
            if full_path not in b_m_mode_videos:
                video_list.append(full_path)



In [19]:
crop_data = make_crop_table(video_list)

# save as csv file

df = pd.DataFrame(crop_data, columns=['video_path','x_start', 'y_start', 'x_end', 'y_end'])
df.to_csv('mft_video_crop_coordinates.csv', index=False)


In [22]:
# if y_start does not equal 14 print video path
for index, row in df.iterrows():
    if row['y_start'] != 14:
        print(row['video_path'])

# set all Y_start to 14 and y_end to 601
df = pd.read_csv('mft_video_crop_coordinates.csv')
df['y_start'] = 14
df['y_end'] = 601
df.to_csv('mft_video_crop_coordinates.csv', index=False)

In [23]:
df = pd.read_csv('mft_video_crop_coordinates.csv')
df['y_start'] = 14
df['y_end'] = 601
df.to_csv('mft_video_crop_coordinates.csv', index=False)