<a href="https://colab.research.google.com/github/sarah1ibrahim/End-to-End-Echocardiographic-Analysis-Pipeline/blob/main/EchoNet_Dynamic_Tracings_Masks.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Masks Creation

In [None]:
from google.colab import drive
import os
import cv2
import numpy as np
import pandas as pd
import logging
from datetime import datetime
import signal
import time
import shutil

# Set up logging
log_dir = "/content/drive/MyDrive/✨ GP ✨/EchoNet_Dataset/EchoNet-Dynamic"
try:
    os.makedirs(log_dir, exist_ok=True)
except Exception as e:
    log_dir = "/content"
    print(f"Could not create log directory on Google Drive yet: {e}. Logging to /content temporarily.")

log_file = os.path.join(log_dir, f"create_all_filled_masks_log_{datetime.now().strftime('%Y%m%d_%H%M%S')}.txt")

logger = logging.getLogger()
logger.setLevel(logging.INFO)

file_handler = logging.FileHandler(log_file)
file_handler.setLevel(logging.INFO)
file_formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
file_handler.setFormatter(file_formatter)
logger.addHandler(file_handler)

console_handler = logging.StreamHandler()
console_handler.setLevel(logging.INFO)
console_formatter = logging.Formatter('%(message)s')
console_handler.setFormatter(console_formatter)
logger.addHandler(console_handler)

# Function to timeout an operation
def timeout_handler(signum, frame):
    raise TimeoutError("Operation timed out")

# Function to mount Google Drive with retries
def mount_google_drive(max_retries=3, retry_delay=10):
    for attempt in range(max_retries):
        logger.info(f"Mounting Google Drive (Attempt {attempt + 1}/{max_retries})...")
        signal.signal(signal.SIGALRM, timeout_handler)
        signal.alarm(60)
        try:
            drive.mount('/content/drive', force_remount=True)
            signal.alarm(0)
            logger.info("Google Drive mounted successfully.")
            return True
        except TimeoutError:
            logger.warning("Timeout while mounting Google Drive.")
        except Exception as e:
            logger.warning(f"Failed to mount Google Drive: {e}")
        signal.alarm(0)
        if attempt < max_retries - 1:
            logger.info(f"Retrying in {retry_delay} seconds...")
            time.sleep(retry_delay)
    logger.error("Failed to mount Google Drive after all retries. Aborting...")
    raise Exception("Could not mount Google Drive")

# Unmount the drive if it's already mounted
if os.path.exists('/content/drive'):
    logger.info("Unmounting existing drive mount at /content/drive...")
    try:
        drive.flush_and_unmount()
        time.sleep(5)
        if os.path.exists('/content/drive'):
            shutil.rmtree('/content/drive', ignore_errors=True)
            logger.info("Removed /content/drive directory.")
    except Exception as e:
        logger.warning(f"Failed to unmount cleanly: {e}. Proceeding anyway...")

# Mount Google Drive
mount_google_drive()

# Update log directory now that Drive is mounted
log_dir = "/content/drive/MyDrive/✨ GP ✨/EchoNet_Dataset/EchoNet-Dynamic"
os.makedirs(log_dir, exist_ok=True)
new_log_file = os.path.join(log_dir, os.path.basename(log_file))
shutil.move(log_file, new_log_file)
log_file = new_log_file
logger.removeHandler(file_handler)
file_handler = logging.FileHandler(log_file)
file_handler.setLevel(logging.INFO)
file_handler.setFormatter(file_formatter)
logger.addHandler(file_handler)

# Define paths
csv_path = "/content/drive/MyDrive/✨ GP ✨/EchoNet_Dataset/EchoNet-Dynamic/VolumeTracings.csv"
output_path = "/content/drive/MyDrive/✨ GP ✨/EchoNet_Dataset/EchoNet-Dynamic/MaskTracings"

# Create the output directory
if os.path.exists(output_path):
    logger.info(f"Deleting existing MaskTracings directory at {output_path}...")
    shutil.rmtree(output_path)
os.makedirs(output_path, exist_ok=True)
logger.info(f"Created MaskTracings directory at {output_path}")

# Load the CSV file
logger.info(f"Loading CSV file from {csv_path}...")
try:
    df = pd.read_csv(csv_path)
    logger.info("Successfully loaded CSV file.")
except Exception as e:
    logger.error(f"Failed to load CSV file: {e}")
    raise

# Clean video names by removing .avi extension
df['FileName'] = df['FileName'].str.replace('.avi', '')

# Group data by FileName to process all videos
grouped = df.groupby("FileName")
total_videos = len(grouped)
logger.info(f"Total number of videos to process: {total_videos}")

# Function to create a filled binary mask using polygon filling
def create_filled_polygon_mask(tracings, frame_shape=(112, 112), video_name="", frame_number=""):
    # Step 1: Create an initial mask
    mask = np.zeros(frame_shape, dtype=np.uint8)
    if tracings.empty:
        return mask

    # Step 2: Collect all points from the line segments
    points = []
    for _, row in tracings.iterrows():
        start = (int(row['X1']), int(row['Y1']))
        end = (int(row['X2']), int(row['Y2']))
        # Clamp coordinates to image bounds
        start = (max(0, min(111, start[0])), max(0, min(111, start[1])))
        end = (max(0, min(111, end[0])), max(0, min(111, end[1])))
        points.append(start)
        points.append(end)

    # Step 3: Remove duplicates while preserving order
    seen = set()
    unique_points = []
    for pt in points:
        if pt not in seen:
            seen.add(pt)
            unique_points.append(pt)

    if len(unique_points) < 3:
        logger.warning(f"Not enough unique points to form a polygon for {video_name}_frame_{frame_number}. Returning empty mask.")
        return np.zeros(frame_shape, dtype=np.uint8)

    # Step 4: Sort points to form a closed loop
    # Compute centroid
    centroid_x = sum(pt[0] for pt in unique_points) / len(unique_points)
    centroid_y = sum(pt[1] for pt in unique_points) / len(unique_points)
    centroid = (centroid_x, centroid_y)

    # Sort points by angle from centroid
    def angle_from_centroid(pt):
        import math
        dx = pt[0] - centroid[0]
        dy = pt[1] - centroid[1]
        return math.atan2(dy, dx)

    sorted_points = sorted(unique_points, key=angle_from_centroid)

    # Convert to numpy array for cv2.fillPoly
    polygon_points = np.array(sorted_points, dtype=np.int32)

    # Step 5: Fill the polygon
    filled_mask = np.zeros(frame_shape, dtype=np.uint8)
    cv2.fillPoly(filled_mask, [polygon_points], 255)

    # Step 6: Ensure binary values (0 and 255)
    _, filled_mask = cv2.threshold(filled_mask, 127, 255, cv2.THRESH_BINARY)

    # Debug: Log unique values
    logger.info(f"Unique values in filled mask for {video_name}_frame_{frame_number}: {np.unique(filled_mask)}")

    return filled_mask

# Process each video
for video_idx, (video_name, group) in enumerate(grouped, 1):
    logger.info(f"Processing video {video_idx}/{total_videos}: {video_name}")

    # Create output subdirectory for this video
    video_output_path = os.path.join(output_path, video_name)
    os.makedirs(video_output_path, exist_ok=True)

    # Get unique frame numbers for this video
    unique_frames = group["Frame"].unique()
    if len(unique_frames) != 2:
        logger.warning(f"Video {video_name} does not have exactly 2 unique frames (found {len(unique_frames)}). Skipping...")
        continue

    # Process each frame
    for frame_number in unique_frames:
        logger.info(f"  Processing frame {frame_number} for {video_name}")
        frame_tracings = group[group['Frame'] == frame_number]
        if frame_tracings.empty:
            logger.warning(f"No tracings found for {video_name}_frame_{frame_number}. Skipping...")
            continue

        # Create the filled mask
        filled_mask = create_filled_polygon_mask(
            frame_tracings,
            video_name=video_name,
            frame_number=str(frame_number)
        )

        # Save the filled mask
        frame_filename = f"{video_name}_frame_{str(frame_number).zfill(4)}_segments.jpg"
        output_file_path = os.path.join(video_output_path, frame_filename)
        success = cv2.imwrite(output_file_path, filled_mask)
        if success:
            logger.info(f"Saved filled mask: {output_file_path}")
        else:
            logger.error(f"Failed to save filled mask: {output_file_path}")
            continue

# Summary
logger.info("\n[Summary]")
logger.info(f"Processed {total_videos} videos")
logger.info(f"Output saved in: {output_path}")
logger.info(f"Log file saved at: {log_file}")

# Force sync to ensure changes are visible
try:
    drive.flush_and_unmount()
    logger.info("Google Drive unmounted successfully.")
    drive.mount('/content/drive', force_remount=True)
    logger.info("Google Drive remounted successfully.")
except Exception as e:
    logger.error(f"Failed to sync Google Drive: {e}")

INFO:root:Unmounting existing drive mount at /content/drive...
Unmounting existing drive mount at /content/drive...
--- Logging error ---
Traceback (most recent call last):
  File "/usr/lib/python3.11/logging/__init__.py", line 1114, in emit
    self.flush()
  File "/usr/lib/python3.11/logging/__init__.py", line 1094, in flush
    self.stream.flush()
OSError: [Errno 107] Transport endpoint is not connected
Call stack:
  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "/usr/local/lib/python3.11/dist-packages/colab_kernel_launcher.py", line 37, in <module>
    ColabKernelApp.launch_instance()
  File "/usr/local/lib/python3.11/dist-packages/traitlets/config/application.py", line 992, in launch_instance
    app.start()
  File "/usr/local/lib/python3.11/dist-packages/ipykernel/kernelapp.py", line 712, in start
    self.io_loop.start()
  File "/usr/local/lib/python3.11/dist-packages/tornado/platform/asyncio.py", line 205, in start

Mounted at /content/drive


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
    self.io_loop.start()
  File "/usr/local/lib/python3.11/dist-packages/tornado/platform/asyncio.py", line 205, in start
    self.asyncio_loop.run_forever()
  File "/usr/lib/python3.11/asyncio/base_events.py", line 608, in run_forever
    self._run_once()
  File "/usr/lib/python3.11/asyncio/base_events.py", line 1936, in _run_once
    handle._run()
  File "/usr/lib/python3.11/asyncio/events.py", line 84, in _run
    self._context.run(self._callback, *self._args)
  File "/usr/local/lib/python3.11/dist-packages/ipykernel/kernelbase.py", line 510, in dispatch_queue
    await self.process_one()
  File "/usr/local/lib/python3.11/dist-packages/ipykernel/kernelbase.py", line 499, in process_one
    await dispatch(*args)
  File "/usr/local/lib/python3.11/dist-packages/ipykernel/kernelbase.py", line 406, in dispatch_shell
    await result
  File "/usr/local/lib/python3.11/dist-packages/ipykernel/kernelbase.py", line 730, in execu

Mounted at /content/drive


--- Logging error ---
Traceback (most recent call last):
  File "/usr/lib/python3.11/logging/__init__.py", line 1114, in emit
    self.flush()
  File "/usr/lib/python3.11/logging/__init__.py", line 1094, in flush
    self.stream.flush()
OSError: [Errno 107] Transport endpoint is not connected
Call stack:
  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "/usr/local/lib/python3.11/dist-packages/colab_kernel_launcher.py", line 37, in <module>
    ColabKernelApp.launch_instance()
  File "/usr/local/lib/python3.11/dist-packages/traitlets/config/application.py", line 992, in launch_instance
    app.start()
  File "/usr/local/lib/python3.11/dist-packages/ipykernel/kernelapp.py", line 712, in start
    self.io_loop.start()
  File "/usr/local/lib/python3.11/dist-packages/tornado/platform/asyncio.py", line 205, in start
    self.asyncio_loop.run_forever()
  File "/usr/lib/python3.11/asyncio/base_events.py", line 608, in run_forever
 

# Masks Count Verification

In [None]:
import os
import pandas as pd
import logging
import shutil
import time
from datetime import datetime
from google.colab import drive

# Set up logging
log_dir = "/content/drive/MyDrive/✨ GP ✨/EchoNet_Dataset/EchoNet-Dynamic"
try:
    os.makedirs(log_dir, exist_ok=True)
except Exception as e:
    log_dir = "/content"
    print(f"Could not create log directory on Google Drive yet: {e}. Logging to /content temporarily.")

log_file = os.path.join(log_dir, f"verify_masktracings_log_{datetime.now().strftime('%Y%m%d_%H%M%S')}.txt")

logger = logging.getLogger()
logger.setLevel(logging.INFO)

file_handler = logging.FileHandler(log_file)
file_handler.setLevel(logging.INFO)
file_formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
file_handler.setFormatter(file_formatter)
logger.addHandler(file_handler)

console_handler = logging.StreamHandler()
console_handler.setLevel(logging.INFO)
console_formatter = logging.Formatter('%(message)s')
console_handler.setFormatter(console_formatter)
logger.addHandler(console_handler)

# Unmount the drive if it's already mounted
if os.path.exists('/content/drive'):
    logger.info("Unmounting existing drive mount at /content/drive...")
    try:
        drive.flush_and_unmount()
        time.sleep(5)
        if os.path.exists('/content/drive'):
            shutil.rmtree('/content/drive', ignore_errors=True)
            logger.info("Removed /content/drive directory.")
    except Exception as e:
        logger.warning(f"Failed to unmount cleanly: {e}. Proceeding anyway...")

# Mount Google Drive
logger.info("Mounting Google Drive...")
try:
    drive.mount('/content/drive', force_remount=True)
    logger.info("Google Drive mounted successfully.")
except Exception as e:
    logger.error(f"Failed to mount Google Drive: {e}")
    raise

# Update log directory now that Drive is mounted
log_dir = "/content/drive/MyDrive/✨ GP ✨/EchoNet_Dataset/EchoNet-Dynamic"
os.makedirs(log_dir, exist_ok=True)
new_log_file = os.path.join(log_dir, os.path.basename(log_file))
shutil.move(log_file, new_log_file)
log_file = new_log_file
logger.removeHandler(file_handler)
file_handler = logging.FileHandler(log_file)
file_handler.setLevel(logging.INFO)
file_handler.setFormatter(file_formatter)
logger.addHandler(file_handler)

# Constants
EXPECTED_VIDEOS = 10025
MASK_TRACINGS_PATH = "/content/drive/MyDrive/✨ GP ✨/EchoNet_Dataset/EchoNet-Dynamic/MaskTracings"
CSV_PATH = "/content/drive/MyDrive/✨ GP ✨/EchoNet_Dataset/EchoNet-Dynamic/VolumeTracings.csv"

# Load the CSV file
logger.info(f"Loading CSV file from {CSV_PATH}...")
try:
    df = pd.read_csv(CSV_PATH)
    logger.info("Successfully loaded CSV file.")
except Exception as e:
    logger.error(f"Failed to load CSV file: {e}")
    raise

# Clean video names by removing .avi extension
df['FileName'] = df['FileName'].str.replace('.avi', '')

# Group data by FileName to get frame numbers
grouped = df.groupby("FileName")

# Verify number of folders
video_dirs = [d for d in os.listdir(MASK_TRACINGS_PATH) if os.path.isdir(os.path.join(MASK_TRACINGS_PATH, d))]
num_folders = len(video_dirs)
logger.info(f"Found {num_folders} folders in {MASK_TRACINGS_PATH}. Expected: {EXPECTED_VIDEOS}")
if num_folders != EXPECTED_VIDEOS:
    logger.warning(f"Mismatch: Expected {EXPECTED_VIDEOS} folders, but found {num_folders}.")
else:
    logger.info("Number of folders matches the expected count.")

# Verify each folder has the correct 2 frames
mismatches = 0
for video_idx, video_name in enumerate(sorted(video_dirs), 1):
    video_path = os.path.join(MASK_TRACINGS_PATH, video_name)
    logger.info(f"Checking video {video_idx}/{num_folders}: {video_name}")

    # Get mask files
    mask_files = [f for f in os.listdir(video_path) if f.endswith('.jpg')]
    if len(mask_files) != 2:
        logger.error(f"Video {video_name} has {len(mask_files)} frames instead of 2. Files: {mask_files}")
        mismatches += 1
        continue

    # Extract frame numbers from filenames
    frame_numbers = [int(f.split('_frame_')[1].split('_segments')[0]) for f in mask_files]

    # Get expected frame numbers from CSV
    video_data = grouped.get_group(video_name) if video_name in grouped.groups else None
    if video_data is None:
        logger.error(f"No data found in CSV for video: {video_name}")
        mismatches += 1
        continue

    expected_frames = sorted(video_data["Frame"].unique())
    if len(expected_frames) != 2:
        logger.error(f"CSV data for {video_name} has {len(expected_frames)} unique frames instead of 2: {expected_frames}")
        mismatches += 1
        continue

    # Compare frame numbers
    if sorted(frame_numbers) != expected_frames:
        logger.error(f"Frame number mismatch for {video_name}. Expected: {expected_frames}, Found: {frame_numbers}")
        mismatches += 1
    else:
        logger.info(f"Video {video_name} has correct frames: {frame_numbers}")

# Summary
logger.info("\n[Summary]")
logger.info(f"Total folders checked: {num_folders}")
logger.info(f"Expected folders: {EXPECTED_VIDEOS}")
logger.info(f"Mismatches found: {mismatches}")
if mismatches == 0:
    logger.info("All folders contain the correct 2 frames matching the CSV.")
else:
    logger.warning(f"{mismatches} folders have issues (incorrect frame count or mismatch).")

print(f"Verification complete. Check {log_file} for details.")

INFO:root:Unmounting existing drive mount at /content/drive...
Unmounting existing drive mount at /content/drive...
Unmounting existing drive mount at /content/drive...
Unmounting existing drive mount at /content/drive...
Unmounting existing drive mount at /content/drive...
INFO:root:Mounting Google Drive...
Mounting Google Drive...
Mounting Google Drive...
Mounting Google Drive...
--- Logging error ---
Traceback (most recent call last):
  File "/usr/lib/python3.11/logging/__init__.py", line 1114, in emit
    self.flush()
  File "/usr/lib/python3.11/logging/__init__.py", line 1094, in flush
    self.stream.flush()
OSError: [Errno 107] Transport endpoint is not connected
Call stack:
  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "/usr/local/lib/python3.11/dist-packages/colab_kernel_launcher.py", line 37, in <module>
    ColabKernelApp.launch_instance()
  File "/usr/local/lib/python3.11/dist-packages/traitlets/config/applic

Mounted at /content/drive


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
INFO:root:Checking video 9529/10025: 0X9E181B942CACADF
Checking video 9529/10025: 0X9E181B942CACADF
Checking video 9529/10025: 0X9E181B942CACADF
Checking video 9529/10025: 0X9E181B942CACADF
Checking video 9529/10025: 0X9E181B942CACADF
INFO:root:Video 0X9E181B942CACADF has correct frames: [0, 18]
Video 0X9E181B942CACADF has correct frames: [0, 18]
Video 0X9E181B942CACADF has correct frames: [0, 18]
Video 0X9E181B942CACADF has correct frames: [0, 18]
Video 0X9E181B942CACADF has correct frames: [0, 18]
INFO:root:Checking video 9530/10025: 0X9E37320F33E7ED
Checking video 9530/10025: 0X9E37320F33E7ED
Checking video 9530/10025: 0X9E37320F33E7ED
Checking video 9530/10025: 0X9E37320F33E7ED
Checking video 9530/10025: 0X9E37320F33E7ED
INFO:root:Video 0X9E37320F33E7ED has correct frames: [187, 212]
Video 0X9E37320F33E7ED has correct frames: [187, 212]
Video 0X9E37320F33E7ED has correct frames: [187, 212]
Video 0X9E37320F33E7ED has c

Verification complete. Check /content/drive/MyDrive/✨ GP ✨/EchoNet_Dataset/EchoNet-Dynamic/verify_masktracings_log_20250509_180348.txt for details.
