In [3]:
pip install opencv-python

Collecting opencv-python
  Using cached opencv_python-4.11.0.86-cp37-abi3-win_amd64.whl.metadata (20 kB)
Using cached opencv_python-4.11.0.86-cp37-abi3-win_amd64.whl (39.5 MB)
Installing collected packages: opencv-python
Successfully installed opencv-python-4.11.0.86
Note: you may need to restart the kernel to use updated packages.


In [None]:
import os
import cv2
import torch
from torch.utils.data import Dataset

class CholecSeg8kDataset(Dataset):
    def __init__(self, data_folder):
        self.data_folder = data_folder
        self.image_files = [f for f in os.listdir(data_folder) if f.endswith('_image.png')]
    def __len__(self):
        return len(self.image_files)
    def __getitem__(self, idx):
        # Find image and corresponding mask
        image_file = self.image_files[idx]
        mask_file = image_file.replace('_image.png', '_mask.png')
        image_path = os.path.join(self.data_folder, image_file)
        mask_path = os.path.join(self.data_folder, mask_file)
        image = cv2.imread(image_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
        image = torch.tensor(image, dtype=torch.float32).permute(2, 0, 1) / 255.0
        mask = torch.tensor(mask, dtype=torch.long)
        return image, mask


In [None]:
import os
import shutil
import random
processed_folder = r"C:\Users\karth\OneDrive\Desktop\Project\processed_cholecseg8k"
split_folder = r"C:\Users\karth\OneDrive\Desktop\Project\split_cholecseg8k"
# Create train/val folders
os.makedirs(os.path.join(split_folder, "train"), exist_ok=True)
os.makedirs(os.path.join(split_folder, "val"), exist_ok=True)
image_files = [f for f in os.listdir(processed_folder) if f.endswith('_image.png')]
random.seed(42) 
random.shuffle(image_files)
# Split 80% train, 20% val
split_idx = int(0.8 * len(image_files))
train_files = image_files[:split_idx]
val_files = image_files[split_idx:]
# Copy files to train/val folders
for file in train_files:
    mask_file = file.replace('_image.png', '_mask.png')
    shutil.copy(os.path.join(processed_folder, file), os.path.join(split_folder, "train", file))
    shutil.copy(os.path.join(processed_folder, mask_file), os.path.join(split_folder, "train", mask_file))
for file in val_files:
    mask_file = file.replace('_image.png', '_mask.png')
    shutil.copy(os.path.join(processed_folder, file), os.path.join(split_folder, "val", file))
    shutil.copy(os.path.join(processed_folder, mask_file), os.path.join(split_folder, "val", mask_file))
print(" Data split into train and val successfully!")


✅ Data split into train and val successfully!


In [8]:
from torch.utils.data import DataLoader

train_folder = r"C:\Users\karth\OneDrive\Desktop\Project\split_cholecseg8k\train"
val_folder = r"C:\Users\karth\OneDrive\Desktop\Project\split_cholecseg8k\val"

train_dataset = CholecSeg8kDataset(train_folder)
val_dataset = CholecSeg8kDataset(val_folder)

train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=8)

# Quick test
for images, masks in train_loader:
    print(f"Image batch shape: {images.shape}")
    print(f"Mask batch shape: {masks.shape}")
    break


Image batch shape: torch.Size([8, 3, 256, 256])
Mask batch shape: torch.Size([8, 256, 256])
