Build a Sub-Dataset of 4800 Patches from Tumor and Non-Tumor Images

In [None]:
#Necessary Imports
import os
import cv2
import pandas as pd

Mark all the folder locations

In [None]:
image_folder = "images"
mask_folder = "masks"

output_root = "sub_dataset"
patch_output_folder = os.path.join(output_root, "patches")

os.makedirs(output_root, exist_ok=True)
os.makedirs(patch_output_folder, exist_ok=True)

#Create 16 Patches from all 300 Images

1. Load Original Labels, Images, and Masks
2. Divide each image into 16 boxes and create patches
3. Determine patches label (tumor or non-tumor)
4. Save patch with filename
5. Add to CSV


In [4]:
# Load original labels
labels_df = pd.read_csv("labels.csv")   # columns: filename, target

patch_rows = []

for idx, row in labels_df.iterrows():
    filename = row["filename"]          # e.g., "1.png"
    base = filename.split(".")[0]       # "1"
    label = row["target"]  
    # Load image and mask
    img_path = os.path.join(image_folder, filename)
    mask_path = os.path.join(mask_folder, f"{base}_mask.png")

    image = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
    mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)

    h, w = image.shape
    patch_h = h // 4
    patch_w = w // 4

    patch_id = 1

# Create 16 patches
    for i in range(4):
        for j in range(4):
            y1 = i * patch_h
            y2 = (i + 1) * patch_h
            x1 = j * patch_w
            x2 = (j + 1) * patch_w

            img_patch = image[y1:y2, x1:x2]
            mask_patch = mask[y1:y2, x1:x2]

            # Determine patch label
            patch_label = 1 if mask_patch.max() > 0 else 0

            # Patch filename
            patch_filename = f"{base}_{patch_id}.png"
            patch_path = os.path.join(patch_output_folder, patch_filename)

            # Save patch
            cv2.imwrite(patch_path, img_patch)

            # Add row to CSV
            patch_rows.append({
                "original_filename": filename,
                "patch_id": patch_id,
                "patch_filename": patch_filename,
                "label": patch_label
            })

            patch_id += 1             # 0 or 1

In [5]:
# Save CSV
patch_df = pd.DataFrame(patch_rows)
csv_path = os.path.join(output_root, "sub_dataset.csv")
patch_df.to_csv(csv_path, index=False)

print("Patches and CSV saved")


Patches and CSV saved
