In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
"""
Top-Q1 Journal Standard: Refined Segmentation using U-Net++ for Cancerous ROI

Dataset structure:
    dataset/
        train/
            benign/
            malignant/
        test/
            benign/
            malignant/

Desired output:
    The background is black, while the lesion (ROI) retains its original color from the input image.

Author: [Your Name]
Date: [Current Date]
"""

import os
import cv2
import time
import copy
import random
import zipfile
import numpy as np
from glob import glob
from PIL import Image
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

#############################################
# 1. Reproducibility & Device Setup
#############################################
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(SEED)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

#############################################
# 2. Helper Functions: Pseudo Mask Generation & Image Conversion
#############################################
def generate_pseudo_mask(pil_img):
    """
    Generates a binary pseudo-mask using Otsu thresholding.
    Then inverts it so that the lesion area is white (255) and background is black (0).
    """
    gray = np.array(pil_img.convert("L"))
    blurred = cv2.GaussianBlur(gray, (5, 5), 0)
    _, mask = cv2.threshold(blurred, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    # Ensure binary
    mask = (mask > 127).astype(np.uint8) * 255
    # Force the lesion area to be white (255). If Otsu picks it as black, invert:
    # This simple approach inverts the mask unconditionally, ensuring the "dark" region becomes white.
    # If your dataset is reversed, you may want to add logic to detect which region is the lesion and invert if needed.
    mask = 255 - mask
    return Image.fromarray(mask, mode='L')

def random_horizontal_flip(image, mask, p=0.5):
    if random.random() < p:
        image = image.transpose(Image.FLIP_LEFT_RIGHT)
        mask = mask.transpose(Image.FLIP_LEFT_RIGHT)
    return image, mask

class SegmentationTransforms:
    """
    Paired augmentation for image and corresponding pseudo-mask.
    """
    def __init__(self, flip_prob=0.5):
        self.flip_prob = flip_prob

    def __call__(self, image, mask):
        image, mask = random_horizontal_flip(image, mask, self.flip_prob)
        return image, mask

def to_tensor_image(pil_img):
    """
    Converts a PIL RGB image to a torch.FloatTensor and normalizes to [0, 1].
    """
    img = np.array(pil_img).astype(np.float32) / 255.0  # shape: (H, W, C)
    img = np.transpose(img, (2, 0, 1))  # shape: (C, H, W)
    return torch.from_numpy(img)

def to_tensor_mask(pil_mask):
    """
    Converts a PIL grayscale mask to a torch.FloatTensor (1 x H x W) with values in [0, 1].
    """
    mask = np.array(pil_mask).astype(np.float32) / 255.0
    mask = np.expand_dims(mask, axis=0)
    return torch.from_numpy(mask)

#############################################
# 3. Custom Dataset Class
#############################################
class SkinSegmentationDataset(Dataset):
    """
    Dataset for segmentation. Assumes images are stored in class folders
    (e.g. train/benign, train/malignant).
    A pseudoâ€“ground-truth mask is generated for each image.
    """
    def __init__(self, root_dir, transform=None):
        """
        root_dir: directory containing class subfolders.
        transform: paired transformation (to both image and mask)
        """
        super().__init__()
        self.root_dir = root_dir
        self.image_paths = []
        self.labels = []
        classes = os.listdir(root_dir)
        self.classes = sorted(classes)
        for cls in self.classes:
            cls_folder = os.path.join(root_dir, cls)
            for ext in ('*.png', '*.jpg', '*.jpeg'):
                paths = glob(os.path.join(cls_folder, ext))
                self.image_paths.extend(paths)
                self.labels.extend([cls] * len(paths))
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        image = Image.open(img_path).convert("RGB")
        mask = generate_pseudo_mask(image)
        if self.transform:
            image, mask = self.transform(image, mask)
        image = to_tensor_image(image)
        mask = to_tensor_mask(mask)
        return image, mask, img_path

#############################################
# 4. U-Net++ Model Definition
#############################################
def conv_block(in_channels, out_channels):
    """
    Basic two-layer convolution block: Conv->BN->ReLU -> Conv->BN->ReLU
    """
    return nn.Sequential(
        nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
        nn.BatchNorm2d(out_channels),
        nn.ReLU(inplace=True),
        nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1),
        nn.BatchNorm2d(out_channels),
        nn.ReLU(inplace=True)
    )

def up_conv(in_channels, out_channels):
    """
    Upsampling followed by a convolution block.
    """
    return nn.Sequential(
        nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True),
        nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
        nn.BatchNorm2d(out_channels),
        nn.ReLU(inplace=True)
    )

class UNetPlusPlus(nn.Module):
    def __init__(self, in_channels=3, out_channels=1):
        """
        A simplified implementation of U-Net++ with 3 nested levels.
        """
        super(UNetPlusPlus, self).__init__()
        nb_filter = [32, 64, 128, 256, 512]

        # Encoder
        self.conv0_0 = conv_block(in_channels, nb_filter[0])
        self.pool0   = nn.MaxPool2d(2)

        self.conv1_0 = conv_block(nb_filter[0], nb_filter[1])
        self.pool1   = nn.MaxPool2d(2)

        self.conv2_0 = conv_block(nb_filter[1], nb_filter[2])
        self.pool2   = nn.MaxPool2d(2)

        self.conv3_0 = conv_block(nb_filter[2], nb_filter[3])
        self.pool3   = nn.MaxPool2d(2)

        self.conv4_0 = conv_block(nb_filter[3], nb_filter[4])

        # Decoder (Nested)
        self.up0_1 = up_conv(nb_filter[1], nb_filter[0])
        self.conv0_1 = conv_block(nb_filter[0]*2, nb_filter[0])

        self.up1_1 = up_conv(nb_filter[2], nb_filter[1])
        self.conv1_1 = conv_block(nb_filter[1]*2, nb_filter[1])

        self.up2_1 = up_conv(nb_filter[3], nb_filter[2])
        self.conv2_1 = conv_block(nb_filter[2]*2, nb_filter[2])

        self.up3_1 = up_conv(nb_filter[4], nb_filter[3])
        self.conv3_1 = conv_block(nb_filter[3]*2, nb_filter[3])

        self.up0_2 = up_conv(nb_filter[1], nb_filter[0])
        self.conv0_2 = conv_block(nb_filter[0]*3, nb_filter[0])

        self.up1_2 = up_conv(nb_filter[2], nb_filter[1])
        self.conv1_2 = conv_block(nb_filter[1]*3, nb_filter[1])

        self.up0_3 = up_conv(nb_filter[1], nb_filter[0])
        self.conv0_3 = conv_block(nb_filter[0]*4, nb_filter[0])

        self.final = nn.Conv2d(nb_filter[0], out_channels, kernel_size=1)

    def forward(self, x):
        # Encoder
        x0_0 = self.conv0_0(x)               
        x1_0 = self.conv1_0(self.pool0(x0_0)) 
        x2_0 = self.conv2_0(self.pool1(x1_0)) 
        x3_0 = self.conv3_0(self.pool2(x2_0)) 
        x4_0 = self.conv4_0(self.pool3(x3_0)) 

        # 1st nested level
        x0_1 = self.conv0_1(torch.cat([x0_0, self.up0_1(x1_0)], 1))
        x1_1 = self.conv1_1(torch.cat([x1_0, self.up1_1(x2_0)], 1))
        x2_1 = self.conv2_1(torch.cat([x2_0, self.up2_1(x3_0)], 1))
        x3_1 = self.conv3_1(torch.cat([x3_0, self.up3_1(x4_0)], 1))

        # 2nd nested level
        x0_2 = self.conv0_2(torch.cat([x0_0, x0_1, self.up0_2(x1_1)], 1))
        x1_2 = self.conv1_2(torch.cat([x1_0, x1_1, self.up1_2(x2_1)], 1))

        # 3rd nested level
        x0_3 = self.conv0_3(torch.cat([x0_0, x0_1, x0_2, self.up0_3(x1_2)], 1))

        output = self.final(x0_3)
        return output

#############################################
# 5. Loss Functions: BCE + Dice Loss
#############################################
criterion_bce = nn.BCEWithLogitsLoss()

def dice_loss(pred, target, smooth=1.):
    pred = torch.sigmoid(pred)
    pred = pred.view(-1)
    target = target.view(-1)
    intersection = (pred * target).sum()
    dice = (2. * intersection + smooth) / (pred.sum() + target.sum() + smooth)
    return 1 - dice

def combined_loss(pred, target):
    return criterion_bce(pred, target) + dice_loss(pred, target)

#############################################
# 6. Training Function for Segmentation
#############################################
def train_segmentation(model, dataloader, optimizer, num_epochs=25):
    model.train()
    history = []
    for epoch in range(num_epochs):
        epoch_loss = 0.0
        start_time = time.time()
        for imgs, masks, _ in dataloader:
            imgs = imgs.to(device)
            masks = masks.to(device)
            optimizer.zero_grad()
            outputs = model(imgs)
            loss = combined_loss(outputs, masks)
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item() * imgs.size(0)

        epoch_loss /= len(dataloader.dataset)
        elapsed = time.time() - start_time
        print(f"Epoch {epoch+1}/{num_epochs} - Loss: {epoch_loss:.4f} - Time: {elapsed:.1f}s")
        history.append(epoch_loss)
    return history

#############################################
# 7. Inference & Saving Segmented Images
#############################################
def inference_and_save(model, dataset_root, output_root):
    """
    For every image in dataset_root (organized in subfolders), run inference to compute a
    segmentation mask. Then create an output image where:
      - Background pixels (mask==0) are set to black.
      - ROI pixels (mask==1) retain their original color.
    The output is saved preserving the original folder structure.
    """
    model.eval()
    os.makedirs(output_root, exist_ok=True)
    for cls in os.listdir(dataset_root):
        cls_input_folder = os.path.join(dataset_root, cls)
        cls_output_folder = os.path.join(output_root, cls)
        os.makedirs(cls_output_folder, exist_ok=True)
        for ext in ('*.png', '*.jpg', '*.jpeg'):
            for img_path in glob(os.path.join(cls_input_folder, ext)):
                # Load and prepare
                orig_img = Image.open(img_path).convert("RGB")
                img_tensor = to_tensor_image(orig_img).unsqueeze(0).to(device)
                with torch.no_grad():
                    output = model(img_tensor)
                    prob = torch.sigmoid(output)
                    mask = (prob > 0.5).float()  # threshold to binary
                # Convert mask to numpy
                mask_np = mask.squeeze().cpu().numpy().astype(np.uint8)
                # Multiply each pixel in the original image by the mask => black background
                orig_np = np.array(orig_img)
                seg_np = orig_np * np.expand_dims(mask_np, axis=2)
                seg_pil = Image.fromarray(seg_np.astype(np.uint8))
                base_name = os.path.basename(img_path)
                seg_pil.save(os.path.join(cls_output_folder, base_name))

    print(f"Segmented images saved in: {output_root}")

#############################################
# 8. Zip the Output Directory
#############################################
def zip_directory(folder_path, zip_path):
    """
    Compress the folder at folder_path into a zip file saved at zip_path.
    """
    with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
        for root, _, files in os.walk(folder_path):
            for file in files:
                abs_path = os.path.join(root, file)
                rel_path = os.path.relpath(abs_path, folder_path)
                zipf.write(abs_path, arcname=rel_path)
    print(f"Output zipped to: {zip_path}")

#############################################
# 9. Main Script
#############################################
if __name__ == "__main__":
    # (1) Update dataset_dir with your data path
    dataset_dir = "/kaggle/input/benign-malignant-original/archive"   # has "train/" and "test/" subfolders
    train_dir   = os.path.join(dataset_dir, "train")
    test_dir    = os.path.join(dataset_dir, "test")

    # (2) Output directory for segmented images
    output_segmentation_dir = "/kaggle/working/segmented"

    # (3) Create dataset & DataLoader for training
    seg_transforms = SegmentationTransforms(flip_prob=0.5)
    train_dataset = SkinSegmentationDataset(train_dir, transform=seg_transforms)
    train_loader  = DataLoader(train_dataset, batch_size=4, shuffle=True, num_workers=2)

    # (4) Initialize U-Net++ model & optimizer
    model = UNetPlusPlus(in_channels=3, out_channels=1).to(device)
    optimizer = optim.Adam(model.parameters(), lr=1e-4)

    # (5) Train the segmentation model
    num_epochs = 25
    print("Starting training of U-Net++ for segmentation...")
    train_history = train_segmentation(model, train_loader, optimizer, num_epochs=num_epochs)

    # (6) (Optional) Save the trained model weights
    torch.save(model.state_dict(), "/kaggle/working/unetpp_weights.pth")
    print("Model training completed and weights saved.")

    # (7) Inference on train & test sets => black background, lesion retains color
    segmented_train_out = os.path.join(output_segmentation_dir, "train")
    segmented_test_out  = os.path.join(output_segmentation_dir, "test")

    print("Running inference on training images...")
    inference_and_save(model, train_dir, segmented_train_out)
    print("Running inference on testing images...")
    inference_and_save(model, test_dir, segmented_test_out)

    # (8) Zip the entire segmented output folder
    zip_path = "/kaggle/working/segmented_output.zip"
    zip_directory(output_segmentation_dir, zip_path)


Using device: cuda:0
Starting training of U-Net++ for segmentation...
Epoch 1/25 - Loss: 0.6745 - Time: 101.8s
Epoch 2/25 - Loss: 0.4987 - Time: 110.7s
Epoch 3/25 - Loss: 0.4130 - Time: 110.7s
Epoch 4/25 - Loss: 0.3674 - Time: 110.8s
Epoch 5/25 - Loss: 0.3461 - Time: 110.9s
Epoch 6/25 - Loss: 0.3345 - Time: 111.0s
Epoch 7/25 - Loss: 0.3190 - Time: 111.0s
Epoch 8/25 - Loss: 0.3056 - Time: 110.9s
Epoch 9/25 - Loss: 0.3043 - Time: 110.7s
Epoch 10/25 - Loss: 0.2894 - Time: 111.1s
Epoch 11/25 - Loss: 0.2865 - Time: 111.4s
Epoch 12/25 - Loss: 0.2808 - Time: 111.6s
Epoch 13/25 - Loss: 0.2797 - Time: 110.7s
Epoch 14/25 - Loss: 0.2762 - Time: 111.3s
Epoch 15/25 - Loss: 0.2708 - Time: 111.7s
Epoch 16/25 - Loss: 0.2732 - Time: 111.5s
Epoch 17/25 - Loss: 0.2658 - Time: 111.8s
Epoch 18/25 - Loss: 0.2566 - Time: 111.7s
Epoch 19/25 - Loss: 0.2534 - Time: 111.9s
Epoch 20/25 - Loss: 0.2601 - Time: 111.6s
Epoch 21/25 - Loss: 0.2554 - Time: 111.6s
Epoch 22/25 - Loss: 0.2483 - Time: 111.6s
Epoch 23/25 - L

In [1]:
import os
import shutil

def clean_kaggle_output_dir(dir_path="/kaggle/working"):                                                                                       
    """
    Removes all files and directories in the specified Kaggle output directory.

    Parameters:
        dir_path (str): Path to the directory to clean.
    """
    # Confirm the directory exists
    if not os.path.exists(dir_path):
        print(f"Directory {dir_path} does not exist. Creating it.")
        os.makedirs(dir_path)
        return

    for filename in os.listdir(dir_path):
        file_path = os.path.join(dir_path, filename)
        try:
            if os.path.isfile(file_path) or os.path.islink(file_path):
                os.unlink(file_path)  # remove the file or link
            elif os.path.isdir(file_path):
                shutil.rmtree(file_path)  # remove the directory and its content
        except Exception as e:
            print(f"Failed to delete {file_path}. Reason: {e}")

    print(f"Cleaned the Kaggle output directory: {dir_path}")

if __name__ == "__main__":
    # Clean the default Kaggle working directory
    clean_kaggle_output_dir("/kaggle/working")


Cleaned the Kaggle output directory: /kaggle/working
