In [1]:
pip install kagglehub segmentation-models-pytorch albumentations


Collecting segmentation-models-pytorch
  Downloading segmentation_models_pytorch-0.5.0-py3-none-any.whl.metadata (17 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.8->segmentation-models-pytorch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.8->segmentation-models-pytorch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.8->segmentation-models-pytorch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.8->segmentation-models-pytorch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.8->segmentation-models-pytorch)
  Downloading nvidia_cublas_cu12-12.4.5.8-

In [2]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("shubhambaid/skin-burn-dataset")

print("Path to dataset files:", path)


Downloading from https://www.kaggle.com/api/v1/datasets/download/shubhambaid/skin-burn-dataset?dataset_version_number=1...


100%|██████████| 16.6M/16.6M [00:00<00:00, 181MB/s]

Extracting files...





Path to dataset files: /root/.cache/kagglehub/datasets/shubhambaid/skin-burn-dataset/versions/1


In [18]:
import os
import numpy as np
from PIL import Image
from glob import glob

import torch
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as T

import segmentation_models_pytorch as smp

# ========== Dataset Class ===========

class BurnSegmentationDataset(Dataset):
    def __init__(self, images_dir, image_size=(128,128), transform=None):
        self.images_dir = images_dir
        self.image_size = image_size
        self.transform = transform

        self.image_paths = sorted(glob(os.path.join(images_dir, "*.jpg")))

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        base_name = os.path.splitext(img_path)[0]
        txt_path = base_name + ".txt"

        # Load image
        img = Image.open(img_path).convert("RGB")
        original_w, original_h = img.size

        # Load bboxes and create mask
        bboxes = self.load_bboxes(txt_path)
        mask = self.bboxes_to_mask(bboxes, original_w, original_h)

        # Resize image and mask
        img = img.resize(self.image_size)
        mask_img = Image.fromarray(mask*255).resize(self.image_size)

        # Convert to tensors
        if self.transform:
            img = self.transform(img)
        else:
            # Default transform: ToTensor (0-1 float)
            img = T.ToTensor()(img)

        # Mask to tensor, binary 0/1
        mask = np.array(mask_img)/255.0
        mask = torch.tensor(mask, dtype=torch.float32).unsqueeze(0)  # (1, H, W)

        return img, mask

    def load_bboxes(self, txt_path):
        bboxes = []
        if os.path.exists(txt_path):
            with open(txt_path, "r") as f:
                for line in f:
                    parts = line.strip().split()
                    if len(parts) == 5:
                        bboxes.append(list(map(float, parts)))
        return bboxes

    def bboxes_to_mask(self, bboxes, img_width, img_height):
        mask = np.zeros((img_height, img_width), dtype=np.uint8)
        for bbox in bboxes:
            _, x_c, y_c, w, h = bbox
            x_c *= img_width
            y_c *= img_height
            w *= img_width
            h *= img_height

            x_min = max(0, int(x_c - w/2))
            y_min = max(0, int(y_c - h/2))
            x_max = min(img_width, int(x_c + w/2))
            y_max = min(img_height, int(y_c + h/2))

            mask[y_min:y_max, x_min:x_max] = 1
        return mask


# ====== Set dataset directory and create Dataset + DataLoader ======

dataset_dir = "/kaggle/input/skin-burn-dataset"
dataset = BurnSegmentationDataset(dataset_dir, image_size=(128, 128))


dataloader = DataLoader(dataset, batch_size=4, shuffle=True)

# ====== Build PSPNet Model ======

# Use ResNet34 encoder backbone (you can choose others)
model = smp.PSPNet(
    encoder_name="resnet34",        # backbone
    encoder_weights="imagenet",     # pretrained weights
    in_channels=3,
    classes=1,                      # binary segmentation
)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# ====== Loss and optimizer ======

from segmentation_models_pytorch.losses import DiceLoss

loss_fn = DiceLoss(mode='binary')

optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

# ====== Training loop ======

num_epochs = 10

for epoch in range(num_epochs):
    model.train()
    epoch_loss = 0

    for images, masks in dataloader:
        images = images.to(device)
        masks = masks.to(device)

        optimizer.zero_grad()
        outputs = model(images)

        loss = loss_fn(outputs, masks)
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()

    print(f"Epoch {epoch+1}/{num_epochs} - Loss: {epoch_loss/len(dataloader):.4f}")

print("Training finished!")


Epoch 1/10 - Loss: 0.4500
Epoch 2/10 - Loss: 0.3788
Epoch 3/10 - Loss: 0.3549
Epoch 4/10 - Loss: 0.3206
Epoch 5/10 - Loss: 0.3046
Epoch 6/10 - Loss: 0.2869
Epoch 7/10 - Loss: 0.2685
Epoch 8/10 - Loss: 0.2516
Epoch 9/10 - Loss: 0.2392
Epoch 10/10 - Loss: 0.2290
Training finished!


In [13]:
import os

print("Dataset root:", path)
print("Files in dataset root:", os.listdir(path))


Dataset root: /kaggle/input/skin-burn-dataset
Files in dataset root: ['img560.txt', 'img1194.jpg', 'img275.txt', 'img966.txt', 'img516.jpg', 'img943.txt', 'img254.jpg', 'img263.jpg', 'img699.txt', 'img659.jpg', 'img338.txt', 'img1387.txt', 'img575.jpg', 'img483.txt', 'img4.txt', 'img470.txt', 'img794.txt', 'img1013.jpg', 'img0.jpg', 'img988.txt', 'img66.txt', 'img1119.jpg', 'img403.jpg', 'img1138.jpg', 'img1280.txt', 'img1173.jpg', 'img786.txt', 'img522.txt', 'img1176.jpg', 'img696.txt', 'img1343.txt', 'img932.txt', 'img822.txt', 'img1120.txt', 'img224.jpg', 'img985.jpg', 'img113.txt', 'img514.jpg', 'img170.jpg', 'img212.jpg', 'img18.txt', 'img280.jpg', 'img344.txt', 'img341.txt', 'img1303.txt', 'img373.txt', 'img77.jpg', 'img340.jpg', 'img521.jpg', 'img154.jpg', 'img170.txt', 'img498.jpg', 'img226.txt', 'img42.jpg', 'img989.txt', 'img317.txt', 'img1144.txt', 'img921.txt', 'img1285.jpg', 'img1210.txt', 'img920.txt', 'img435.txt', 'img342.jpg', 'img691.jpg', 'img286.jpg', 'img144.jpg', 

In [23]:
def pixel_accuracy(output, target):
    with torch.no_grad():
        preds = (torch.sigmoid(output) > 0.5).float()
        correct = (preds == target).float()
        acc = correct.sum() / correct.numel()
    return acc.item()


In [24]:
for epoch in range(num_epochs):
    model.train()
    epoch_loss = 0
    epoch_acc = 0

    for images, masks in dataloader:
        images = images.to(device)
        masks = masks.to(device)

        optimizer.zero_grad()
        outputs = model(images)

        loss = loss_fn(outputs, masks)
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()
        epoch_acc += pixel_accuracy(outputs, masks)

    avg_loss = epoch_loss / len(dataloader)
    avg_acc = epoch_acc / len(dataloader)

    print(f"Epoch {epoch+1}/{num_epochs} - Loss: {avg_loss:.4f} - Accuracy: {avg_acc:.4f}")


Epoch 1/10 - Loss: 0.2070 - Accuracy: 0.9108
Epoch 2/10 - Loss: 0.2068 - Accuracy: 0.9122
Epoch 3/10 - Loss: 0.1968 - Accuracy: 0.9155
Epoch 4/10 - Loss: 0.1907 - Accuracy: 0.9186
Epoch 5/10 - Loss: 0.1866 - Accuracy: 0.9200
Epoch 6/10 - Loss: 0.1801 - Accuracy: 0.9222
Epoch 7/10 - Loss: 0.1740 - Accuracy: 0.9244
Epoch 8/10 - Loss: 0.1668 - Accuracy: 0.9269
Epoch 9/10 - Loss: 0.1668 - Accuracy: 0.9277
Epoch 10/10 - Loss: 0.1567 - Accuracy: 0.9307
