In [2]:
!pip install segmentation-models-pytorch --quiet
!pip install torchmetrics --quiet


In [3]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)


Mounted at /content/drive


In [4]:
import os

path = "/content/drive/MyDrive/maskdata"
print(os.listdir(path))


['val2017.zip', 'val2017', 'masks']


In [5]:
import zipfile

zip_path = "/content/drive/MyDrive/maskdata/val2017.zip"
extract_path = "/content/drive/MyDrive/maskdata/"

with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

print("Extracted files:", os.listdir(extract_path))


Extracted files: ['val2017.zip', 'val2017', 'masks']


In [6]:
import os

data_path = "/content/drive/MyDrive/maskdata/val2017"
print(os.listdir(data_path)[:10])


['000000404249.jpg', '000000319935.jpg', '000000500565.jpg', '000000323709.jpg', '000000376856.jpg', '000000126110.jpg', '000000151051.jpg', '000000218439.jpg', '000000085772.jpg', '000000419653.jpg']


In [7]:
images_dir = "/content/drive/MyDrive/maskdata/val2017"

if os.path.exists(images_dir):
    print("Folder found!")
    image_files = sorted(os.listdir(images_dir))
    print(f"Total images: {len(image_files)}")
    print("First 5 images:", image_files[:5])
else:
    print("Folder NOT found. Check the path!")





Folder found!
Total images: 5000
First 5 images: ['000000000139.jpg', '000000000285.jpg', '000000000632.jpg', '000000000724.jpg', '000000000776.jpg']


In [8]:
!pip install rembg --quiet
!pip install onnxruntime --quiet

import os
from rembg import remove
from PIL import Image
from tqdm import tqdm

images_dir = "/content/drive/MyDrive/maskdata/val2017"
masks_dir = "/content/drive/MyDrive/maskdata/masks"

os.makedirs(masks_dir, exist_ok=True)

image_files = sorted(os.listdir(images_dir))[:50]

for img_name in tqdm(image_files, desc="Generating Masks"):
    img_path = os.path.join(images_dir, img_name)
    mask_path = os.path.join(masks_dir, os.path.splitext(img_name)[0] + ".png")

    input_image = Image.open(img_path)
    output = remove(input_image)  # Generates mask automatically
    output.save(mask_path)

print(f"Masks generated for {len(image_files)} images at {masks_dir}")


Generating Masks: 100%|██████████| 50/50 [02:46<00:00,  3.32s/it]

Masks generated for 50 images at /content/drive/MyDrive/maskdata/masks





In [9]:
!pip install segmentation-models-pytorch --quiet
!pip install torchmetrics --quiet
!pip install albumentations --quiet

import os
import torch
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import numpy as np
import segmentation_models_pytorch as smp
import albumentations as A
from albumentations.pytorch import ToTensorV2


In [10]:
class SegmentationDataset(Dataset):
    def __init__(self, images_dir, masks_dir, transform=None):
        self.images_dir = images_dir
        self.masks_dir = masks_dir
        self.transform = transform
        self.images = sorted(os.listdir(images_dir))

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_path = os.path.join(self.images_dir, self.images[idx])
        mask_path = os.path.join(self.masks_dir, os.path.splitext(self.images[idx])[0] + ".png")

        image = np.array(Image.open(img_path).convert("RGB"))
        mask = np.array(Image.open(mask_path).convert("L"))  # grayscale mask

        mask = np.where(mask > 0, 1, 0).astype('float32')

        if self.transform:
            augmented = self.transform(image=image, mask=mask)
            image = augmented['image']
            mask = augmented['mask']

        return image, mask


In [11]:

train_transform = A.Compose([
    A.Resize(256, 256),
    A.HorizontalFlip(p=0.5),
    A.Normalize(),
    ToTensorV2()
])

dataset = SegmentationDataset(images_dir="/content/drive/MyDrive/maskdata/val2017",
                              masks_dir="/content/drive/MyDrive/maskdata/masks",
                              transform=train_transform)

dataloader = DataLoader(dataset, batch_size=4, shuffle=True)


In [12]:

model = smp.Unet(
    encoder_name="resnet34",
    encoder_weights="imagenet",
    in_channels=3,
    classes=1,
)

device = "cuda" if torch.cuda.is_available() else "cpu"
model = model.to(device)

loss_fn = torch.nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [13]:

images_dir = "/content/drive/MyDrive/maskdata/val2017"
masks_dir = "/content/drive/MyDrive/maskdata/masks"

image_files = sorted([f for f in os.listdir(images_dir)
                      if os.path.splitext(f)[0] + ".png" in os.listdir(masks_dir)])

print(f"Images with masks: {len(image_files)}")

class SegmentationDataset(Dataset):
    def __init__(self, images_dir, masks_dir, image_files, transform=None):
        self.images_dir = images_dir
        self.masks_dir = masks_dir
        self.transform = transform
        self.images = image_files

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_path = os.path.join(self.images_dir, self.images[idx])
        mask_path = os.path.join(self.masks_dir, os.path.splitext(self.images[idx])[0] + ".png")

        image = np.array(Image.open(img_path).convert("RGB"))
        mask = np.array(Image.open(mask_path).convert("L"))
        mask = np.where(mask > 0, 1, 0).astype('float32')

        if self.transform:
            augmented = self.transform(image=image, mask=mask)
            image = augmented['image']
            mask = augmented['mask']

        return image, mask

dataset = SegmentationDataset(images_dir, masks_dir, image_files=image_files, transform=train_transform)
dataloader = DataLoader(dataset, batch_size=4, shuffle=True)


Images with masks: 190


In [14]:
import torch.nn as nn


criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

num_epochs = 10

for epoch in range(num_epochs):
    model.train()
    epoch_loss = 0

    for images, masks in dataloader:
        images = images.to(device)
        masks = masks.unsqueeze(1).to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, masks)
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()

    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss/len(dataloader):.4f}")

Epoch 1/10, Loss: 0.5414
Epoch 2/10, Loss: 0.5390
Epoch 3/10, Loss: 0.5302
Epoch 4/10, Loss: 0.5161
Epoch 5/10, Loss: 0.5032
Epoch 6/10, Loss: 0.4884
Epoch 7/10, Loss: 0.4998
Epoch 8/10, Loss: 0.5020
Epoch 9/10, Loss: 0.4869
Epoch 10/10, Loss: 0.4951
