In [None]:
from google.colab import drive
drive.mount('/content/drive')

os.chdir('/content/drive/MyDrive')

Mounted at /content/drive


In [None]:
import os
import numpy as np
from PIL import Image
from pycocotools.coco import COCO
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

images_dir = "/content/drive/MyDrive/val2017"
masks_dir = "/content/drive/MyDrive/Masks"
out_mask_dir = "/content/drive/MyDrive/val2017/masks_png"
os.makedirs(out_mask_dir, exist_ok=True)

# --- Generate masks from COCO JSON ---
json_files = [f for f in os.listdir(masks_dir) if f.endswith(".json")]
for json_file in json_files:
    ann_path = os.path.join(masks_dir, json_file)
    coco = COCO(ann_path)
    img_ids = coco.getImgIds()
    for img_id in img_ids:
        img_info = coco.loadImgs(img_id)[0]
        anns = coco.loadAnns(coco.getAnnIds(imgIds=img_id))
        mask = np.zeros((img_info["height"], img_info["width"]), dtype=np.uint8)
        for ann in anns:
            mask = np.maximum(mask, coco.annToMask(ann) * 255)
        mask_img = Image.fromarray(mask)
        mask_name = img_info['file_name'].replace(".jpg", ".png")
        mask_img.save(os.path.join(out_mask_dir, mask_name))


# --- Dataset ---
class SegmentationDataset(Dataset):
    def __init__(self, images_dir, masks_dir, transform_img=None, transform_mask=None):
        self.images_dir = images_dir
        self.masks_dir = masks_dir
        self.image_files = sorted(os.listdir(images_dir))
        self.mask_files = sorted(os.listdir(masks_dir))
        self.valid_pairs = []
        for img_name in self.image_files:
            mask_name = img_name.replace(".jpg", ".png")
            if mask_name in self.mask_files:
                self.valid_pairs.append((img_name, mask_name))
        self.transform_img = transform_img
        self.transform_mask = transform_mask

    def __len__(self):
        return len(self.valid_pairs)

    def __getitem__(self, idx):
        img_name, mask_name = self.valid_pairs[idx]
        img_path = os.path.join(self.images_dir, img_name)
        mask_path = os.path.join(self.masks_dir, mask_name)

        image = Image.open(img_path).convert("RGB")
        mask = Image.open(mask_path).convert("L")

        if self.transform_img:
            image = self.transform_img(image)
        if self.transform_mask:
            mask = self.transform_mask(mask)
            mask = (mask > 0).float()

        return image, mask


# --- Transforms ---
transform_img = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor()
])
transform_mask = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor()
])

dataset = SegmentationDataset(images_dir=images_dir,
                              masks_dir=out_mask_dir,
                              transform_img=transform_img,
                              transform_mask=transform_mask)

train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, val_size])
train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=4, shuffle=False)


# --- UNet blocks ---
class DoubleConv(nn.Module):
    def __init__(self, in_channels, out_channels):
        super().__init__()
        self.double_conv = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True)
        )
    def forward(self, x):
        return self.double_conv(x)

class Down(nn.Module):
    def __init__(self, in_channels, out_channels):
        super().__init__()
        self.maxpool_conv = nn.Sequential(nn.MaxPool2d(2), DoubleConv(in_channels, out_channels))
    def forward(self, x):
        return self.maxpool_conv(x)

class Up(nn.Module):
    def __init__(self, in_channels, out_channels, bilinear=True):
        super().__init__()
        self.up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
        self.conv = DoubleConv(in_channels, out_channels)
    def forward(self, x1, x2):
        x1 = self.up(x1)
        diffY = x2.size()[2] - x1.size()[2]
        diffX = x2.size()[3] - x1.size()[3]
        x1 = F.pad(x1, [diffX//2, diffX-diffX//2,
                        diffY//2, diffY-diffY//2])
        x = torch.cat([x2, x1], dim=1)
        return self.conv(x)

class OutConv(nn.Module):
    def __init__(self, in_channels, out_channels):
        super().__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=1)
    def forward(self, x):
        return self.conv(x)


# --- UNet model ---
class UNet(nn.Module):
    def __init__(self, n_channels, n_classes, bilinear=True):
        super().__init__()
        self.inc = DoubleConv(n_channels, 64)
        self.down1 = Down(64, 128)
        self.down2 = Down(128, 256)
        self.down3 = Down(256, 512)
        self.down4 = Down(512, 1024)
        self.up1 = Up(1024 + 512, 512)
        self.up2 = Up(512 + 256, 256)
        self.up3 = Up(256 + 128, 128)
        self.up4 = Up(128 + 64, 64)
        self.outc = OutConv(64, n_classes)

    def forward(self, x):
        x1 = self.inc(x)
        x2 = self.down1(x1)
        x3 = self.down2(x2)
        x4 = self.down3(x3)
        x5 = self.down4(x4)
        x = self.up1(x5, x4)
        x = self.up2(x, x3)
        x = self.up3(x, x2)
        x = self.up4(x, x1)
        return self.outc(x)

model = UNet(n_channels=3, n_classes=1).to(device)


# --- Training loop ---
def train_model(model, train_loader, val_loader, num_epochs=5):
    criterion = nn.BCEWithLogitsLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

    for epoch in range(num_epochs):
        model.train()
        total_loss = 0
        for images, masks in train_loader:
            images, masks = images.to(device), masks.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, masks)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

        avg_loss = total_loss / len(train_loader)

        model.eval()
        val_loss = 0
        with torch.no_grad():
            for images, masks in val_loader:
                images, masks = images.to(device), masks.to(device)
                outputs = model(images)
                val_loss += criterion(outputs, masks).item()

        print(f"Epoch [{epoch+1}/{num_epochs}] - "
              f"Train Loss: {avg_loss:.4f} - "
              f"Val Loss: {val_loss/len(val_loader):.4f}")

    return model


# --- Train for 5 epochs ---
trained_model = train_model(model, train_loader, val_loader, num_epochs=5)
torch.save(trained_model.state_dict(), "unet_model.pth")


loading annotations into memory...
Done (t=3.08s)
creating index...
index created!
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
loading annotations into memory...
Done (t=0.01s)
creating index...
index created!
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
loading annotations into memory...
Done (t=0.01s)
creating index...
index created!
Epoch [1/5] - Train Loss: 0.6861 - Val Loss: 0.6808
Epoch [2/5] - Train Loss: 0.5108 - 

In [None]:
torch.save(trained_model.state_dict(), "deeplabv3_model.pth")
print("DeepLabV3 model saved.")


DeepLabV3 model saved.


hyperparameters

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
from PIL import Image
import pandas as pd
import time
import os

dropout_rate = 0.3
image_size = (128, 128)
num_epochs = 5
batch_size = 8
learning_rate = 1e-4

transform_img = transforms.Compose([
    transforms.Resize(image_size),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(20),
    transforms.ToTensor()
])
transform_mask = transforms.Compose([
    transforms.Resize(image_size),
    transforms.ToTensor()
])

class SegmentationDataset(Dataset):
    def __init__(self, images_dir, masks_dir, transform_img=None, transform_mask=None):
        self.images_dir = images_dir
        self.masks_dir = masks_dir
        self.image_files = sorted([f for f in os.listdir(images_dir) if f.endswith(".jpg") or f.endswith(".png")])
        self.mask_files = sorted([f for f in os.listdir(masks_dir) if f.endswith(".png")])
        self.transform_img = transform_img
        self.transform_mask = transform_mask
        self.valid_pairs = []
        for img_file in self.image_files:
            mask_file = img_file.replace(".jpg", ".png")
            if mask_file in self.mask_files:
                self.valid_pairs.append((img_file, mask_file))

    def __len__(self):
        return len(self.valid_pairs)

    def __getitem__(self, idx):
        img_file, mask_file = self.valid_pairs[idx]
        img = Image.open(os.path.join(self.images_dir, img_file)).convert("RGB")
        mask = Image.open(os.path.join(self.masks_dir, mask_file)).convert("L")
        if self.transform_img:
            img = self.transform_img(img)
        if self.transform_mask:
            mask = self.transform_mask(mask)
        return img, mask

train_dataset = SegmentationDataset(
    images_dir="/content/drive/MyDrive/val2017",
    masks_dir="/content/drive/MyDrive/val2017/masks_png",
    transform_img=transform_img,
    transform_mask=transform_mask
)
val_dataset = SegmentationDataset(
    images_dir="/content/drive/MyDrive/val2017",
    masks_dir="/content/drive/MyDrive/val2017/masks_png",
    transform_img=transform_img,
    transform_mask=transform_mask
)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

class UNetDropout(nn.Module):
    def __init__(self, n_channels, n_classes, bilinear=True, dropout=0.3):
        super().__init__()
        self.inc = DoubleConv(n_channels, 64)
        self.down1 = Down(64, 128)
        self.down2 = Down(128, 256)
        self.down3 = Down(256, 512)
        self.down4 = Down(512, 1024)
        self.dropout = nn.Dropout2d(p=dropout)
        self.up1 = Up(1024 + 512, 512)
        self.up2 = Up(512 + 256, 256)
        self.up3 = Up(256 + 128, 128)
        self.up4 = Up(128 + 64, 64)
        self.outc = OutConv(64, n_classes)

    def forward(self, x):
        x1 = self.inc(x)
        x2 = self.down1(x1)
        x3 = self.down2(x2)
        x4 = self.down3(x3)
        x5 = self.down4(x4)
        x5 = self.dropout(x5)
        x = self.up1(x5, x4)
        x = self.up2(x, x3)
        x = self.up3(x, x2)
        x = self.up4(x, x1)
        return self.outc(x)

def train_val(model, train_loader, val_loader, num_epochs, optimizer, criterion, device):
    history = {"train_loss": [], "val_loss": []}
    for epoch in range(num_epochs):
        model.train()
        train_loss = 0
        for images, masks in train_loader:
            images, masks = images.to(device), masks.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, masks)
            loss.backward()
            optimizer.step()
            train_loss += loss.item() * images.size(0)
        avg_train_loss = train_loss / len(train_loader.dataset)

        model.eval()
        val_loss = 0
        with torch.no_grad():
            for images, masks in val_loader:
                images, masks = images.to(device), masks.to(device)
                outputs = model(images)
                loss = criterion(outputs, masks)
                val_loss += loss.item() * images.size(0)
        avg_val_loss = val_loss / len(val_loader.dataset)

        history["train_loss"].append(avg_train_loss)
        history["val_loss"].append(avg_val_loss)
        print(f"Epoch [{epoch+1}/{num_epochs}] - Train Loss: {avg_train_loss:.4f}, Val Loss: {avg_val_loss:.4f}")
    return history

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
results = []

optimizers = {
    "Adam": lambda params: optim.Adam(params, lr=learning_rate),
    "SGD": lambda params: optim.SGD(params, lr=learning_rate, momentum=0.9),
    "RMSprop": lambda params: optim.RMSprop(params, lr=learning_rate)
}

for opt_name, opt_fn in optimizers.items():
    print(f"\nRunning {opt_name}")
    model = UNetDropout(n_channels=3, n_classes=1, dropout=dropout_rate).to(device)
    criterion = nn.BCEWithLogitsLoss()
    optimizer = opt_fn(model.parameters())
    start = time.time()
    history = train_val(model, train_loader, val_loader, num_epochs, optimizer, criterion, device)
    end = time.time()
    results.append({
        "Optimizer": opt_name,
        "LR": learning_rate,
        "Batch Size": batch_size,
        "Epochs": num_epochs,
        "Dropout": dropout_rate,
        "Final Val Loss": history["val_loss"][-1],
        "Training Time (s)": round(end - start, 2)
    })

df = pd.DataFrame(results)
print("\nResults Summary")
print(df)
df.to_csv("hyperparam_results.csv", index=False)



Running Adam
Epoch [1/5] - Train Loss: 0.8232, Val Loss: 0.7251
Epoch [2/5] - Train Loss: 0.8092, Val Loss: 0.7306
Epoch [3/5] - Train Loss: 0.7773, Val Loss: 0.7355
Epoch [4/5] - Train Loss: 0.7389, Val Loss: 0.7396
Epoch [5/5] - Train Loss: 0.7154, Val Loss: 0.7434

Running SGD
Epoch [1/5] - Train Loss: 0.7027, Val Loss: 0.6600
Epoch [2/5] - Train Loss: 0.6996, Val Loss: 0.6617
Epoch [3/5] - Train Loss: 0.7008, Val Loss: 0.6618
Epoch [4/5] - Train Loss: 0.6998, Val Loss: 0.6607
Epoch [5/5] - Train Loss: 0.6983, Val Loss: 0.6595

Running RMSprop
Epoch [1/5] - Train Loss: 1.0305, Val Loss: 0.7367
Epoch [2/5] - Train Loss: 0.9327, Val Loss: 0.7406
Epoch [3/5] - Train Loss: 0.7797, Val Loss: 1.2551
