In [120]:
import torch
from torch.utils.data import Dataset, DataLoader
import cv2

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import wandb

np.set_printoptions(linewidth=200)

In [121]:
from torchvision.io import decode_image, read_image, ImageReadMode
from torchvision.transforms import v2

class PuzzleImages(Dataset):
    def __init__(self, description_dir, mask_dir, img_dir, type): 
        description = pd.read_csv(description_dir)
        self.description = description[description["type"] == type].reset_index() 
        self.img_dir = img_dir
        self.mask_dir = mask_dir

        self.image_transforms = v2.Compose([
            v2.Resize((512, 512))
        ])

    def __len__(self):
        return len(self.description)

    def __getitem__(self, idx):
        _, identifier, image_file, mask_file, img_type = self.description.loc[idx]
        image = self.image_transforms(read_image(f"{self.img_dir}/{image_file}").float() / 255)
        temp_mask = self.image_transforms(read_image(f"{self.mask_dir}/{mask_file}").float() / 255 > 0.5).float()

        mask = torch.cat([temp_mask, 1 - temp_mask], dim=0)
        return image, mask
    

In [122]:
train_dataset = PuzzleImages("./data.csv", "./masks", "./images", "train")
test_dataset = PuzzleImages("./data.csv", "./masks", "./images", "test")
val_dataset = PuzzleImages("./data.csv", "./masks", "./images", "val")

train_dataloader = DataLoader(train_dataset, batch_size=2, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=2, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=2, shuffle=True)

In [123]:
kernel_padding = (1,1)
pool_size = 2
pool_stride = 2
transpose_size = (2,2)
transpose_stride = (2,2)
upsample_scale_factor = 2

class UNetPoolBlock(torch.nn.Module):
    def __init__(self, in_channel, out_channel, kernel_size):
        super().__init__()
        self.block = torch.nn.Sequential(
            torch.nn.MaxPool2d(kernel_size=pool_size, stride=pool_stride),
            torch.nn.Conv2d(in_channels=in_channel, out_channels=out_channel, kernel_size=kernel_size, padding=kernel_padding),
            # torch.nn.ReLU(),
            torch.nn.Conv2d(in_channels=out_channel, out_channels=out_channel, kernel_size=kernel_size, padding=kernel_padding),
            # torch.nn.ReLU(),
        )
    def forward(self, in_map):
        return self.block(in_map)

class UNetTransposeUpBlock(torch.nn.Module):
    def __init__(self, in_channel, out_channel, kernel_size):
        super().__init__()
        self.block = torch.nn.Sequential(
            torch.nn.Conv2d(in_channels=in_channel, out_channels=in_channel, kernel_size=kernel_size, padding=kernel_padding),
            # torch.nn.ReLU(),
            torch.nn.ConvTranspose2d(in_channels=in_channel, out_channels=in_channel, kernel_size=transpose_size, stride=transpose_stride),
            torch.nn.Conv2d(in_channels=in_channel, out_channels=out_channel, kernel_size=kernel_size, padding=kernel_padding),
            # torch.nn.ReLU(),
        )
    def forward(self, in_map):
        return self.block(in_map)

class UNetSampleUpBlock(torch.nn.Module):
    def __init__(self, in_channel, out_channel, kernel_size):
        super().__init__()
        self.block = torch.nn.Sequential(
            torch.nn.Conv2d(in_channels=in_channel, out_channels=in_channel, kernel_size=kernel_size, padding=kernel_padding),
            # torch.nn.ReLU(),
            torch.nn.Conv2d(in_channels=in_channel, out_channels=in_channel, kernel_size=kernel_size, padding=kernel_padding),
            # torch.nn.ReLU(),
            torch.nn.Upsample(scale_factor=upsample_scale_factor),
            torch.nn.Conv2d(in_channels=in_channel, out_channels=out_channel, kernel_size=kernel_size, padding=kernel_padding),
            # torch.nn.ReLU(),
        )
    def forward(self, in_map):
        return self.block(in_map)

class UNetTransposeBottleneckBlock(torch.nn.Module):
    def __init__(self, in_channel, mid_channel, out_channel, kernel_size):
        super().__init__()
        self.block = torch.nn.Sequential(
            torch.nn.MaxPool2d(kernel_size=pool_size, stride=pool_stride),
            torch.nn.Conv2d(in_channels=in_channel, out_channels=mid_channel, kernel_size=kernel_size, padding=kernel_padding),
            # torch.nn.ReLU(),
            torch.nn.Conv2d(in_channels=mid_channel, out_channels=mid_channel, kernel_size=kernel_size, padding=kernel_padding),
            # torch.nn.ReLU(),
            torch.nn.ConvTranspose2d(in_channels=mid_channel, out_channels=mid_channel, kernel_size=transpose_size, stride=transpose_stride),
            torch.nn.Conv2d(in_channels=mid_channel, out_channels=out_channel, kernel_size=kernel_size, padding=kernel_padding),
            # torch.nn.ReLU(),
        )

    def forward(self, in_map):
        return self.block(in_map)

class UNetSampleBottleneckBlock(torch.nn.Module):
    def __init__(self, in_channel, mid_channel, out_channel, kernel_size):
        super().__init__()
        self.block = torch.nn.Sequential(
            torch.nn.MaxPool2d(kernel_size=pool_size, stride=pool_stride),
            torch.nn.Conv2d(in_channels=in_channel, out_channels=mid_channel, kernel_size=kernel_size, padding=kernel_padding),
            # torch.nn.ReLU(),
            torch.nn.Conv2d(in_channels=mid_channel, out_channels=mid_channel, kernel_size=kernel_size, padding=kernel_padding),
            # torch.nn.ReLU(),
            torch.nn.Upsample(scale_factor=upsample_scale_factor),
            torch.nn.Conv2d(in_channels=mid_channel, out_channels=out_channel, kernel_size=kernel_size, padding=kernel_padding),
            # torch.nn.ReLU(),
        )

    def forward(self, in_map):
        return self.block(in_map)

In [124]:
class UNetSimple(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.in_block = torch.nn.Sequential(
            torch.nn.Conv2d(3, 64, kernel_size=3, padding=(1,1)),
            torch.nn.ReLU(),
        )
        self.first_down = UNetPoolBlock(64, 128, 3)
        self.bottlneck = UNetSampleBottleneckBlock(128, 256, 128, 3)
        self.first_up = UNetSampleUpBlock(128 + 128, 64, 3)
        self.out_block = torch.nn.Sequential(
            torch.nn.Conv2d(64 + 64, 2, kernel_size=3, padding=(1,1)),
            # torch.nn.ReLU(),
        )
        self.classifier = torch.nn.Softmax2d()

    def forward(self, input):
        in_map = self.in_block(input)
        down_map = self.first_down(in_map)
        bottleneck_map = self.bottlneck(down_map)
        up_map = self.first_up(torch.cat((down_map, bottleneck_map), dim=1))
        out_map = self.out_block(torch.cat((in_map, up_map), dim=1))
        return self.classifier(out_map)

class UNetTranspose(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.in_block = torch.nn.Sequential(
            torch.nn.Conv2d(3, 64, kernel_size=3, padding=(1,1)),
            # torch.nn.ReLU(),
            torch.nn.Conv2d(64, 64, kernel_size=3, padding=(1,1)),
            # torch.nn.ReLU()
        )
        self.first_down = UNetPoolBlock(64, 128, 3)
        self.second_down = UNetPoolBlock(128, 256, 3)
        self.third_down = UNetPoolBlock(256, 512, 3)
        self.bottlneck = UNetTransposeBottleneckBlock(512, 1024, 512, kernel_size=3)
        self.first_up = UNetTransposeUpBlock(512 + 512, 256, 3)
        self.second_up = UNetTransposeUpBlock(256 + 256, 128, 3)
        self.third_up = UNetTransposeUpBlock(128 + 128, 64, 3)
        self.out_block = torch.nn.Sequential(
            torch.nn.Conv2d(64 + 64, 64, kernel_size=3, padding=(1,1)),
            # torch.nn.ReLU(),
            torch.nn.Conv2d(64, 64, kernel_size=3, padding=(1,1)),
            # torch.nn.ReLU()
        )

        self.classifier = torch.nn.Softmax2d()

    def forward(self, in_image):
        in_map = self.in_block(in_image)
        firstdown_map = self.first_down(in_map)
        seconddown_map = self.second_down(firstdown_map)
        thirddown_map = self.third_down(seconddown_map)
        bottleneck_map = self.bottlneck(thirddown_map)

        firstup_map = self.first_up(torch.cat((thirddown_map, bottleneck_map), dim=1))
        secondup_map = self.second_up(torch.cat((seconddown_map, firstup_map), dim=1))
        thirdup_map = self.third_up(torch.cat((firstdown_map, secondup_map), dim=1))
        feature_map = self.out_block(torch.cat((in_map, thirdup_map), dim=1))

        return self.classifer(feature_map)

class UNetUpsample(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.in_block = torch.nn.Sequential(
            torch.nn.Conv2d(3, 64, kernel_size=3, padding=(1,1)),
            # torch.nn.ReLU(),
            torch.nn.Conv2d(64, 64, kernel_size=3, padding=(1,1)),
            # torch.nn.ReLU()
        )
        self.first_down = UNetPoolBlock(64, 128, 3)
        self.second_down = UNetPoolBlock(128, 256, 3)
        self.third_down = UNetPoolBlock(256, 512, 3)
        self.bottlneck = UNetSampleBottleneckBlock(512, 1024, 512, 3)
        self.first_up = UNetSampleUpBlock(512 + 512, 256, 3)
        self.second_up = UNetSampleUpBlock(256 + 256, 128, 3)
        self.third_up = UNetSampleUpBlock(128 + 128, 64, 3)
        self.out_block = torch.nn.Sequential(
            torch.nn.Conv2d(64 + 64, 64, kernel_size=3, padding=(1,1)),
            # torch.nn.ReLU(),
            torch.nn.Conv2d(64, 64, kernel_size=3, padding=(1,1)),
            # torch.nn.ReLU(),
            torch.nn.Conv2d(64, 2, kernel_size=3, padding=(1,1)),
        )

        self.classifier = torch.nn.Softmax2d()

    def forward(self, in_image):

        in_map = self.in_block(in_image)
        firstdown_map = self.first_down(in_map)
        seconddown_map = self.second_down(firstdown_map)
        thirddown_map = self.third_down(seconddown_map)
        bottleneck_map = self.bottlneck(thirddown_map)
        firstup_map = self.first_up(torch.cat((thirddown_map, bottleneck_map), dim=1))
        secondup_map = self.second_up(torch.cat((seconddown_map, firstup_map), dim=1))
        thirdup_map = self.third_up(torch.cat((firstdown_map, secondup_map), dim=1))
        feature_map = self.out_block(torch.cat((in_map, thirdup_map), dim=1))
        # print("feature map shape:", feature_map.shape)

        return self.classifier(feature_map)

In [125]:
device = ("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using {device}")

transpose_model = UNetTranspose()
upsample_model = UNetUpsample()
simple_model = UNetSimple()

Using cuda


In [126]:
def iou(input: torch.Tensor, target: torch.Tensor, threshold: float):
    union = ((input > threshold) | target.bool())
    intersection = ((input > threshold) & target.bool())

    return intersection.sum() / union.sum()

In [127]:
def train(dataloader, model, threshold = 0.5, learning_rate=0.001):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)

    model.train()
    loss_fn = torch.nn.BCELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    all_loss, layer0_loss, layer1_loss = 0, 0, 0
    all_iou, layer0_iou, layer1_iou= 0, 0, 0
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)

        pred = model(X)
        loss = loss_fn(pred, y)
        all_loss += loss.item()
        layer0_loss += loss_fn(pred[:,0], y[:,0]).item()
        layer1_loss += loss_fn(pred[:,1], y[:,1]).item()

        all_iou += iou(pred, y, threshold).item()
        layer0_iou += iou(pred[:,0], y[:,0], threshold).item()
        layer1_iou += iou(pred[:,1], y[:,1], threshold).item()


        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

    all_loss /= num_batches
    layer0_loss /= num_batches
    layer1_loss /= num_batches

    all_iou /= num_batches
    layer0_iou /= num_batches
    layer1_iou /= num_batches

    return {
        "train/loss": all_loss,
        "train/fg_loss": layer0_loss,
        "train/bg_loss": layer1_loss,
        "train/IoU": all_iou,
        "train/fg_IoU": layer0_iou,
        "train/bg_IoU": layer1_iou,
    }

def validate(dataloader, model, threshold = 0.5):
    num_batches = len(dataloader)
    model.eval()
    loss_fn = torch.nn.BCELoss()
    all_loss, layer0_loss, layer1_loss = 0, 0, 0
    all_iou, layer0_iou, layer1_iou= 0, 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            all_loss += loss_fn(pred, y).item()
            layer0_loss += loss_fn(pred[:,0], y[:,0]).item()
            layer1_loss += loss_fn(pred[:,1], y[:,1]).item()

            all_iou += iou(pred, y, threshold).item()
            layer0_iou += iou(pred[:,0], y[:,0], threshold).item()
            layer1_iou += iou(pred[:,1], y[:,1], threshold).item()

    all_loss /= num_batches
    layer0_loss /= num_batches
    layer1_loss /= num_batches

    all_iou /= num_batches
    layer0_iou /= num_batches
    layer1_iou /= num_batches

    return {
        "validate/loss": all_loss,
        "validate/fg_loss": layer0_loss,
        "validate/bg_loss": layer1_loss,
        "validate/IoU": all_iou,
        "validate/fg_IoU": layer0_iou,
        "validate/bg_IoU": layer1_iou,
    }

In [128]:
upsample_run = 0
transpose_run = 0

In [129]:
def model_pipeline(model, num_epochs, threshold, initial_learning_rate, run_name):
    print(f"{run_name} for {num_epochs} epochs")
    print("Hyperparameters:")
    print(f"\tThreshold = {threshold}")
    print(f"\tInitial Learning Rate = {initial_learning_rate}")
    wandb.init(
        project="computer-vision-lab-3",
        name=run_name,
        config={
            "architecture": "UNet",
            "epochs": num_epochs,
            "threshold": threshold,
            "initial_learning_rate": initial_learning_rate
        }
    )

    for t in range(num_epochs):
        print(f"Epoch {t + 1}")
        print("\tTraining")
        train_results = train(train_dataloader, model.to(device), threshold=threshold, learning_rate=initial_learning_rate)
        print("\tValidating")
        val_results = validate(val_dataloader, model.to(device), threshold=threshold)

        wandb.log(train_results | val_results)

    wandb.finish()

In [130]:
# model_pipeline(simple_model, 4, 0.5, 0.1, f"Simple Model A - alpha = 0.1")

In [131]:
model_pipeline(upsample_model, 3, 0.5, 0.05, f"Upsample(No ReLU) Run {upsample_run + 1}")
upsample_run += 1

Upsample(No ReLU) Run 1 for 3 epochs
Hyperparameters:
	Threshold = 0.5
	Initial Learning Rate = 0.05


Epoch 1
	Training
	Validating
Epoch 2
	Training
	Validating
Epoch 3
	Training
	Validating


0,1
train/IoU,▁██
train/bg_IoU,▁██
train/bg_loss,█▁▁
train/fg_IoU,█▁▁
train/fg_loss,█▁▁
train/loss,█▁▁
validate/IoU,▁▁█
validate/bg_IoU,▁▁█
validate/bg_loss,██▁
validate/fg_IoU,▁▁▁

0,1
train/IoU,0.59642
train/bg_IoU,0.74694
train/bg_loss,25.30621
train/fg_IoU,0.0
train/fg_loss,25.30621
train/loss,25.30621
validate/IoU,0.61761
validate/bg_IoU,0.76354
validate/bg_loss,23.64588
validate/fg_IoU,0.0
