In [1]:
import os
from torch.utils.data import Dataset, DataLoader
import numpy as np
import torch
import torch.nn as nn
import torchvision
import pandas as pd
from tqdm.auto import tqdm
from torch.optim import Adam
import torch.nn.functional as F
# import imageio

class ImagesDataset(Dataset):
    def __init__(self, annotations_file, x_dir, y_dir, transform=None, target_transform=None):
        self.x_list, self.y_list = annotations_file  #load csv file containing training examples
        self.x_dir = x_dir  #path to X_train
        self.y_dir = y_dir  #path to Y_train
        self.transform = transform
        self.target_transform = target_transform

    def __len__(self):
        return len(self.x_list)

    def __getitem__(self, idx):
        img_path = self.x_list[idx]   # os.path.join(self.x_dir, self.x_list[idx])
        img = torchvision.io.read_image(img_path)   #output= n_c*n_h*n_w tensor of img
        label_path = self.y_list[idx]   # os.path.join(self.y_dir, self.y_list[idx])
        label = torchvision.io.read_image(label_path)
        if self.transform:
            img = self.transform(img)
        if self.target_transform:
            label = self.target_transform(label)
        return img, label


In [2]:
class conv_block(nn.Module):
    def __init__(self, in_c, out_c):
        super().__init__()
        self.double_conv = nn.Sequential(
            nn.Conv2d(in_c, out_c, kernel_size=3, padding=1),   # out_c = n_filters
            nn.BatchNorm2d(out_c),
            nn.ReLU(inplace=True),
            
            nn.Conv2d(out_c, out_c, kernel_size=3, padding=1),
            nn.BatchNorm2d(out_c),
            nn.ReLU(inplace=True)
        )

        # self.pool = nn.MaxPool2d((2, 2))

    def forward(self, inputs):

        # p = self.pool(x)    #if want pooling, else p=x
        return self.double_conv(inputs)


In [3]:
class encoder_block(nn.Module):
    def __init__(self, in_c, out_c):
        super().__init__()

        self.maxpool_conv = nn.Sequential(
            nn.MaxPool2d(2),
            conv_block(in_c, out_c)       ######################
        )

    def forward(self, inputs):
        return self.maxpool_conv(inputs)

In [4]:
class decoder_block(nn.Module):
    def __init__(self, in_c, out_c):
        super().__init__()

        self.up = nn.ConvTranspose2d(in_c, out_c, kernel_size=2, stride=2, padding=1)
        self.conv = conv_block(out_c+out_c, out_c)
        

    def forward(self, inputs, skip):
        x = self.up(inputs)
        diffY = skip.size()[2] - x.size()[2]
        diffX = skip.size()[3] - x.size()[3]
        x = F.pad(x, (diffY//2, diffY-diffY//2, diffX//2, diffX-diffX//2))
        x = torch.cat([skip, x], axis=1)
        x = self.conv(x)

        return x


In [5]:

class outConv(nn.Module):
    def __init__(self, in_c, out_c):
        super().__init__()
        self.conv = nn.Conv2d(in_c, out_c, kernel_size=1)

    def forward(self, inputs):
        return self.conv(inputs)

In [6]:
class build_unet(nn.Module):
    def __init__(self, in_c=3, batch_size=64, n_classes=30):
        super().__init__()
        self.in_c = in_c
        self.batch_size = batch_size
        self.n_classes = n_classes

        #Encoder
        self.b = conv_block(in_c, batch_size)
        self.e1 = encoder_block(batch_size, 128)
        self.e2 = encoder_block(128, 256)
        self.e3 = encoder_block(256, 512)

        #Bottleneck
        self.e4 = encoder_block(512, 1024)

        #Decoder
        self.d1 = decoder_block(1024, 512)
        self.d2 = decoder_block(512, 256)
        self.d3 = decoder_block(256, 128)
        self.d4 = decoder_block(128, 64)

        #Classifier
        self.outputs = outConv(64, 30)  # 64, n_classes
        self.rgbl = nn.Conv2d(30, 3, 3, padding=1)
        self.rel = nn.ReLU(inplace=True)

    def forward(self, inputs):
        #Encoder
        s1 = self.b(inputs)
        s2 = self.e1(s1)
        s3 = self.e2(s2)
        s4 = self.e3(s3)
        s5 = self.e4(s4)


        #Bottleneck
        # b = self.b(s4)

        #Decoder
        d = self.d1(s5, s4)
        d = self.d2(d, s3)
        d = self.d3(d, s2)
        d = self.d4(d, s1)

        #Classifier
        out = self.outputs(d)

        # Change to output to have 3 channels to be compared to rgb mask, for that this layer needs to learn 30 values of r, g, b each
        outputs = self.rgbl(out)
        outputs = self.rel(outputs)


        return outputs

In [7]:
segModel = build_unet()

torch.cuda.empty_cache()
# os.environ["PYTORCH_CUDA_ALLOC_CONF"] = 'max_split_size_mb:256.0'

loss_fn = torch.nn.CrossEntropyLoss(reduction='mean')
optimizer = Adam(segModel.parameters(), lr=0.001, weight_decay=0.0001)

In [8]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [9]:
img_path = 'drive/MyDrive/ml_projects/Semantic-segmentation/cityscapes-data/train/imaged/'
mask_path = 'drive/MyDrive/ml_projects/Semantic-segmentation/cityscapes-data/train/masked/'
img_test_path = 'drive/MyDrive/ml_projects/Semantic-segmentation/cityscapes-data/test/imaged/'
mask_test_path = 'drive/MyDrive/ml_projects/Semantic-segmentation/cityscapes-data/test/masked/'
img_list = [img_path+f'{i+1}img.jpg' for i in range(2975)]
mask_list = [mask_path+f'{i+1}mask.jpg' for i in range(2975)]
# img_list = [img_path+i for i in os.listdir(img_path)]
# mask_list = [mask_path+i for i in os.listdir(mask_path)]
img_test_list = [img_test_path+f'{i+1}img.jpg' for i in range(500)]
mask_test_list = [mask_test_path+f'{i+1}mask.jpg' for i in range(500)]


# annotations_file = pd.read_csv('drive/MyDrive/ml_projects/Semantic-segmentation/train_list.csv')
# test_annotations = pd.read_csv('drive/MyDrive/ml_projects/Semantic-segmentation/test_list.csv')
# x_dir = 'drive/MyDrive/ml_projects/Semantic-segmentation/cityscapes-data/train/imaged'
# y_dir = 'drive/MyDrive/ml_projects/Semantic-segmentation/cityscapes-data/train/masked'
# x_test_dir = 'drive/MyDrive/ml_projects/Semantic-segmentation/cityscapes-data/test/imaged'
# y_test_dir = 'drive/MyDrive/ml_projects/Semantic-segmentation/cityscapes-data/test/masked'

Train_data = ImagesDataset((img_list, mask_list), img_path, mask_path)
Test_data = ImagesDataset((img_test_list, mask_test_list), img_test_path, mask_test_path)

train_dataloader = DataLoader(Train_data, batch_size=32, shuffle=True)
test_dataloader = DataLoader(Test_data, batch_size=32, shuffle=True)

In [10]:
def testAccuracy():
    segModel.train(mode=False)
    accuracy=0
    # total=0
    with torch.no_grad():
        for (x, y) in test_dataloader:
            device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
            # device = torch.device('cpu')
            typ = torch.cuda.FloatTensor if torch.cuda.is_available else torch.FloatTensor
            x.to(device)
            y.to(device)
            x = x.type(typ)
            y = torch.round(y.type(typ))
#             y = y.type(torch.LongTensor)
            outputs = torch.round(segModel(x))
            # predicted = torch.argmax(outputs)
            # total+=y.size(0)
            corr = (outputs==y).sum().item()
            accuracy = accuracy + corr
      
    accuracy = (100.*accuracy/500)/256/256/3    # compute accuracy over all test images
    segModel.train(True)
    return accuracy

In [11]:

def savemodel():
    path = 'drive/MyDrive/ml_projects/Semantic-segmentation/segModel.pth'
    torch.save(segModel.state_dict(), path)

In [12]:
def trainseg(num_epochs):
    best_accuracy = 0.0
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    typ = torch.cuda.FloatTensor if torch.cuda.is_available() else torch.FloatTensor
    # device = torch.device('cpu')

    segModel.to(device)
    segModel.train(True)

    for epoch in tqdm(range(num_epochs), position=0, desc='epochs', leave=True):  # , ncols-70
        running_loss = 0.0
        running_acc = 0.0
        torch.cuda.empty_cache()

        for (img, mask) in tqdm(train_dataloader, position=1, desc='batches', leave=False): # , ncols=70
            img, mask = (img.to(device)).type(typ), (mask.to(device)).type(typ)
            optimizer.zero_grad()
            outputs = segModel(img)
            loss = loss_fn(outputs, mask)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            running_acc += (outputs==mask).sum().item()
        print(f'epoch {epoch+1}:', 'running_loss:', running_loss/2975, '\t', f'trainAccuracy: {100.*running_acc/2975/256/256/3}','\t', 'testAccuracy:', testAccuracy())
        segModel.train(True)
        

In [13]:
trainseg(25)

epochs:   0%|          | 0/25 [00:00<?, ?it/s]

batches:   0%|          | 0/93 [00:00<?, ?it/s]

epoch 1: running_loss: 8.96439528008469 	 trainAccuracy: 3.59865059304972 	 testAccuracy: 5.4932301839192705


batches:   0%|          | 0/93 [00:00<?, ?it/s]

epoch 2: running_loss: 8.884970969833246 	 trainAccuracy: 3.489457159817052 	 testAccuracy: 5.388766479492188


batches:   0%|          | 0/93 [00:00<?, ?it/s]

epoch 3: running_loss: 8.831161586216519 	 trainAccuracy: 3.4911184444480914 	 testAccuracy: 5.385734049479166


batches:   0%|          | 0/93 [00:00<?, ?it/s]

epoch 4: running_loss: 8.708693262949712 	 trainAccuracy: 3.175512319185487 	 testAccuracy: 5.2544189453125005


batches:   0%|          | 0/93 [00:00<?, ?it/s]

epoch 5: running_loss: 8.678756421513919 	 trainAccuracy: 3.206107516248687 	 testAccuracy: 5.5362497965494795


batches:   0%|          | 0/93 [00:00<?, ?it/s]

KeyboardInterrupt: ignored

In [None]:
savemodel()