In [None]:
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

In [None]:
from os import listdir
from os.path import join, splitext

from typing import Tuple

import numpy as np
from PIL import Image
import pandas as pd
import torch
from torchvision import transforms
from torchvision.transforms import ToTensor, Resize, RandomHorizontalFlip, RandomVerticalFlip, Normalize

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

In [None]:
import os.path

DATA_FOLDER = '/content/gdrive/My Drive/'
TRAINING_SET_FOLDER = os.path.join(DATA_FOLDER, 'semantic_drone_dataset')
INPUT_IMAGES_FOLDER = os.path.join(TRAINING_SET_FOLDER, 'resized_original224')
LABEL_IMAGES_FOLDER = os.path.join(TRAINING_SET_FOLDER, 'resized_label224')
TRAIN_CSV = os.path.join(DATA_FOLDER, 'train.csv')
TEST_CSV = os.path.join(DATA_FOLDER, 'test.csv')
VALIDATION_CSV = os.path.join(DATA_FOLDER, 'validation.csv')

In [None]:
class CSVDataset(torch.utils.data.Dataset):
    "dataset for a subset of the raw dataset given by a csv file"

    def __init__(self, source, transform=None):
        "Initializes a dataset from a csv file created by split"
        super().__init__()

        self.transform = transform
        if transform is None:
            self.transform = lambda e: e

        # get pd.Series containing all filename
        self.imgs = pd.read_csv(source, names=('img',)).img

    def __len__(self):
        return len(self.imgs)

    def __getitem__(self, index: int):
        "returns the index-th data item of the dataset."
        fname = self.imgs[index]
        inpt = Image.open(join(INPUT_IMAGES_FOLDER, fname))
        lbel = Image.open(join(LABEL_IMAGES_FOLDER, splitext(fname)[0]+'.png'))

        return self.transform(inpt), self.transform(lbel), fname


class SegmentationDataset(torch.utils.data.Dataset):
    "dataset holding images and their segmentation masks (greyscale)"

    def __init__(self, source: str, size: Tuple[int, int]):
        "source: file path, size: (width, height)"
        super().__init__()
        self.images_transform = transforms.Compose(
            [transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
            ])

        # get pd.Series containing all filename
        self.imgs = pd.read_csv(source, names=('img',)).img

    def __len__(self):
        return len(self.imgs)

    def __getitem__(self, index: int):

        fname = self.imgs[index]
        inpt = Image.open(join(INPUT_IMAGES_FOLDER, fname))
        lbel = Image.open(join(LABEL_IMAGES_FOLDER, splitext(fname)[0]+'.png'))

        # apply transform
        inpt: torch.Tensor = self.images_transform(inpt)  # C, H, W

        return inpt, np.array(lbel, dtype=np.int64), fname


def trainset(size: Tuple[int, int] = (6000, 4000)) -> CSVDataset:
    "trainset: returns the training set"
    return SegmentationDataset(TRAIN_CSV, size)


def testset(size: Tuple[int, int] = (6000, 4000)) -> CSVDataset:
    "trainset: returns the testing set"
    return SegmentationDataset(TEST_CSV, size)


def validationset(size: Tuple[int, int] = (6000, 4000)) -> CSVDataset:
    "trainset: returns the validation set"
    return SegmentationDataset(VALIDATION_CSV, size)

def split(test=0.1, validation=0.1):
    df = pd.DataFrame(data=[f for f in listdir(
        INPUT_IMAGES_FOLDER) if f.endswith("jpg")])
    df = df.sample(frac=1, random_state=42)

    test_size = int(test * len(df))
    validation_size = int(validation * len(df))
    train_size = len(df) - test_size - validation_size

    test_end = train_size + test_size
    valid_end = test_end + validation_size

    df.iloc[0:train_size].to_csv(TRAIN_CSV, index=False, header=False)
    df.iloc[train_size:test_end].to_csv(TEST_CSV, index=False, header=False)
    df.iloc[test_end:valid_end].to_csv(
        VALIDATION_CSV, index=False, header=False)


if __name__ == "__main__":
    split()

In [None]:
torch.cuda.empty_cache()

In [None]:
import torch
import torch.nn as nn
import matplotlib.pyplot as plt


def conv(in_channels: int, out_channels: int, kernel_size=3) -> nn.Conv2d:
    return nn.Sequential(nn.Conv2d(in_channels, out_channels, kernel_size, padding=1), nn.BatchNorm2d(out_channels), nn.ReLU())

def deconv(in_channels: int, out_channels: int) -> nn.Conv2d:
    return nn.Sequential(nn.ConvTranspose2d(in_channels, out_channels, kernel_size=4, padding=1, stride=2), nn.BatchNorm2d(out_channels), nn.ReLU())

class net(nn.Module):

    def __init__(self):
        super(net, self).__init__()
        
        torch.hub._validate_not_a_forked_repo=lambda a,b,c: True
        self.resnet = torch.hub.load('pytorch/vision:v0.9.0', 'resnet152', pretrained=True) #1000 classes
        self.resnet= torch.nn.Sequential(*list(self.resnet.children())[:-2])
        #from 1000 classes to nb of classes for semantic segmentation problem
        self.net = nn.Sequential(self.resnet, deconv(2048, 1024), deconv(1024, 512), deconv(512, 256), deconv(256, 128), deconv(128, 26))

    def forward(self, x):
        return self.net(x)


train_set = trainset((224, 224))
test_set = testset((224, 224))

train_ldr = torch.utils.data.DataLoader(
    train_set, batch_size=4, shuffle=True, num_workers=2, pin_memory=True)
test_ldr = torch.utils.data.DataLoader(
    test_set, batch_size=4, shuffle=False, num_workers=2, pin_memory=True)

device = 'cuda:0'
model = net().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)


def train(num_epochs):
    
    train_loss = torch.zeros((num_epochs), device=device)
    test_loss = torch.zeros((num_epochs), device=device)
    test_acc = torch.zeros((num_epochs), device=device)

    for i in range(num_epochs):
        print(f'epoch ({i+1:2}/{num_epochs:2})', end='\r')
        model.train()
        for k, (x, y, _) in enumerate(train_ldr):
            print("Image " + str(k))
            print(f'epoch ({i+1:2}/{num_epochs:2}) - batch ({k+1:3}/{len(train_ldr):3})', end='\r')
            outputs = model(x.to(device))
            loss = criterion(outputs, y.to(device))

            train_loss[i] += loss.detach().sum()

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        print('')
        print(f'epoch {i+1:2}/{num_epochs:2} - evaluation')
        with torch.no_grad():
            model.eval()
            accuracy = 0.0
            for x, y, _ in test_ldr:
                
                inputs = x.to(device)
                targets = y.to(device)

                outputs = model(inputs)

                loss = criterion(outputs, targets)
                test_loss[i] += loss.sum()

                # per - pixel accuracy
                predicted = outputs.argmax(1)
                accuracy += (predicted == targets).float().mean()
            test_acc[i] = accuracy / len(test_ldr)

    train_loss /= (num_epochs * train_ldr.batch_size * len(train_ldr))
    test_loss /= (num_epochs * test_ldr.batch_size * len(test_ldr))

    return train_loss.cpu(), test_loss.cpu(), test_acc.cpu()


num_epochs = 100  # could be increased but training is long
print(f'training for {num_epochs} epochs')
epochs_train_loss, epochs_test_loss, test_acc = train(num_epochs)
torch.save(model.state_dict(), "/content/gdrive/MyDrive/ResNet2.pth")

print(f'{epochs_train_loss}\n{epochs_test_loss}\n{test_acc}')

idx = range(0, num_epochs)
plt.figure()
plt.plot(idx, epochs_train_loss, label='training loss')
plt.plot(idx, epochs_test_loss, label='testing loss')
plt.xlabel("epoch")
plt.ylabel("loss")
plt.legend()
plt.show()
plt.savefig('content/gdrive/MyDrive/ResNetLosses.png')

plt.figure()
plt.plot(idx, test_acc, 'k', label='testing accuracy')
plt.xlabel("epoch")
plt.ylabel("accuracy")
plt.legend()
plt.show()
plt.savefig('content/gdrive/MyDrive/ResAccuracy.png')