# Machine Learning Project 2

This is a notebook to run the ML Road Segmentation Project by the lla_team in a Google Colab environment.

Import requirement.txt file to colab then run the following cell to install the correct requirements. Then restart the runtime.

In [None]:
! pip install -r requirements.txt

Import all the necessary libraries

In [4]:
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
from glob import glob
import numpy as np
from PIL import ImageFile, ImageEnhance, ImageFilter
import sys, getopt
from torch import nn
import pandas as pd
from sklearn.metrics import f1_score, accuracy_score
from tqdm import tqdm
sys.path.append('..')

Mount your drive to access the data

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Define the Parameters of the run here it is particularly important to change the submission directory and path and the data path to something on your google drive

In [12]:
SEED = 19
DATA_PATH = "./drive/MyDrive/EPFL/data"
SUBMISSION_DIR = "./drive/MyDrive/EPFL/submission/"
SUBMISSION_PATH = "./drive/MyDrive/EPFL/submission/submission.csv"
PATCH_SIZE = 80
BATCH_SIZE = 10
LR = 0.001
MAX_ITER = 64
TH = 0.25
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
TEST_SIZE = 0.25
K_FOLD = 5

THRESHOLD_VALIDATION_VECTOR = [0.20, 0.25, 0.30, 0.35] # best found 0.25
LEARNING_RATE_VALIDATION_VECTOR = [0.0003, 0.0001, 0.001, 0.01]  # best found 0.0003

Those are some of the modes to run in, if you want to run cross-validation set the mode to True. Specific experiments can also be chosen further down the notebook

In [13]:
normalize = False
divide_patches = True
augment = True
training = True
submmission = True
validation = False

Import a pre-trained neural network to use

In [14]:
import segmentation_models_pytorch as smp

def deeplab_model(encoder = 'resnet34'):
    return smp.DeepLabV3(encoder_name=encoder, encoder_depth=5, encoder_weights="imagenet",in_channels=3,classes=1)

Define some helper functions

In [15]:
def image_to_tensor(image:np.ndarray, device = None, divide = False):
    image = np.transpose(image,[2,0,1])
    tensor = torch.Tensor(image)
    if divide:
        tensor = tensor / 255
    tensor.unsqueeze(0)
    if device is not None:
        tensor = tensor.to(device)
    return tensor

def mask_to_tensor(mask: np.ndarray, device = None):
    tensor = transforms.ToTensor()(mask)
    tensor = torch.round(tensor)
    if device is not None:
        tensor = tensor.to(device)
    return tensor[0,:,:][None,:,:]

def transform_to_patch(pixels, th):
    m = np.mean(pixels)
    if m  > th:
        return 1
    else:
        return 0


def transform_prediction_to_patch(img, id, patch_size=16,step=16,th=0.25):
    prs = []
    ids = []
    for j in range(0,img.shape[1],step):
        for i in range(0, img.shape[0],step):
            prs.append(transform_to_patch(img[i:i+patch_size,j:j+patch_size],th=th))
            ids.append("{:03d}_{}_{}".format(id, j, i))
    return prs, ids

def images_to_np_array(images):
    res = []
    for img in images:
        img = np.array(img)
        res.append(img)
    return np.array(res)

def PIL_Images_from_np_array(images):
    res = []
    for img in images:
        res.append(Image.fromarray(img))
    return res

def split_data(x,y):
    np.random.seed(SEED)
    ids = np.arange(len(x))

    np.random.shuffle(ids)

    division = len(ids)*(1-TEST_SIZE)
    division = int(division)
    return np.array(x)[ids[:division]], np.array(y)[ids[:division]], np.array(x)[ids[division:]], np.array(y)[ids[division:]]

Define preprocessing and postprocessing functions

In [16]:
def preprocess(data, gts, operations, train: bool):
    data = images_to_np_array(data)
    if train:
        gts = images_to_np_array(gts)
    else:
        gts = None
    
    if operations['normalization']:
        for i, image in enumerate(data):
            image = image / 255
            image = ((image - image.mean(axis=(0,1),dtype='float64')))/(image.std(axis=(0,1),dtype='float64'))
            data[i] = image

    if not train:
        return data, None
    
    if operations['augment']:
        data = PIL_Images_from_np_array(data)
        if gts is not None:
            gts = PIL_Images_from_np_array(gts)

        augmented_images = []
        augmented_groundtruths = []

        for image in data:
            augmented_imgs = []
            augmented_imgs.append(image.transpose(Image.FLIP_LEFT_RIGHT))

            augmented_imgs.append(image.transpose(Image.ROTATE_90))

            augmented_imgs.append(image.transpose(Image.ROTATE_180))

            augmented_imgs.append(image.transpose(Image.ROTATE_270))

            augmented_imgs.append(image.transpose(Image.FLIP_TOP_BOTTOM))

            
            if train:
              augmented_imgs.append(image.filter(ImageFilter.GaussianBlur(4)))    
              color_shift = ImageEnhance.Color(image)
              augmented_imgs.append(color_shift.enhance(0.5))
              augmented_imgs.append(image.rotate(10, resample=Image.BICUBIC))
              augmented_imgs.append(image.rotate(20, resample=Image.BICUBIC))
              augmented_imgs.append(image.rotate(30, resample=Image.BICUBIC))
              augmented_imgs.append(image.rotate(40, resample=Image.BICUBIC))
              augmented_imgs.append(image.rotate(50, resample=Image.BICUBIC))
              augmented_imgs.append(image.rotate(60, resample=Image.BICUBIC))
 
            augmented_images.extend(augmented_imgs)
        if gts is not None:
            for image in gts:
                augmented_imgs = []
                augmented_imgs.append(image.transpose(Image.FLIP_LEFT_RIGHT))

                augmented_imgs.append(image.transpose(Image.ROTATE_90))

                augmented_imgs.append(image.transpose(Image.ROTATE_180))

                augmented_imgs.append(image.transpose(Image.ROTATE_270))

                augmented_imgs.append(image.transpose(Image.FLIP_TOP_BOTTOM))

                if train:
                  augmented_imgs.append(image)
                  augmented_imgs.append(image)
                  augmented_imgs.append(image.rotate(10, resample=Image.BICUBIC))
                  augmented_imgs.append(image.rotate(20, resample=Image.BICUBIC))
                  augmented_imgs.append(image.rotate(30, resample=Image.BICUBIC))
                  augmented_imgs.append(image.rotate(40, resample=Image.BICUBIC))
                  augmented_imgs.append(image.rotate(50, resample=Image.BICUBIC))
                  augmented_imgs.append(image.rotate(60, resample=Image.BICUBIC))
                
                augmented_groundtruths.extend(augmented_imgs)
        data.extend(augmented_images)
        if gts is not None:
            gts.extend(augmented_groundtruths)
        data = np.array([np.array(image) for image in data])
        if gts is not None:
            gts = np.array([np.array(image) for image in gts])

    if operations['patches'] and gts is not None:
        patch_size = PATCH_SIZE
        data = [crop(image,patch_size,patch_size) for image in data]
        data = np.asarray([data[i][j] for i in range(len(data)) for j in range(len(data[i]))])
        gts = [crop(image,patch_size,patch_size) for image in gts]
        gts = np.asarray([gts[i][j] for i in range(len(gts)) for j in range(len(gts[i]))])
        data = images_to_np_array(data)
        gts = images_to_np_array(gts)

    return data, gts
        
def crop(image, width, height):
    res = []
    for i in range(0,image.shape[1],height):
        for j in range(0,image.shape[0],width):
            if len(image.shape) == 2:
                res.append(image[j:j + width, i : i + height])
            else:
                res.append(image[j:j + width, i : i + height, :])
    return res

def load_data(path_data, path_gts, train : bool, device, operations):
    data = [Image.open(img) for img in path_data]
    gts = []
    if path_gts is not None:
        gts = [Image.open(gt) for gt in path_gts]
    
    if train:
        data, gts = preprocess(data, gts, operations,True)
    else:
        data, _ = preprocess(data, None, operations, False)

    return data, gts

def postprocess(y):
    res = []
    y = np.array(y)
    sz = y[0][0].shape[0]
    y = y.reshape((-1,6,sz,sz))
    for images in y:
        one = np.fliplr(images[1])
        two = np.rot90(images[2], k=3)
        three = np.rot90(images[3], k=2)
        four = np.rot90(images[4],k=1)
        five = np.flipud(images[5]) 

        m = np.stack([images[0], one, two, three, four, five])
        m = np.mean(m,axis=0)
        res.append(m)
    return res

Define the dataset

In [8]:
class RoadSegmentationDataset(Dataset):
    def __init__(self, data_path, gt_path, operations: dict, train: bool, device = None):
        self.train = train
        self.device = device
        imgs, gts = load_data(data_path,gt_path,train,device,operations)
        divide = not operations['normalization']
        if gts is not None:
            self.gt = [mask_to_tensor(gt,device) for gt in gts]
        self.data = [image_to_tensor(img, device, divide = divide) for img in imgs] 

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        if self.train:
            return self.data[index], self.gt[index]
        else:
            return self.data[index]

Define Model

In [9]:
class RoadSegmentationModel(nn.Module):
    def __init__(self, device, lr = LR, th =TH, max_iter = MAX_ITER):
        super().__init__()
        self.device = device
        self.pre_trained_network = deeplab_model()
        self.criterion = nn.BCEWithLogitsLoss()
        self.pre_trained_network.to(self.device)
        self.th = th
        self.lr = lr
        self.max_iter = max_iter

    def forward(self, data):
        data_x = data[0].to(self.device)
        return self.pre_trained_network(data_x)

    def train_epoch(self, loader, optimizer):
        loss = []
        self.pre_trained_network.train()
        for batch in tqdm(loader):
            pr = self.forward(batch)
            y = batch[1].to(self.device)
            l = self.criterion(pr,y).to(self.device)
            optimizer.zero_grad()
            l.backward()
            optimizer.step()
            l = l.cpu().detach().numpy()
            loss.append(l)
        return np.mean(loss)

    def test_epoch(self, loader):
        self.pre_trained_network.eval()
        loss = []
        with torch.no_grad():
            for batch in tqdm(loader):
                pr = self.forward(batch)
                y = batch[1].to(self.device)
                l = self.criterion(pr,y).to(self.device)
                l = l.cpu().detach().numpy()
                loss.append(l)
        return np.mean(loss)

    def get_score(self,loader, do_postprocessing=True):
        self.pre_trained_network.eval()
        prs = []
        ys = []
        first_predictions = self.make_prediction(loader,do_postprocessing)
        masks = loader.dataset.gt

        for pr, y in zip(first_predictions, masks):
            labels, _ = transform_prediction_to_patch(pr,1,th=self.th)
            prs.extend(labels)
            y = y[0].cpu().detach().numpy()
            real_labels, _ = transform_prediction_to_patch(y,1,th=self.th)
            ys.extend(real_labels)
        return f1_score(ys,prs), accuracy_score(ys,prs)

    def make_prediction(self, loader, do_postprocessing):
        pr = []
        self.pre_trained_network.eval()
        sigmoid = torch.nn.Sigmoid()
        with torch.no_grad():
            for batch in tqdm(loader):
                p = sigmoid(self.pre_trained_network(batch.to(self.device))).cpu().detach().numpy()
                pr.append(p[0][0])
        if do_postprocessing:
            pr = postprocess(pr)
        return pr


    def train(self, train_loader, test_loader, evaluate, evaluate_loader, do_postprocessing):
        optimizer = torch.optim.Adam(self.pre_trained_network.parameters(), lr=self.lr)
        losses = []
        test_losses = []
        accuracies = []
        f1s = []
        best_loss = {'loss': float('inf'), 'epoch': 0}
        i = 1
        while True:
            print("EPOCH " + str(i))
            l_train = self.train_epoch(train_loader, optimizer)
            print("epoch trained, now testing")
            losses.append(l_train)
            l_test = self.test_epoch(test_loader)
            test_losses.append(l_test)
            print("Test Loss for epoch " + str(i) + " = " + str(l_test))
            f1 = 0
            acc = 0
            if evaluate:
                f1, acc = self.get_score(evaluate_loader, do_postprocessing)
                print("LOSS = " + str(l_test) + " F1 = " + str(f1) + " ACCURACY = " + str(acc))

            f1s.append(f1)
            accuracies.append(acc)

            if l_test < best_loss['loss']:
                best_loss['loss'] = l_test
                best_loss['epoch'] = i

            if i == self.max_iter - 1:
                break

            i += 1
        results = {}
        results['train_loss'] = losses
        results['f1'] = f1s
        results['accuracy'] = accuracies
        results['test_loss'] = test_losses
        return results

    def submit(self, test_loader):
        
        prs = self.make_prediction(test_loader,False)
        img_ids = range(1,len(prs)+1)
        
        ret_ids = []
        ret_labels = []

        for pr, i in zip(prs,img_ids):
            labels, ids = transform_prediction_to_patch(pr,i,th=self.th)
            for label in labels:
                ret_labels.append(label)
            for id in ids:
                ret_ids.append(id)
        
        pd.DataFrame({'id': ret_ids, 'prediction' : ret_labels}).to_csv(SUBMISSION_PATH,index=False)

Do Cross-Validation oon TH and LR

In [None]:
def build_k_indices(N, k_fold, seed = SEED):
    """build k indices for k-fold.

    Args:
        N:      shape=(N,)
        k_fold: K in K-fold, i.e. the fold num
        seed:   the random seed

    Returns:
        A 2D array of shape=(k_fold, N/k_fold) that indicates the data indices for each fold

    >>> build_k_indices(np.array([1., 2., 3., 4.]), 2, 1)
    array([[3, 2],
           [0, 1]])
    """
    num_row = N
    interval = int(num_row / k_fold)
    np.random.seed(seed)
    indices = np.random.permutation(num_row)
    k_indices = [indices[k * interval : (k + 1) * interval] for k in range(k_fold)]
    return np.array(k_indices)


def cross_validation_step(data, gts, k_indices, k, th, lr):
    train_data = np.delete(data, k_indices[k], axis = 0)
    train_gts = np.delete(gts, k_indices[k], axis = 0)
    test_x = []
    test_y = []
    for id in k_indices[k]:
        test_x.append(data[id])
        test_y.append(gts[id])
    operations = {}
    operations['augment'] = True
    operations['normalization'] = True
    operations['patches'] = True

    
    train_set = RoadSegmentationDataset(train_data,train_gts,operations, True, DEVICE)
    test_set = RoadSegmentationDataset(test_x,test_y,operations,True,DEVICE)
    evaluation_dataset = RoadSegmentationDataset(test_x,test_y,operations,False,DEVICE)

    train_loader = DataLoader(train_set, BATCH_SIZE, shuffle=True)
    test_loader = DataLoader(test_set,BATCH_SIZE, shuffle=False)
    evaluation_loader = DataLoader(evaluation_dataset,1, shuffle=False)

    model = RoadSegmentationModel(DEVICE,th=th,lr=lr,max_iter=10)
    results = model.train(train_loader, test_loader,True,evaluation_loader,False)
    f1_score = results["f1"]
    return f1_score


def validation_over_threshold(data, gts, k_indices, k, threshold):
    return cross_validation_step(data,gts,k_indices,k,threshold,LR)

def validation_over_learning_rate(data, gts, k_indices, k, lr):
    return cross_validation_step(data,gts,k_indices,k,TH,lr)

def cross_validation(data, gts, parameters, parameter_name, N, seed = SEED, k_fold = K_FOLD):
    k_indices = build_k_indices(N,k_fold,seed)
    best_performance = -1
    optimal_parameter = -1

    for parameter in parameters:
        print("Trying " + str(parameter_name) + " = " + str(parameter))
        avg_performance = 0
        performances = np.zeros(k_fold)

        for k in range(k_fold):
            performance = 0
            if parameter_name == "threshold":
                performance = validation_over_threshold(data,gts,k_indices,k,parameter)[-1]
            elif parameter_name == "learning rate":
                performance = validation_over_learning_rate(data,gts,k_indices,k,parameter)[-1]
            avg_performance = performance + avg_performance
            performances[k] = performance
        
        avg_performance = avg_performance / k_fold

        print("Cross-Validation for " + parameter_name + " = " + str(parameter) + " with f1_score = " + str(avg_performance))
        if best_performance == -1 or avg_performance > best_performance:
            best_performance = avg_performance
            optimal_parameter = parameter

    print("Optimal Patameter for " + parameter_name + " = " + str(optimal_parameter))
    return optimal_parameter, best_performance

Choose which experiment to run

In [None]:
EXPERIMENT = 4

if EXPERIMENT == 1:
    divide_patches = True
    normalize = False
    augment = False
    training = True
    submmission = True
elif EXPERIMENT == 2:
    divide_patches = True
    normalize = True
    augment = False
    training = True
    submmission = True
elif EXPERIMENT == 3:
    divide_patches = True
    normalize = False
    augment = True
    training = True
    submmission = True
elif EXPERIMENT == 4:
    divide_patches = True
    normalize = False
    augment = True
    training = True
    submmission = True
    TH = 0.25
    LR = 0.0003

Run the training and produce submission

In [None]:
def run():
    torch.manual_seed(SEED)
    np.random.seed(SEED)
    
    operations = {}
    operations['augment'] = augment
    operations['normalization'] = normalize
    operations['patches'] = divide_patches

    data = sorted(glob(DATA_PATH + "/training/images/*.png"))
    gts = sorted(glob(DATA_PATH + "/training/groundtruth/*.png"))

    optimal_th = TH
    optimal_lr = LR
    if validation:
        print("Starting Cross Validation over Foreground Threshold")
        optimal_th,_ = cross_validation(data,gts,THRESHOLD_VALIDATION_VECTOR,"threshold",len(data))
        print("Starting Cross Validation over Learning Rate")
        optimal_lr,_ = cross_validation(data,gts,LEARNING_RATE_VALIDATION_VECTOR,"learning rate",len(data))
    
    train_data, train_labels, test_data, test_labels = split_data(data,gts)

    if submmission:
        # run training on full dataset
        train_data = data
        train_labels = gts
    train_set = RoadSegmentationDataset(train_data,train_labels,operations, True, DEVICE)
    test_set = RoadSegmentationDataset(test_data,test_labels,operations,True,DEVICE)

    train_loader = DataLoader(train_set, BATCH_SIZE, shuffle=True)
    test_loader = DataLoader(test_set,BATCH_SIZE, shuffle=False)

    model = RoadSegmentationModel(DEVICE, th= optimal_th, lr = optimal_lr)

    if training:
        results = model.train(train_loader, test_loader,False,None,augment)
        loss = results['train_loss']
        test_loss = results['test_loss']
        print("TRAINING LOSS = " + str(loss[len(loss)-1]))
        print("TEST LOSS = " + str(test_loss[len(test_loss)-1]))
    if submmission:
        submission_images = sorted(glob(DATA_PATH + "/test_set_images/*/*"),
            key = lambda x: int(x.split('/')[-2].split('_')[-1]))

        test_set = RoadSegmentationDataset(submission_images, None, operations, False, DEVICE)
        test_loader = DataLoader(test_set,1)

        model.submit(test_loader)
run()