## **SPECIFIC FOR GOOGLE COLAB**

In [None]:
!lscpu |grep 'Model name'
!lscpu |grep 'Core(s) per socket:'
!free -h
!lscpu |grep 'Thread(s) per core'

Model name:          Intel(R) Xeon(R) CPU @ 2.20GHz
Core(s) per socket:  1
              total        used        free      shared  buff/cache   available
Mem:            12G        627M        9.7G        1.2M        2.3G         11G
Swap:            0B          0B          0B
Thread(s) per core:  2


In [1]:
# Mount Google Drive and load project 2
# WARNING, we have to add the shared drive of Sami on our home directory (by creating an alias)
from google.colab import drive
drive.mount('/content/drive')
#Move to the shared directory
%cd /content/drive/MyDrive/ml_project_2_drive/ml_project_2/
# list all files
! ls

# Read helpers python file
!cp /content/drive/MyDrive/ml_project_2_drive/ml_project_2/script/helper_functions.py .

# A good help can be found here:
#https://medium.com/analytics-vidhya/how-to-use-google-colab-with-github-via-google-drive-68efb23a42d

Mounted at /content/drive
/content/drive/MyDrive/ml_project_2_drive/ml_project_2
data  helper_functions.py  project2_description.pdf  README.md	script


In [7]:
!git status
#test

On branch main
Your branch is up to date with 'origin/main'.

Changes to be committed:
  (use "git reset HEAD <file>..." to unstage)

	[32mmodified:   script/CNN.ipynb[m

Changes not staged for commit:
  (use "git add <file>..." to update what will be committed)
  (use "git checkout -- <file>..." to discard changes in working directory)

	[31mmodified:   script/CNN.ipynb[m



In [6]:
!git log --oneline
#!git config --global user.email "etienne.bruno@epfl.ch"
#!git config --global user.name "Etienne BRUNO"

In [None]:
!git config --global user.email "sami.ferchiou@epfl.ch"
!git config --global user.name "samiferchiou"

In [None]:
!git pull

remote: Enumerating objects: 12, done.[K
remote: Counting objects:   8% (1/12)[Kremote: Counting objects:  16% (2/12)[Kremote: Counting objects:  25% (3/12)[Kremote: Counting objects:  33% (4/12)[Kremote: Counting objects:  41% (5/12)[Kremote: Counting objects:  50% (6/12)[Kremote: Counting objects:  58% (7/12)[Kremote: Counting objects:  66% (8/12)[Kremote: Counting objects:  75% (9/12)[Kremote: Counting objects:  83% (10/12)[Kremote: Counting objects:  91% (11/12)[Kremote: Counting objects: 100% (12/12)[Kremote: Counting objects: 100% (12/12), done.[K
remote: Compressing objects:  25% (1/4)[Kremote: Compressing objects:  50% (2/4)[Kremote: Compressing objects:  75% (3/4)[Kremote: Compressing objects: 100% (4/4)[Kremote: Compressing objects: 100% (4/4), done.[K
remote: Total 8 (delta 4), reused 8 (delta 4), pack-reused 0[K
Unpacking objects:  12% (1/8)   Unpacking objects:  25% (2/8)   Unpacking objects:  37% (3/8)   Unpacking objects:  50% (4/8)

In [5]:
!git add .
!git commit -m "update from google colab"
!git push


*** Please tell me who you are.

Run

  git config --global user.email "you@example.com"
  git config --global user.name "Your Name"

to set your account's default identity.
Omit --global to set the identity only in this repository.

fatal: unable to auto-detect email address (got 'root@343662bfd686.(none)')
Everything up-to-date


## **MACHINE LEARNING MODEL**

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as T
import torchvision
import matplotlib.pyplot as plt
import numpy as np
import os,sys
from PIL import Image
from pathlib import Path
from tqdm import tqdm
import time
from torch.optim.lr_scheduler import ExponentialLR
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score

from helper_functions import *

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
use_cuda = torch.cuda.is_available()

In [None]:
NBR_EPOCHS = 2
BATCH_SIZE = 10
LEARNING_RATE =1e-3
WEIGHT_DECAY=0.01
DIM = 1
GAMMA = 0.9

In [None]:
def load_train_dataset():
    # Loaded a set of images
    root_dir = "../data/training/"
    #root_dir = "/content/drive/MyDrive/ml_project_2_drive/ml_project_2/data/training/"
    image_dir = root_dir + "images/"
    gt_dir = root_dir + "groundtruth/"
    files = os.listdir(image_dir)
    n = len(files)
    to_tensor = T.ToTensor()
    imgs = [to_tensor(Image.open(image_dir + files[i])) for i in range(n)]
    gt_imgs = [to_tensor(Image.open(gt_dir + files[i])).type(torch.LongTensor) for i in range(n)]
    return (imgs, gt_imgs)

In [None]:
%%time
from torch.utils.data import Dataset, DataLoader

# load initial images
imgs_init, gt_imgs_init = load_train_dataset()
# proceed to data augmnetation on both train images and ground truth images
all_imgs = compose_all_functions_for_data(imgs_init)
all_gt_imgs = compose_all_functions_for_data(gt_imgs_init)

# split data into training and validation sets
x_train, x_validation, y_train, y_validation = train_test_split(all_imgs, all_gt_imgs, test_size=0.2)

CPU times: user 1min 20s, sys: 1min 17s, total: 2min 37s
Wall time: 3min 32s


In [None]:
%%time

class trainDataset(Dataset): 
    def __init__(self, x_train, y_train):
        # Data augmentation
        self.x_train = x_train
        self.y_train = y_train
        self.n_samples = len(self.x_train)
        
    def __getitem__(self, index):
        return self.x_train[index], self.y_train[index]

    def __len__(self):
        return self.n_samples

class validateDataset(Dataset): 
    def __init__(self, x_validation, y_validation):
        # Data augmentation
        self.x_validation = x_validation
        self.y_validation = y_validation
        self.n_samples = len(self.x_validation)
        
    def __getitem__(self, index):
        return self.x_validation[index], self.y_validation[index]

    def __len__(self):
        return self.n_samples

# create datasets
torch.manual_seed(1)
trainset = trainDataset(x_train, y_train)
testset = validateDataset(x_validation, y_validation)
loaders = {
    'train_loader' : torch.utils.data.DataLoader(trainset, batch_size = BATCH_SIZE, shuffle=True),
    'test_loader'  : torch.utils.data.DataLoader(testset, batch_size = BATCH_SIZE, shuffle=True),
}

CPU times: user 3.51 ms, sys: 17.5 ms, total: 21 ms
Wall time: 67 ms


In [None]:
#img_temp = trainset[5432][0]
#to_PIL = T.ToPILImage()
#to_PIL(img_temp)

In [None]:
class ConvNet(nn.Module):
    def __init__(self):
        super(ConvNet, self).__init__()
        
        # 
        self.pool_d = nn.MaxPool2d(2, 2)
        self.pool_u = nn.Upsample(scale_factor=2)
        
        # Activation function
        self.activ = nn.ReLU()
        self.final_activ = nn.Sigmoid()
        
        # Convolution Downwards
        self.conv_1 = nn.Conv2d(3, 64, (3,3), padding=(1, 1))
        self.conv_2 = nn.Conv2d(64, 64, (3,3), padding=(1, 1))
        
        self.conv_3 = nn.Conv2d(64, 128, (3,3), padding=(1, 1))
        self.conv_4 = nn.Conv2d(128, 128, (3,3), padding=(1, 1))
        
        self.conv_5 = nn.Conv2d(128, 256, (3,3), padding=(1, 1))
        self.conv_6 = nn.Conv2d(256, 256, (3,3), padding=(1, 1))
        
        self.conv_7 = nn.Conv2d(256, 512, (3,3), padding=(1, 1))
        self.conv_8 = nn.Conv2d(512, 512, (3,3), padding=(1, 1))
        
        self.conv_9 = nn.Conv2d(512, 1024, (3,3), padding=(1, 1))
        self.conv_10 = nn.Conv2d(1024, 1024, (3,3), padding=(1, 1))
        
        
        # Upconvolution
        self.upconv_1 = nn.Conv2d(512+1024, 512, (3,3), padding=(1, 1))
        self.upconv_2 = nn.Conv2d(512, 512, (3,3), padding=(1, 1))
        
        self.upconv_3 = nn.Conv2d(256+512, 256, (3,3), padding=(1, 1))
        self.upconv_4 = nn.Conv2d(256, 256, (3,3), padding=(1, 1))
        
        self.upconv_5 = nn.Conv2d(128+256, 128, (3,3), padding=(1, 1))
        self.upconv_6 = nn.Conv2d(128, 128, (3,3), padding=(1, 1))
        
        self.upconv_7 = nn.Conv2d(64+128, 64, (3,3), padding=(1, 1))
        self.upconv_8 = nn.Conv2d(64, 64, (3,3), padding=(1, 1))
        self.upconv_9 = nn.Conv2d(64, 2, (1,1))


    def forward(self, x):
        # Convolution with activation and max_pooling
        xd_1 = self.activ(self.conv_1(x))
        xd_2 = self.activ(self.conv_2(xd_1))
    
        xd_3 = self.activ(self.conv_3(self.pool_d(xd_2)))
        xd_4 = self.activ(self.conv_4(xd_3))
        
        xd_5 = self.activ(self.conv_5(self.pool_d(xd_4)))
        xd_6 = self.activ(self.conv_6(xd_5))
        
        xd_7 = self.activ(self.conv_7(self.pool_d(xd_6)))
        xd_8 = self.activ(self.conv_8(xd_7))
        
        xd_9 = self.activ(self.conv_9(self.pool_d(xd_8)))
        xd_10 = self.pool_u(self.activ(self.conv_10(xd_9)))

        # "Fractionally / Backward strided convolution" with activation and upsampling
        xu_1 = self.activ(self.upconv_1(torch.cat((xd_8, xd_10), dim=DIM)))
        xu_2 = self.pool_u(self.activ(self.upconv_2(xu_1)))
        
        xu_3 = self.activ(self.upconv_3(torch.cat((xd_6, xu_2), dim=DIM)))
        xu_4 = self.pool_u(self.activ(self.upconv_4(xu_3)))
        
        xu_5 = self.activ(self.upconv_5(torch.cat((xd_4, xu_4), dim=DIM)))
        xu_6 = self.pool_u(self.activ(self.upconv_6(xu_5)))
        
        xu_7 = self.activ(self.upconv_7(torch.cat((xd_2, xu_6), dim=DIM)))
        xu_8 = self.activ(self.upconv_8(xu_7))
        xu_9 = self.final_activ(self.upconv_9(xu_8))
    
        return xu_9

model = ConvNet().to(device)

In [None]:
def save_ckp(state, is_best, checkpoint_path, best_model_path):
    """
    state:            checkpoint we want to save
    is_best:          boolean to indicates if it is the best checkpoint
    checkpoint_path:  path to save checkpoint
    best_model_path:  path to save best model
    """
    f_path = checkpoint_path
    # save checkpoint data to the path given, checkpoint_path
    torch.save(state, f_path)
    # if it is a best model, min validation loss
    if is_best:
        best_fpath = best_model_path
        # copy that checkpoint file to best path given, best_model_path
        shutil.copyfile(f_path, best_fpath)

In [None]:
def load_ckp(checkpoint_fpath, model, optimizer):
    """
    checkpoint_path: path to save checkpoint
    model:           model that we want to load checkpoint parameters into       
    optimizer:       optimizer we defined in previous training
    """
    # load check point
    checkpoint = torch.load(checkpoint_fpath)
    # initialize state_dict from checkpoint to model
    model.load_state_dict(checkpoint['state_dict'])
    # initialize optimizer from checkpoint to optimizer
    optimizer.load_state_dict(checkpoint['optimizer'])
    # initialize valid_loss_min from checkpoint to valid_loss_min
    valid_loss_min = checkpoint['valid_loss_min']
    # initialize the adaptative learning rate
    scheduler = checkpoint['scheduler']
    # return model, optimizer, epoch value, min validation loss
    return model, optimizer, checkpoint['epoch'], valid_loss_min.item(), scheduler

In [None]:
def train(start_epochs, n_epochs, valid_loss_min_input, loaders, model, optimizer, scheduler, criterion, use_cuda, checkpoint_path, best_model_path):
    """
    """
    # initialize tracker for minimum validation loss
    valid_loss_min = valid_loss_min_input 
    
    for epoch in range(start_epochs, n_epochs+1):
        # initialize variables to monitor training and validation loss
        train_loss = 0.0
        valid_loss = 0.0
        
        ###################
        # train the model #
        ###################
        model.train()
        train_steps = len(loaders['train_loader'].dataset)
        for batch_idx, (data, target) in enumerate(loaders['train_loader']):
            # Measure training time of one batch sample
            start = time.time()
        
            # move to GPU
            if use_cuda:
                data, target = data.cuda(), target.cuda()
            
            ## find the loss and update the model parameters accordingly
            # clear the gradients of all optimized variables
            optimizer.zero_grad()
            # forward pass: compute predicted outputs by passing inputs to the model
            output = model(data)
            # calculate the batch loss
            print(target.shape)
            print(output.shape)
            pred_training= torch.reshape(target, (BATCH_SIZE, 400, 400))
            loss = criterion(output, pred_training)
            # backward pass: compute gradient of the loss with respect to model parameters
            loss.backward()
            # perform a single optimization step (parameter update)
            optimizer.step()
            ## record the average training loss, using something like
            ## train_loss = train_loss + ((1 / (batch_idx + 1)) * (loss.data - train_loss))
            train_loss = train_loss + ((1 / (batch_idx + 1)) * (loss.data - train_loss))
            
            print(f"Epoch {epoch},  Batch {batch_idx}/{train_steps} - Duration: {time.time()-start}, Loss:{loss.item():.4f}")
        
        ######################    
        # validate the model #
        ######################
        model.eval()
        test_steps = len(loaders['test_loader'].dataset)
        for batch_idx, (data, target) in enumerate(loaders['test_loader']):
            # move to GPU
            if use_cuda:
                data, target = data.cuda(), target.cuda()
           
            ## update the average validation loss
            # forward pass: compute predicted outputs by passing inputs to the model
            output = model(data)
            # calculate the batch loss
            pred_eval= torch.reshape(target, (BATCH_SIZE, 400, 400))
            loss = criterion(output, pred_eval)
            # update average validation loss 
            valid_loss = valid_loss + ((1 / (batch_idx + 1)) * (loss.data - valid_loss))
            
            # F1 score computation
            prediction_raveled = torch.flatten(torch.argmax(pred_eval, dim=1))
            y_validation_raveled = torch.flatten(target)
            f1 = f1_score(y_validation_raveled, prediction_raveled)
            print(f'{f1 = }')
        
        # perform a step of the adaptative learning rate
        scheduler.step()
    
        # calculate average losses
        train_loss = train_loss/train_steps
        valid_loss = valid_loss/test_steps

        # print training/validation statistics 
        print('Epoch: {} \tTraining Loss: {:.6f} \tValidation Loss: {:.6f}'.format(
            epoch, 
            train_loss,
            valid_loss
            ))
        
        # create checkpoint variable and add important data
        checkpoint = {
            'epoch': epoch + 1,
            'valid_loss_min': valid_loss,
            'state_dict': model.state_dict(),
            'optimizer': optimizer.state_dict(),
            'scheduler': scheduler.get_last_lr(),
        }
        
        # save checkpoint
        save_ckp(checkpoint, False, checkpoint_path, best_model_path)
        
        ## save the model if validation loss has decreased
        if valid_loss <= valid_loss_min:
            print('Validation loss decreased ({:.6f} --> {:.6f}).  Saving model ...'.format(valid_loss_min,valid_loss))
            # save checkpoint as best model
            save_ckp(checkpoint, True, checkpoint_path, best_model_path)
            valid_loss_min = valid_loss
            
    return model

In [None]:
#criterion = nn.CrossEntropyLoss()# nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)
#define adaptative learning rate
scheduler = ExponentialLR(optimizer, GAMMA)
#loss function used in our neural network
criterion = nn.CrossEntropyLoss()

trained_model = train(1, 3, np.Inf, loaders, model, optimizer, scheduler, criterion, use_cuda,
                      "../checkpoint/current_checkpoint.pt",
                      "../checkpoint/best_model.pt"
                     )

torch.Size([10, 1, 400, 400])
torch.Size([10, 2, 400, 400])
Epoch 1,  Batch 0/7680 - Duration: 177.8477532863617, Loss:0.6941
torch.Size([10, 1, 400, 400])
torch.Size([10, 2, 400, 400])
