## **Import and Directory Path**

In [None]:
%cd code
import torch
import albumentations as A
from albumentations.pytorch import ToTensorV2
from tqdm import tqdm
import torch.nn as nn
import torch.optim as optim
from model import *
import os, sys
from torch.utils.data import DataLoader
from my_dataset import RoadDataset
import matplotlib.pyplot as plt
from tqdm import tqdm
import numpy as np
from sklearn.metrics import f1_score
import cv2
from perf_tracker import TrainingTracker
from torch.optim.lr_scheduler import OneCycleLR
import train_helpers as th
from torch.nn.functional import sigmoid
from PIL import Image
import matplotlib.image as mpimg
from torch.optim.lr_scheduler import LambdaLR 
from torch.optim.lr_scheduler import StepLR
from torch.optim.lr_scheduler import ReduceLROnPlateau



%run -i model.py
%run -i my_dataset.py
%run -i perf_tracker.py
%run -i train_helpers.py


TRAIN_IMG_DIR = "Data/training/train/images"
TRAIN_MASK_DIR = "Data/training/train/groundtruth"
VAL_IMG_DIR = "Data/training/val/images"
VAL_MASK_DIR = "Data/training/val/groundtruth"
ROOT_DIR = os.path.join('Data', 'training')
IMG_DIR = os.path.join(ROOT_DIR, 'images')
MASK_DIR= os.path.join(ROOT_DIR, 'groundtruth')
TRAIN_IMG_DIR = VAL_IMG_DIR
TRAIN_MASK_DIR = VAL_MASK_DIR


  check_for_updates()


## **Training Cst**

In [2]:


PATCH_SIZE = 400 #608 TO MODIFY AGAIN WHEN TRAINING WITH 400X400
STRIDE = None
AUG_FACTOR = (0,0) # If 1 ; proba des deux flips


BATCH_SIZE = 2 #4
LR_MODIFIER = False #True to use warm up Lr
NUM_EPOCH = 3
LEARNING_RATE = 2e-3 #For Att-Unet
#LEARNING_RATE = 1e-3 #For Jaccard
#LEARNING_RATE = 5e-4
CLAHE = True
Plateau = True #True to use ReduceLROnPlateau
TRACK_F1 = True #True to track F1 score and plot

device = torch.device("cuda")
model = AttU_Net(img_ch=3, output_ch=1).to(device)
init_weights(model, init_type='kaiming', gain=0.02) #TO COMMENT WHEN OTHER MODELS AS UNET

criterion = nn.BCEWithLogitsLoss()
CRITERION = 'BCE_loss'

optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE) #, weight_decay=1e-4)
OPTIMIZER = 'Adam'


#KERNEL_SIZE = 25
#criterion = BCEDicePenalizeBorderLoss(kernel_size= KERNEL_SIZE)
#CRITERION = f'BCEDicePenalizeBorderLoss_Kernel_{KERNEL_SIZE}'
#pos_weight = torch.tensor([4.35], dtype=torch.float32).to(device)
#criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight)
#criterion = IoULoss()
#CRITERION = 'Jaccard'
#optimizer = optim.AdamW(model.parameters(), lr=LEARNING_RATE)
#OPTIMIZER = 'AdamW'



#CRITERION = f'WeightedBCELoss




initialize network with kaiming


## **Loading Preprocessing of Data**

In [3]:


train_transform = A.Compose([
    A.Normalize(mean=[0.0, 0.0, 0.0], std=[1.0, 1.0, 1.0], max_pixel_value=255.0),
    ToTensorV2(),
    ])


val_transform = A.Compose([
        A.Normalize(mean=[0.0, 0.0, 0.0],std=[1.0, 1.0, 1.0],max_pixel_value=255.0,),
        ToTensorV2()
        ])

train_dataset = RoadDataset(
    image_dir=TRAIN_IMG_DIR,
    mask_dir=TRAIN_MASK_DIR,
    transform=train_transform,
    patch_size=PATCH_SIZE,
    stride=STRIDE,
    num_augmentations=AUG_FACTOR,
    CLAHE = CLAHE
)


val_dataset = RoadDataset(
    image_dir=VAL_IMG_DIR,
    mask_dir=VAL_MASK_DIR,
    transform=val_transform,
    patch_size=PATCH_SIZE,
    CLAHE = CLAHE
)



train_loader = DataLoader(
    train_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=0,
)

val_loader = DataLoader(
    val_dataset,
    batch_size= len(val_dataset), 
    shuffle=False, 
    num_workers=0,
)

val_img,val_mask = th.extract_val_image_mask(val_img_dir=VAL_IMG_DIR, val_mask_dir=VAL_MASK_DIR)
    

## **Tracker**

In [4]:

if LR_MODIFIER:
    max_lr = 2e-3 #2e-2 For all except jaccard
    total_steps = NUM_EPOCH * len(train_loader)
    pct_start = 0.3
    anneal_strategy = 'cos'
    div_factor = 25.0

    scheduler = OneCycleLR(
        optimizer=optimizer,
        max_lr= max_lr,
        total_steps=NUM_EPOCH * len(train_loader),
        pct_start=0.3,
        anneal_strategy='cos',
        div_factor=25.0,
        final_div_factor= 0.05, #0.09,
    )
    arg = {"max_lr": max_lr, "total_steps": total_steps, "pct_start": pct_start, "anneal_strategy": anneal_strategy, "div_factor": div_factor}
    tracker = TrainingTracker(hyperparams={"NUM_EPOCH": NUM_EPOCH, "learning_rate": LEARNING_RATE, "batch_size": BATCH_SIZE, "optimizer": OPTIMIZER, "criterion": CRITERION, "LR_MODIFIER": arg, "patch_size" : PATCH_SIZE })
else :
  if Plateau:
    print("Plateau")
    scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.9, patience=3, verbose=True, min_lr=1e-4) #patience 2??
    tracker = TrainingTracker(hyperparams={"NUM_EPOCH": NUM_EPOCH, "learning_rate": LEARNING_RATE, "batch_size": BATCH_SIZE, "optimizer":OPTIMIZER, "criterion": CRITERION,"LR_MODIFIER" : "redce_on_plateau","patch_size" : PATCH_SIZE})
  else :

    scheduler = StepLR(optimizer, step_size= np.ceil((len(train_dataset)//BATCH_SIZE * NUM_EPOCH * .15 )), gamma=0.7) #np.ceil((len(train_dataset)//BATCH_SIZE * NUM_EPOCH * .15))

    tracker = TrainingTracker(hyperparams={"NUM_EPOCH": NUM_EPOCH, "learning_rate": LEARNING_RATE, "batch_size": BATCH_SIZE, "optimizer":OPTIMIZER, "criterion": CRITERION,"LR_MODIFIER" : "lr_step","patch_size" : PATCH_SIZE})

Plateau




## **Training and Validation Functions**

In [5]:
def train_one_epoch(model, loader, criterion, optimizer, device, scheduler=None, Plateau = False):
    model.train()
    train_loss = 0.0
    lr_sum = 0.0
    loop = tqdm(loader, desc="Training", leave=False)
    num_batches = len(loader)
    for images, masks in loop:
        images, masks = images.to(device), masks.to(device)
        masks = masks.unsqueeze(1)
        outputs = model(images)
        loss = criterion(outputs, masks)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if scheduler is not None and not Plateau :
            scheduler.step()
            current_lr = scheduler.get_last_lr()[0]
            lr_sum += current_lr
            loop.set_postfix(loss=loss.item(), lr=current_lr)
        else:
            loop.set_postfix(loss=loss.item())

        train_loss += loss.item()
    avg_loss = train_loss / num_batches
    avg_lr = lr_sum / num_batches if (scheduler is not None) and (not Plateau) else None

    return avg_loss, avg_lr

def validate(model, loader, criterion, device):
    model.eval()
    val_loss = 0.0
    loop = tqdm(loader, desc="Validating", leave=False)
    all_preds = []
    with torch.no_grad():
        for images, masks in loop:
            images, masks = images.to(device), masks.to(device)
            masks = masks.unsqueeze(1)
            outputs = model(images)
            all_preds.append(outputs.cpu().squeeze(1))
            loss = criterion(outputs, masks) #Sigmoid is already included in the loss
            val_loss += loss.item()
            loop.set_postfix(loss=loss.item())
    all_preds = torch.cat(all_preds, dim=0)
    return val_loss / len(loader), all_preds

# **MODEL TRAINING**


In [None]:

if TRACK_F1 :
  max_f1 = 0
  f1_list = []
past_val_loss = 1000


for epoch in range(NUM_EPOCH):
    print(f"Epoch {epoch+1}/{NUM_EPOCH}")

    # Training Phase
    if scheduler is not None :
      train_loss, avg_lr = train_one_epoch(model, train_loader, criterion, optimizer, device, scheduler, Plateau = Plateau)
    else :
      train_loss, avg_lr = train_one_epoch(model, train_loader, criterion, optimizer, device)
    # Validation Phase
    val_loss, prediction = validate(model, val_loader, criterion, device)

    # Trackgin loss metrics
    tracker.log_epoch(train_loss, val_loss)
    if Plateau :
      scheduler.step(val_loss)
      for param_group in optimizer.param_groups:
        avg_lr = param_group['lr']

    if scheduler is not None:
        tracker.log_lr(avg_lr)
        print(f"Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Avg LR: {avg_lr:.4f}")
    else:
        print(f"Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}")

    if TRACK_F1 :    
      prob_mask = []
      for i in range(len(prediction)):
          image = torch.sigmoid(prediction[i]).numpy()
          prob_mask.append(image)
      f1 = th.compute_f1(true_masks=val_mask, pred_probs = prob_mask, patch_size=16, patch_threshold=0.25)
      f1_list.append(f1)
      print(f"F1 Score: {f1}")
      if f1 > max_f1:
        max_f1 = f1
        torch.save(model.state_dict(), f"saved_models/unet_best_f1.pth")

    # Save the Best Model so far
    if val_loss < past_val_loss:
        past_val_loss = val_loss
        torch.save(model.state_dict(), f"saved_models/unet_best_val.pth")
        print("Model saved")
        if PATCH_SIZE != 400 or PATCH_SIZE != 608:
          reconstructed_mask = th.reconstruct_all_masks(
              patches=prediction,
              num_images= len(val_img),
              image_size=(400, 400), 
              patch_size=PATCH_SIZE,
              stride = STRIDE
          )
        else : 
          reconstructed_mask = prediction
        proba_mask = []
        for pred in reconstructed_mask:
                pred_tensor = torch.from_numpy(pred).to(device) 
                proba = torch.sigmoid(pred_tensor)  
                proba_mask.append(proba)  

        proba_mask_cpu = [proba.cpu().numpy() for proba in proba_mask]
        th.plot_images_with_predictions(val_img, val_mask, proba_mask_cpu, rows=2)

torch.save(model.state_dict(), "saved_models/unet_final.pth")
print("Training completed - all the models are saved")

tracker.save(th.get_unique_filepath("history/training_history.json"))
tracker.plot_metrics()

In [None]:

#PRINT F1 SCORE
plt.figure(figsize=(10, 6))  
plt.plot(list(range(1,NUM_EPOCH+1)), f1_list, marker='o', linestyle='-', label='F1 Score', color = "orange")
plt.title('F1 Score Over Epochs', fontsize=16)
plt.xlabel('Epoch', fontsize=14)
plt.ylabel('F1 Score', fontsize=14)
plt.ylim(0, 1)
plt.yticks(np.linspace(0, 1, 11), fontsize=12) 
plt.xticks(list(range(0, NUM_EPOCH, max(1, NUM_EPOCH // 10))), fontsize=12)  
plt.grid(visible=True, which='both', linestyle='--', linewidth=0.5, alpha=0.7,)
plt.legend(fontsize=12)
plt.show()