In [1]:
import argparse
import os
import time
import datetime
import torch
import torch.optim as optim
from math import ceil
from torch.utils.data import DataLoader
from dataset.dataset import CityScapesDataset
from utils.metric import runningScore, averageMeter
import numpy as np
from tqdm import tqdm
import torch.nn as nn
import torch.nn.functional as F


os.environ["CUDA_VISIBLE_DEVICES"] = "2"

In [2]:
torch.cuda.is_available()

True

In [3]:
BASE_CHANNELS = 9
# n_train=2436
n_train=2476
n_val=400

batchsize   = 20
Epoch       = 50
img_size    = [256, 512]
model_name  = 'model_task2'
task        = 'cat'
lr = 3.75e-3

final_save_path='./'+model_name+'.pth'

In [4]:
# LOSS FUNCTIONS: CE + Dice + Lovasz-Softmax
class DiceLoss(nn.Module):
    def __init__(self, smooth=1.0):
        super(DiceLoss, self).__init__()
        self.smooth = smooth

    def forward(self, pred, target):
        pred = F.softmax(pred, dim=1)
        num_classes = pred.shape[1]
        target_one_hot = F.one_hot(target, num_classes=num_classes)  # (B, H, W, C)
        target_one_hot = target_one_hot.permute(0, 3, 1, 2).float()  # (B, C, H, W)
        
        pred = pred.view(pred.shape[0], pred.shape[1], -1)  # (B, C, H*W)
        target_one_hot = target_one_hot.view(target_one_hot.shape[0], target_one_hot.shape[1], -1)
        
        intersection = (pred * target_one_hot).sum(dim=2)  # (B, C)
        union = pred.sum(dim=2) + target_one_hot.sum(dim=2)  # (B, C)
        dice = (2.0 * intersection + self.smooth) / (union + self.smooth)
        
        return 1.0 - dice.mean()


class LovaszSoftmaxLoss(nn.Module):
    def __init__(self, ignore_index=255):
        super(LovaszSoftmaxLoss, self).__init__()
        self.ignore_index = ignore_index

    def forward(self, pred, target):
        pred = F.softmax(pred, dim=1)
        B, C, H, W = pred.shape
        pred = pred.permute(0, 2, 3, 1).reshape(-1, C)  # (B*H*W, C)
        target = target.view(-1)  # (B*H*W,)
        
        if self.ignore_index is not None:
            valid_mask = target != self.ignore_index
            pred = pred[valid_mask]
            target = target[valid_mask]
        
        if pred.numel() == 0:
            return pred.sum() * 0.0
        
        loss = self.lovasz_softmax_flat(pred, target, C)
        return loss

    def lovasz_softmax_flat(self, pred, target, num_classes):
        losses = []
        for c in range(num_classes):
            target_c = (target == c).float()
            pred_c = pred[:, c]
            errors = (target_c - pred_c).abs()
            errors_sorted, perm = torch.sort(errors, descending=True)
            target_sorted = target_c[perm]
            
            inter = target_sorted.sum() - target_sorted.cumsum(0)
            union = target_sorted.sum() + (1.0 - target_sorted).cumsum(0)
            iou = 1.0 - inter / union
            
            if len(errors_sorted) > 1:
                grad = iou[1:] - iou[:-1]
                loss_c = torch.dot(F.relu(errors_sorted[1:]), grad)
            else:
                loss_c = errors_sorted[0] if len(errors_sorted) > 0 else torch.tensor(0.0).to(pred.device)
            
            losses.append(loss_c)
        
        return torch.stack(losses).mean()

class ComposedLoss(nn.Module):
    def __init__(self, ce_weight=1.0, dice_weight=1.0, lovasz_weight=0.5):
        super(ComposedLoss, self).__init__()
        self.ce_weight = ce_weight
        self.dice_weight = dice_weight
        self.lovasz_weight = lovasz_weight
        
        self.ce_loss = nn.CrossEntropyLoss()
        self.dice_loss = DiceLoss()
        self.lovasz_loss = LovaszSoftmaxLoss()

    def forward(self, pred, target):
        ce = self.ce_loss(pred, target)
        dice = self.dice_loss(pred, target)
        lovasz = self.lovasz_loss(pred, target)
        
        total_loss = (self.ce_weight * ce +
                     self.dice_weight * dice +
                     self.lovasz_weight * lovasz)
        
        return total_loss

In [5]:
def train(epoch, data_loader, Net, optimizer, loss_fn, Meter):
    Net.train()
    timeStart = time.time()
    with tqdm(total=n_train, desc=f'Epoch {epoch + 1}/{Epoch}', unit='img') as pbar:
        for i, (data, target) in enumerate(data_loader):
            data , target = data.to(device),target.to(device)
            ### by yourself         
            optimizer.zero_grad()
            pred = Net(data)
            loss = loss_fn(pred, target)
            loss.backward()
            optimizer.step()



            ### End
            training_loss = loss.item()
            pbar.set_postfix(**{'loss (batch)': training_loss})
            pred = pred.data.max(1)[1]
            Meter['metric'].update(target.data.cpu().numpy(), pred.data.cpu().numpy())
            Meter['loss'].update(training_loss,data.size()[0])
            pbar.update(data.shape[0])
    timeEnd = time.time()
    score, class_iou = Meter['metric'].get_scores()
    loss_avg = Meter['loss'].avg
    print('epoch %3d : %10s loss: %f OverallAcc: %f MeanAcc %f mIoU %f time: %f' 
        %(epoch, ('training'), loss_avg, score['OverallAcc'], score['MeanAcc'], score['mIoU'], timeEnd-timeStart))

    return score['mIoU'],loss_avg

In [6]:
def val(epoch, data_loader, Net, loss_fn, Meter):
    Net.eval()
    with torch.no_grad():
        for i, (data, target) in enumerate(data_loader):
            data, target = data.to(device), target.to(device)
            timeStart = time.time()
            ### by yourself
            pred = Net(data)
            validation_loss = loss_fn(pred, target).item()
            timeEnd = time.time()           
            
            pred = pred.data.max(1)[1]
            Meter['metric'].update(target.data.cpu().numpy(), pred.data.cpu().numpy())
            Meter['loss'].update(validation_loss,data.size()[0])
            Meter['time'].update(timeEnd-timeStart,1)
    score, class_iou = Meter['metric'].get_scores()
    loss_avg = Meter['loss'].avg
    time_avg = Meter['time'].avg
    print('epoch %3d : %10s loss: %f OverallAcc: %f MeanAcc %f mIoU %f time: %f' 
        %(epoch, ('validation'), loss_avg, score['OverallAcc'], score['MeanAcc'], score['mIoU'], time_avg))
    
    return score['mIoU']

In [7]:
best_val_miou=0
current_val_miou=0

In [8]:
import network2
import copy
#from network2 import *
if __name__ == '__main__':

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")   
    assert task in ['cat'],'wrong value of task'
    if task=='cat':
        num_classes = 8
        
    training_meter = {'metric':runningScore(num_classes),'loss':averageMeter(),'time':averageMeter()}
    validation_meter = {'metric':runningScore(num_classes),'loss':averageMeter(),'time':averageMeter()}

    print(str(datetime.datetime.now()))
    print('batchsize %3d | epoch %3d | img_size  %4d %4d | task  %6s | model_name  %25s '
                    %(batchsize,Epoch,img_size[0],img_size[1], task ,model_name ))

    TrainingDataset   = CityScapesDataset("/export/home/dl2025f/shared/data", "training", img_size, task=task, augmentation=None)
    ValidationDataset = CityScapesDataset("/export/home/dl2025f/shared/data", "validation", img_size, task=task)

    TrainingLoader    = DataLoader(TrainingDataset, batch_size=batchsize, shuffle=True, num_workers=4)
    ValidationLoader  = DataLoader(ValidationDataset, batch_size=batchsize, shuffle=False, num_workers=4)
    num_batch         = ceil(len(TrainingDataset)/batchsize)

    # define yout model
    Net = network2.EfficientUNet(n_channels=3, n_classes=num_classes, base_channels=BASE_CHANNELS)
    Net = Net.to(device)
    # define your optimizer
    optimizer = optim.Adam(Net.parameters(), lr=lr)
    # schduler
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.5)
    # define your loss    
    loss_fn = ComposedLoss(ce_weight=1.0, dice_weight=1.0, lovasz_weight=0.5)

    start_epoch = 0
    for epoch in range(start_epoch, Epoch):
        for _, v in training_meter.items():
            v.reset()
            
        show_lr=optimizer.param_groups[0]['lr']
        print('learning rate is : ',  show_lr   )
        current_train_miou,current_train_loss=train(epoch, TrainingLoader, Net, optimizer, loss_fn, training_meter)
        
        scheduler.step() ###  StepLR
        
        if (epoch+1)%1==0 or epoch==Epoch-1:
            for _, v in validation_meter.items():
                v.reset()
            current_val_miou=val(epoch, ValidationLoader, Net, loss_fn, validation_meter)
            
            if current_val_miou>best_val_miou:
                best_val_miou=current_val_miou
                best_state_dict = Net.state_dict()
                torch.save(best_state_dict, final_save_path)
                print("(model saved)")

    print(str(datetime.datetime.now()))


2025-11-27 17:31:27.102597
batchsize  20 | epoch  50 | img_size   256  512 | task     cat | model_name                model_task2 
learning rate is :  0.00375


Epoch 1/50: 100%|██████████| 2476/2476 [01:53<00:00, 21.75img/s, loss (batch)=1.88]

epoch   0 :   training loss: 2.554615 OverallAcc: 0.556749 MeanAcc 0.266091 mIoU 0.182644 time: 113.840778





epoch   0 : validation loss: 1.861406 OverallAcc: 0.713980 MeanAcc 0.403972 mIoU 0.303115 time: 0.027552
(model saved)
learning rate is :  0.00375


Epoch 2/50: 100%|██████████| 2476/2476 [01:50<00:00, 22.42img/s, loss (batch)=1.44]

epoch   1 :   training loss: 1.563804 OverallAcc: 0.792613 MeanAcc 0.486962 mIoU 0.395528 time: 110.446157





epoch   1 : validation loss: 1.528524 OverallAcc: 0.785686 MeanAcc 0.500606 mIoU 0.402444 time: 0.027456
(model saved)
learning rate is :  0.00375


Epoch 3/50: 100%|██████████| 2476/2476 [01:50<00:00, 22.46img/s, loss (batch)=1.38]

epoch   2 :   training loss: 1.367381 OverallAcc: 0.835020 MeanAcc 0.574453 mIoU 0.489665 time: 110.252427





epoch   2 : validation loss: 1.409408 OverallAcc: 0.827615 MeanAcc 0.606390 mIoU 0.505330 time: 0.027443
(model saved)
learning rate is :  0.00375


Epoch 4/50: 100%|██████████| 2476/2476 [01:50<00:00, 22.47img/s, loss (batch)=1.23]

epoch   3 :   training loss: 1.262532 OverallAcc: 0.857817 MeanAcc 0.631778 mIoU 0.544624 time: 110.169962





epoch   3 : validation loss: 1.342601 OverallAcc: 0.833865 MeanAcc 0.625090 mIoU 0.524264 time: 0.027456
(model saved)
learning rate is :  0.00375


Epoch 5/50: 100%|██████████| 2476/2476 [01:51<00:00, 22.17img/s, loss (batch)=1.35]

epoch   4 :   training loss: 1.180102 OverallAcc: 0.865236 MeanAcc 0.651976 mIoU 0.567392 time: 111.688730





epoch   4 : validation loss: 1.212034 OverallAcc: 0.850415 MeanAcc 0.672987 mIoU 0.568641 time: 0.027436
(model saved)
learning rate is :  0.00375


Epoch 6/50: 100%|██████████| 2476/2476 [01:50<00:00, 22.35img/s, loss (batch)=1.34] 

epoch   5 :   training loss: 1.113200 OverallAcc: 0.870463 MeanAcc 0.689762 mIoU 0.594916 time: 110.777159





epoch   5 : validation loss: 1.154124 OverallAcc: 0.855107 MeanAcc 0.691506 mIoU 0.589863 time: 0.027442
(model saved)
learning rate is :  0.00375


Epoch 7/50: 100%|██████████| 2476/2476 [01:50<00:00, 22.38img/s, loss (batch)=1.13] 

epoch   6 :   training loss: 1.071726 OverallAcc: 0.872370 MeanAcc 0.700891 mIoU 0.603729 time: 110.613118





epoch   6 : validation loss: 1.169539 OverallAcc: 0.847318 MeanAcc 0.705051 mIoU 0.582678 time: 0.027501
learning rate is :  0.00375


Epoch 8/50: 100%|██████████| 2476/2476 [01:49<00:00, 22.55img/s, loss (batch)=0.973]

epoch   7 :   training loss: 1.023159 OverallAcc: 0.874840 MeanAcc 0.710224 mIoU 0.615626 time: 109.806412





epoch   7 : validation loss: 1.233513 OverallAcc: 0.845698 MeanAcc 0.627858 mIoU 0.551436 time: 0.027445
learning rate is :  0.00375


Epoch 9/50: 100%|██████████| 2476/2476 [01:50<00:00, 22.35img/s, loss (batch)=1.05] 

epoch   8 :   training loss: 0.995384 OverallAcc: 0.876643 MeanAcc 0.720356 mIoU 0.627207 time: 110.762915





epoch   8 : validation loss: 1.181255 OverallAcc: 0.836326 MeanAcc 0.685076 mIoU 0.588469 time: 0.027478
learning rate is :  0.00375


Epoch 10/50: 100%|██████████| 2476/2476 [01:50<00:00, 22.48img/s, loss (batch)=0.921]

epoch   9 :   training loss: 0.983371 OverallAcc: 0.877348 MeanAcc 0.726377 mIoU 0.632601 time: 110.120005





epoch   9 : validation loss: 1.157566 OverallAcc: 0.839540 MeanAcc 0.701518 mIoU 0.600519 time: 0.027464
(model saved)
learning rate is :  0.00375


Epoch 11/50: 100%|██████████| 2476/2476 [01:49<00:00, 22.60img/s, loss (batch)=1.08] 

epoch  10 :   training loss: 0.967935 OverallAcc: 0.879475 MeanAcc 0.733895 mIoU 0.639366 time: 109.565952





epoch  10 : validation loss: 1.028703 OverallAcc: 0.867613 MeanAcc 0.725863 mIoU 0.629563 time: 0.027472
(model saved)
learning rate is :  0.00375


Epoch 12/50: 100%|██████████| 2476/2476 [01:50<00:00, 22.44img/s, loss (batch)=0.924]

epoch  11 :   training loss: 0.959892 OverallAcc: 0.879896 MeanAcc 0.739061 mIoU 0.643826 time: 110.351150





epoch  11 : validation loss: 1.096484 OverallAcc: 0.860049 MeanAcc 0.682381 mIoU 0.601490 time: 0.027489
learning rate is :  0.00375


Epoch 13/50: 100%|██████████| 2476/2476 [01:50<00:00, 22.50img/s, loss (batch)=0.881]

epoch  12 :   training loss: 0.941288 OverallAcc: 0.882713 MeanAcc 0.744301 mIoU 0.649087 time: 110.059939





epoch  12 : validation loss: 1.046002 OverallAcc: 0.859483 MeanAcc 0.744425 mIoU 0.627935 time: 0.027461
learning rate is :  0.00375


Epoch 14/50: 100%|██████████| 2476/2476 [01:50<00:00, 22.43img/s, loss (batch)=0.885]

epoch  13 :   training loss: 0.931323 OverallAcc: 0.883507 MeanAcc 0.749600 mIoU 0.653944 time: 110.411906





epoch  13 : validation loss: 1.021606 OverallAcc: 0.863302 MeanAcc 0.756498 mIoU 0.639199 time: 0.027464
(model saved)
learning rate is :  0.00375


Epoch 15/50: 100%|██████████| 2476/2476 [01:49<00:00, 22.52img/s, loss (batch)=0.94] 

epoch  14 :   training loss: 0.918472 OverallAcc: 0.885530 MeanAcc 0.755000 mIoU 0.658880 time: 109.947313





epoch  14 : validation loss: 1.033051 OverallAcc: 0.869498 MeanAcc 0.722292 mIoU 0.636933 time: 0.027477
learning rate is :  0.00375


Epoch 16/50: 100%|██████████| 2476/2476 [01:52<00:00, 22.06img/s, loss (batch)=0.879]

epoch  15 :   training loss: 0.908358 OverallAcc: 0.886657 MeanAcc 0.758646 mIoU 0.662936 time: 112.232527





epoch  15 : validation loss: 0.994538 OverallAcc: 0.870379 MeanAcc 0.753088 mIoU 0.644972 time: 0.027478
(model saved)
learning rate is :  0.00375


Epoch 17/50: 100%|██████████| 2476/2476 [01:50<00:00, 22.39img/s, loss (batch)=0.912]

epoch  16 :   training loss: 0.905838 OverallAcc: 0.886568 MeanAcc 0.761133 mIoU 0.663584 time: 110.569371





epoch  16 : validation loss: 1.002149 OverallAcc: 0.869223 MeanAcc 0.755729 mIoU 0.649215 time: 0.027430
(model saved)
learning rate is :  0.00375


Epoch 18/50: 100%|██████████| 2476/2476 [01:50<00:00, 22.38img/s, loss (batch)=0.915]

epoch  17 :   training loss: 0.902259 OverallAcc: 0.887242 MeanAcc 0.761899 mIoU 0.665094 time: 110.646564





epoch  17 : validation loss: 1.048826 OverallAcc: 0.857440 MeanAcc 0.739609 mIoU 0.632712 time: 0.027479
learning rate is :  0.00375


Epoch 19/50: 100%|██████████| 2476/2476 [01:49<00:00, 22.57img/s, loss (batch)=0.826]

epoch  18 :   training loss: 0.890146 OverallAcc: 0.888805 MeanAcc 0.766580 mIoU 0.669547 time: 109.713907





epoch  18 : validation loss: 1.012543 OverallAcc: 0.871026 MeanAcc 0.712195 mIoU 0.634362 time: 0.027478
learning rate is :  0.00375


Epoch 20/50: 100%|██████████| 2476/2476 [01:50<00:00, 22.48img/s, loss (batch)=0.94] 

epoch  19 :   training loss: 0.884103 OverallAcc: 0.889482 MeanAcc 0.767990 mIoU 0.671137 time: 110.163294





epoch  19 : validation loss: 0.967971 OverallAcc: 0.875003 MeanAcc 0.749399 mIoU 0.654033 time: 0.027432
(model saved)
learning rate is :  0.001875


Epoch 21/50: 100%|██████████| 2476/2476 [01:49<00:00, 22.54img/s, loss (batch)=0.905]

epoch  20 :   training loss: 0.860834 OverallAcc: 0.892897 MeanAcc 0.775022 mIoU 0.679485 time: 109.857697





epoch  20 : validation loss: 0.943111 OverallAcc: 0.877204 MeanAcc 0.756575 mIoU 0.663428 time: 0.027473
(model saved)
learning rate is :  0.001875


Epoch 22/50: 100%|██████████| 2476/2476 [01:51<00:00, 22.23img/s, loss (batch)=0.899]

epoch  21 :   training loss: 0.850076 OverallAcc: 0.894307 MeanAcc 0.778573 mIoU 0.682974 time: 111.379680





epoch  21 : validation loss: 0.932870 OverallAcc: 0.878704 MeanAcc 0.774509 mIoU 0.665818 time: 0.027477
(model saved)
learning rate is :  0.001875


Epoch 23/50: 100%|██████████| 2476/2476 [01:49<00:00, 22.52img/s, loss (batch)=0.96] 

epoch  22 :   training loss: 0.847608 OverallAcc: 0.894252 MeanAcc 0.779791 mIoU 0.684095 time: 109.932712





epoch  22 : validation loss: 0.950616 OverallAcc: 0.877126 MeanAcc 0.752286 mIoU 0.663316 time: 0.027464
learning rate is :  0.001875


Epoch 24/50: 100%|██████████| 2476/2476 [01:51<00:00, 22.22img/s, loss (batch)=0.737]

epoch  23 :   training loss: 0.844152 OverallAcc: 0.895044 MeanAcc 0.780907 mIoU 0.685688 time: 111.446156





epoch  23 : validation loss: 0.932008 OverallAcc: 0.880433 MeanAcc 0.760633 mIoU 0.668232 time: 0.027483
(model saved)
learning rate is :  0.001875


Epoch 25/50: 100%|██████████| 2476/2476 [01:51<00:00, 22.27img/s, loss (batch)=0.888]

epoch  24 :   training loss: 0.841620 OverallAcc: 0.895369 MeanAcc 0.780584 mIoU 0.686018 time: 111.177380





epoch  24 : validation loss: 0.961796 OverallAcc: 0.874610 MeanAcc 0.755164 mIoU 0.658725 time: 0.027450
learning rate is :  0.001875


Epoch 26/50: 100%|██████████| 2476/2476 [01:49<00:00, 22.54img/s, loss (batch)=0.954]

epoch  25 :   training loss: 0.837365 OverallAcc: 0.895729 MeanAcc 0.783643 mIoU 0.688088 time: 109.831336





epoch  25 : validation loss: 0.936246 OverallAcc: 0.877519 MeanAcc 0.763181 mIoU 0.667047 time: 0.027441
learning rate is :  0.001875


Epoch 27/50: 100%|██████████| 2476/2476 [01:50<00:00, 22.50img/s, loss (batch)=0.854]

epoch  26 :   training loss: 0.840199 OverallAcc: 0.895249 MeanAcc 0.782307 mIoU 0.686592 time: 110.052742





epoch  26 : validation loss: 0.930384 OverallAcc: 0.879861 MeanAcc 0.766707 mIoU 0.669899 time: 0.027452
(model saved)
learning rate is :  0.001875


Epoch 28/50: 100%|██████████| 2476/2476 [01:49<00:00, 22.57img/s, loss (batch)=0.704]

epoch  27 :   training loss: 0.831003 OverallAcc: 0.896517 MeanAcc 0.784688 mIoU 0.689535 time: 109.710608





epoch  27 : validation loss: 0.934537 OverallAcc: 0.880148 MeanAcc 0.765948 mIoU 0.669413 time: 0.027461
learning rate is :  0.001875


Epoch 29/50: 100%|██████████| 2476/2476 [01:49<00:00, 22.55img/s, loss (batch)=0.835]

epoch  28 :   training loss: 0.832137 OverallAcc: 0.896098 MeanAcc 0.785164 mIoU 0.689598 time: 109.807861





epoch  28 : validation loss: 0.944108 OverallAcc: 0.876085 MeanAcc 0.771807 mIoU 0.661590 time: 0.027485
learning rate is :  0.001875


Epoch 30/50: 100%|██████████| 2476/2476 [01:50<00:00, 22.38img/s, loss (batch)=0.768]

epoch  29 :   training loss: 0.829694 OverallAcc: 0.896546 MeanAcc 0.785338 mIoU 0.689613 time: 110.642586





epoch  29 : validation loss: 0.922012 OverallAcc: 0.881401 MeanAcc 0.764271 mIoU 0.669583 time: 0.027459
learning rate is :  0.001875


Epoch 31/50: 100%|██████████| 2476/2476 [01:50<00:00, 22.35img/s, loss (batch)=0.883]

epoch  30 :   training loss: 0.828849 OverallAcc: 0.896685 MeanAcc 0.785897 mIoU 0.690711 time: 110.794060





epoch  30 : validation loss: 0.908575 OverallAcc: 0.881675 MeanAcc 0.772142 mIoU 0.673631 time: 0.027462
(model saved)
learning rate is :  0.001875


Epoch 32/50: 100%|██████████| 2476/2476 [01:51<00:00, 22.20img/s, loss (batch)=0.873]

epoch  31 :   training loss: 0.822437 OverallAcc: 0.897501 MeanAcc 0.787446 mIoU 0.692599 time: 111.552191





epoch  31 : validation loss: 0.926312 OverallAcc: 0.878935 MeanAcc 0.769567 mIoU 0.668628 time: 0.027493
learning rate is :  0.001875


Epoch 33/50: 100%|██████████| 2476/2476 [01:51<00:00, 22.28img/s, loss (batch)=0.779]

epoch  32 :   training loss: 0.821995 OverallAcc: 0.897600 MeanAcc 0.787508 mIoU 0.692365 time: 111.143301





epoch  32 : validation loss: 0.919579 OverallAcc: 0.881191 MeanAcc 0.763541 mIoU 0.673045 time: 0.027503
learning rate is :  0.001875


Epoch 34/50: 100%|██████████| 2476/2476 [01:50<00:00, 22.38img/s, loss (batch)=0.757]

epoch  33 :   training loss: 0.818440 OverallAcc: 0.897549 MeanAcc 0.788260 mIoU 0.693096 time: 110.626814





epoch  33 : validation loss: 0.927632 OverallAcc: 0.880877 MeanAcc 0.774130 mIoU 0.671802 time: 0.027478
learning rate is :  0.001875


Epoch 35/50: 100%|██████████| 2476/2476 [01:50<00:00, 22.42img/s, loss (batch)=0.743]

epoch  34 :   training loss: 0.818444 OverallAcc: 0.897442 MeanAcc 0.788782 mIoU 0.693570 time: 110.461324





epoch  34 : validation loss: 0.914803 OverallAcc: 0.881643 MeanAcc 0.769062 mIoU 0.671690 time: 0.027471
learning rate is :  0.001875


Epoch 36/50: 100%|██████████| 2476/2476 [01:50<00:00, 22.40img/s, loss (batch)=0.897]

epoch  35 :   training loss: 0.817177 OverallAcc: 0.897698 MeanAcc 0.789777 mIoU 0.694451 time: 110.538711





epoch  35 : validation loss: 0.907915 OverallAcc: 0.882801 MeanAcc 0.772555 mIoU 0.676534 time: 0.027489
(model saved)
learning rate is :  0.001875


Epoch 37/50: 100%|██████████| 2476/2476 [01:50<00:00, 22.31img/s, loss (batch)=0.827]

epoch  36 :   training loss: 0.816540 OverallAcc: 0.897961 MeanAcc 0.789571 mIoU 0.694713 time: 110.993313





epoch  36 : validation loss: 0.907126 OverallAcc: 0.880780 MeanAcc 0.783589 mIoU 0.675114 time: 0.027477
learning rate is :  0.001875


Epoch 38/50: 100%|██████████| 2476/2476 [01:50<00:00, 22.39img/s, loss (batch)=0.902]

epoch  37 :   training loss: 0.814113 OverallAcc: 0.898069 MeanAcc 0.790742 mIoU 0.695061 time: 110.604985





epoch  37 : validation loss: 0.926109 OverallAcc: 0.876088 MeanAcc 0.772936 mIoU 0.669553 time: 0.027473
learning rate is :  0.001875


Epoch 39/50: 100%|██████████| 2476/2476 [01:50<00:00, 22.46img/s, loss (batch)=0.85] 

epoch  38 :   training loss: 0.809838 OverallAcc: 0.898965 MeanAcc 0.791644 mIoU 0.696964 time: 110.245886





epoch  38 : validation loss: 0.937229 OverallAcc: 0.877800 MeanAcc 0.781211 mIoU 0.669629 time: 0.027482
learning rate is :  0.001875


Epoch 40/50: 100%|██████████| 2476/2476 [01:50<00:00, 22.40img/s, loss (batch)=0.752]

epoch  39 :   training loss: 0.810708 OverallAcc: 0.898894 MeanAcc 0.792998 mIoU 0.697109 time: 110.515710





epoch  39 : validation loss: 0.916613 OverallAcc: 0.879524 MeanAcc 0.777973 mIoU 0.671818 time: 0.027465
learning rate is :  0.0009375


Epoch 41/50: 100%|██████████| 2476/2476 [01:49<00:00, 22.52img/s, loss (batch)=0.738]

epoch  40 :   training loss: 0.793918 OverallAcc: 0.901220 MeanAcc 0.796884 mIoU 0.702809 time: 109.925740





epoch  40 : validation loss: 0.892968 OverallAcc: 0.882123 MeanAcc 0.783216 mIoU 0.680095 time: 0.027469
(model saved)
learning rate is :  0.0009375


Epoch 42/50: 100%|██████████| 2476/2476 [01:50<00:00, 22.49img/s, loss (batch)=0.733]

epoch  41 :   training loss: 0.790091 OverallAcc: 0.901993 MeanAcc 0.797538 mIoU 0.704133 time: 110.087109





epoch  41 : validation loss: 0.932990 OverallAcc: 0.881071 MeanAcc 0.755756 mIoU 0.669246 time: 0.027486
learning rate is :  0.0009375


Epoch 43/50: 100%|██████████| 2476/2476 [01:50<00:00, 22.51img/s, loss (batch)=0.84] 

epoch  42 :   training loss: 0.790785 OverallAcc: 0.901429 MeanAcc 0.797968 mIoU 0.703257 time: 110.008192





epoch  42 : validation loss: 0.913201 OverallAcc: 0.878457 MeanAcc 0.772667 mIoU 0.674064 time: 0.027479
learning rate is :  0.0009375


Epoch 44/50: 100%|██████████| 2476/2476 [01:50<00:00, 22.47img/s, loss (batch)=0.72] 

epoch  43 :   training loss: 0.788508 OverallAcc: 0.901846 MeanAcc 0.797731 mIoU 0.704226 time: 110.195612





epoch  43 : validation loss: 0.917522 OverallAcc: 0.875802 MeanAcc 0.772814 mIoU 0.671870 time: 0.027466
learning rate is :  0.0009375


Epoch 45/50: 100%|██████████| 2476/2476 [01:50<00:00, 22.33img/s, loss (batch)=0.776]

epoch  44 :   training loss: 0.786114 OverallAcc: 0.902453 MeanAcc 0.799440 mIoU 0.705432 time: 110.862821





epoch  44 : validation loss: 0.894117 OverallAcc: 0.884213 MeanAcc 0.780678 mIoU 0.680889 time: 0.027460
(model saved)
learning rate is :  0.0009375


Epoch 46/50: 100%|██████████| 2476/2476 [01:51<00:00, 22.28img/s, loss (batch)=0.789]

epoch  45 :   training loss: 0.784410 OverallAcc: 0.902329 MeanAcc 0.799299 mIoU 0.705671 time: 111.151498





epoch  45 : validation loss: 0.936724 OverallAcc: 0.880937 MeanAcc 0.766339 mIoU 0.672934 time: 0.027478
learning rate is :  0.0009375


Epoch 47/50: 100%|██████████| 2476/2476 [01:50<00:00, 22.47img/s, loss (batch)=0.813]

epoch  46 :   training loss: 0.787906 OverallAcc: 0.901953 MeanAcc 0.798476 mIoU 0.704725 time: 110.173075





epoch  46 : validation loss: 0.908517 OverallAcc: 0.880358 MeanAcc 0.778426 mIoU 0.676007 time: 0.027480
learning rate is :  0.0009375


Epoch 48/50: 100%|██████████| 2476/2476 [01:50<00:00, 22.43img/s, loss (batch)=0.79] 

epoch  47 :   training loss: 0.783297 OverallAcc: 0.902855 MeanAcc 0.799999 mIoU 0.706136 time: 110.376265





epoch  47 : validation loss: 0.935604 OverallAcc: 0.878062 MeanAcc 0.766659 mIoU 0.670440 time: 0.027483
learning rate is :  0.0009375


Epoch 49/50: 100%|██████████| 2476/2476 [01:50<00:00, 22.50img/s, loss (batch)=0.726]

epoch  48 :   training loss: 0.781022 OverallAcc: 0.903168 MeanAcc 0.800189 mIoU 0.707052 time: 110.029022





epoch  48 : validation loss: 0.910306 OverallAcc: 0.877850 MeanAcc 0.783174 mIoU 0.676546 time: 0.027512
learning rate is :  0.0009375


Epoch 50/50: 100%|██████████| 2476/2476 [01:50<00:00, 22.44img/s, loss (batch)=0.891]

epoch  49 :   training loss: 0.781975 OverallAcc: 0.903004 MeanAcc 0.800897 mIoU 0.706767 time: 110.352105





epoch  49 : validation loss: 0.894663 OverallAcc: 0.883547 MeanAcc 0.778772 mIoU 0.680351 time: 0.027475
2025-11-27 19:18:40.758429


In [9]:
print(' best_val_miou is :  ',best_val_miou)

 best_val_miou is :   0.68088921440897


In [10]:
# INT8 QAT CONFIGURATION
QAT_EPOCHS = 35
QAT_LR = lr * 0.1
QAT_BACKEND = 'qnnpack'

In [11]:
# INT8 QAT TRAINING AND CONVERSION

# Step 1: Load baseline FP32 model
Net_qat = network2.load_model(final_save_path, base_channels=BASE_CHANNELS)
Net_qat = Net_qat.to(device)

# Step 2: Prepare model for QAT
print(f"\n[2/5] Preparing model for QAT (adding fake quantization)...")
Net_qat = network2.prepare_qat_model(Net_qat, backend=QAT_BACKEND)
Net_qat = Net_qat.to(device)

# Step 3: QAT Training (fine-tune with fake quantization)
print(f"\n[3/5] QAT Training for {QAT_EPOCHS} epochs...")
optimizer_qat = optim.Adam(Net_qat.parameters(), lr=QAT_LR)
scheduler_qat = optim.lr_scheduler.StepLR(optimizer_qat, step_size=5, gamma=0.5)

best_qat_miou = 0

for epoch in range(QAT_EPOCHS):
    # Training
    for _, v in training_meter.items():
        v.reset()
    
    current_train_miou_qat, current_train_loss_qat = train(
        epoch, TrainingLoader, Net_qat, optimizer_qat, loss_fn, training_meter
    )
    scheduler_qat.step()
    
    # Validation
    for _, v in validation_meter.items():
        v.reset()
    current_val_miou_qat = val(epoch, ValidationLoader, Net_qat, loss_fn, validation_meter)
    
    # Track best
    if current_val_miou_qat > best_qat_miou:
        best_qat_miou = current_val_miou_qat
        print(f"  (best QAT mIoU updated: {best_qat_miou:.6f})")

print(f"\nQAT Training Complete")
print(f"  Best QAT mIoU: {best_qat_miou:.6f}")

# Step 4: Convert to INT8 (moves model to CPU)
print(f"\n[4/5] Converting to INT8 quantized model...")
Net_qat = Net_qat.to('cpu')
Net_qat.eval()
Net_quantized = network2.convert_to_quantized(Net_qat)

# Step 5: Final evaluation of INT8 model on CPU
print(f"\n[5/5] Evaluating INT8 model on CPU...")
print(f"  Moving validation data to CPU for INT8 evaluation...")

# Create CPU validation function for quantized model
def val_cpu(epoch, data_loader, Net, loss_fn, Meter):
    Net.eval()
    with torch.no_grad():
        for i, (data, target) in enumerate(data_loader):
            # Keep data on CPU for quantized model
            data_cpu, target_cpu = data.cpu(), target.cpu()
            
            pred = Net(data_cpu)
            validation_loss = loss_fn(pred, target_cpu).item()
            
            pred = pred.data.max(1)[1]
            Meter['metric'].update(target_cpu.data.cpu().numpy(), pred.data.cpu().numpy())
            Meter['loss'].update(validation_loss, data_cpu.size()[0])
    
    score, class_iou = Meter['metric'].get_scores()
    loss_avg = Meter['loss'].avg
    print('epoch %3d : %10s loss: %f OverallAcc: %f MeanAcc %f mIoU %f (CPU)' 
        %(epoch, ('validation'), loss_avg, score['OverallAcc'], score['MeanAcc'], score['mIoU']))
    
    return score['mIoU']

for _, v in validation_meter.items():
    v.reset()
final_quantized_miou = val_cpu(-1, ValidationLoader, Net_quantized, loss_fn, validation_meter)

print(f"\n" + "="*80)
print(f"INT8 QUANTIZATION COMPLETE")
print(f"="*80)
print(f"  Final INT8 mIoU: {final_quantized_miou:.6f}")
print(f"  Threshold: 0.66")
if final_quantized_miou >= 0.66:
    print(f"Requirement satisfied (margin: +{final_quantized_miou - 0.66:.6f})")
else:
    print(f"Below threshold (shortfall: {0.66 - final_quantized_miou:.6f})")

# Save INT8 model (CPU model)
torch.save(Net_quantized.state_dict(), './model_task2_int8.pth')
print(f"\nINT8 model saved to: model_task2_int8.pth (CPU model)")

[load_model] Detected FP32 model
[load_model] Loaded FP32 model

[2/5] Preparing model for QAT (adding fake quantization)...
Model prepared for QAT with backend=qnnpack
Fake quantization modules added for training

[3/5] QAT Training for 35 epochs...


Epoch 1/50: 100%|██████████| 2476/2476 [01:50<00:00, 22.39img/s, loss (batch)=0.883]

epoch   0 :   training loss: 0.843997 OverallAcc: 0.891507 MeanAcc 0.784149 mIoU 0.687124 time: 110.579932





epoch   0 : validation loss: 0.935212 OverallAcc: 0.875061 MeanAcc 0.771773 mIoU 0.667671 time: 0.042677
  (best QAT mIoU updated: 0.667671)


Epoch 2/50: 100%|██████████| 2476/2476 [01:50<00:00, 22.44img/s, loss (batch)=0.808]

epoch   1 :   training loss: 0.833345 OverallAcc: 0.893445 MeanAcc 0.786152 mIoU 0.690002 time: 110.338294





epoch   1 : validation loss: 0.922568 OverallAcc: 0.878498 MeanAcc 0.781766 mIoU 0.672514 time: 0.042650
  (best QAT mIoU updated: 0.672514)


Epoch 3/50: 100%|██████████| 2476/2476 [01:50<00:00, 22.38img/s, loss (batch)=0.784]

epoch   2 :   training loss: 0.825978 OverallAcc: 0.894818 MeanAcc 0.788097 mIoU 0.692487 time: 110.652340





epoch   2 : validation loss: 0.928736 OverallAcc: 0.876809 MeanAcc 0.775913 mIoU 0.669910 time: 0.042704


Epoch 4/50: 100%|██████████| 2476/2476 [01:49<00:00, 22.66img/s, loss (batch)=0.929]

epoch   3 :   training loss: 0.824837 OverallAcc: 0.894839 MeanAcc 0.788010 mIoU 0.692437 time: 109.278584





epoch   3 : validation loss: 0.934091 OverallAcc: 0.876296 MeanAcc 0.770522 mIoU 0.669507 time: 0.042848


Epoch 5/50: 100%|██████████| 2476/2476 [01:51<00:00, 22.30img/s, loss (batch)=0.98] 

epoch   4 :   training loss: 0.818088 OverallAcc: 0.895920 MeanAcc 0.790756 mIoU 0.694904 time: 111.018506





epoch   4 : validation loss: 0.924643 OverallAcc: 0.877772 MeanAcc 0.770753 mIoU 0.671267 time: 0.042776


Epoch 6/50: 100%|██████████| 2476/2476 [01:52<00:00, 22.08img/s, loss (batch)=0.734]

epoch   5 :   training loss: 0.814994 OverallAcc: 0.896496 MeanAcc 0.791082 mIoU 0.695606 time: 112.149890





epoch   5 : validation loss: 0.919006 OverallAcc: 0.878872 MeanAcc 0.776146 mIoU 0.673767 time: 0.042764
  (best QAT mIoU updated: 0.673767)


Epoch 7/50: 100%|██████████| 2476/2476 [01:51<00:00, 22.29img/s, loss (batch)=0.826]

epoch   6 :   training loss: 0.812389 OverallAcc: 0.897271 MeanAcc 0.791423 mIoU 0.696856 time: 111.092291





epoch   6 : validation loss: 0.919990 OverallAcc: 0.879322 MeanAcc 0.771385 mIoU 0.673124 time: 0.042672


Epoch 8/50: 100%|██████████| 2476/2476 [01:50<00:00, 22.46img/s, loss (batch)=0.751]

epoch   7 :   training loss: 0.813976 OverallAcc: 0.896916 MeanAcc 0.791332 mIoU 0.696938 time: 110.234539





epoch   7 : validation loss: 0.914153 OverallAcc: 0.879550 MeanAcc 0.777026 mIoU 0.674830 time: 0.042637
  (best QAT mIoU updated: 0.674830)


Epoch 9/50: 100%|██████████| 2476/2476 [01:50<00:00, 22.36img/s, loss (batch)=0.863]

epoch   8 :   training loss: 0.813342 OverallAcc: 0.897067 MeanAcc 0.791812 mIoU 0.696632 time: 110.715349





epoch   8 : validation loss: 0.925819 OverallAcc: 0.874881 MeanAcc 0.771853 mIoU 0.670584 time: 0.042782


Epoch 10/50: 100%|██████████| 2476/2476 [01:49<00:00, 22.53img/s, loss (batch)=0.862]

epoch   9 :   training loss: 0.809847 OverallAcc: 0.897508 MeanAcc 0.792314 mIoU 0.697754 time: 109.892772





epoch   9 : validation loss: 0.917036 OverallAcc: 0.878845 MeanAcc 0.778906 mIoU 0.673910 time: 0.042742


Epoch 11/50: 100%|██████████| 2476/2476 [01:49<00:00, 22.57img/s, loss (batch)=0.814]

epoch  10 :   training loss: 0.810120 OverallAcc: 0.897273 MeanAcc 0.792188 mIoU 0.697757 time: 109.717298





epoch  10 : validation loss: 0.924923 OverallAcc: 0.877217 MeanAcc 0.769383 mIoU 0.671601 time: 0.042640


Epoch 12/50: 100%|██████████| 2476/2476 [01:49<00:00, 22.56img/s, loss (batch)=0.895]

epoch  11 :   training loss: 0.808295 OverallAcc: 0.898033 MeanAcc 0.792403 mIoU 0.698271 time: 109.773517





epoch  11 : validation loss: 0.914309 OverallAcc: 0.878691 MeanAcc 0.776013 mIoU 0.674681 time: 0.042662


Epoch 13/50: 100%|██████████| 2476/2476 [01:50<00:00, 22.50img/s, loss (batch)=0.725]

epoch  12 :   training loss: 0.810718 OverallAcc: 0.897664 MeanAcc 0.792130 mIoU 0.697570 time: 110.026729





epoch  12 : validation loss: 0.919489 OverallAcc: 0.878493 MeanAcc 0.772765 mIoU 0.672704 time: 0.042629


Epoch 14/50: 100%|██████████| 2476/2476 [01:50<00:00, 22.41img/s, loss (batch)=0.863]

epoch  13 :   training loss: 0.807894 OverallAcc: 0.898074 MeanAcc 0.793361 mIoU 0.698503 time: 110.474633





epoch  13 : validation loss: 0.915163 OverallAcc: 0.879105 MeanAcc 0.780112 mIoU 0.674559 time: 0.042807


Epoch 15/50: 100%|██████████| 2476/2476 [01:50<00:00, 22.44img/s, loss (batch)=0.871]

epoch  14 :   training loss: 0.810300 OverallAcc: 0.897701 MeanAcc 0.792323 mIoU 0.697796 time: 110.362754





epoch  14 : validation loss: 0.919344 OverallAcc: 0.878155 MeanAcc 0.771431 mIoU 0.673467 time: 0.042810


Epoch 16/50: 100%|██████████| 2476/2476 [01:50<00:00, 22.48img/s, loss (batch)=0.759]

epoch  15 :   training loss: 0.806767 OverallAcc: 0.898221 MeanAcc 0.794321 mIoU 0.699437 time: 110.141085





epoch  15 : validation loss: 0.922852 OverallAcc: 0.877439 MeanAcc 0.771130 mIoU 0.671953 time: 0.042644


Epoch 17/50: 100%|██████████| 2476/2476 [01:51<00:00, 22.19img/s, loss (batch)=0.813]

epoch  16 :   training loss: 0.804892 OverallAcc: 0.898595 MeanAcc 0.793417 mIoU 0.699556 time: 111.606214





epoch  16 : validation loss: 0.923087 OverallAcc: 0.878303 MeanAcc 0.771068 mIoU 0.673053 time: 0.042860


Epoch 18/50: 100%|██████████| 2476/2476 [01:50<00:00, 22.38img/s, loss (batch)=0.835]

epoch  17 :   training loss: 0.806605 OverallAcc: 0.898378 MeanAcc 0.793457 mIoU 0.698923 time: 110.627498





epoch  17 : validation loss: 0.918790 OverallAcc: 0.877618 MeanAcc 0.779271 mIoU 0.674193 time: 0.042752


Epoch 19/50: 100%|██████████| 2476/2476 [01:50<00:00, 22.40img/s, loss (batch)=0.797]

epoch  18 :   training loss: 0.805551 OverallAcc: 0.898619 MeanAcc 0.793743 mIoU 0.699489 time: 110.524823





epoch  18 : validation loss: 0.919454 OverallAcc: 0.876840 MeanAcc 0.774831 mIoU 0.672867 time: 0.042618


Epoch 20/50: 100%|██████████| 2476/2476 [01:49<00:00, 22.54img/s, loss (batch)=0.724]

epoch  19 :   training loss: 0.805651 OverallAcc: 0.898285 MeanAcc 0.794145 mIoU 0.699557 time: 109.838356





epoch  19 : validation loss: 0.919938 OverallAcc: 0.877337 MeanAcc 0.774371 mIoU 0.673551 time: 0.042739


Epoch 21/50: 100%|██████████| 2476/2476 [01:50<00:00, 22.40img/s, loss (batch)=0.846]

epoch  20 :   training loss: 0.804904 OverallAcc: 0.898483 MeanAcc 0.794131 mIoU 0.699747 time: 110.546819





epoch  20 : validation loss: 0.914093 OverallAcc: 0.879095 MeanAcc 0.775859 mIoU 0.675041 time: 0.042654
  (best QAT mIoU updated: 0.675041)


Epoch 22/50: 100%|██████████| 2476/2476 [01:51<00:00, 22.27img/s, loss (batch)=0.841]

epoch  21 :   training loss: 0.805898 OverallAcc: 0.898535 MeanAcc 0.793201 mIoU 0.699383 time: 111.194003





epoch  21 : validation loss: 0.914723 OverallAcc: 0.878261 MeanAcc 0.776345 mIoU 0.674610 time: 0.042707


Epoch 23/50: 100%|██████████| 2476/2476 [01:51<00:00, 22.28img/s, loss (batch)=0.735]

epoch  22 :   training loss: 0.804827 OverallAcc: 0.898421 MeanAcc 0.794335 mIoU 0.699693 time: 111.144382





epoch  22 : validation loss: 0.920373 OverallAcc: 0.879021 MeanAcc 0.768982 mIoU 0.673597 time: 0.042658


Epoch 24/50: 100%|██████████| 2476/2476 [01:51<00:00, 22.29img/s, loss (batch)=0.774]

epoch  23 :   training loss: 0.804980 OverallAcc: 0.898719 MeanAcc 0.793477 mIoU 0.699391 time: 111.099054





epoch  23 : validation loss: 0.918856 OverallAcc: 0.877972 MeanAcc 0.771670 mIoU 0.673351 time: 0.042678


Epoch 25/50: 100%|██████████| 2476/2476 [01:51<00:00, 22.26img/s, loss (batch)=0.88] 

epoch  24 :   training loss: 0.804707 OverallAcc: 0.898613 MeanAcc 0.794595 mIoU 0.700052 time: 111.223494





epoch  24 : validation loss: 0.924934 OverallAcc: 0.876560 MeanAcc 0.770896 mIoU 0.671241 time: 0.042609


Epoch 26/50: 100%|██████████| 2476/2476 [01:50<00:00, 22.44img/s, loss (batch)=0.81] 

epoch  25 :   training loss: 0.806394 OverallAcc: 0.898347 MeanAcc 0.793580 mIoU 0.699332 time: 110.347575





epoch  25 : validation loss: 0.924125 OverallAcc: 0.877205 MeanAcc 0.769850 mIoU 0.671372 time: 0.042614


Epoch 27/50: 100%|██████████| 2476/2476 [01:51<00:00, 22.28img/s, loss (batch)=0.818]

epoch  26 :   training loss: 0.805665 OverallAcc: 0.898533 MeanAcc 0.793510 mIoU 0.699468 time: 111.141620





epoch  26 : validation loss: 0.922477 OverallAcc: 0.876899 MeanAcc 0.772722 mIoU 0.671850 time: 0.042657


Epoch 28/50: 100%|██████████| 2476/2476 [01:50<00:00, 22.39img/s, loss (batch)=0.751]

epoch  27 :   training loss: 0.805134 OverallAcc: 0.898735 MeanAcc 0.793494 mIoU 0.699846 time: 110.565832





epoch  27 : validation loss: 0.924648 OverallAcc: 0.876158 MeanAcc 0.771665 mIoU 0.671283 time: 0.042628


Epoch 29/50: 100%|██████████| 2476/2476 [01:49<00:00, 22.57img/s, loss (batch)=0.752]

epoch  28 :   training loss: 0.802739 OverallAcc: 0.899141 MeanAcc 0.794526 mIoU 0.700302 time: 109.716890





epoch  28 : validation loss: 0.921143 OverallAcc: 0.878416 MeanAcc 0.773321 mIoU 0.673845 time: 0.042622


Epoch 30/50: 100%|██████████| 2476/2476 [01:50<00:00, 22.43img/s, loss (batch)=0.873]

epoch  29 :   training loss: 0.804172 OverallAcc: 0.898921 MeanAcc 0.795015 mIoU 0.700585 time: 110.407258





epoch  29 : validation loss: 0.926312 OverallAcc: 0.875733 MeanAcc 0.774028 mIoU 0.671368 time: 0.042624


Epoch 31/50: 100%|██████████| 2476/2476 [01:50<00:00, 22.42img/s, loss (batch)=0.899]

epoch  30 :   training loss: 0.805583 OverallAcc: 0.898497 MeanAcc 0.793151 mIoU 0.699501 time: 110.439006





epoch  30 : validation loss: 0.928545 OverallAcc: 0.876548 MeanAcc 0.766250 mIoU 0.669987 time: 0.042650


Epoch 32/50: 100%|██████████| 2476/2476 [01:50<00:00, 22.42img/s, loss (batch)=0.678]

epoch  31 :   training loss: 0.803186 OverallAcc: 0.899067 MeanAcc 0.794481 mIoU 0.700281 time: 110.418664





epoch  31 : validation loss: 0.918174 OverallAcc: 0.878205 MeanAcc 0.777006 mIoU 0.674100 time: 0.042669


Epoch 33/50: 100%|██████████| 2476/2476 [01:49<00:00, 22.67img/s, loss (batch)=0.783]

epoch  32 :   training loss: 0.805962 OverallAcc: 0.898486 MeanAcc 0.793799 mIoU 0.699608 time: 109.242959





epoch  32 : validation loss: 0.916564 OverallAcc: 0.879133 MeanAcc 0.775899 mIoU 0.675107 time: 0.042643
  (best QAT mIoU updated: 0.675107)


Epoch 34/50: 100%|██████████| 2476/2476 [01:50<00:00, 22.37img/s, loss (batch)=0.815]

epoch  33 :   training loss: 0.804630 OverallAcc: 0.898534 MeanAcc 0.793979 mIoU 0.699631 time: 110.701701





epoch  33 : validation loss: 0.919096 OverallAcc: 0.877872 MeanAcc 0.775293 mIoU 0.673826 time: 0.042510


Epoch 35/50: 100%|██████████| 2476/2476 [01:52<00:00, 22.10img/s, loss (batch)=0.784]

epoch  34 :   training loss: 0.806426 OverallAcc: 0.898278 MeanAcc 0.793378 mIoU 0.699266 time: 112.047472





epoch  34 : validation loss: 0.921617 OverallAcc: 0.877786 MeanAcc 0.772510 mIoU 0.673192 time: 0.042767

QAT Training Complete
  Best QAT mIoU: 0.675107

[4/5] Converting to INT8 quantized model...
Model converted to INT8 quantized format
All weights and activations are now 8-bit integers
Note: Quantized model is on CPU (qnnpack/fbgemm requirement)

[5/5] Evaluating INT8 model on CPU...
  Moving validation data to CPU for INT8 evaluation...
epoch  -1 : validation loss: 0.932147 OverallAcc: 0.876710 MeanAcc 0.771203 mIoU 0.668445 (CPU)

INT8 QUANTIZATION COMPLETE
  Final INT8 mIoU: 0.668445
  Threshold: 0.66
Requirement satisfied (margin: +0.008445)

INT8 model saved to: model_task2_int8.pth (CPU model)
