In [1]:
import argparse
import os
import time
import datetime
import torch
import torch.optim as optim
from math import ceil
from torch.utils.data import DataLoader
from dataset.dataset import CityScapesDataset
from utils.metric import runningScore, averageMeter
import numpy as np
from tqdm import tqdm
import torch.nn as nn



os.environ["CUDA_VISIBLE_DEVICES"] = "2"

In [2]:
torch.cuda.is_available()

True

In [3]:
# n_train=2436
n_train=2476
n_val=400


batchsize   = 16
Epoch       = 25
img_size    = [256, 512]
model_name  = 'model_task1'
task        = 'cat'
lr = 2e-3

final_save_path='./'+model_name+'.pth'

In [4]:
def train(epoch, data_loader, Net, optimizer, loss_fn, Meter):
    Net.train()
    timeStart = time.time()
    with tqdm(total=n_train, desc=f'Epoch {epoch + 1}/{Epoch}', unit='img') as pbar:
        for i, (data, target) in enumerate(data_loader):
            data , target = data.to(device),target.to(device)
            ##yourself
            optimizer.zero_grad()
            pred = Net(data)
            loss = loss_fn(pred, target)
            loss.backward()
            optimizer.step()
            
            training_loss = loss.item()
            pbar.set_postfix(**{'loss (batch)': training_loss})
            pred = pred.data.max(1)[1]
            Meter['metric'].update(target.data.cpu().numpy(), pred.data.cpu().numpy())
            Meter['loss'].update(training_loss,data.size()[0])
            pbar.update(data.shape[0])
    timeEnd = time.time()
    score, class_iou = Meter['metric'].get_scores()
    loss_avg = Meter['loss'].avg
    print('epoch %3d : %10s loss: %f OverallAcc: %f MeanAcc %f mIoU %f time: %f' 
        %(epoch, ('training'), loss_avg, score['OverallAcc'], score['MeanAcc'], score['mIoU'], timeEnd-timeStart))

    return score['mIoU'],loss_avg

In [5]:
def val(epoch, data_loader, Net, loss_fn, Meter):
    Net.eval()
    with torch.no_grad():
        for i, (data, target) in enumerate(data_loader):
            data, target = data.to(device), target.to(device)
            timeStart = time.time()
            ### evaluate by yourself
            pred = Net(data)
            validation_loss = loss_fn(pred, target).item()
            timeEnd = time.time()           
            
            pred = pred.data.max(1)[1]
            Meter['metric'].update(target.data.cpu().numpy(), pred.data.cpu().numpy())
            Meter['loss'].update(validation_loss,data.size()[0])
            Meter['time'].update(timeEnd-timeStart,1)
    score, class_iou = Meter['metric'].get_scores()
    loss_avg = Meter['loss'].avg
    time_avg = Meter['time'].avg
    print('epoch %3d : %10s loss: %f OverallAcc: %f MeanAcc %f mIoU %f time: %f' 
        %(epoch, ('validation'), loss_avg, score['OverallAcc'], score['MeanAcc'], score['mIoU'], time_avg))
    
    return score['mIoU']

In [6]:
best_val_miou=0
current_val_miou=0

In [7]:
import network
#from network import *
if __name__ == '__main__':

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")   
    assert task in ['cat'],'wrong value of task'
    if task=='cat':
        num_classes = 8
        
    training_meter = {'metric':runningScore(num_classes),'loss':averageMeter(),'time':averageMeter()}
    validation_meter = {'metric':runningScore(num_classes),'loss':averageMeter(),'time':averageMeter()}

    print(str(datetime.datetime.now()))
    print('batchsize %3d | epoch %3d | img_size  %4d %4d | task  %6s | model_name  %25s '
                    %(batchsize,Epoch,img_size[0],img_size[1], task ,model_name ))

    TrainingDataset   = CityScapesDataset("/export/home/dl2025f/shared/data", "training", img_size, task=task, augmentation=None)
    ValidationDataset = CityScapesDataset("/export/home/dl2025f/shared/data", "validation", img_size, task=task)

    TrainingLoader    = DataLoader(TrainingDataset, batch_size=batchsize, shuffle=True, num_workers=4)
    ValidationLoader  = DataLoader(ValidationDataset, batch_size=batchsize, shuffle=False, num_workers=4)
    num_batch         = ceil(len(TrainingDataset)/batchsize)

    # define yout model
    Net = network.UNet(n_channels=3, n_classes=num_classes)
    Net = Net.to(device)
    # define your optimizer
    optimizer = optim.Adam(Net.parameters(), lr=lr)
    # scheduler
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.5)
    # define your loss   
    loss_fn = nn.CrossEntropyLoss()

    start_epoch = 0
       
    for epoch in range(start_epoch, Epoch):
        for _, v in training_meter.items():
            v.reset()
            
        show_lr=optimizer.param_groups[0]['lr']
        print('learning rate is : ',  show_lr)
        current_train_miou,current_train_loss=train(epoch, TrainingLoader, Net, optimizer, loss_fn, training_meter)
        
        scheduler.step() ###  StepLR
        
        if (epoch+1)%1==0 or epoch==Epoch-1:
            for _, v in validation_meter.items():
                v.reset()
            current_val_miou=val(epoch, ValidationLoader, Net, loss_fn, validation_meter)
            
            if current_val_miou>best_val_miou:
                best_val_miou=current_val_miou
                best_state_dict = Net.state_dict()
                torch.save(best_state_dict, final_save_path)
                print("(model saved)")
    print(str(datetime.datetime.now()))

2025-11-27 16:32:33.107659
batchsize  16 | epoch  25 | img_size   256  512 | task     cat | model_name                model_task1 
learning rate is :  0.002


Epoch 1/25: 100%|██████████| 2476/2476 [01:59<00:00, 20.74img/s, loss (batch)=0.473]

epoch   0 :   training loss: 0.729188 OverallAcc: 0.767928 MeanAcc 0.544855 mIoU 0.434559 time: 119.360943





epoch   0 : validation loss: 0.578124 OverallAcc: 0.817678 MeanAcc 0.612556 mIoU 0.500657 time: 0.124723
(model saved)
learning rate is :  0.002


Epoch 2/25: 100%|██████████| 2476/2476 [01:54<00:00, 21.61img/s, loss (batch)=0.44] 

epoch   1 :   training loss: 0.503994 OverallAcc: 0.839268 MeanAcc 0.617338 mIoU 0.525765 time: 114.603011





epoch   1 : validation loss: 0.511880 OverallAcc: 0.840505 MeanAcc 0.637295 mIoU 0.536956 time: 0.124734
(model saved)
learning rate is :  0.002


Epoch 3/25: 100%|██████████| 2476/2476 [01:54<00:00, 21.61img/s, loss (batch)=0.414]

epoch   2 :   training loss: 0.435670 OverallAcc: 0.862442 MeanAcc 0.646498 mIoU 0.565140 time: 114.566798





epoch   2 : validation loss: 0.543723 OverallAcc: 0.823077 MeanAcc 0.614269 mIoU 0.515537 time: 0.124737
learning rate is :  0.002


Epoch 4/25: 100%|██████████| 2476/2476 [01:55<00:00, 21.45img/s, loss (batch)=0.451]

epoch   3 :   training loss: 0.396972 OverallAcc: 0.874540 MeanAcc 0.683273 mIoU 0.602798 time: 115.405648





epoch   3 : validation loss: 0.505612 OverallAcc: 0.838710 MeanAcc 0.650778 mIoU 0.563023 time: 0.124799
(model saved)
learning rate is :  0.002


Epoch 5/25: 100%|██████████| 2476/2476 [01:55<00:00, 21.38img/s, loss (batch)=0.325]

epoch   4 :   training loss: 0.375216 OverallAcc: 0.881394 MeanAcc 0.704627 mIoU 0.623564 time: 115.786977





epoch   4 : validation loss: 0.404466 OverallAcc: 0.870484 MeanAcc 0.750390 mIoU 0.643854 time: 0.124799
(model saved)
learning rate is :  0.002


Epoch 6/25: 100%|██████████| 2476/2476 [01:55<00:00, 21.51img/s, loss (batch)=0.294]

epoch   5 :   training loss: 0.354270 OverallAcc: 0.887646 MeanAcc 0.722447 mIoU 0.640724 time: 115.106930





epoch   5 : validation loss: 0.393679 OverallAcc: 0.873087 MeanAcc 0.740139 mIoU 0.644807 time: 0.124736
(model saved)
learning rate is :  0.002


Epoch 7/25: 100%|██████████| 2476/2476 [01:53<00:00, 21.81img/s, loss (batch)=0.298]

epoch   6 :   training loss: 0.336365 OverallAcc: 0.893292 MeanAcc 0.739385 mIoU 0.657310 time: 113.523159





epoch   6 : validation loss: 0.367653 OverallAcc: 0.882215 MeanAcc 0.736890 mIoU 0.651055 time: 0.124816
(model saved)
learning rate is :  0.002


Epoch 8/25: 100%|██████████| 2476/2476 [01:55<00:00, 21.39img/s, loss (batch)=0.315]

epoch   7 :   training loss: 0.319038 OverallAcc: 0.898953 MeanAcc 0.752040 mIoU 0.671464 time: 115.734573





epoch   7 : validation loss: 0.491033 OverallAcc: 0.853651 MeanAcc 0.653698 mIoU 0.563255 time: 0.124713
learning rate is :  0.002


Epoch 9/25: 100%|██████████| 2476/2476 [01:54<00:00, 21.68img/s, loss (batch)=0.244]

epoch   8 :   training loss: 0.314168 OverallAcc: 0.899983 MeanAcc 0.758339 mIoU 0.676242 time: 114.193414





epoch   8 : validation loss: 0.395865 OverallAcc: 0.871787 MeanAcc 0.768099 mIoU 0.644136 time: 0.124757
learning rate is :  0.002


Epoch 10/25: 100%|██████████| 2476/2476 [01:56<00:00, 21.23img/s, loss (batch)=0.33] 

epoch   9 :   training loss: 0.308938 OverallAcc: 0.901522 MeanAcc 0.764866 mIoU 0.682724 time: 116.646316





epoch   9 : validation loss: 0.369528 OverallAcc: 0.883913 MeanAcc 0.755913 mIoU 0.661560 time: 0.124807
(model saved)
learning rate is :  0.002


Epoch 11/25: 100%|██████████| 2476/2476 [01:55<00:00, 21.43img/s, loss (batch)=0.46] 

epoch  10 :   training loss: 0.297384 OverallAcc: 0.905323 MeanAcc 0.773827 mIoU 0.692221 time: 115.555393





epoch  10 : validation loss: 0.385289 OverallAcc: 0.882760 MeanAcc 0.706540 mIoU 0.635460 time: 0.124774
learning rate is :  0.002


Epoch 12/25: 100%|██████████| 2476/2476 [01:54<00:00, 21.64img/s, loss (batch)=0.357]

epoch  11 :   training loss: 0.283054 OverallAcc: 0.909126 MeanAcc 0.786360 mIoU 0.704925 time: 114.418619





epoch  11 : validation loss: 0.337550 OverallAcc: 0.891989 MeanAcc 0.788280 mIoU 0.686823 time: 0.124752
(model saved)
learning rate is :  0.002


Epoch 13/25: 100%|██████████| 2476/2476 [01:55<00:00, 21.44img/s, loss (batch)=0.209]

epoch  12 :   training loss: 0.275572 OverallAcc: 0.911418 MeanAcc 0.793632 mIoU 0.712000 time: 115.474027





epoch  12 : validation loss: 0.357881 OverallAcc: 0.883100 MeanAcc 0.792739 mIoU 0.675084 time: 0.124786
learning rate is :  0.002


Epoch 14/25: 100%|██████████| 2476/2476 [01:56<00:00, 21.30img/s, loss (batch)=0.308]

epoch  13 :   training loss: 0.269603 OverallAcc: 0.913280 MeanAcc 0.798473 mIoU 0.718442 time: 116.235321





epoch  13 : validation loss: 0.331816 OverallAcc: 0.893333 MeanAcc 0.803001 mIoU 0.697731 time: 0.124838
(model saved)
learning rate is :  0.002


Epoch 15/25: 100%|██████████| 2476/2476 [01:55<00:00, 21.51img/s, loss (batch)=0.31] 

epoch  14 :   training loss: 0.264392 OverallAcc: 0.914253 MeanAcc 0.801327 mIoU 0.720244 time: 115.110225





epoch  14 : validation loss: 0.393002 OverallAcc: 0.879713 MeanAcc 0.728110 mIoU 0.659736 time: 0.124756
learning rate is :  0.002


Epoch 16/25: 100%|██████████| 2476/2476 [01:55<00:00, 21.36img/s, loss (batch)=0.237]

epoch  15 :   training loss: 0.256967 OverallAcc: 0.916897 MeanAcc 0.808739 mIoU 0.728930 time: 115.939395





epoch  15 : validation loss: 0.363559 OverallAcc: 0.884479 MeanAcc 0.777264 mIoU 0.692921 time: 0.124847
learning rate is :  0.002


Epoch 17/25: 100%|██████████| 2476/2476 [01:55<00:00, 21.37img/s, loss (batch)=0.21] 

epoch  16 :   training loss: 0.251992 OverallAcc: 0.918281 MeanAcc 0.814835 mIoU 0.735083 time: 115.886910





epoch  16 : validation loss: 0.333536 OverallAcc: 0.896091 MeanAcc 0.826353 mIoU 0.706497 time: 0.124865
(model saved)
learning rate is :  0.002


Epoch 18/25: 100%|██████████| 2476/2476 [01:54<00:00, 21.59img/s, loss (batch)=0.2]  

epoch  17 :   training loss: 0.246530 OverallAcc: 0.919795 MeanAcc 0.818984 mIoU 0.739131 time: 114.667355





epoch  17 : validation loss: 0.325583 OverallAcc: 0.896971 MeanAcc 0.821507 mIoU 0.714616 time: 0.124822
(model saved)
learning rate is :  0.002


Epoch 19/25: 100%|██████████| 2476/2476 [01:55<00:00, 21.51img/s, loss (batch)=0.18] 

epoch  18 :   training loss: 0.240712 OverallAcc: 0.921189 MeanAcc 0.823122 mIoU 0.743635 time: 115.104829





epoch  18 : validation loss: 0.309005 OverallAcc: 0.904844 MeanAcc 0.796084 mIoU 0.723522 time: 0.124857
(model saved)
learning rate is :  0.002


Epoch 20/25: 100%|██████████| 2476/2476 [01:55<00:00, 21.43img/s, loss (batch)=0.231]

epoch  19 :   training loss: 0.234876 OverallAcc: 0.922047 MeanAcc 0.826588 mIoU 0.747327 time: 115.556037





epoch  19 : validation loss: 0.283986 OverallAcc: 0.909923 MeanAcc 0.823518 mIoU 0.738049 time: 0.124832
(model saved)
learning rate is :  0.001


Epoch 21/25: 100%|██████████| 2476/2476 [01:55<00:00, 21.39img/s, loss (batch)=0.16] 

epoch  20 :   training loss: 0.210936 OverallAcc: 0.929257 MeanAcc 0.843030 mIoU 0.767361 time: 115.774185





epoch  20 : validation loss: 0.267773 OverallAcc: 0.913731 MeanAcc 0.825809 mIoU 0.747070 time: 0.124797
(model saved)
learning rate is :  0.001


Epoch 22/25: 100%|██████████| 2476/2476 [01:54<00:00, 21.67img/s, loss (batch)=0.162]

epoch  21 :   training loss: 0.203718 OverallAcc: 0.931438 MeanAcc 0.848350 mIoU 0.773535 time: 114.248903





epoch  21 : validation loss: 0.279130 OverallAcc: 0.912593 MeanAcc 0.830688 mIoU 0.750744 time: 0.124784
(model saved)
learning rate is :  0.001


Epoch 23/25: 100%|██████████| 2476/2476 [01:54<00:00, 21.62img/s, loss (batch)=0.21] 

epoch  22 :   training loss: 0.200847 OverallAcc: 0.931878 MeanAcc 0.849667 mIoU 0.775563 time: 114.540407





epoch  22 : validation loss: 0.309877 OverallAcc: 0.901360 MeanAcc 0.848774 mIoU 0.726534 time: 0.124764
learning rate is :  0.001


Epoch 24/25: 100%|██████████| 2476/2476 [01:55<00:00, 21.35img/s, loss (batch)=0.176]

epoch  23 :   training loss: 0.194690 OverallAcc: 0.934036 MeanAcc 0.854269 mIoU 0.781661 time: 115.974708





epoch  23 : validation loss: 0.278457 OverallAcc: 0.912547 MeanAcc 0.834211 mIoU 0.751199 time: 0.124803
(model saved)
learning rate is :  0.001


Epoch 25/25: 100%|██████████| 2476/2476 [01:55<00:00, 21.41img/s, loss (batch)=0.195]

epoch  24 :   training loss: 0.194590 OverallAcc: 0.933573 MeanAcc 0.854462 mIoU 0.780516 time: 115.659122





epoch  24 : validation loss: 0.263163 OverallAcc: 0.916547 MeanAcc 0.846189 mIoU 0.760689 time: 0.124830
(model saved)
2025-11-27 17:30:15.128288


In [8]:
print(' best_val_miou is :  ',best_val_miou)

 best_val_miou is :   0.7606891829551075
