import Basic Module

In [1]:
import os
import torch
import numpy as np
from pathlib import Path
import time
from datetime import datetime, timedelta
from tqdm import tqdm
# from tqdm.notebook import tqdm
# import tqdm as tqdm_module
# tqdm_module.tqdm = tqdm  # 전역 tqdm 교체
import importlib
import json

# Utils import (모듈화)
from utils import create_dataloaders, CDMetrics, get_loss_fn, CDTrainer



CUDA(GPU) 확인

In [2]:
# 단일 GPU 사용
GPU_ID = 1
os.environ['CUDA_VISIBLE_DEVICES'] = str(GPU_ID)
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using GPU: {GPU_ID}")
print(f"Device: {DEVICE}")

Using GPU: 1
Device: cuda


데이터셋 & 모델 리스트

In [3]:
# ============================
# 데이터셋 리스트
# ============================
DATASET_ROOT = "./dataset"  # 심볼릭 링크된 데이터셋 루트 폴더
DATASET_LIST = [
    'LEVIR-CD+',
    'WHU-CD',
    'CLCD',
    'CaBuAr-CD',
    'S2Looking-CD',
    'SEN1Floods11-CD'
]

##### 모델 & 데이터셋 설정

In [4]:
# 실험할 데이터셋
TEST_DATASET = 'LEVIR-CD+'

TEST_MODEL = 'Change3D'  # 파일명 기준 (대소문자 구분 X, 하이픈 무시)

# 실험할 클래스명 (해당 파일 내의 클래스)
TEST_CLASS = 'Change3DX3D'  

# 이어서하는 경우
RESUME = True  # True로 설정하면 자동 재개

# ============================
# 모델 파일명 변환 함수
# ============================
def normalize_model_name(model_name):
    """
    모델명을 파일명으로 변환
    - 대소문자 -> 소문자
    - 하이픈(-) 제거
    - 언더스코어(_) 유지
    
    예시:
    'ST-Robust-Net' -> 'strobustnet'
    'USSFC-Net' -> 'ussfcnet'
    'Change3D' -> 'change3d'
    'A2Net' -> 'a2net'
    """
    # 하이픈 제거하고 소문자로 변환
    file_name = model_name.replace('-', '').lower()
    return file_name

# ============================
# 모델 정보 설정
# ============================
model_file = normalize_model_name(TEST_MODEL)
model_class = TEST_CLASS

print(f"Test dataset: {TEST_DATASET}")
print(f"Test model: {TEST_MODEL}")
print(f"Module: models.{model_file}")
print(f"Class: {model_class}")


Test dataset: LEVIR-CD+
Test model: Change3D
Module: models.change3d
Class: Change3DX3D


시드 설정

In [5]:
SEED = 42
np.random.seed(SEED)
torch.manual_seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed(SEED)

##### 고정 메트릭 설정

In [6]:
# 이미지 설정
IMG_SIZE = 256 
IN_CHANNELS = 3  # RGB
OUT_CHANNELS = 1  # Binary change detection

# 학습 설정
BATCH_SIZE = 16
NUM_WORKERS = 4
MAX_ITERATIONS = 100000  # 데이터셋 크기와 무관하게 고정


모델 동적 import

In [7]:
# ============================
# 모델 동적 import
# ============================
def get_model_class(module_name, class_name):
    """모델 동적 import"""
    module_path = f'models.{module_name}'
    
    try:
        module = importlib.import_module(module_path)
        model_class = getattr(module, class_name)
        print(f"✅ Successfully loaded: {class_name} from {module_path}")
        return model_class
    except ImportError as e:
        print(f"❌ Cannot import module {module_path}")
        print(f"   Make sure models/{module_name}.py exists")
        raise ImportError(f"Module import failed: {e}")
    except AttributeError as e:
        print(f"❌ Class {class_name} not found in {module_path}")
        # 사용 가능한 클래스 목록 출력
        try:
            module = importlib.import_module(module_path)
            available_classes = [name for name in dir(module) 
                               if not name.startswith('_') and 
                               name[0].isupper()]  # 대문자로 시작하는 클래스들
            print(f"   Available classes: {available_classes}")
        except:
            pass
        raise AttributeError(f"Class not found: {e}")

# 모델 클래스 로드
ModelClass = get_model_class(model_file, model_class)

✅ Successfully loaded: Change3DX3D from models.change3d


In [8]:
from configs import get_model_config

# config는 TEST_MODEL 이름 기준으로 가져오기 (원본 이름 사용)
model_config = get_model_config(TEST_MODEL)

optimizer = model_config['optimizer']
learning_rate = model_config['learning_rate']
weight_decay = model_config['weight_decay']
betas = model_config.get('betas', (0.9, 0.999))
eps = model_config.get('eps', 1e-8)
scheduler = model_config.get('scheduler', None)
momentum = model_config.get('momentum', 0.9)

print(f"\nModel configurations:")
print(f"  Model: {TEST_MODEL}")
print(f"  Optimizer: {optimizer}")
print(f"  Learning rate: {learning_rate}")
print(f"  Weight decay: {weight_decay}")
if momentum and optimizer == 'sgd':
    print(f"  Momentum: {momentum}")
if scheduler:
    print(f"  Scheduler: {scheduler}")


Model configurations:
  Model: Change3D
  Optimizer: adam
  Learning rate: 0.0002
  Weight decay: 0.0001
  Scheduler: poly


실험 폴더 설정

In [9]:
# 실험 이름: 모델명_클래스명 (클래스명이 모델명과 같으면 생략)
if model_class.lower() == TEST_MODEL.lower():
    experiment_name = TEST_MODEL
else:
    experiment_name = f"{TEST_MODEL}_{model_class}"

test_path = f"experiments/{TEST_DATASET}"
test_dir = Path(test_path)
test_dir.mkdir(parents=True, exist_ok=True)

model_dir = Path(f"{test_path}/{experiment_name}")
model_dir.mkdir(parents=True, exist_ok=True)

checkpoint_dir = model_dir / "checkpoints"
checkpoint_dir.mkdir(exist_ok=True)

print(f"Experiment directory: {model_dir}")

Experiment directory: experiments/LEVIR-CD+/Change3D_Change3DX3D


Iteration 계산 
MAX_ITERATIONS = 100000을 기준으로 epoch 수 계산


In [10]:
dataset_path = Path(DATASET_ROOT) / TEST_DATASET
if dataset_path.exists():
    print(f"\nDataset found: {dataset_path}")
    splits = ['train', 'val', 'test']
    dataset_info = {}
    
    for split in splits:
        split_path = dataset_path / split
        if split_path.exists():
            img_count = len(list((split_path / 't1').glob('*')))
            dataset_info[split] = img_count
            print(f"  {split}: {img_count} images")
    
    # Epoch 수 계산
    if 'train' in dataset_info:
        train_samples = dataset_info['train']
        iterations_per_epoch = train_samples // BATCH_SIZE
        EPOCHS = MAX_ITERATIONS // iterations_per_epoch
        
        print(f"\nTraining iterations info:")
        print(f"  Train samples: {train_samples}")
        print(f"  Iterations per epoch: {iterations_per_epoch}")
        print(f"  Total epochs: {EPOCHS}")
        print(f"  Total iterations: {EPOCHS * iterations_per_epoch}")
else:
    raise FileNotFoundError(f"Dataset {TEST_DATASET} not found at {dataset_path}")



Dataset found: dataset/LEVIR-CD+
  train: 10192 images
  val: 1568 images
  test: 4000 images

Training iterations info:
  Train samples: 10192
  Iterations per epoch: 637
  Total epochs: 156
  Total iterations: 99372


데이터로더 생성

In [11]:
train_loader, val_loader, test_loader = create_dataloaders(
    root_dir=DATASET_ROOT,
    dataset_name=TEST_DATASET,
    batch_size=BATCH_SIZE,
    num_workers=NUM_WORKERS
)

Loaded 10192 images from LEVIR-CD+/train
Loaded 1568 images from LEVIR-CD+/val
Loaded 4000 images from LEVIR-CD+/test


모델 학습 및 검증

In [12]:
model = ModelClass(num_classes=1).to(DEVICE)
criterion = get_loss_fn('bce_dice')

# 옵티마이저
if optimizer == 'adam':
    opt = torch.optim.Adam(
        model.parameters(),
        lr=learning_rate,
        betas=betas,
        eps=eps,
        weight_decay=weight_decay
    )
elif optimizer == 'adamw':
    opt = torch.optim.AdamW(
        model.parameters(),
        lr=learning_rate,
        betas=betas,
        eps=eps,
        weight_decay=weight_decay
    )
elif optimizer == 'sgd':
    opt = torch.optim.SGD(
        model.parameters(),
        lr=learning_rate,
        momentum=momentum,
        weight_decay=weight_decay
    )
else:
    raise ValueError(f"Unknown optimizer: {optimizer}")

# 스케줄러
sched = None
if scheduler == 'cosine':
    sched = torch.optim.lr_scheduler.CosineAnnealingLR(opt, T_max=EPOCHS)
elif scheduler == 'linear':
    sched = torch.optim.lr_scheduler.LinearLR(opt, start_factor=1.0, end_factor=0.01, total_iters=EPOCHS)


Loading pretrained weights from ./pretrained/Change3D/X3D_L.pyth...
✅ Successfully loaded X3D-L pretrained weights
   Loaded 1139/1141 parameters
   Loaded blocks: ['0', '1', '2', '3', '4', '5']


In [None]:
# ============================
# Trainer 생성 및 학습
# ============================
trainer = CDTrainer(
    model=model,
    optimizer=opt,
    criterion=criterion,
    device=DEVICE,
    checkpoint_dir=checkpoint_dir,
    scheduler=sched
)

# 체크포인트 확인 및 로드
start_epoch = 0
if RESUME:
    import glob
    checkpoints = sorted(glob.glob(str(checkpoint_dir / "checkpoint_epoch_*.pth")))
    if checkpoints:
        latest_ckpt = checkpoints[-1]
        print(f"Loading checkpoint: {latest_ckpt}")
        
        checkpoint = torch.load(latest_ckpt, map_location=DEVICE)
        model.load_state_dict(checkpoint['model_state_dict'])
        opt.load_state_dict(checkpoint['optimizer_state_dict'])
        if sched and 'scheduler_state_dict' in checkpoint:
            sched.load_state_dict(checkpoint['scheduler_state_dict'])
        
        # epoch 번호 추출
        start_epoch = checkpoint.get('epoch', 0) + 1
        print(f"Resuming from epoch {start_epoch}/{EPOCHS}")
    else:
        print("No checkpoint found, starting from scratch")

# 학습 실행 (남은 epoch만)
if start_epoch < EPOCHS:
    print("\n" + "="*60)
    if start_epoch > 0:
        print(f"Resuming Training from epoch {start_epoch}...")
        print(f"Remaining epochs: {EPOCHS - start_epoch}")
    else:
        print("Starting Training...")
    print("="*60)
    
    # trainer의 시작 epoch 설정 (trainer가 지원하는 경우)
    if hasattr(trainer, 'current_epoch'):
        trainer.current_epoch = start_epoch
    
    trainer.train(
        train_loader=train_loader,
        val_loader=val_loader,
        epochs=EPOCHS,
        val_interval=10,
        save_interval=50,
        start_epoch=start_epoch
    )
else:
    print(f"Training already completed ({start_epoch} epochs done)")

# ============================
# 테스트 및 평가
# ============================
print("\n" + "="*60)
print("Testing...")
print("="*60)

# 테스트
test_metrics = trainer.test(test_loader)

# 추론 속도 측정
speed_metrics = trainer.measure_inference_speed(test_loader)

# 결과 저장
trainer.save_results(model_dir, experiment_name, TEST_DATASET)

# ============================
# 최종 결과 정리
# ============================
total_params = sum(p.numel() for p in model.parameters())
print(f"\nModel Parameters: {total_params:,}")

final_results = {
    'experiment': experiment_name,
    'model_file': model_file,
    'model_class': model_class,
    'dataset': TEST_DATASET,
    'test_metrics': test_metrics,
    'speed_metrics': speed_metrics,
    'parameters': total_params,
    'training_config': {
        'optimizer': optimizer,
        'learning_rate': learning_rate,
        'weight_decay': weight_decay,
        'scheduler': scheduler,
        'epochs': EPOCHS,
        'batch_size': BATCH_SIZE
    }
}

with open(model_dir / 'final_results.json', 'w') as f:
    json.dump(final_results, f, indent=2)

print("\n✅ All completed!")
print(f"Results saved in: {model_dir}")

Loading checkpoint: experiments/LEVIR-CD+/Change3D_Change3DX3D/checkpoints/checkpoint_epoch_70.pth
Resuming from epoch 71/156

Resuming Training from epoch 71...
Remaining epochs: 85

Starting Training
Total Epochs: 156
Iterations per epoch: 637
Total iterations: 99372

Overall Progress:   0%|          | 0/156 [00:00<?] 