In [1]:
# FLOPs, Parameters, FPS, Inference Time 측정 추가

# ============================================================
# Cell 1: Import Modules
# ============================================================
import os
import torch
import numpy as np
from pathlib import Path
import time
import json
from tqdm import tqdm
import importlib

# FLOPs 계산용
from thop import profile, clever_format

# Utils import
from utils import create_dataloaders, CDMetrics, get_loss_fn

In [2]:
# ============================================================
# Cell 2: GPU 설정
# ============================================================
GPU_ID = 1
os.environ['CUDA_VISIBLE_DEVICES'] = str(GPU_ID)
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using GPU: {GPU_ID}")
print(f"Device: {DEVICE}")

Using GPU: 1
Device: cuda


In [3]:
# ============================================================
# Cell 3: 실험 설정
# ============================================================
DATASET_ROOT = "./dataset"
TEST_DATASET = 'LEVIR-CD+'

TEST_MODEL = 'Change3D'  # 파일명 기준 (대소문자 구분 X, 하이픈 무시)

# 실험할 클래스명 (해당 파일 내의 클래스)
TEST_CLASS = 'Change3DX3D'  

def normalize_model_name(model_name):
    """
    모델명을 파일명으로 변환
    'ST-Robust-Net' -> 'strobustnet'
    'Change3D' -> 'change3d'
    """
    return model_name.replace('-', '').lower()



In [4]:
# 배치 크기
BATCH_SIZE = 1
NUM_WORKERS = 1
IMG_SIZE = 256

DATASET_ROOT = "./dataset"

model_file = normalize_model_name(TEST_MODEL)
model_class = TEST_CLASS

print(f"Test dataset: {TEST_DATASET}")
print(f"Test model: {TEST_MODEL}")
print(f"Module: models.{model_file}")
print(f"Class: {model_class}")

Test dataset: LEVIR-CD+
Test model: Change3D
Module: models.change3d
Class: Change3DX3D


In [5]:
# 실험 이름: 모델명_클래스명 (클래스명이 모델명과 같으면 생략)
if model_class.lower() == TEST_MODEL.lower().replace('-', ''):
    experiment_name = TEST_MODEL
else:
    experiment_name = f"{TEST_MODEL}_{model_class}"

exp_dir = Path(f"experiments/{TEST_DATASET}/{experiment_name}")
checkpoint_dir = exp_dir / "checkpoints"
best_model_path = checkpoint_dir / "best_model.pth"

print(f"Experiment directory: {exp_dir}")
print(f"Checkpoint path: {best_model_path}")

if not best_model_path.exists():
    # best_model.pth가 없으면 가장 최근 체크포인트 사용
    import glob
    checkpoints = sorted(glob.glob(str(checkpoint_dir / "checkpoint_epoch_*.pth")))
    if checkpoints:
        best_model_path = Path(checkpoints[-1])
        print(f"Best model not found, using latest checkpoint: {best_model_path.name}")
    else:
        raise FileNotFoundError(f"No checkpoints found in {checkpoint_dir}")


Experiment directory: experiments/LEVIR-CD+/Change3D_Change3DX3D
Checkpoint path: experiments/LEVIR-CD+/Change3D_Change3DX3D/checkpoints/best_model.pth


In [6]:
# ============================
# 모델 동적 import
# ============================
def get_model_class(module_name, class_name):
    """모델 동적 import"""
    module_path = f'models.{module_name}'
    
    try:
        module = importlib.import_module(module_path)
        model_class = getattr(module, class_name)
        print(f"✅ Successfully loaded: {class_name} from {module_path}")
        return model_class
    except ImportError as e:
        print(f"❌ Cannot import module {module_path}")
        raise ImportError(f"Module import failed: {e}")
    except AttributeError as e:
        print(f"❌ Class {class_name} not found in {module_path}")
        # 사용 가능한 클래스 목록 출력
        try:
            module = importlib.import_module(module_path)
            available_classes = [name for name in dir(module) 
                               if not name.startswith('_') and 
                               name[0].isupper()]
            print(f"   Available classes: {available_classes}")
        except:
            pass
        raise AttributeError(f"Class not found: {e}")

# ============================
# 모델 로드
# ============================
ModelClass = get_model_class(model_file, model_class)
model = ModelClass(num_classes=1).to(DEVICE)

# 체크포인트 로드
checkpoint = torch.load(best_model_path, map_location=DEVICE)
model.load_state_dict(checkpoint['model_state_dict'])
model.eval()

print(f"\n✓ Model loaded: {ModelClass.__name__}")
print(f"  Checkpoint: {best_model_path.name}")
print(f"  Epoch: {checkpoint.get('epoch', 'unknown')}")
if 'best_f1' in checkpoint:
    print(f"  Best F1: {checkpoint.get('best_f1', 0):.4f}")
if 'best_iou' in checkpoint:
    print(f"  Best IoU: {checkpoint.get('best_iou', 0):.4f}")


✅ Successfully loaded: Change3DX3D from models.change3d
Loading pretrained weights from ./pretrained/Change3D/X3D_L.pyth...
✅ Successfully loaded X3D-L pretrained weights
   Loaded 1139/1141 parameters
   Loaded blocks: ['0', '1', '2', '3', '4', '5']

✓ Model loaded: Change3DX3D
  Checkpoint: best_model.pth
  Epoch: 90
  Best F1: 0.7356
  Best IoU: 0.5818


In [7]:
# ============================================================
# Cell 6: 모델 복잡도 측정 (Parameters & FLOPs)
# ============================================================
print("\n" + "="*60)
print("Model Complexity Analysis")
print("="*60)

# Parameters 계산
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)

# Parameters (M)
params_m = total_params / 1e6

# Model Size (MB)
model_size_mb = total_params * 4 / 1024 / 1024

print(f"  Total Parameters:      {total_params:,}")
print(f"  Trainable Parameters:  {trainable_params:,}")
print(f"  Parameters (M):        {params_m:.2f} M")
print(f"  Model Size:            {model_size_mb:.2f} MB (FP32)")

# FLOPs 계산
input_img1 = torch.randn(1, 3, IMG_SIZE, IMG_SIZE).to(DEVICE)
input_img2 = torch.randn(1, 3, IMG_SIZE, IMG_SIZE).to(DEVICE)

try:
    flops, params = profile(model, inputs=(input_img1, input_img2), verbose=False)
    flops_g = flops / 1e9  # GFLOPs
    
    print(f"\n  FLOPs:                 {flops:,}")
    print(f"  FLOPs (G):             {flops_g:.2f} G")
    
except Exception as e:
    print(f"\n  FLOPs calculation failed: {e}")
    flops = 0
    flops_g = 0

print("="*60)



Model Complexity Analysis
  Total Parameters:      5,004,985
  Trainable Parameters:  5,004,985
  Parameters (M):        5.00 M
  Model Size:            19.09 MB (FP32)

  FLOPs:                 9,652,329,184.0
  FLOPs (G):             9.65 G


In [8]:
# ============================================================
# Cell 7: 데이터로더 생성
# ============================================================
_, _, test_loader = create_dataloaders(
    root_dir=DATASET_ROOT,
    dataset_name=TEST_DATASET,
    batch_size=BATCH_SIZE,
    num_workers=NUM_WORKERS,
    img_size=IMG_SIZE,
    augment=False
)

print(f"\nTest batches: {len(test_loader)}")
print(f"Total test samples: {len(test_loader.dataset)}")


# ============================================================
# Cell 8: 손실 함수
# ============================================================
criterion = get_loss_fn('bce_dice')
print("Loss function: BCEDiceLoss")

Loaded 10192 images from LEVIR-CD+/train
Loaded 1568 images from LEVIR-CD+/val
Loaded 4000 images from LEVIR-CD+/test

Test batches: 4000
Total test samples: 4000
Loss function: BCEDiceLoss


In [9]:
# ============================================================
# Cell 9: 추론 속도 측정 (강화된 Warmup + 안정화)
# ============================================================
print("\n" + "="*60)
print("Inference Speed Measurement")
print("="*60)

# ============================================================
# 1. 강화된 Warmup (매우 중요!)
# ============================================================
print("\nWarming up GPU (this may take a while)...")

warmup_batch = next(iter(test_loader))
img1_warmup = warmup_batch['img1'].to(DEVICE)
img2_warmup = warmup_batch['img2'].to(DEVICE)

# 첫 실행 (메모리 할당)
with torch.no_grad():
    _ = model(img1_warmup, img2_warmup)

torch.cuda.synchronize()

# 충분한 Warmup (최소 50회!)
for _ in range(50):
    with torch.no_grad():
        _ = model(img1_warmup, img2_warmup)

torch.cuda.synchronize()

# GPU 안정화 대기
time.sleep(2)

print("✓ Warmup complete!")



Inference Speed Measurement

Warming up GPU (this may take a while)...
✓ Warmup complete!


In [None]:
# ============================================================
# Cell 10: 단일 이미지 추론 속도 측정 (1개 이미지로 고정)
# ============================================================
print("\n" + "-"*60)
print("Single Image Inference Speed")
print("-"*60)

# 측정 설정
num_iterations = 300
single_times = []

# 테스트 이미지 준비 (1개만!)
test_iter = iter(test_loader)
test_batch = next(test_iter)
test_img1 = test_batch['img1'][0:1].to(DEVICE)  # [1, 3, 256, 256]
test_img2 = test_batch['img2'][0:1].to(DEVICE)  # [1, 3, 256, 256]

print(f"Test image shape: {test_img1.shape}")

# 같은 이미지로 반복 측정
for i in range(num_iterations):
    torch.cuda.synchronize()
    
    start = time.perf_counter()
    
    with torch.no_grad():
        output = model(test_img1, test_img2)
    
    torch.cuda.synchronize()
    
    elapsed = time.perf_counter() - start
    single_times.append(elapsed)
    
    # 진행상황 (10회마다)
    if (i + 1) % 50 == 0:
        print(f"  Progress: {i+1}/{num_iterations} iterations")

# numpy 배열로 변환
single_times = np.array(single_times)

print(f"\nInitial measurements: {len(single_times)}")
print(f"Raw statistics:")
print(f"  Mean: {np.mean(single_times)*1000:.3f} ms")
print(f"  Std:  {np.std(single_times)*1000:.3f} ms")
print(f"  Min:  {np.min(single_times)*1000:.3f} ms")
print(f"  Max:  {np.max(single_times)*1000:.3f} ms")
print(f"  CV:   {(np.std(single_times)/np.mean(single_times))*100:.2f}%")

# 이상치 제거 (IQR method)
Q1 = np.percentile(single_times, 25)
Q3 = np.percentile(single_times, 75)
IQR = Q3 - Q1
lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR

mask = (single_times >= lower_bound) & (single_times <= upper_bound)
filtered_times = single_times[mask]

num_outliers = len(single_times) - len(filtered_times)

print(f"\nAfter outlier removal: {len(filtered_times)}")
print(f"Outliers removed: {num_outliers}")

# === 시간 통계 ===
mean_time_sec = np.mean(filtered_times)
std_time_sec = np.std(filtered_times)
median_time_sec = np.median(filtered_times)
min_time_sec = np.min(filtered_times)
max_time_sec = np.max(filtered_times)

# ms 변환
inference_time_ms = mean_time_sec * 1000
inference_time_std_ms = std_time_sec * 1000
median_time_ms = median_time_sec * 1000
min_time_ms = min_time_sec * 1000
max_time_ms = max_time_sec * 1000

# CV (변동계수)
cv_percent = (std_time_sec / mean_time_sec) * 100

# === FPS 통계 ===
fps_list = 1.0 / filtered_times
fps_mean = np.mean(fps_list)
fps_std = np.std(fps_list)
fps_median = np.median(fps_list)
fps_min = np.min(fps_list)
fps_max = np.max(fps_list)

# 최종 출력
print(f"\n{'='*60}")
print(f"{'FINAL STATISTICS':^60}")
print(f"{'='*60}")

print(f"\nInference Time (ms):")
print(f"  Mean:   {inference_time_ms:.3f} ± {inference_time_std_ms:.3f} ms")
print(f"  Median: {median_time_ms:.3f} ms")
print(f"  Range:  [{min_time_ms:.3f}, {max_time_ms:.3f}] ms")
print(f"  CV:     {cv_percent:.2f}%")

print(f"\nFrame Per Second (FPS):")
print(f"  Mean:   {fps_mean:.2f} ± {fps_std:.2f}")
print(f"  Median: {fps_median:.2f}")
print(f"  Range:  [{fps_min:.2f}, {fps_max:.2f}]")

# 품질 체크
print(f"\n{'Quality Check':^60}")
if cv_percent < 10:
    print(f"  ✓ Excellent (CV < 10%)")
elif cv_percent < 20:
    print(f"  ⚠ Acceptable (10% ≤ CV < 20%)")
else:
    print(f"  ✗ Poor (CV ≥ 20%) - Consider re-measuring")
    print(f"    Possible causes:")
    print(f"    - Other GPU processes")
    print(f"    - Thermal throttling")
    print(f"    - Insufficient warmup")

print("-"*60)


------------------------------------------------------------
Single Image Inference Speed
------------------------------------------------------------
Test image shape: torch.Size([1, 3, 256, 256])
  Progress: 50/300 iterations


In [None]:
# ============================================================
# Cell 11: 배치 추론 속도 측정
# ============================================================
print("\n" + "-"*60)
print("Batch Inference Speed")
print("-"*60)

# 배치 단위로 20회 측정
num_batch_iterations = 20
batch_times = []

test_iter = iter(test_loader)

for _ in range(num_batch_iterations):
    try:
        batch = next(test_iter)
    except StopIteration:
        test_iter = iter(test_loader)
        batch = next(test_iter)
    
    img1 = batch['img1'].to(DEVICE)
    img2 = batch['img2'].to(DEVICE)
    
    torch.cuda.synchronize()
    
    start = time.time()
    
    with torch.no_grad():
        _ = model(img1, img2)
    
    torch.cuda.synchronize()
    
    batch_times.append(time.time() - start)

# 통계
batch_times = np.array(batch_times)
avg_batch_time = np.mean(batch_times)
batch_fps = BATCH_SIZE / avg_batch_time
batch_per_image = avg_batch_time / BATCH_SIZE

print(f"  Batch size:        {BATCH_SIZE}")
print(f"  Iterations:        {num_batch_iterations}")
print(f"  Avg batch time:    {avg_batch_time*1000:.2f} ms")
print(f"  Batch FPS:         {batch_fps:.2f} images/sec")
print(f"  Per image:         {batch_per_image*1000:.2f} ms")

print("="*60)


------------------------------------------------------------
Batch Inference Speed
------------------------------------------------------------
  Batch size:        1
  Iterations:        20
  Avg batch time:    24.15 ms
  Batch FPS:         41.40 images/sec
  Per image:         24.15 ms


In [None]:
# ============================================================
# Cell 12: 테스트 세트 평가
# ============================================================
print("\n" + "="*60)
print("Testing on Full Test Set")
print("="*60)

test_metrics = CDMetrics()
test_loss = 0

with torch.no_grad():
    for batch in tqdm(test_loader, desc='Testing'):
        img1 = batch['img1'].to(DEVICE)
        img2 = batch['img2'].to(DEVICE)
        label = batch['label'].to(DEVICE)
        
        # 추론
        output = model(img1, img2)
        
        # Loss
        loss = criterion(output, label)
        test_loss += loss.item()
        
        # 메트릭 업데이트
        test_metrics.update(output, label)

avg_test_loss = test_loss / len(test_loader)
test_results = test_metrics.get_metrics()

print(f"\n{'Test Results':^60}")
print("="*60)
print(f"  Loss:      {avg_test_loss:.4f}")
print(f"  F1 Score:  {test_results['f1']:.4f}")
print(f"  Precision: {test_results['precision']:.4f}")
print(f"  Recall:    {test_results['recall']:.4f}")
print(f"  IoU:       {test_results['iou']:.4f}")
print(f"  Kappa:     {test_results['kappa']:.4f}")
print(f"  OA:        {test_results['oa']:.4f}")
print("="*60)



Testing on Full Test Set


Testing: 100%|██████████| 4000/4000 [01:36<00:00, 41.56it/s]


                        Test Results                        
  Loss:      0.1547
  F1 Score:  0.7711
  Precision: 0.7581
  Recall:    0.7846
  IoU:       0.6275
  Kappa:     0.7611
  OA:        0.9809





In [None]:
# ============================================================
# Cell 13: 결과 요약 테이블
# ============================================================
print(f"\n{'SUMMARY TABLE':^60}")
print("="*60)
print(f"{'Metric':<30} {'Value':>20}")
print("-"*60)
print(f"{'Model':<30} {TEST_MODEL:>20}")
print(f"{'Dataset':<30} {TEST_DATASET:>20}")
print("-"*60)
print(f"{'Model Parameters (M)':<30} {params_m:>19.2f} M")
print(f"{'FLOPs (G)':<30} {flops_g:>19.2f} G")
print(f"{'Frame Per Second (FPS)':<30} {fps_mean:>15.2f} ± {fps_std:.2f}")
print(f"{'Inference Time (ms)':<30} {inference_time_ms:>12.2f} ± {inference_time_std_ms:.2f}")
print("-"*60)
print(f"{'F1 Score':<30} {test_results['f1']:>20.4f}")
print(f"{'IoU':<30} {test_results['iou']:>20.4f}")
print(f"{'Precision':<30} {test_results['precision']:>20.4f}")
print(f"{'Recall':<30} {test_results['recall']:>20.4f}")
print("="*60)


                       SUMMARY TABLE                        
Metric                                        Value
------------------------------------------------------------
Model                                      Change3D
Dataset                                   LEVIR-CD+


NameError: name 'use_base' is not defined

In [None]:
# ============================================================
# Cell 14: 결과 저장
# ============================================================
results_dict = {
    'model': TEST_MODEL,
    'dataset': TEST_DATASET,
    
    # 핵심 지표 (논문 표준 단위)
    'model_parameters_m': float(params_m),
    'flops_g': float(flops_g),
    'inference_time_ms': float(inference_time_ms),
    'inference_time_std_ms': float(inference_time_std_ms),
    'fps': float(fps_mean),
    'fps_std': float(fps_std),
    
    # 상세 정보
    'model_complexity': {
        'total_params': int(total_params),
        'trainable_params': int(trainable_params),
        'params_m': float(params_m),
        'flops': int(flops),
        'flops_g': float(flops_g),
        'model_size_mb': float(model_size_mb)
    },
    
    'inference_speed': {
        'inference_time_ms': float(inference_time_ms),
        'inference_time_std_ms': float(inference_time_std_ms),
        'inference_time_median_ms': float(median_time_ms),
        'inference_time_min_ms': float(min_time_ms),
        'inference_time_max_ms': float(max_time_ms),
        'cv_percent': float(cv_percent),
        'fps_mean': float(fps_mean),
        'fps_std': float(fps_std),
        'fps_median': float(fps_median),
        'fps_min': float(fps_min),
        'fps_max': float(fps_max),
        'num_iterations': int(len(filtered_times)),
        'num_outliers': int(num_outliers),
        'batch_time_ms': float(avg_batch_time * 1000),
        'batch_fps': float(batch_fps)
    },
    
    'test_metrics': {
        'loss': float(avg_test_loss),
        'f1': float(test_results['f1']),
        'iou': float(test_results['iou']),
        'precision': float(test_results['precision']),
        'recall': float(test_results['recall']),
        'oa': float(test_results['oa']),
        'kappa': float(test_results['kappa'])
    }
}

# JSON 저장
results_path = exp_dir / 'test_results.json'
with open(results_path, 'w') as f:
    json.dump(results_dict, f, indent=2)

print(f"\n✓ Results saved to {results_path}")

# 간단한 요약
print("\n" + "="*60)
print("Quick Summary:")
print("="*60)
print(f"F1 Score:              {test_results['f1']:.4f}")
print(f"Precision:             {test_results['precision']:.4f}")
print(f"Recall:                {test_results['recall']:.4f}")
print(f"IoU:                   {test_results['iou']:.4f}")
print(f"Kappa:                 {test_results['kappa']:.4f}")
print(f"Overall Accuracy:      {test_results['oa']:.4f}")
print("-"*60)
print(f"Model Parameters (M):  {params_m:.2f}")
print(f"FLOPs (G):             {flops_g:.2f}")
print(f"Inference Time (ms):   {inference_time_ms:.2f} ± {inference_time_std_ms:.2f}")
print(f"FPS:                   {fps_mean:.2f} ± {fps_std:.2f}")
print("="*60)

print("\n" + "="*60)
print("Test Complete!")
print("="*60)


✓ Results saved to experiments/LEVIR-CD+/USSFCNet/test_results.json

Quick Summary:
F1 Score:              0.7786
Precision:             0.7732
Recall:                0.7841
IoU:                   0.6375
Kappa:                 0.7691
Overall Accuracy:      0.9817
------------------------------------------------------------
Model Parameters (M):  5.57
FLOPs (G):             3.69
Inference Time (ms):   2.32 ± 0.10
FPS:                   431.62 ± 17.94

Test Complete!
