# Model Efficiency Measurement

GPU/CPU Inference Time, Parameters, FLOPs 측정

## 1. 라이브러리 임포트

In [1]:
import timm
timm.__version__

'1.0.24'

In [2]:
import torch
import pandas as pd
import os
from datetime import datetime

from ex_eff_utils import measure_all_models
from models import list_models

print(f"PyTorch: {torch.__version__} | CUDA: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
print(f"Available models: {list_models()}")

PyTorch: 2.0.1+cu118 | CUDA: True
GPU: NVIDIA GeForce RTX 3090
Available models: ['AMNet', 'AMNet_Nearest', 'AttU_Net', 'CMUNeXt', 'DWBlock', 'EGEUNet', 'JeongWonNet', 'JeongWonNet77', 'JeongWonNet77_Rep', 'JeongWonNet77_Rep256', 'JeongWonNet77_Rep256Basis8', 'JeongWonNet77_Rep256Basis8B2', 'JeongWonNet77_Rep256Basis8S16', 'JeongWonNet77_Rep256Basis8S24', 'JeongWonNet77_Rep256Basis8S24Drop', 'JeongWonNet77_Rep256Basis8S24NN', 'JeongWonNet77_Rep256Basis8S24_AWQ', 'JeongWonNet77_Rep256Basis8S24_EffDown', 'JeongWonNet77_Rep256Basis8S24_EffDownAWQ', 'JeongWonNet77_Rep256Basis8S24_QuanRep', 'JeongWonNet77_Rep256Basis8S24_Tiled', 'JeongWonNet77_Rep256BasisExp4', 'JeongWonNet77_Rep256BasisMax8', 'JeongWonNet77_Rep96', 'JeongWonNet_CtxBridge', 'JeongWonNet_CtxBridge_AttnFusion', 'JeongWonNet_CtxBridge_CatSkip', 'JeongWonNet_CtxBridge_Coeff', 'JeongWonNet_CtxBridge_Concat', 'JeongWonNet_CtxBridge_ConcatNL', 'JeongWonNet_CtxBridge_Deep', 'JeongWonNet_CtxBridge_DiffFusion', 'JeongWonNet_CtxBridge

## 2. 설정

In [3]:
MODELS = [
    'JeongWonNet_CtxBridge',
    'JeongWonNet_CtxBridge',
    'JeongWonNet_CtxBridge_StdExp',
    'JeongWonNet_CtxBridge_StdExp_Boun',
    'JeongWonNet_CtxBridge_StdExp_Boun2',
        # 'EGEUNet',
        # 'UCMNet',
        # 'MALUNet',
        # 'CMUNeXt',
        # 'TinyUNet',
        # 'MHorUNet',
        # 'MAResUNet',
        # 'AMNet',
]

INPUT_SIZE = (3, 256, 256)
NUM_WARMUP = 100
NUM_ITERATIONS = 100

# 결과 저장 폴더
RESULT_DIR = f'results/efficiency_{datetime.now().strftime("%y%m%d_%H%M%S")}/'

print(f"Models: {MODELS}")
print(f"Input: {INPUT_SIZE} | Warmup: {NUM_WARMUP} | Iterations: {NUM_ITERATIONS}")
print(f"Save to: {RESULT_DIR}")

Models: ['JeongWonNet_CtxBridge', 'JeongWonNet_CtxBridge', 'JeongWonNet_CtxBridge_StdExp', 'JeongWonNet_CtxBridge_StdExp_Boun', 'JeongWonNet_CtxBridge_StdExp_Boun2']
Input: (3, 256, 256) | Warmup: 100 | Iterations: 100
Save to: results/efficiency_260222_075610/


## 3. 측정 실행

In [4]:
# 측정 실행
results = measure_all_models(MODELS, INPUT_SIZE, NUM_WARMUP, NUM_ITERATIONS)

# 결과 저장
os.makedirs(RESULT_DIR, exist_ok=True)
df = pd.DataFrame(results)
df.to_csv(os.path.join(RESULT_DIR, 'results.csv'), index=False)

print(f"\nSaved to: {RESULT_DIR}")
print(df[['Model Name', 'Params (fmt)', 'FLOPs (fmt)', 'GPU Mean (ms)', 'CPU Mean (ms)']].to_string(index=False))

Measuring 5 models...

[1/5] JeongWonNet_CtxBridge
  Params: 160.59K
  FLOPs: 203.59M
  GPU: 0.75 ± 0.00 ms
  CPU: 4.75 ± 0.58 ms

[2/5] JeongWonNet_CtxBridge
  Params: 160.59K
  FLOPs: 203.59M
  GPU: 0.75 ± 0.00 ms
  CPU: 4.23 ± 0.47 ms

[3/5] JeongWonNet_CtxBridge_StdExp
  Params: 1.01M
  FLOPs: 1.25G
  GPU: 1.17 ± 0.02 ms
  CPU: 8.41 ± 0.13 ms

[4/5] JeongWonNet_CtxBridge_StdExp_Boun
  Params: 1.03M
  FLOPs: 1.25G
  GPU: 1.27 ± 0.06 ms
  CPU: 6.44 ± 0.11 ms

[5/5] JeongWonNet_CtxBridge_StdExp_Boun2
  Params: 1.05M
  FLOPs: 1.25G
  GPU: 1.48 ± 0.05 ms
  CPU: 10.22 ± 8.91 ms

Measurement completed!

Saved to: results/efficiency_260222_075610/
                        Model Name Params (fmt) FLOPs (fmt)  GPU Mean (ms)  CPU Mean (ms)
             JeongWonNet_CtxBridge      160.59K     203.59M       0.754406       4.753237
             JeongWonNet_CtxBridge      160.59K     203.59M       0.753485       4.228473
      JeongWonNet_CtxBridge_StdExp        1.01M       1.25G       1.172259    