### 4. Model Complexity & Practical Usability (모델 복잡도 및 실용성)

이 표는 각 모델이 **얼마나 계산적으로 무거운지**, 그리고 실제 사용할 때 **연산/메모리 비용**이 어느 정도인지 비교합니다.

| Metric | 의미 (Korean 설명) |
|-------|----------------|
| **Params (#)** | 학습 가능한 파라미터 총 개수. 모델 표현력 규모를 반영하나, 너무 크면 과적합 및 메모리 비용 증가 가능. |
| **FLOPs** | 단일 추론(Forward pass) 동안 수행되는 부동소수점 연산 수. 연산 복잡도의 직접적인 척도. |
| **Inference Memory (MB)** | 입력 1개를 추론할 때 GPU 메모리가 어느 정도 사용되는지. |
| **Latency per Inference (s)** | 입력 하나를 처리하는 데 걸리는 시간. 실시간 처리 가능성 및 배치 사이즈 결정에 영향. |

#### 해석 관점
- **ViT** 계열은 일반적으로 **파라미터 수는 크지만 FLOPs 효율이 좋아** 추론 속도는 빠른 편.
- **UNet3D (V-NET)** 는 **입체 convolution 핵심 구조로 인해 메모리 사용량이 크고 추론 시간이 상대적으로 길 수 있음.**
- **Base Model** 은 구조가 단순하므로 일반적으로 가장 가볍지만 성능 한계가 존재.

즉,
> 이 표는 “**정확도 vs 계산비용**” 트레이드오프를 정량적으로 보여주며,  
> 실제 운용 환경에서 어떤 모델을 선택해야 하는지를 결정하는 핵심 기준이 됩니다.


In [1]:
# <<< 이 셀을 노트북 "맨 위"에서 실행하세요 >>>
import os
# TF가 GPU를 전혀 보지 못하도록 비활성화 (CPU 강제)
os.environ["CUDA_VISIBLE_DEVICES"] = ""

import tensorflow as tf
# GPU가 안 보이므로 굳이 메모리 그로스 설정은 불필요

# PyTorch는 별도 환경에서 GPU 사용 (CUDA_VISIBLE_DEVICES가 빈 문자열이면 CPU만 보임)
# -> Torch쪽에서는 다시 원하는 GPU를 지정해서 사용하세요 (SLURM 스크립트 등에서 지정)


2025-11-12 15:18:23.727498: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
# ===== Choose a big GPU (e.g., 1) BEFORE importing TF/Torch =====
import os
GPU_ID = "1"  # <- 20GB 있는 GPU로 지정 (원하면 "2","3"로 바꿔도 됨)
os.environ["CUDA_VISIBLE_DEVICES"] = GPU_ID
os.environ.setdefault("TF_GPU_ALLOCATOR", "cuda_malloc_async")  # TF 메모리 파편화 완화

# (이 아래부터 TensorFlow / PyTorch import)
import tensorflow as tf

# Enable memory growth on visible GPUs
try:
    gpus = tf.config.list_physical_devices('GPU')
    for g in gpus:
        tf.config.experimental.set_memory_growth(g, True)
    TF_DEVICE = "/GPU:0" if gpus else "/CPU:0"   # CUDA_VISIBLE_DEVICES로 remap된 0번
except Exception:
    TF_DEVICE = "/CPU:0"

import torch
TORCH_DEVICE = "cuda:0" if torch.cuda.is_available() else "cpu"  # 동일하게 remap된 0

# ----- TF latency/memory (GPU/CPU 자동) -----
def tf_infer_latency_and_memory(model, input_shape=(1,2,128,128,128), warmup=2, runs=5):
    x = tf.random.normal(input_shape, dtype=tf.float32)
    call_fn = tf.function(model, jit_compile=False)
    # Warmup
    for _ in range(warmup):
        _ = call_fn(x, training=False)

    # Memory (best-effort)
    mem_mb = float("nan")
    try:
        info0 = tf.config.experimental.get_memory_info("GPU:0")
    except Exception:
        info0 = None

    t0 = tf.timestamp()
    for _ in range(runs):
        _ = call_fn(x, training=False)
    t1 = tf.timestamp()

    if info0 is not None:
        try:
            info1 = tf.config.experimental.get_memory_info("GPU:0")
            peak = max(info0.get("peak", 0), info1.get("peak", 0))
            mem_mb = float(peak) / (1024**2)
        except Exception:
            pass

    return float(t1 - t0) / runs, mem_mb

# ----- Torch latency/memory/FLOPs (동일한 logical cuda:0 사용) -----
def torch_infer_latency(model, input_shape=(1,2,128,128,128), device=TORCH_DEVICE, warmup=3, runs=5):
    model = model.to(device).eval()
    x = torch.randn(*input_shape, device=device)
    if device.startswith("cuda"):
        torch.cuda.synchronize()
    with torch.inference_mode():
        for _ in range(warmup):
            _ = model(x)
        if device.startswith("cuda"):
            torch.cuda.synchronize()
        import time
        t0 = time.time()
        for _ in range(runs):
            _ = model(x)
        if device.startswith("cuda"):
            torch.cuda.synchronize()
        t1 = time.time()
    return float((t1 - t0) / runs)

def torch_infer_memory(model, input_shape=(1,2,128,128,128), device=TORCH_DEVICE):
    if not device.startswith("cuda"):
        return float("nan")
    model = model.to(device).eval()
    x = torch.randn(*input_shape, device=device)
    torch.cuda.reset_peak_memory_stats(device)
    with torch.inference_mode():
        _ = model(x)
    return torch.cuda.max_memory_allocated(device) / (1024**2)

try:
    from thop import profile, clever_format
    def torch_try_flops(model, input_shape=(1,2,128,128,128), device=TORCH_DEVICE):
        model = model.to(device).eval()
        x = torch.randn(*input_shape, device=device)
        macs, _ = profile(model, inputs=(x,), verbose=False)
        flops_val = macs * 2
        flops_str, _ = clever_format([flops_val, macs], "%.3f")
        return flops_str
except Exception:
    def torch_try_flops(*args, **kwargs):
        return "N/A"


In [4]:
# ===== Safe loaders =====
import importlib.util, types, sys, os

def load_symbol_from_file(py_path: str, symbol_name: str):
    """Simple loader for files WITHOUT relative imports."""
    spec = importlib.util.spec_from_file_location(f"{symbol_name}_mod", py_path)
    if spec is None or spec.loader is None:
        raise ImportError(f"Cannot load spec for {py_path}")
    mod = importlib.util.module_from_spec(spec)
    spec.loader.exec_module(mod)  # type: ignore
    if not hasattr(mod, symbol_name):
        raise ImportError(f"{symbol_name} not found in {py_path}")
    return getattr(mod, symbol_name)

def load_symbol_with_package(model_path: str, symbol_name: str, pkg: str = "pix2pixcc3d"):
    """
    Loader for modules that use relative imports like `from .utils import ...`.
    Loads <pkg>.utils and <pkg>.model so that `.utils` resolves.
    """
    model_path = os.path.abspath(model_path)
    src_dir    = os.path.dirname(model_path)
    utils_path = os.path.join(src_dir, "utils.py")

    if not os.path.isfile(model_path):
        raise ImportError(f"[cGAN] model file not found: {model_path}")
    if not os.path.isfile(utils_path):
        raise ImportError(f"[cGAN] utils file not found: {utils_path}")

    # Load <pkg>.utils
    utils_name = f"{pkg}.utils"
    if utils_name not in sys.modules:
        utils_spec = importlib.util.spec_from_file_location(utils_name, utils_path)
        if utils_spec is None or utils_spec.loader is None:
            raise ImportError(f"[cGAN] Cannot create spec for utils: {utils_path}")
        utils_mod = importlib.util.module_from_spec(utils_spec)
        sys.modules[utils_name] = utils_mod
        utils_spec.loader.exec_module(utils_mod)  # type: ignore

    # Load <pkg>.model
    model_name = f"{pkg}.model"
    model_spec = importlib.util.spec_from_file_location(model_name, model_path)
    if model_spec is None or model_spec.loader is None:
        raise ImportError(f"[cGAN] Cannot create spec for model: {model_path}")
    model_mod = importlib.util.module_from_spec(model_spec)
    model_mod.__package__ = pkg  # <-- crucial for `.utils`
    sys.modules[model_name] = model_mod
    model_spec.loader.exec_module(model_mod)  # type: ignore

    if not hasattr(model_mod, symbol_name):
        raise ImportError(f"[cGAN] {symbol_name} not found in {model_path}")
    return getattr(model_mod, symbol_name)

# ===== Exact paths & class names =====
BASE_PATH = "/gpfs/adupuy/CF4_CNN/generatorSingle.py"               # TF Generator()
UNET_PATH = "/home/mingyeong/GAL2DM_ASIM_VNET/src/model.py"         # UNet3D
VIT_PATH  = "/home/mingyeong/GAL2DM_ASIM_ViT/src/model.py"          # VoxelViTUNet3D
GAN_PATH  = "/home/mingyeong/GAL2DM_ASIM_GAN/src/model.py"          # cGAN (uses .utils)

SYM_BASE = "Generator"
SYM_UNET = "UNet3D"
SYM_VIT  = "VoxelViTUNet3D"
SYM_GAN  = "GeneratorPix2PixCC3D"

# ===== Load TensorFlow Base Model =====
import tensorflow as tf
Generator = load_symbol_from_file(BASE_PATH, SYM_BASE)
base_model_tf = Generator()  # returns tf.keras.Model, input=(B,2,128,128,128)

# ===== Load PyTorch models =====
import torch
UNet3D = load_symbol_from_file(UNET_PATH, SYM_UNET)
VoxelViTUNet3D = load_symbol_from_file(VIT_PATH, SYM_VIT)
# ⚠️ Do NOT overwrite GAN_PATH (string). Load the CLASS into a new name:
GeneratorPix2PixCC3D = load_symbol_with_package(GAN_PATH, SYM_GAN, pkg="pix2pixcc3d")

# UNet3D uses in_ch, out_ch
unet_model_torch = UNet3D(in_ch=2, out_ch=1)

# VoxelViTUNet3D: try reasonable constructor signatures
def instantiate_vit(cls):
    try:
        return cls(in_ch=2, out_ch=1)
    except TypeError:
        pass
    try:
        return cls(in_channels=2, out_channels=1)
    except TypeError:
        pass
    return cls()  # fallback → internal defaults

vit_model_torch = instantiate_vit(VoxelViTUNet3D)

print("✅ Base model:", type(base_model_tf))
print("✅ UNet3D:", type(unet_model_torch))
print("✅ ViT:", type(vit_model_torch))
print("✅ cGAN class:", GeneratorPix2PixCC3D)


✅ Base model: <class 'keras.src.engine.functional.Functional'>
✅ UNet3D: <class 'UNet3D_mod.UNet3D'>
✅ ViT: <class 'VoxelViTUNet3D_mod.VoxelViTUNet3D'>
✅ cGAN class: <class 'pix2pixcc3d.model.GeneratorPix2PixCC3D'>


In [5]:
# ==== Consistent names from your loaders/constructors ====
# TensorFlow
base_model_tf = base_model_tf      # already created above

# PyTorch
unet_model = unet_model_torch
vit_model  = vit_model_torch

import torch
import time
import pandas as pd
from thop import profile, clever_format  # pip install thop

def count_params(model: torch.nn.Module) -> int:
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

def _ensure_device(device: str) -> torch.device:
    if device.startswith("cuda"):
        if not torch.cuda.is_available():
            print("[WARN] CUDA requested but not available. Falling back to CPU.")
            return torch.device("cpu")
        # support e.g. "cuda:0"
        return torch.device(device)
    return torch.device("cpu")

def measure_inference_memory(model: torch.nn.Module,
                             input_shape=(1,2,128,128,128),
                             device="cuda") -> float:
    dev = _ensure_device(device)
    model = model.to(dev).eval()
    dummy = torch.randn(*input_shape, device=dev)

    if dev.type != "cuda":
        return float("nan")  # GPU-only metric

    torch.cuda.reset_peak_memory_stats(dev)
    with torch.no_grad():
        _ = model(dummy)
    return torch.cuda.max_memory_allocated(dev) / (1024**2)

def measure_latency(model: torch.nn.Module,
                    input_shape=(1,2,128,128,128),
                    device="cuda",
                    warmup=3, runs=5) -> float:
    dev = _ensure_device(device)
    model = model.to(dev).eval()
    dummy = torch.randn(*input_shape, device=dev)

    with torch.no_grad():
        for _ in range(warmup):
            _ = model(dummy)

        if dev.type == "cuda":
            torch.cuda.synchronize(dev)
        t0 = time.time()
        for _ in range(runs):
            _ = model(dummy)
        if dev.type == "cuda":
            torch.cuda.synchronize(dev)

    return (time.time() - t0) / runs

def evaluate_model_complexity(model_dict, input_shape=(1,2,128,128,128), device="cuda"):
    dev = _ensure_device(device)
    results = []
    for name, model in model_dict.items():
        print(f"\n--- Evaluating {name} ---")
        # Params
        try:
            params = count_params(model)
        except Exception as e:
            print(f"[WARN] Param count failed for {name}: {e}")
            params = float("nan")

        # FLOPs / MACs via thop (may fail for some custom layers)
        try:
            dummy = torch.randn(*input_shape, device=dev)
            model_eval = model.to(dev).eval()
            macs_raw, _ = profile(model_eval, inputs=(dummy,), verbose=False)
            # FLOPs ~= 2 * MACs is a common convention for convs
            flops_str, macs_str = clever_format([macs_raw * 2, macs_raw], "%.3f")
        except Exception as e:
            print(f"[WARN] THOP profiling failed for {name}: {e}")
            flops_str, macs_str = "NaN", "NaN"

        # Inference memory (GPU only)
        try:
            mem = measure_inference_memory(model, input_shape, device)
        except Exception as e:
            print(f"[WARN] Memory measurement failed for {name}: {e}")
            mem = float("nan")

        # Latency
        try:
            latency = measure_latency(model, input_shape, device)
        except Exception as e:
            print(f"[WARN] Latency measurement failed for {name}: {e}")
            latency = float("nan")

        results.append({
            "Model": name,
            "Params (#)": params,
            "FLOPs": flops_str,
            "MACs": macs_str,
            "Inference Memory (MB)": mem,
            "Latency per Inference (s)": latency,
        })

    return pd.DataFrame(results)

# --------------------- Run (PyTorch-only) ---------------------
model_dict = {
    "V-NET (UNet3D)": unet_model,
    "ViT (3D Transformer)": vit_model,
}
df_complex = evaluate_model_complexity(model_dict, input_shape=(1,2,128,128,128), device="cuda")

print("\n=== Model Complexity & Practical Usability (PyTorch) ===\n")
print(df_complex.to_string(index=False))
df_complex.to_csv("model_complexity_summary_torch.csv", index=False)
print("\nSaved → model_complexity_summary_torch.csv")

# --------------------- Optional: TensorFlow base model summary ---------------------
try:
    # Keras parameter count (trainable + non-trainable)
    tf_params = int(base_model_tf.count_params())
    print(f"\n[TensorFlow] Base Model params: {tf_params:,}")
    # If you need TF FLOPs/latency, profile separately with tf.profiler or tf.function jit; not compatible with thop.
except Exception as e:
    print(f"[WARN] TensorFlow base model summary failed: {e}")



--- Evaluating V-NET (UNet3D) ---





--- Evaluating ViT (3D Transformer) ---

=== Model Complexity & Practical Usability (PyTorch) ===

               Model  Params (#)    FLOPs    MACs  Inference Memory (MB)  Latency per Inference (s)
      V-NET (UNet3D)    28823577 121.235G 60.617G             854.778809                   0.030333
ViT (3D Transformer)    23485633   2.282T  1.141T            3010.214355                   0.120250

Saved → model_complexity_summary_torch.csv

[TensorFlow] Base Model params: 461,006,711


In [6]:
import os, time, math
import numpy as np
import pandas as pd
import torch

from thop import profile, clever_format

# ========= Unit formatters =========
def fmt_si(x, kind="count"):
    if x is None or (isinstance(x, float) and (math.isnan(x) or math.isinf(x))):
        return "NaN"
    scales = [
        (1e12, "T"),
        (1e9,  "G"),
        (1e6,  "M"),
        (1e3,  "K"),
    ]
    for s, tag in scales:
        if abs(x) >= s:
            val = x / s
            return f"{val:.3f}{tag}"
    if kind == "time_ms":
        return f"{x*1e3:.3f} ms"
    if kind == "mem":
        return f"{x:.3f} MB"
    return f"{x:.0f}"

def fmt_params(n):  # counts -> M/B
    if n is None or (isinstance(n, float) and (math.isnan(n) or math.isinf(n))):
        return "NaN"
    if n >= 1e9:
        return f"{n/1e9:.3f}B"
    if n >= 1e6:
        return f"{n/1e6:.3f}M"
    return f"{n:,}"

# ========= PyTorch profiling =========
def _ensure_device(device: str) -> torch.device:
    if device.startswith("cuda"):
        if not torch.cuda.is_available():
            print("[WARN] CUDA requested but not available. Falling back to CPU.")
            return torch.device("cpu")
        return torch.device(device)
    return torch.device("cpu")

def torch_profile(model: torch.nn.Module,
                  input_shape=(1,2,128,128,128),
                  device="cuda",
                  warmup=3, runs=5):
    dev = _ensure_device(device)
    model = model.to(dev).eval()
    x = torch.randn(*input_shape, device=dev)

    # Params
    try:
        params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    except Exception:
        params = float("nan")

    # FLOPs/MACs
    try:
        macs_raw, _ = profile(model, inputs=(x,), verbose=False)
        flops_str, macs_str = clever_format([macs_raw * 2, macs_raw], "%.3f")
    except Exception as e:
        print(f"[WARN] THOP failed: {e}")
        flops_str, macs_str = "NaN", "NaN"

    # GPU mem
    try:
        if dev.type == "cuda":
            torch.cuda.reset_peak_memory_stats(dev)
            with torch.no_grad():
                _ = model(x)
            mem_mb = torch.cuda.max_memory_allocated(dev) / (1024**2)
        else:
            mem_mb = float("nan")
    except Exception as e:
        print(f"[WARN] Torch mem failed: {e}")
        mem_mb = float("nan")

    # Latency
    try:
        with torch.no_grad():
            for _ in range(warmup):
                _ = model(x)
            if dev.type == "cuda":
                torch.cuda.synchronize(dev)
            t0 = time.time()
            for _ in range(runs):
                _ = model(x)
            if dev.type == "cuda":
                torch.cuda.synchronize(dev)
        lat_s = (time.time() - t0) / runs
    except Exception as e:
        print(f"[WARN] Torch latency failed: {e}")
        lat_s = float("nan")

    return {
        "Params (#)": params,
        "FLOPs": flops_str,
        "MACs": macs_str,
        "Inference Memory (MB)": mem_mb,
        "Latency per Inference (s)": lat_s,
    }

# ========= TensorFlow profiling =========
import tensorflow as tf
from tensorflow.python.framework.convert_to_constants import convert_variables_to_constants_v2_as_graph

def _tf_input_shape_from_model(model, fallback=(1,128,128,128,2)):
    try:
        shp = model.input_shape
        if isinstance(shp, (list, tuple)):
            if isinstance(shp[0], (list, tuple)):  # multiple inputs
                shp = shp[0]
        # replace None batch with 1
        shp = tuple(1 if (d is None) else d for d in shp)
        if len(shp) == 5:
            return shp  # assume model is correct (5D)
    except Exception:
        pass
    return fallback

def tf_profile(model: tf.keras.Model,
               input_shape=None,
               device="/GPU:0",
               warmup=3, runs=5):
    # Input shape
    if input_shape is None:
        input_shape = _tf_input_shape_from_model(model)

    # Determine channels-last vs channels-first from shape
    # Expect either (B,D,H,W,C) or (B,C,D,H,W)
    if len(input_shape) != 5:
        raise ValueError(f"TF model expected 5D input, got {input_shape}")
    if input_shape[-1] in (1,2,3,4):
        layout = "channels_last"
    else:
        layout = "channels_first"

    # Build dummy
    x_np = np.random.randn(*input_shape).astype(np.float32)
    x_tf = tf.constant(x_np)

    # Params
    try:
        params = int(model.count_params())
    except Exception:
        params = float("nan")

    # FLOPs via TF profiler (best effort)
    def _flops_estimate_keras(model, sample):
        try:
            @tf.function(jit_compile=False)
            def _call(inp):
                return model(inp, training=False)

            concrete = _call.get_concrete_function(sample)
            frozen_func, graph_def = convert_variables_to_constants_v2_as_graph(concrete)
            # Use TF v1 profiler on graph_def
            opts = tf.compat.v1.profiler.ProfileOptionBuilder.float_operation()
            flops = tf.compat.v1.profiler.profile(
                graph=frozen_func.graph,
                options=opts
            )
            return float(flops.total_float_ops) if flops is not None else float("nan")
        except Exception as e:
            print(f"[WARN] TF FLOPs profiling failed: {e}")
            return float("nan")

    flops_total = _flops_estimate_keras(model, x_tf)  # raw FLOPs (approx)
    flops_str = fmt_si(flops_total, kind="count") if not np.isnan(flops_total) else "NaN"
    macs_str  = "NaN"  # TF v1 profiler reports FLOPs, not MACs

    # GPU memory (best effort; only works on TF>=2.9 and with GPU)
    mem_mb = float("nan")
    try:
        gpus = tf.config.list_physical_devices("GPU")
        if gpus:
            # Clear peak then run once
            # Note: TF doesn't expose a reset; we sample before/after instead.
            _ = model(x_tf, training=False)
            info = tf.config.experimental.get_memory_info("GPU:0")
            # peak memory since the start of program in bytes
            mem_mb = info.get("peak", np.nan) / (1024**2)
    except Exception as e:
        print(f"[WARN] TF memory query failed: {e}")

    # Latency
    try:
        for _ in range(warmup):
            _ = model(x_tf, training=False)
        t0 = time.time()
        for _ in range(runs):
            _ = model(x_tf, training=False)
        lat_s = (time.time() - t0) / runs
    except Exception as e:
        print(f"[WARN] TF latency failed: {e}")
        lat_s = float("nan")

    return {
        "Params (#)": params,
        "FLOPs": flops_str,
        "MACs": macs_str,
        "Inference Memory (MB)": mem_mb,
        "Latency per Inference (s)": lat_s,
        "layout": layout,
        "input_shape_used": input_shape,
    }

# ========= Run all & pretty print =========
torch_models = {
    "V-NET (UNet3D)": unet_model,
    "ViT (3D Transformer)": vit_model,
}
tf_models = {
    "Base Model (TF)": base_model_tf,
}

torch_rows = []
for name, m in torch_models.items():
    res = torch_profile(m, input_shape=(1,2,128,128,128), device="cuda")
    torch_rows.append({"Model": name, **res})

tf_rows = []
for name, m in tf_models.items():
    # If your TF model expects (B,2,128,128,128) (channels-first), pass that here:
    # tf_input = (1,2,128,128,128)
    res = tf_profile(m, input_shape=None, device="/GPU:0")
    tf_rows.append({"Model": name, **res})

df_all = pd.DataFrame(torch_rows + tf_rows,
                      columns=["Model","Params (#)","FLOPs","MACs","Inference Memory (MB)","Latency per Inference (s)"])

# Human-readable view
df_pretty = pd.DataFrame({
    "Model": df_all["Model"],
    "Params": [fmt_params(x) if isinstance(x,(int,float)) else x for x in df_all["Params (#)"]],
    "FLOPs":  df_all["FLOPs"],
    "MACs":   df_all["MACs"],
    "Mem":    [fmt_si(x, "mem") if isinstance(x,(int,float)) else x for x in df_all["Inference Memory (MB)"]],
    "Latency": [fmt_si(x, "time_ms") if isinstance(x,(int,float)) else x for x in df_all["Latency per Inference (s)"]],
})

print("\n=== Model Complexity & Practical Usability (All) ===\n")
print(df_pretty.to_string(index=False))

df_all.to_csv("model_complexity_summary_all.csv", index=False)
print("\nSaved → model_complexity_summary_all.csv")




2025-11-12 15:22:26.562222: I tensorflow/core/grappler/devices.cc:66] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 1
2025-11-12 15:22:26.666930: I tensorflow/core/grappler/clusters/single_machine.cc:357] Starting new session
2025-11-12 15:22:26.678750: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1639] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 20533 MB memory:  -> device: 0, name: NVIDIA A10, pci bus id: 0000:48:00.0, compute capability: 8.6


Instructions for updating:
This API was designed for TensorFlow v1. See https://www.tensorflow.org/guide/migrate for instructions on how to migrate your code to TensorFlow v2.


Instructions for updating:
This API was designed for TensorFlow v1. See https://www.tensorflow.org/guide/migrate for instructions on how to migrate your code to TensorFlow v2.



-max_depth                  10000
-min_bytes                  0
-min_peak_bytes             0
-min_residual_bytes         0
-min_output_bytes           0
-min_micros                 0
-min_accelerator_micros     0
-min_cpu_micros             0
-min_params                 0
-min_float_ops              1
-min_occurrence             0
-step                       -1
-order_by                   float_ops
-account_type_regexes       .*
-start_name_regexes         .*
-trim_name_regexes          
-show_name_regexes          .*
-hide_name_regexes          
-account_displayed_op_only  true
-select                     float_ops
-output                     stdout:


Doc:
scope: The nodes in the model graph are organized by their names, which is hierarchical like filesystem.
flops: Number of float operations. Note: Please read the implementation for the math behind it.

Profile:
node name | # float_ops
_TFProfRoot (--/1839.50b flops)
  model_1/conv3d_18/Conv3D (695.78b/695.78b flops)
  model_1/con

2025-11-12 15:22:48.803943: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:432] Loaded cuDNN version 8905
2025-11-12 15:22:48.888857: I tensorflow/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory



=== Model Complexity & Practical Usability (All) ===

               Model   Params    FLOPs    MACs        Mem    Latency
      V-NET (UNet3D)  28.824M 121.235G 60.617G 937.386 MB  30.379 ms
ViT (3D Transformer)  23.502M   2.282T  1.141T     2.994K 119.247 ms
     Base Model (TF) 461.007M   1.840T     NaN     6.002K 246.976 ms

Saved → model_complexity_summary_all.csv


In [11]:
# === Next cell: robust cGAN load (fix relative-import error) + clean FLOPs warnings + re-run table ===
# Assumes previous cells defined:
#   - load_symbol_from_file(), fmt_si(), torch_profile(), tf_profile()
#   - `unet_model`, `vit_model`, `base_model_tf`
#   - `GAN_PATH` (string path to .../src/model.py), `SYM_GAN`
#   - CUDA/TF already set up

import os, math, time, types, importlib.util, sys, logging
import torch
import pandas as pd
from types import SimpleNamespace

# ----------------------------------------------------------------------
# 0) Quiet THOP's "Upsample: trilinear not implemented" warnings in logs
#    (FLOPs for Upsample are treated as ~0; this only suppresses noisy logs)
# ----------------------------------------------------------------------
try:
    import thop
    logging.getLogger().setLevel(logging.ERROR)
except Exception:
    pass

# ----------------------------------------------------------------------
# 1) Loader that respects relative imports in GAN (".utils") by creating
#    a synthetic package name and loading utils + model under that package.
# ----------------------------------------------------------------------
def load_symbol_with_package(model_path: str, symbol_name: str, pkg: str = "cganpkg"):
    """
    Load `symbol_name` from a Python file that uses relative imports like `from .utils import ...`.

    It:
      - infers the sibling utils.py in the same directory,
      - loads it as `<pkg>.utils`,
      - loads the model.py as `<pkg>.model`,
      - then returns `getattr(<pkg>.model, symbol_name)`.
    """
    model_path = os.path.abspath(model_path)
    src_dir    = os.path.dirname(model_path)
    utils_path = os.path.join(src_dir, "utils.py")

    if not os.path.isfile(model_path):
        raise ImportError(f"[cGAN] model file not found: {model_path}")
    if not os.path.isfile(utils_path):
        raise ImportError(f"[cGAN] utils file not found (required for relative import): {utils_path}")

    # Load <pkg>.utils
    utils_name = f"{pkg}.utils"
    if utils_name in sys.modules:
        utils_mod = sys.modules[utils_name]
    else:
        utils_spec = importlib.util.spec_from_file_location(utils_name, utils_path)
        if utils_spec is None or utils_spec.loader is None:
            raise ImportError(f"[cGAN] Cannot create spec for utils: {utils_path}")
        utils_mod = importlib.util.module_from_spec(utils_spec)
        sys.modules[utils_name] = utils_mod
        utils_spec.loader.exec_module(utils_mod)  # type: ignore

    # Load <pkg>.model
    model_name = f"{pkg}.model"
    model_spec = importlib.util.spec_from_file_location(model_name, model_path)
    if model_spec is None or model_spec.loader is None:
        raise ImportError(f"[cGAN] Cannot create spec for model: {model_path}")
    model_mod = importlib.util.module_from_spec(model_spec)
    # Hint package context so that `.utils` resolves to `<pkg>.utils`
    model_mod.__package__ = pkg  # crucial for relative imports inside model.py
    sys.modules[model_name] = model_mod
    model_spec.loader.exec_module(model_mod)  # type: ignore

    if not hasattr(model_mod, symbol_name):
        raise ImportError(f"[cGAN] `{symbol_name}` not found in model module at {model_path}")

    return getattr(model_mod, symbol_name)

# ----------------------------------------------------------------------
# 2) Load cGAN Generator class (fixes: "attempted relative import with no known parent package")
# ----------------------------------------------------------------------
try:
    GeneratorPix2PixCC3D = load_symbol_with_package(GAN_PATH, SYM_GAN, pkg="pix2pixcc3d")
except Exception as e:
    raise ImportError(f"[cGAN] Failed to load `{SYM_GAN}` from {GAN_PATH}: {e}")

# ----------------------------------------------------------------------
# 3) Instantiate cGAN Generator
#    NOTE: set trans_conv=True to avoid Upsample(trilinear) in FLOPs (cleaner profiling)
# ----------------------------------------------------------------------
# --- cGAN 옵션만 수정: norm_type을 'InstanceNorm3d'로 지정 ---
# ✅ 최종 권장 옵션 (두 곳 수정됨)
gan_opt = SimpleNamespace(
    input_ch=2, target_ch=1,
    n_gf=32, n_df=32,
    n_downsample=3, n_residual=6,
    norm_type='InstanceNorm3d',   # <- 'instance' 대신 정확한 이름
    padding_type='reflection',    # <- 'reflect' 대신 'reflection'
    trans_conv=True,              # (THOP trilinear 경고 피하려면 True 유지)
    n_D=3, ch_balance=0.0,
    lambda_LSGAN=1.0, lambda_FM=10.0, lambda_CC=5.0,
    n_CC=2, ccc=True, eps=1e-8,
    gpu_ids=0, data_type=32
)
gan_model = GeneratorPix2PixCC3D(gan_opt)



# ----------------------------------------------------------------------
# 4) Build dicts and profile everything again
# ----------------------------------------------------------------------
torch_models = {
    "V-NET (UNet3D)": unet_model,
    "ViT (3D Transformer)": vit_model,
    "cGAN (Pix2PixCC3D-Generator)": gan_model,
}
tf_models = {
    "Base Model (TF)": base_model_tf,
}

# Reuse torch_profile/tf_profile from previous cells
torch_rows = []
for name, m in torch_models.items():
    res = torch_profile(m, input_shape=(1,2,128,128,128), device="cuda")
    torch_rows.append({"Model": name, **res})

tf_rows = []
for name, m in tf_models.items():
    res = tf_profile(m, input_shape=None, device="/GPU:0")
    tf_rows.append({
        "Model": name,
        "Params (#)": res.get("Params (#)"),
        "FLOPs":       res.get("FLOPs"),
        "MACs":        res.get("MACs"),
        "Inference Memory (MB)": res.get("Inference Memory (MB)"),
        "Latency per Inference (s)": res.get("Latency per Inference (s)"),
    })

df_all = pd.DataFrame(
    torch_rows + tf_rows,
    columns=["Model","Params (#)","FLOPs","MACs","Inference Memory (MB)","Latency per Inference (s)"]
)

# ----------------------------------------------------------------------
# 5) Pretty print + save
# ----------------------------------------------------------------------
def _fmt_params(n):
    if n is None or (isinstance(n, float) and (math.isnan(n) or math.isinf(n))):
        return "NaN"
    if n >= 1e9: return f"{n/1e9:.3f}B"
    if n >= 1e6: return f"{n/1e6:.3f}M"
    return f"{n:,}"

df_pretty = pd.DataFrame({
    "Model": df_all["Model"],
    "Params": [ _fmt_params(x) if isinstance(x,(int,float)) else x for x in df_all["Params (#)"] ],
    "FLOPs":  df_all["FLOPs"],
    "MACs":   df_all["MACs"],
    "Mem":    [ fmt_si(x, "mem") if isinstance(x,(int,float)) else x for x in df_all["Inference Memory (MB)"] ],
    "Latency": [ fmt_si(x, "time_ms") if isinstance(x,(int,float)) else x for x in df_all["Latency per Inference (s)"] ],
})

print("\n=== Model Complexity & Practical Usability (Including cGAN) ===\n")
print(df_pretty.to_string(index=False))

out_csv = "model_complexity_summary_all_with_cgan.csv"
df_all.to_csv(out_csv, index=False)
print(f"\nSaved → {out_csv}")

# ----------------------------------------------------------------------
# (Optional) If you still prefer Upsample(trilinear) inside GAN for inference,
# set `gan_opt.trans_conv=False` above for the final run; FLOPs will show zeros for Upsample
# but latency/memory will still be measured correctly.
# ----------------------------------------------------------------------


2025-11-12 15:26:24.648910: I tensorflow/core/grappler/devices.cc:66] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 1
2025-11-12 15:26:24.649139: I tensorflow/core/grappler/clusters/single_machine.cc:357] Starting new session
2025-11-12 15:26:24.658090: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1639] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 20533 MB memory:  -> device: 0, name: NVIDIA A10, pci bus id: 0000:48:00.0, compute capability: 8.6



-max_depth                  10000
-min_bytes                  0
-min_peak_bytes             0
-min_residual_bytes         0
-min_output_bytes           0
-min_micros                 0
-min_accelerator_micros     0
-min_cpu_micros             0
-min_params                 0
-min_float_ops              1
-min_occurrence             0
-step                       -1
-order_by                   float_ops
-account_type_regexes       .*
-start_name_regexes         .*
-trim_name_regexes          
-show_name_regexes          .*
-hide_name_regexes          
-account_displayed_op_only  true
-select                     float_ops
-output                     stdout:


Doc:
scope: The nodes in the model graph are organized by their names, which is hierarchical like filesystem.
flops: Number of float operations. Note: Please read the implementation for the math behind it.

Profile:
node name | # float_ops
_TFProfRoot (--/1839.50b flops)
  model_1/conv3d_18/Conv3D (695.78b/695.78b flops)
  model_1/con

In [13]:
import pandas as pd
import math

# CSV 경로
csv_path = "/home/mingyeong/GAL2DM_ASIM_VNET/eval/model_complexity_summary_all_with_cgan.csv"

# CSV 읽기
df = pd.read_csv(csv_path)

# 숫자 포맷 함수
def fmt_params(n):
    if isinstance(n, str):
        return n
    if math.isnan(n):
        return "NaN"
    if n >= 1e9:
        return f"{n/1e9:.3f} B"
    if n >= 1e6:
        return f"{n/1e6:.3f} M"
    if n >= 1e3:
        return f"{n/1e3:.3f} K"
    return f"{n:.0f}"

def fmt_mem(x):
    if isinstance(x, str):
        return x
    if math.isnan(x):
        return "NaN"
    return f"{x:,.0f} MB"

def fmt_lat(x):
    if isinstance(x, str):
        return x
    if math.isnan(x):
        return "NaN"
    return f"{x*1e3:.2f} ms"

# 각 컬럼에 포맷 적용
df["Params (#)"] = df["Params (#)"].apply(fmt_params)
df["Inference Memory (MB)"] = df["Inference Memory (MB)"].apply(fmt_mem)
df["Latency per Inference (s)"] = df["Latency per Inference (s)"].apply(fmt_lat)

# 보기 좋은 순서로 정렬
order = ["Model", "Params (#)", "FLOPs", "MACs", "Inference Memory (MB)", "Latency per Inference (s)"]
df = df[[c for c in order if c in df.columns]]

# 출력
print("\n=== Model Complexity Summary (Including cGAN) ===\n")
print(df.to_string(index=False))



=== Model Complexity Summary (Including cGAN) ===

                       Model Params (#)    FLOPs     MACs Inference Memory (MB) Latency per Inference (s)
              V-NET (UNet3D)   28.824 M 121.235G  60.617G                937 MB                  30.64 ms
        ViT (3D Transformer)   23.502 M   2.282T   1.141T              2,994 MB                 119.53 ms
cGAN (Pix2PixCC3D-Generator)   27.808 M 953.571G 476.785G              1,100 MB                 211.51 ms
             Base Model (TF)  461.007 M   1.840T      NaN              6,002 MB                 248.64 ms
