In [1]:
# Cell 0: 모듈 import를 위한 경로 설정
import os, sys
sys.path.append(os.path.abspath(".."))  # shared, models 디렉토리 접근 가능하도록 경로 추가


In [2]:
# Cell 1: 환경 확인
import torch

print(f"✅ PyTorch version: {torch.__version__}")
print(f"🚀 GPU available: {torch.cuda.is_available()}")

if torch.cuda.is_available():
    device = torch.device("cuda:0")
    print(f"🧠 GPU name: {torch.cuda.get_device_name(0)}")
    
    total_memory = torch.cuda.get_device_properties(device).total_memory / 1024**3  # GiB
    reserved_memory = torch.cuda.memory_reserved(device) / 1024**3  # GiB
    allocated_memory = torch.cuda.memory_allocated(device) / 1024**3  # GiB
    free_memory = reserved_memory - allocated_memory  # GiB

    print(f"💾 Total memory: {total_memory:.2f} GiB")
    print(f"📦 Reserved memory: {reserved_memory:.2f} GiB")
    print(f"📈 Allocated memory: {allocated_memory:.2f} GiB")
    print(f"🟢 Free memory in reserved: {free_memory:.2f} GiB")


✅ PyTorch version: 2.6.0+cu124
🚀 GPU available: True
🧠 GPU name: Quadro RTX 5000
💾 Total memory: 15.73 GiB
📦 Reserved memory: 0.00 GiB
📈 Allocated memory: 0.00 GiB
🟢 Free memory in reserved: 0.00 GiB


In [3]:
# Cell 2: 데이터셋 로딩
from torch.utils.data import DataLoader
from shared.data_loader import HDF5Dataset
import os

input_dir = "/caefs/data/IllustrisTNG/subcube/input"
output_dir = "/caefs/data/IllustrisTNG/subcube/output"

input_files = sorted([os.path.join(input_dir, f) for f in os.listdir(input_dir) if f.endswith(".h5")])
output_files = sorted([os.path.join(output_dir, f) for f in os.listdir(output_dir) if f.endswith(".h5")])

dataset = HDF5Dataset(input_files, output_files)
loader = DataLoader(dataset, batch_size=2, shuffle=True)

x, y = next(iter(loader))
print(f"✅ Sample loaded: input shape = {x.shape}, output shape = {y.shape}")


2025-07-30 20:41:42,508 | INFO | data_loader | 🔍 Initializing dataset with 12 file pairs.
2025-07-30 20:41:42,529 | INFO | data_loader | 📦 Total samples across all files: 110592


✅ Sample loaded: input shape = torch.Size([2, 1, 60, 60, 60]), output shape = torch.Size([2, 1, 60, 60, 60])


In [4]:
# Cell 3: ViT 초기화 for scalar regression from full 3D volume
from models.vit.model import ViT3D
import torch

# 입력 크기와 동일하게 설정
input_shape = (60, 60, 60)  # (D, H, W)
patch_spatial = 10         # → 60 / 10 = 6 패치
patch_depth = 10           # → 60 / 10 = 6 패치

# 디바이스 설정
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 모델 선택: "full" or "simple"
model_name = "full"


model = ViT3D(
    image_size=input_shape[1],
    frames=input_shape[0],
    image_patch_size=patch_spatial,
    frame_patch_size=patch_depth,
    dim=256,
    depth=6,
    heads=8,
    mlp_dim=512,
    in_channels=1,
    out_channels=1
).to(device)

model.train()

# Dummy input 확인
x_dummy = torch.randn(2, 1, *input_shape).to(device)  # [B, C, D, H, W]
y_dummy = model(x_dummy)

print(f"✅ ViT model ({model_name}) loaded and set to training mode.")
print(f"📐 Dummy input: {x_dummy.shape} → Prediction shape: {y_dummy.shape}")


✅ ViT model (full) loaded and set to training mode.
📐 Dummy input: torch.Size([2, 1, 60, 60, 60]) → Prediction shape: torch.Size([2, 1, 60, 60, 60])


In [5]:
from torchinfo import summary

summary(model, input_size=(2, 1, 60, 60, 60), col_names=["input_size", "output_size", "num_params", "kernel_size"])

Layer (type:depth-idx)                             Input Shape               Output Shape              Param #                   Kernel Shape
ViT3D                                              [2, 1, 60, 60, 60]        [2, 1, 60, 60, 60]        55,296                    --
├─Sequential: 1-1                                  [2, 1, 60, 60, 60]        [2, 216, 256]             --                        --
│    └─Rearrange: 2-1                              [2, 1, 60, 60, 60]        [2, 216, 1000]            --                        --
│    └─LayerNorm: 2-2                              [2, 216, 1000]            [2, 216, 1000]            2,000                     --
│    └─Linear: 2-3                                 [2, 216, 1000]            [2, 216, 256]             256,256                   --
│    └─LayerNorm: 2-4                              [2, 216, 256]             [2, 216, 256]             512                       --
├─Dropout: 1-2                                     [2, 216, 256]  

In [6]:
from models.vit.model import ViT3D
from torch.utils.data import DataLoader

def test_batch_size(batch_size):
    try:
        loader = DataLoader(dataset, batch_size=batch_size, shuffle=False)

        # ⚠️ ViT3D의 patch 크기 → 10이면 60 % 10 == 0
        model = ViT3D(
            image_size=60,           # H/W
            frames=60,               # D
            image_patch_size=10,
            frame_patch_size=10,
            dim=256,
            depth=6,
            heads=8,
            mlp_dim=512,
            in_channels=1,
            out_channels=1
        ).cuda()

        for x, y in loader:
            x, y = x.cuda(), y.cuda()
            pred = model(x)
            print(f"✅ Success with batch_size={batch_size}, pred.shape={pred.shape}")
            break
    except RuntimeError as e:
        print(f"❌ Failed with batch_size={batch_size}: {str(e).splitlines()[0]}")

for bs in [32, 16, 8, 4, 2, 1]:
    test_batch_size(bs)


✅ Success with batch_size=32, pred.shape=torch.Size([32, 1, 60, 60, 60])
✅ Success with batch_size=16, pred.shape=torch.Size([16, 1, 60, 60, 60])
✅ Success with batch_size=8, pred.shape=torch.Size([8, 1, 60, 60, 60])
✅ Success with batch_size=4, pred.shape=torch.Size([4, 1, 60, 60, 60])
✅ Success with batch_size=2, pred.shape=torch.Size([2, 1, 60, 60, 60])
✅ Success with batch_size=1, pred.shape=torch.Size([1, 1, 60, 60, 60])


In [7]:
# Cell 4: 손실 함수 테스트
from shared.losses import mse_loss

loss_val = mse_loss(x.to(device), y.to(device))
print(f"✅ MSE Loss on batch: {loss_val.item():.4f}")


✅ MSE Loss on batch: 89.6607


In [8]:
# Cell 5: Optimizer 및 스케줄러 설정
import torch.optim as optim

optimizer = optim.Adam(model.parameters(), lr=1e-4)
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=10)
print("✅ Optimizer and LR scheduler initialized.")


✅ Optimizer and LR scheduler initialized.


In [9]:
# Cell 6: 빠른 학습 루프 (1 epoch, 일부 batch만)
from tqdm import tqdm

model.train()
n_batch = 10  # 테스트용으로 10개 배치만 학습

for epoch in range(3):
    total_loss = 0.0
    for i, (inputs, targets) in enumerate(tqdm(loader, desc=f"Epoch {epoch+1}")):
        if i >= n_batch:
            break
        inputs, targets = inputs.to(device), targets.to(device)
        
        optimizer.zero_grad()
        outputs = model(inputs)  # outputs.shape: [B, 1]
        loss = mse_loss(outputs, targets)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    scheduler.step()
    print(f"📉 Epoch {epoch+1} (partial) Loss: {total_loss / n_batch:.4f} | LR: {scheduler.get_last_lr()[0]:.2e}")



  return F.mse_loss(pred, target)
Epoch 1:   0%|          | 0/55296 [00:00<?, ?it/s]


RuntimeError: The size of tensor a (60) must match the size of tensor b (2) at non-singleton dimension 3