In [1]:
# %% [markdown]
# # ConvNeXt V2 Strong-Lensing Pipeline — Debug Notebook
# - data_loader.py, model.py, utils.py, train.py, predict.py, evaluate.py 점검
# - 빠른 smoke test 중심: 작은 샘플 / 1~2 step 학습 / 소규모 평가

In [2]:
# %%
import os, sys, importlib, time, json
import numpy as np
import torch

print("Python:", sys.version)
print("Torch :", torch.__version__)
print("CUDA  :", torch.version.cuda)
print("GPU   :", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "CPU only")

# 프로젝트 루트가 현재가 아니라면 아래처럼 경로 추가
# sys.path.append("/path/to/your/project")


Python: 3.12.0 | packaged by Anaconda, Inc. | (main, Oct  2 2023, 17:29:18) [GCC 11.2.0]
Torch : 2.6.0+cu124
CUDA  : 12.4
GPU   : Quadro RTX 5000


In [3]:
# %%
# --- 수정 포인트: 데이터 루트 ---
SLSIM_LENSES_DIR      = "/caefs/data/IllustrisTNG/slchallenge/slsim_lenses/slsim_lenses"
SLSIM_NONLENSES_DIR   = "/caefs/data/IllustrisTNG/slchallenge/slsim_nonlenses/slsim_nonlenses"
HSC_DEG_LENSES_DIR    = "/caefs/data/IllustrisTNG/slchallenge/hsc_lenses/hsc_lenses"
HSC_DEG_NONLENSES_DIR = "/caefs/data/IllustrisTNG/slchallenge/hsc_nonlenses/hsc_nonlenses"

CLASS_PATHS = {
    "slsim_lenses": SLSIM_LENSES_DIR,
    "slsim_nonlenses": SLSIM_NONLENSES_DIR,
    "hsc_lenses": HSC_DEG_LENSES_DIR,
    "hsc_nonlenses": HSC_DEG_NONLENSES_DIR,
}


DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
BATCH  = 256
NUM_WORKERS = 8
SEED   = 42

# 작은 split로 빠르게 디버그
SPLIT = (0.70, 0.15, 0.15)
TAKE_TRAIN_FRAC = 0.002  # train의 0.2%만 사용(속도용), 필요시 None
CLIP_Q = 99.9            # 상위quantile 클리핑(선택)


In [4]:
# 0) 패키지 세팅
from pathlib import Path
import sys, importlib

PROJECT_ROOT = Path("/caefs/user/mmingyeong/2508_slchallence")
SRC_DIR = PROJECT_ROOT / "src"
sys.path.insert(0, str(SRC_DIR))

# 2) 올바른 임포트 (패키지 경로 사용)
import utils as utils
import model as model
import data_loader as data_loader

importlib.reload(utils)
importlib.reload(model)
importlib.reload(data_loader)

from src.model import convnextv2_atto, convnextv2_nano, convnextv2_tiny
from src.data_loader import get_dataloaders, LensFITSBinaryDataset

print("✅ imports via 'src.*' ready")


  from .autonotebook import tqdm as notebook_tqdm


✅ imports via 'src.*' ready


In [5]:
train_loader, val_loader, test_loader = get_dataloaders(
    class_paths=CLASS_PATHS,
    batch_size=BATCH,
    split=SPLIT,
    seed=SEED,
    num_workers=NUM_WORKERS,
    pin_memory=True,
    augment_train=False,
    take_train_fraction=0.002,
    take_val_fraction=0.002,
    take_test_fraction=0.002,
)
print(len(train_loader.dataset), len(val_loader.dataset), len(test_loader.dataset))


2025-09-03 15:48:02,348 [INFO] [data_loader] Collected 249999 from 'slsim_lenses' (/caefs/data/IllustrisTNG/slchallenge/slsim_lenses/slsim_lenses), label=1, domain=slsim
2025-09-03 15:48:03,014 [INFO] [data_loader] Collected 250000 from 'slsim_nonlenses' (/caefs/data/IllustrisTNG/slchallenge/slsim_nonlenses/slsim_nonlenses), label=0, domain=slsim
2025-09-03 15:48:03,689 [INFO] [data_loader] Collected 250000 from 'hsc_lenses' (/caefs/data/IllustrisTNG/slchallenge/hsc_lenses/hsc_lenses), label=1, domain=hsc
2025-09-03 15:48:04,361 [INFO] [data_loader] Collected 249999 from 'hsc_nonlenses' (/caefs/data/IllustrisTNG/slchallenge/hsc_nonlenses/hsc_nonlenses), label=0, domain=hsc
2025-09-03 15:48:04,362 [INFO] [data_loader] TOTAL files collected: 999998
2025-09-03 15:48:05,299 [INFO] [data_loader] Train subsampling: kept 1399/699998 (0.20%)
2025-09-03 15:48:05,304 [INFO] [data_loader] Val subsampling: kept 299/149999 (0.20%)
2025-09-03 15:48:05,309 [INFO] [data_loader] Test subsampling: kept 

1399 299 300


In [6]:
# %%
# model.py 디버그
import torch
from src.model import convnextv2_atto, convnextv2_nano, convnextv2_tiny

# 모델 생성 (grayscale input, binary classification)
model = convnextv2_atto(in_chans=1, num_classes=1).to(DEVICE)
print(model.__class__.__name__, "ready on", DEVICE)

# 가짜 배치 입력 (batch=4, 채널=1, 41x41)
x_dummy = torch.randn(4, 1, 41, 41, device=DEVICE)
with torch.no_grad():
    y_dummy = model(x_dummy)

print("Input :", x_dummy.shape, x_dummy.dtype, x_dummy.device)
print("Output:", y_dummy.shape, y_dummy.dtype, y_dummy.device)


ConvNeXtV2 ready on cuda
Input : torch.Size([4, 1, 41, 41]) torch.float32 cuda:0
Output: torch.Size([4, 1]) torch.float32 cuda:0


In [7]:
# %%
# model.py 디버그 with BCEWithLogitsLoss
import torch
import torch.nn as nn
from src.model import convnextv2_atto

# 모델 생성 (grayscale input, binary classification)
model = convnextv2_atto(in_chans=1, num_classes=1).to(DEVICE)
print(model.__class__.__name__, "ready on", DEVICE)

# 가짜 배치 입력 (batch=4, 채널=1, 41x41)
x_dummy = torch.randn(4, 1, 41, 41, device=DEVICE)
# 가짜 타겟 (binary label, float tensor for BCEWithLogitsLoss)
y_dummy = torch.randint(0, 2, (4, 1), dtype=torch.float32, device=DEVICE)

criterion = nn.BCEWithLogitsLoss()

with torch.no_grad():
    y_pred = model(x_dummy)
    loss = criterion(y_pred, y_dummy)

print("Input :", x_dummy.shape, x_dummy.dtype, x_dummy.device)
print("Target:", y_dummy.shape, y_dummy.dtype, y_dummy.device)
print("Output:", y_pred.shape, y_pred.dtype, y_pred.device)
print("Loss  :", loss.item())


ConvNeXtV2 ready on cuda
Input : torch.Size([4, 1, 41, 41]) torch.float32 cuda:0
Target: torch.Size([4, 1]) torch.float32 cuda:0
Output: torch.Size([4, 1]) torch.float32 cuda:0
Loss  : 0.652077853679657


In [8]:
# 250829 할일
# 1. train.py debug
# 2. predict.py debug
# 3. evaluate.py debug

In [11]:
# %%
# train.py 디버그 블럭 (주피터용)
import types
import torch
import os
import sys
from pathlib import Path

PROJECT_ROOT = Path("/caefs/user/mmingyeong/2508_slchallence")
SRC_DIR = PROJECT_ROOT / "src"
sys.path.insert(0, str(SRC_DIR))

import train as train

# 가짜 args (argparse.Namespace 대신 SimpleNamespace 사용)
args = types.SimpleNamespace(
    # 데이터 경로
    slsim_lenses      = SLSIM_LENSES_DIR,
    slsim_nonlenses   = SLSIM_NONLENSES_DIR,
    hsc_lenses        = HSC_DEG_LENSES_DIR,
    hsc_nonlenses     = HSC_DEG_NONLENSES_DIR,

    # 데이터로더
    batch_size        = 8,
    num_workers       = 2,
    no_augment        = False,
    take_train_frac   = 0.001,   # 아주 작은 샘플만 사용
    take_val_fraction   = 0.001,   # 아주 작은 샘플만 사용
    take_test_fraction   = 0.001,   # 아주 작은 샘플만 사용

    # --- preprocessing toggles (NEW) ---
    apply_padding        = True,      # center reflect-pad 41->64
    out_size_when_padded = 64,
    apply_normalization  = True,      # bkg-sub -> (clip) -> z-score
    clip_q               = 0.997,     # set to None for pure z-score (no clipping)
    low_clip_q           = None,      # e.g., 0.005 if you want low-tail clip too
    use_mad              = False,     # True => robust median/MAD instead of mean/std
    
    # split
    train_frac        = 0.70,
    val_frac          = 0.15,
    test_frac         = 0.15,

    # 모델
    model_size        = "atto",
    drop_path         = 0.0,

    # Optim
    lr                = 1e-3,
    weight_decay      = 1e-4,
    cosine            = False,
    warmup_epochs     = 0,

    # Train
    epochs            = 5,       # 딱 1 epoch만
    patience          = 2,
    min_delta         = 0.0,
    seed              = 42,
    device            = "cuda" if torch.cuda.is_available() else "cpu",

    # Save
    save_dir          = "./_dbg_ckpt_train",
    log_every         = 1,
)

os.makedirs(args.save_dir, exist_ok=True)

# %%
# 🚀 디버그 실행
train.main(args)


2025-09-03 15:51:15 [INFO] [train] Logger initialized -> ./_dbg_ckpt_train/train.log
2025-09-03 15:51:15 [INFO] [train] 🚀 Configuration
2025-09-03 15:51:15 [INFO] [train]   slsim_lenses: /caefs/data/IllustrisTNG/slchallenge/slsim_lenses/slsim_lenses
2025-09-03 15:51:15 [INFO] [train]   slsim_nonlenses: /caefs/data/IllustrisTNG/slchallenge/slsim_nonlenses/slsim_nonlenses
2025-09-03 15:51:15 [INFO] [train]   hsc_lenses: /caefs/data/IllustrisTNG/slchallenge/hsc_lenses/hsc_lenses
2025-09-03 15:51:15 [INFO] [train]   hsc_nonlenses: /caefs/data/IllustrisTNG/slchallenge/hsc_nonlenses/hsc_nonlenses
2025-09-03 15:51:15 [INFO] [train]   batch_size: 8
2025-09-03 15:51:15 [INFO] [train]   num_workers: 2
2025-09-03 15:51:15 [INFO] [train]   no_augment: False
2025-09-03 15:51:15 [INFO] [train]   take_train_frac: 0.001
2025-09-03 15:51:15 [INFO] [train]   take_val_fraction: 0.001
2025-09-03 15:51:15 [INFO] [train]   take_test_fraction: 0.001
2025-09-03 15:51:15 [INFO] [train]   apply_padding: True
20

                                                                   

2025-09-03 15:51:34 [INFO] [train] ✅ Epoch 1: best model updated (val_loss=0.728046)
2025-09-03 15:51:34 [INFO] [train] 📉 Epoch 001/5 | Train Loss 0.7814 Acc 56.37% | Val Loss 0.7280 Acc 49.66% AUC 0.6431 | LR 1.00e-03 | 16.0s


                                                                   

2025-09-03 15:51:39 [INFO] [train] ✅ Epoch 2: best model updated (val_loss=0.695911)
2025-09-03 15:51:39 [INFO] [train] 📉 Epoch 002/5 | Train Loss 0.6735 Acc 57.94% | Val Loss 0.6959 Acc 49.66% AUC 0.6720 | LR 1.00e-03 | 4.5s


                                                                   

2025-09-03 15:51:44 [INFO] [train] ✅ Epoch 3: best model updated (val_loss=0.685237)
2025-09-03 15:51:44 [INFO] [train] 📉 Epoch 003/5 | Train Loss 0.6680 Acc 57.08% | Val Loss 0.6852 Acc 49.66% AUC 0.6635 | LR 1.00e-03 | 4.8s


                                                                   

2025-09-03 15:51:48 [INFO] [train] ✅ Epoch 4: best model updated (val_loss=0.678846)
2025-09-03 15:51:48 [INFO] [train] 📉 Epoch 004/5 | Train Loss 0.6851 Acc 54.65% | Val Loss 0.6788 Acc 56.38% AUC 0.6605 | LR 1.00e-03 | 4.8s


                                                                   

2025-09-03 15:51:53 [INFO] [train] 📉 Epoch 005/5 | Train Loss 0.6657 Acc 57.80% | Val Loss 0.6794 Acc 49.66% AUC 0.6650 | LR 1.00e-03 | 4.7s
2025-09-03 15:51:53 [INFO] [train] 🔍 Evaluating on test set (best checkpoint)...


                                                     

2025-09-03 15:51:56 [INFO] [train] ✅ Test | Loss 0.6819 | Acc 54.67% | AUC 0.6355


In [13]:
# %%
# predict.py 디버그 블럭 (주피터용)
import types
import torch
#import osW
import sys
from pathlib import Path

# 프로젝트 경로 설정
PROJECT_ROOT = Path("/caefs/user/mmingyeong/2508_slchallence")
SRC_DIR = PROJECT_ROOT / "src"
sys.path.insert(0, str(SRC_DIR))

import predict as predict  # predict.py 모듈 임포트

# 데이터 경로 설정
SLSIM_LENSES_DIR      = "/caefs/data/IllustrisTNG/slchallenge/slsim_lenses/slsim_lenses"
SLSIM_NONLENSES_DIR   = "/caefs/data/IllustrisTNG/slchallenge/slsim_nonlenses/slsim_nonlenses"
HSC_DEG_LENSES_DIR    = "/caefs/data/IllustrisTNG/slchallenge/hsc_lenses/hsc_lenses"
HSC_DEG_NONLENSES_DIR = "/caefs/data/IllustrisTNG/slchallenge/hsc_nonlenses/hsc_nonlenses"

# 모델 체크포인트 (train.py에서 생성된 best.pt 사용)
MODEL_PATH = "./_dbg_ckpt_train/best.pt"

# 디버그용 args 설정
args = types.SimpleNamespace(
    # 데이터 경로
    slsim_lenses      = SLSIM_LENSES_DIR,
    slsim_nonlenses   = SLSIM_NONLENSES_DIR,
    hsc_lenses        = HSC_DEG_LENSES_DIR,
    hsc_nonlenses     = HSC_DEG_NONLENSES_DIR,

    # 실행할 split (train/val/test/all 중 선택)
    which             = "test",

    # 데이터로더 설정
    batch_size        = 16,
    num_workers       = 2,
    train_frac        = 0.7,
    val_frac          = 0.15,
    test_frac         = 0.15,
    seed              = 42,

    # 샘플링 (디버그 시 속도 높이기 위해 소량만 사용 가능)
    take_test_fraction  = None,
    max_samples = 10,              # ← 이 한 줄만으로 1개만 추론

    # 모델 설정
    model_path        = MODEL_PATH,
    model_size        = "atto",
    drop_path         = 0.0,

    # 디바이스 & 출력 경로
    device            = "cuda" if torch.cuda.is_available() else "cpu",
    output_dir        = "./_dbg_pred_outputs",
)

# 출력 폴더 생성
os.makedirs(args.output_dir, exist_ok=True)

# %%
# 🚀 예측 실행
predict.main(args)


2025-09-03 15:52:24,552 [INFO] [predict] 📦 Building dataloaders...
2025-09-03 15:52:28,142 [INFO] [predict] Split sizes -> train:699998  val:149999  test:150001
2025-09-03 15:52:28,144 [INFO] [predict] 🧠 Loading model...
2025-09-03 15:52:28,300 [INFO] [predict] 🚀 Inference on split: test
2025-09-03 15:52:29,027 [INFO] [predict] ✅ TEST | Acc=0.4000 | AUC=0.7917 | N=10
2025-09-03 15:52:29,059 [INFO] [predict] 💾 Saved per-sample CSV → ./_dbg_pred_outputs/pred_test.csv
2025-09-03 15:52:29,069 [INFO] [predict] 💾 Saved NPY arrays for test split
2025-09-03 15:52:29,070 [INFO] [predict] 🎯 Done.


In [14]:
# %%
# evaluate.py 디버그 블럭 (주피터용)
import types
import sys
from pathlib import Path

# 경로 설정
PROJECT_ROOT = Path("/caefs/user/mmingyeong/2508_slchallence")
SRC_DIR = PROJECT_ROOT / "src"
sys.path.insert(0, str(SRC_DIR))

import evaluate as evaluate  # evaluate.py 임포트

# 평가 입력 경로 설정 (predict.py 출력 결과)
PRED_OUTPUT_DIR = PROJECT_ROOT / "_dbg_pred_outputs"
LABELS_PATH = PRED_OUTPUT_DIR / "labels_test.npy"
PROBS_PATH  = PRED_OUTPUT_DIR / "probs_test.npy"
PREDS_PATH  = PRED_OUTPUT_DIR / "preds_test.npy"

# 출력 경로
EVAL_OUT_DIR = PROJECT_ROOT / "_dbg_eval_outputs"
EVAL_OUT_DIR.mkdir(exist_ok=True)

# 디버그 args 설정
args = types.SimpleNamespace(
    tag="test",                # 결과 파일 이름 접두사
    from_csv=False,            # CSV 대신 NPY로 로드
    from_npy=True,
    csv=None,
    labels=str(LABELS_PATH),
    probs=str(PROBS_PATH),
    preds=str(PREDS_PATH),
    threshold=0.5,             # 임계값
    optimize_threshold=False,  # 최적 threshold 찾기 비활성화
    per_domain=False,          # domain별 분석 비활성화
    plot=True,                 # ROC/PR/Calibration 그래프 저장
    out_dir=str(EVAL_OUT_DIR),
    seed=42
)

# %%
# 🚀 평가 실행
evaluate.main(args)


2025-09-03 15:52:36,681 [INFO] Loaded NPY: N=10 (labels/probs) | preds=(10,)
2025-09-03 15:52:36,711 [INFO] Saved metrics JSON → /caefs/user/mmingyeong/2508_slchallence/_dbg_eval_outputs/metrics_test.json
2025-09-03 15:52:36,716 [INFO] Saved metrics CSV → /caefs/user/mmingyeong/2508_slchallence/_dbg_eval_outputs/metrics_test.csv
2025-09-03 15:52:37,150 [INFO] Saved ROC/PR/Calibration plots.
2025-09-03 15:52:37,151 [INFO] Done.
