In [33]:
# !pip install -q ptflops

In [34]:
#!pip install -r requirements.txt

In [35]:
import os
import glob
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
from sklearn.metrics import jaccard_score
import numpy as np
import math
import torch.nn.functional as F
import torchvision
import time
import pandas as pd
from PIL import Image
import datetime

In [36]:
# # 기본 디렉토리 설정 
# TRAIN_DIR = "/kaggle/input/2025-sw-ai/archive/train"
# VAL_DIR = "/kaggle/input/2025-sw-ai/archive/val"
# TEST_DIR = "/kaggle/input/2025-sw-ai/archive/test/images"
# OUTPUT_PATH = "/kaggle/working/submission.csv"

In [37]:
# 로컬 디렉토리 설정
TRAIN_DIR = "input/2025-csu-sw-ai-challenge/archive/train" 
VAL_DIR = "input/2025-csu-sw-ai-challenge/archive/val"
TEST_DIR = "input/2025-csu-sw-ai-challenge/archive/test/images"
OUTPUT_CSV = "working/submission.csv" 
OUTPUT_MASK = "working/mask_ouputs"

In [38]:
SEED = 2025
def set_seed(seed=SEED):
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    print(f'set SEED: {SEED}')
set_seed()

set SEED: 2025


In [39]:
class CrackDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.img_dir = os.path.join(root_dir, "images")
        self.mask_dir = os.path.join(root_dir, "masks")
        self.img_list = sorted(glob.glob(self.img_dir + "/*.jpg"))
        self.mask_list = sorted(glob.glob(self.mask_dir + "/*.jpg"))
        self.transform = transform

    def __len__(self):
        return len(self.img_list)

    def __getitem__(self, idx):
        img = Image.open(self.img_list[idx]).convert("L")  # grayscale
        mask = Image.open(self.mask_list[idx]).convert("L")

        img = np.array(img, dtype=np.float32) / 255.0
        mask = np.array(mask, dtype=np.float32) / 255.0
        mask = (mask > 0.5).astype(np.float32)  # binary mask

        img = torch.tensor(img).unsqueeze(0)  # (1, H, W)
        mask = torch.tensor(mask).unsqueeze(0)  # (1, H, W)

        if self.transform:
            img = self.transform(img)

        return img, mask

In [40]:
import segmentation_models_pytorch as smp

model = smp.Linknet(
    encoder_name="mobilenet_v2",  # 또는 "resnet18", "efficientnet-b0"
    encoder_weights="imagenet",
    in_channels=1,
    classes=1
)

In [41]:
def binary_metrics(preds, targets, eps=1e-6):
    preds = preds.float()
    targets = targets.float()

    tp = (preds * targets).sum(dim=(1,2,3))
    fp = (preds * (1 - targets)).sum(dim=(1,2,3))
    fn = ((1 - preds) * targets).sum(dim=(1,2,3))

    precision = (tp + eps) / (tp + fp + eps)
    recall    = (tp + eps) / (tp + fn + eps)
    f1        = (2 * precision * recall + eps) / (precision + recall + eps)  # Dice
    union     = tp + fp + fn
    iou       = (tp + eps) / (union + eps)

    return {
        "iou": iou.mean().item(),
        "precision": precision.mean().item(),
        "recall": recall.mean().item(),
        "f1": f1.mean().item(),
    }

def train_model(
    model,
    train_loader,
    val_loader,
    device,
    epochs=10,
    aux_weights=(1.0, 0.4, 0.4),
    lr=1e-3,
    use_amp=False,
    log_every=500, # [복원] step 단위 로깅을 위해 원본 값 유지
    validate_every_steps=None,
    threshold=0.5,
    patience=5, # [추가] Early Stopping을 위한 파라미터
    model_save_path='best_model.pth' # [추가] Early Stopping을 위한 파라미터
): 
    
    model.to(device)
    criterion = nn.BCEWithLogitsLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    scaler = torch.cuda.amp.GradScaler(enabled=use_amp)

    # --- [추가] Early Stopping을 위한 변수 초기화 ---
    patience_counter = 0
    best_val_iou = 0.0
    # --- [추가] Early Stopping 로직 끝 ---

    global_step = 0
    # --- [복원] Step 단위 로깅을 위한 변수 ---
    win_loss, win_iou, win_f1, win_steps = 0.0, 0.0, 0.0, 0
    t0 = time.time()

    for epoch in range(1, epochs + 1):
        model.train()
        epoch_loss = 0.0

        for imgs, masks in train_loader:
            global_step += 1
            imgs = imgs.to(device, non_blocking=True)
            masks = masks.to(device, non_blocking=True)

            optimizer.zero_grad(set_to_none=True)

            with torch.cuda.amp.autocast(enabled=use_amp):
                outputs = model(imgs)
                main_logit = outputs[0] if isinstance(outputs, (list, tuple)) else outputs
                loss = aux_weights[0] * criterion(main_logit, masks)
                if isinstance(outputs, (list, tuple)):
                    if len(outputs) > 1 and aux_weights[1] > 0:
                        loss = loss + aux_weights[1] * criterion(outputs[1], masks)
                    if len(outputs) > 2 and aux_weights[2] > 0:
                        loss = loss + aux_weights[2] * criterion(outputs[2], masks)

            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()

            epoch_loss += loss.item()
            
            # --- [복원] Step 단위 로깅을 위한 집계 로직 ---
            win_loss += loss.item()
            win_steps += 1
            with torch.no_grad():
                probs = torch.sigmoid(main_logit)
                preds = (probs > threshold).float()
                m = binary_metrics(preds, masks)
                win_iou += m["iou"]
                win_f1  += m["f1"]
            
            # --- [복원] Step 단위 로깅 출력문 ---
            if log_every and (global_step % log_every == 0):
                elapsed = time.time() - t0
                lr_now = optimizer.param_groups[0]["lr"]
                print(f"[Step {global_step}] epoch={epoch}  "
                      f"avg_loss(win)={win_loss/max(1,win_steps):.4f}  "
                      f"avg_iou(win)={win_iou/max(1,win_steps):.4f}  "
                      f"avg_f1(win)={win_f1/max(1,win_steps):.4f}  "
                      f"lr={lr_now:.3e}  elapsed={elapsed:.1f}s")
                win_loss = win_iou = win_f1 = 0.0
                win_steps = 0
                t0 = time.time()


        # --- Epoch 종료 후 검증 단계 ---
        avg_train_loss = epoch_loss / max(1, len(train_loader))
        model.eval()
        val_iou_list, val_f1_list, val_loss_list = [], [], []
        with torch.no_grad():
            for imgs, masks in val_loader:
                imgs = imgs.to(device, non_blocking=True)
                masks = masks.to(device, non_blocking=True)
                
                logits_list = model(imgs)
                main_logit = logits_list[0] if isinstance(logits_list, (list, tuple)) else logits_list
                
                loss = criterion(main_logit, masks)
                val_loss_list.append(loss.item())

                preds = (torch.sigmoid(main_logit) > threshold).float()
                m = binary_metrics(preds, masks)
                val_iou_list.append(m["iou"])
                val_f1_list.append(m["f1"])

        avg_val_loss = np.mean(val_loss_list)
        avg_val_iou = np.mean(val_iou_list)
        avg_val_f1 = np.mean(val_f1_list)
        
        # --- [복원] 원본 Epoch 단위 출력문 ---
        print(f"[Epoch {epoch}/{epochs}] "
              f"Train Loss: {avg_train_loss:.4f} | "
              f"Val Loss: {avg_val_loss:.4f} | "
              f"Val IoU: {avg_val_iou:.4f} | "
              f"Val F1: {avg_val_f1:.4f}")

        # --- [추가] 간결한 Early Stopping 상태 출력 ---
        if avg_val_iou > best_val_iou:
            best_val_iou = avg_val_iou
            patience_counter = 0
            torch.save(model.state_dict(), model_save_path)
            print(f" -> Best score updated. Model saved.")
        else:
            patience_counter += 1
            print(f" -> Patience: {patience_counter}/{patience}")

        if patience_counter >= patience:
            print(f"\nEarly stopping triggered after {patience} epochs without improvement.")
            break
        # --- [추가] Early Stopping 로직 끝 ---
            
    print(f"\nTraining finished. Best Val IoU was: {best_val_iou:.4f}")
    
    model.load_state_dict(torch.load(model_save_path))
    return model

In [42]:
train_dataset = CrackDataset(TRAIN_DIR)
val_dataset = CrackDataset(VAL_DIR)

train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=8, shuffle=False)

In [43]:
from ptflops import get_model_complexity_info

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#model = HrSegNetB16().to(device)
input_size = (1, 192, 192)

macs, params = get_model_complexity_info(model,
                                         input_size,
                                         as_strings=True,
                                         print_per_layer_stat=False,
                                         verbose=False)
print(f"Total Params: {params}")
print(f"Total MACs: {macs}")

Total Params: 4.32 M
Total MACs: 329.23 MMac


In [44]:
train_model(model, train_loader, val_loader, device, epochs=40)

  scaler = torch.cuda.amp.GradScaler(enabled=use_amp)
  with torch.cuda.amp.autocast(enabled=use_amp):


[Step 500] epoch=1  avg_loss(win)=0.1554  avg_iou(win)=0.1848  avg_f1(win)=0.2170  lr=1.000e-03  elapsed=18.0s
[Epoch 1/40] Train Loss: 0.1195 | Val Loss: 0.0734 | Val IoU: 0.4387 | Val F1: 0.5549
 -> Best score updated. Model saved.
[Step 1000] epoch=2  avg_loss(win)=0.0781  avg_iou(win)=0.3603  avg_f1(win)=0.4833  lr=1.000e-03  elapsed=18.1s
[Step 1500] epoch=2  avg_loss(win)=0.0712  avg_iou(win)=0.4323  avg_f1(win)=0.5590  lr=1.000e-03  elapsed=16.9s
[Epoch 2/40] Train Loss: 0.0706 | Val Loss: 0.0739 | Val IoU: 0.4185 | Val F1: 0.5373
 -> Patience: 1/5
[Step 2000] epoch=3  avg_loss(win)=0.0692  avg_iou(win)=0.4558  avg_f1(win)=0.5788  lr=1.000e-03  elapsed=17.2s
[Step 2500] epoch=3  avg_loss(win)=0.0655  avg_iou(win)=0.4640  avg_f1(win)=0.5871  lr=1.000e-03  elapsed=16.8s
[Epoch 3/40] Train Loss: 0.0669 | Val Loss: 0.0670 | Val IoU: 0.4648 | Val F1: 0.5802
 -> Best score updated. Model saved.
[Step 3000] epoch=4  avg_loss(win)=0.0669  avg_iou(win)=0.4760  avg_f1(win)=0.6013  lr=1.00

Linknet(
  (encoder): MobileNetV2Encoder(
    (features): Sequential(
      (0): Conv2dNormActivation(
        (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU6(inplace=True)
      )
      (1): InvertedResidual(
        (conv): Sequential(
          (0): Conv2dNormActivation(
            (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
            (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (2): ReLU6(inplace=True)
          )
          (1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        )
      )
      (2): InvertedResidual(
        (conv): Sequential(
          (0): Conv2dNormActivation(
            (0): Conv2d(16, 96, kernel_size

In [45]:
def save_mask_image(
    mask_image: Image.Image,
    base_output_dir: str,
    original_filename: str,
    script_name: str = 'defalut'
):
    """
    마스크 이미지를 지정된 규칙에 따라 폴더를 생성하고 저장합니다.

    Args:
        mask_image (Image.Image): 저장할 PIL 이미지 객체.
        base_output_dir (str): 결과 폴더를 생성할 상위 경로.
        original_filename (str): 원본 이미지 파일명 (e.g., 'image_001.jpg').
        script_name (str): 현재 실행 중인 파이썬 스크립트 또는 노트북 파일명.
    
    Returns:
        str: 파일이 저장된 전체 경로.
    """
    # 1. 'test_파일명_mmddhhmm' 형식으로 폴더명 생성
    now = datetime.datetime.now()
    timestamp = now.strftime("%m%d%H%M")  # mmddhhmm 형식
    
    # 스크립트 이름에서 확장자(.py, .ipynb) 제거
    script_basename = os.path.splitext(script_name)[0]
    
    folder_name = f"test_{script_basename}_{timestamp}"
    output_dir = os.path.join(base_output_dir, folder_name)
    
    # 폴더 생성 (이미 존재하면 그대로 사용)
    os.makedirs(output_dir, exist_ok=True)

    # 2. 저장할 파일명 생성 (원본 파일명 기반)
    original_basename = os.path.splitext(original_filename)[0]
    output_filename = f"{original_basename}_mask.png"
    
    # 3. 전체 저장 경로를 조합하고 이미지 저장
    output_path = os.path.join(output_dir, output_filename)
    mask_image.save(output_path)
    
    return output_path

In [46]:

def rle_encode(mask):
    """
    mask: 2D numpy array of {0,1}, shape (H,W)
    return: run length as string
    """
    pixels = mask.flatten(order="C")
    ones = np.where(pixels == 1)[0] + 1  # 1-based
    if len(ones) == 0:
        return ""
    runs = []
    prev = -2
    for idx in ones:
        if idx > prev + 1:
            runs.extend((idx, 0))
        runs[-1] += 1
        prev = idx
    return " ".join(map(str, runs))


def predict_and_submit(model, test_img_dir, output_csv, device, threshold=0.5):
    model.eval()
    ids, rles = [], []

    test_imgs = sorted(glob.glob(os.path.join(test_img_dir, "*.jpg")))
    for path in test_imgs:
        img_id = os.path.splitext(os.path.basename(path))[0]
        img = Image.open(path).convert("L")
        arr = np.array(img, dtype=np.float32) / 255.0
        tensor = torch.tensor(arr).unsqueeze(0).unsqueeze(0).to(device)

        with torch.no_grad():
            out_list = model(tensor)
            main_logit = out_list[0] if isinstance(out_list, (list, tuple)) else out_list
            prob = torch.sigmoid(main_logit)[0,0].cpu().numpy()
            pred = (prob > threshold).astype(np.uint8)
        
        rle = rle_encode(pred)
        ids.append(img_id)
        rles.append(rle)

    df = pd.DataFrame({"image_id": ids, "rle": rles})
    df.to_csv(output_csv, index=False)
    print(f"[OK] submission saved to {output_csv}, total {len(df)} rows.")


In [47]:

def rle_encode(mask):
    """
    mask: 2D numpy array of {0,1}, shape (H,W)
    return: run length as string
    """
    pixels = mask.flatten(order="C")
    ones = np.where(pixels == 1)[0] + 1  # 1-based
    if len(ones) == 0:
        return ""
    runs = []
    prev = -2
    for idx in ones:
        if idx > prev + 1:
            runs.extend((idx, 0))
        runs[-1] += 1
        prev = idx
    return " ".join(map(str, runs))


def predict_submit_and_save_masks(
    model, 
    test_img_dir, 
    output_csv, 
    device, 
    threshold=0.5,
    save_masks=False,
    mask_save_dir=None
):
    
    model.eval()
    ids, rles = [], []

    # --- 이미지 저장을 위한 폴더 설정 ---
    output_mask_path = ""
    if save_masks:
        if mask_save_dir is None:
            # mask_save_dir가 지정되지 않으면 에러 발생
            raise ValueError("If save_masks is True, mask_save_dir must be provided.")
        
        # 현재 시간을 기반으로 하위 폴더 생성
        timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
        output_mask_path = os.path.join(mask_save_dir, f"predictions_{timestamp}")
        os.makedirs(output_mask_path, exist_ok=True)
        print(f"Mask images will be saved to: {output_mask_path}")

    test_imgs = sorted(glob.glob(os.path.join(test_img_dir, "*.jpg")))
    for path in test_imgs:
        img_id = os.path.splitext(os.path.basename(path))[0]
        img = Image.open(path).convert("L")
        arr = np.array(img, dtype=np.float32) / 255.0
        tensor = torch.tensor(arr).unsqueeze(0).unsqueeze(0).to(device)

        with torch.no_grad():
            out_list = model(tensor)
            main_logit = out_list[0] if isinstance(out_list, (list, tuple)) else out_list
            prob = torch.sigmoid(main_logit)[0,0].cpu().numpy()
            pred = (prob > threshold).astype(np.uint8)
        
        # ---  마스크 이미지를 파일로 저장 ---
        if save_masks:
            mask_image = Image.fromarray(pred * 255, mode='L')
            mask_filename = f"{img_id}_mask.png"
            save_path = os.path.join(output_mask_path, mask_filename)
            mask_image.save(save_path)

        # --- RLE 인코딩 및 CSV 데이터 수집 ---
        rle = rle_encode(pred)
        ids.append(img_id)
        rles.append(rle)

    # --- CSV 파일로 최종 저장 ---
    df = pd.DataFrame({"image_id": ids, "rle": rles})
    df.to_csv(output_csv, index=False)
    print(f"OK. Submission CSV saved to {output_csv}, total {len(df)} rows.")
    
    if save_masks:
        print(f"OK. Mask images also saved in: {output_mask_path}")

In [48]:
predict_submit_and_save_masks(
    model=model,
    test_img_dir=TEST_DIR,
    output_csv="working/submission.csv",
    device=device,
    save_masks=True,  
    mask_save_dir=OUTPUT_MASK
)

Mask images will be saved to: working/mask_ouputs/predictions_20251014_024420


  mask_image = Image.fromarray(pred * 255, mode='L')


OK. Submission CSV saved to working/submission.csv, total 2667 rows.
OK. Mask images also saved in: working/mask_ouputs/predictions_20251014_024420
