In [2]:
# ============================
# All-in-one Training (ImageFolder only, Kaggle)
# ============================

import os, json, random, math
from pathlib import Path
from typing import List, Tuple
import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Subset
from torchvision import datasets, transforms, models

# ---- Device
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="0"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.backends.cudnn.benchmark = True
print("Using device:", device)

# ---- Paths & Hyperparams
DATA_ROOT = "/kaggle/input/dog-emotion/Dog Emotion"    # <- angry/happy/relaxed/sad 폴더가 바로 아래에
OUT_DIR   = "/kaggle/working"
IMG_SIZE  = 224
BATCH     = 32
VAL_RATIO = 0.2
EPOCHS_PROBE = 3
EPOCHS_MAIN  = 20
LR_PROBE = 1e-3
LR_MAIN  = 3e-4
WD_MAIN  = 0.05
LAMBDA_PRIOR = 0.5   # (지금은 prior 사용 안 함; 추후 on 가능)

# ---- Utils
def set_seed(seed=42):
    random.seed(seed); np.random.seed(seed)
    torch.manual_seed(seed); torch.cuda.manual_seed_all(seed)
set_seed(42)

def accuracy_top1(logits, targets):
    return (logits.argmax(-1) == targets).float().mean().item()

def macro_f1(logits, targets, K):
    pred = logits.argmax(-1)
    f1=0.0
    for c in range(K):
        tp = ((pred==c)&(targets==c)).sum().item()
        fp = ((pred==c)&(targets!=c)).sum().item()
        fn = ((pred!=c)&(targets==c)).sum().item()
        p = tp/(tp+fp+1e-8); r = tp/(tp+fn+1e-8)
        f1 += 0.0 if (p+r)==0 else 2*p*r/(p+r)
    return f1/K

def stratified_split(dataset, val_ratio=0.2, seed=42) -> Tuple[List[int], List[int]]:
    labels = [s[1] for s in dataset.samples]
    byc={}
    for i,y in enumerate(labels): byc.setdefault(y,[]).append(i)
    tr, va = [], []
    for y,lst in byc.items():
        r = random.Random(seed+y); r.shuffle(lst)
        n_val = max(1, int(len(lst)*val_ratio))
        va += lst[:n_val]; tr += lst[n_val:]
    return tr, va

class FocalLoss(nn.Module):
    def __init__(self, gamma=2.0): super().__init__(); self.gamma=gamma
    def forward(self, logits, targets):
        logp = F.log_softmax(logits, dim=-1); p = logp.exp()
        tgt = F.one_hot(targets, num_classes=logits.size(-1)).float()
        pt = (p*tgt).sum(-1)
        return (-(1-pt)**self.gamma * (tgt*logp).sum(-1)).mean()

# ---- Backbone (ResNet50 ImageNet 가중치)
class DinoResNet50Backbone(nn.Module):
    def __init__(self):
        super().__init__()
        self.resnet = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V2)
        self.resnet.fc = nn.Identity()
    @property
    def out_channels(self): return 2048
    def forward(self, x):
        x = self.resnet.conv1(x); x=self.resnet.bn1(x); x=self.resnet.relu(x); x=self.resnet.maxpool(x)
        x = self.resnet.layer1(x); x=self.resnet.layer2(x); x=self.resnet.layer3(x)
        feat = self.resnet.layer4(x)                      # (B,2048,7,7)
        pooled = F.adaptive_avg_pool2d(feat,1).flatten(1) # (B,2048)
        return feat, pooled

# ---- Prior-Guided Transformer Head (prior는 기본 off)
def build_2d_sincos_pos_embed(h,w,dim,device):
    gy, gx = torch.meshgrid(torch.arange(h,device=device), torch.arange(w,device=device), indexing="ij")
    omega = 1.0 / (10000 ** (torch.arange(dim//4, device=device).float()/(dim//4)))
    oy = torch.einsum('hw,d->hwd', gy.float(), omega)
    ox = torch.einsum('hw,d->hwd', gx.float(), omega)
    pos = torch.cat([torch.sin(oy), torch.cos(oy), torch.sin(ox), torch.cos(ox)], dim=-1)
    return pos.view(h*w, dim)

class PriorGuidedMHA(nn.Module):
    def __init__(self, d_model=512, n_heads=8, attn_drop=0.0, proj_drop=0.0):
        super().__init__(); assert d_model % n_heads==0
        self.h=n_heads; self.d=d_model//n_heads; self.scale=self.d**-0.5
        self.qkv = nn.Linear(d_model, d_model*3); self.proj = nn.Linear(d_model, d_model)
        self.ad=nn.Dropout(attn_drop); self.pd=nn.Dropout(proj_drop)
    def forward(self, x, prior_s=None, lam=0.0, return_attn=False):
        B,N,C=x.shape
        qkv=self.qkv(x).reshape(B,N,3,self.h,self.d).permute(2,0,3,1,4)
        q,k,v=qkv[0]*self.scale, qkv[1], qkv[2]
        logits = q @ k.transpose(-1,-2)
        if prior_s is not None and lam>0:
            s = torch.clamp(prior_s,0,1); bias = 0.5*(s.unsqueeze(2)+s.unsqueeze(1))
            logits = logits + lam*bias.unsqueeze(1)
        attn = F.softmax(logits, dim=-1); attn=self.ad(attn)
        out = (attn@v).transpose(1,2).reshape(B,N,C)
        out = self.pd(self.proj(out))
        return (out, attn) if return_attn else out

class Block(nn.Module):
    def __init__(self, d=512,h=8,mlp=4,drop=0.1):
        super().__init__()
        self.n1=nn.LayerNorm(d); self.attn=PriorGuidedMHA(d,h,proj_drop=drop)
        self.n2=nn.LayerNorm(d)
        self.ff=nn.Sequential(nn.Linear(d,int(d*mlp)), nn.GELU(), nn.Dropout(drop), nn.Linear(int(d*mlp),d))
    def forward(self, x, prior_s, lam, return_attn=False):
        y,a = self.attn(self.n1(x), prior_s, lam, return_attn=True); x=x+y
        z = self.ff(self.n2(x)); x=x+z
        return (x,a) if return_attn else x

class HeadConfig:
    def __init__(self, num_classes=4, d_model=512, n_heads=8, n_layers=3, lambda_prior=0.5, drop=0.1, use_class_token=True):
        self.num_classes=num_classes; self.d_model=d_model
        self.n_heads=n_heads; self.n_layers=n_layers
        self.lambda_prior=lambda_prior; self.drop=drop; self.use_class_token=use_class_token

class PriorGuidedEmotionModel(nn.Module):
    def __init__(self, backbone=None, cfg:HeadConfig=HeadConfig()):
        super().__init__()
        self.backbone = backbone or DinoResNet50Backbone()
        self.cfg=cfg; C=self.backbone.out_channels
        self.proj = nn.Linear(C, cfg.d_model)
        self.cls_token = nn.Parameter(torch.zeros(1,1,cfg.d_model)) if cfg.use_class_token else None
        self.blocks = nn.ModuleList([Block(cfg.d_model, cfg.n_heads, drop=cfg.drop) for _ in range(cfg.n_layers)])
        self.norm = nn.LayerNorm(cfg.d_model)
        self.neck = nn.Sequential(nn.Linear(cfg.d_model,cfg.d_model), nn.GELU(), nn.Dropout(cfg.drop))
        self.head_cls = nn.Linear(cfg.d_model, cfg.num_classes)
        self.head_val = nn.Linear(cfg.d_model, 1)
        self.head_ar  = nn.Linear(cfg.d_model, 1)
    def forward(self, images, priors=None, return_attn=False):
        feat,_ = self.backbone(images)                # (B,2048,7,7)
        B,C,H,W = feat.shape
        tok = self.proj(feat.permute(0,2,3,1).reshape(B,H*W,C))   # (B,49,D)
        pos = build_2d_sincos_pos_embed(H,W,self.cfg.d_model, images.device).unsqueeze(0).expand(B,-1,-1)
        x = tok + pos
        if self.cfg.use_class_token:
            cls = self.cls_token.expand(B,-1,-1); x = torch.cat([cls,x], dim=1)  # (B,50,D)
            s = torch.zeros(B,1,device=images.device)
            if priors is not None: s = torch.cat([s, priors.flatten(1)], dim=1)
        else:
            s = priors.flatten(1) if priors is not None else torch.zeros(B,x.size(1),device=images.device)
        for blk in self.blocks:
            x = blk(x, s, self.cfg.lambda_prior, return_attn=False)
        x = self.norm(x)
        f = x[:,0,:] if self.cfg.use_class_token else x.mean(1)
        f = self.neck(f)
        logits = self.head_cls(f)
        val = torch.sigmoid(self.head_val(f)); ar=torch.sigmoid(self.head_ar(f))
        return logits, val, ar, {}

# ---- Transforms & Datasets
normalize = transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])
train_tf = transforms.Compose([
    transforms.RandomResizedCrop(IMG_SIZE, scale=(0.6,1.0)),
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(0.2,0.2,0.2,0.05),
    transforms.ToTensor(), normalize,
])
val_tf = transforms.Compose([
    transforms.Resize(int(IMG_SIZE*1.15)),
    transforms.CenterCrop(IMG_SIZE),
    transforms.ToTensor(), normalize,
])

full_for_split = datasets.ImageFolder(DATA_ROOT, transform=val_tf)
classes = full_for_split.classes; num_classes=len(classes)
print("Classes:", classes)  # ['angry','happy','relaxed','sad'] 여야 정상

train_idx, val_idx = stratified_split(full_for_split, VAL_RATIO, seed=42)
train_ds = Subset(datasets.ImageFolder(DATA_ROOT, transform=train_tf), train_idx)
val_ds   = Subset(datasets.ImageFolder(DATA_ROOT, transform=val_tf),   val_idx)

train_loader = DataLoader(train_ds, batch_size=BATCH, shuffle=True,  num_workers=2, pin_memory=True)
val_loader   = DataLoader(val_ds,   batch_size=BATCH, shuffle=False, num_workers=2, pin_memory=True)

# ---- Stage 1: Linear Probe
backbone = DinoResNet50Backbone().to(device)
for p in backbone.parameters(): p.requires_grad = False
probe = nn.Linear(backbone.out_channels, num_classes).to(device)
opt_probe = torch.optim.AdamW(probe.parameters(), lr=LR_PROBE, weight_decay=0.01)
ce = nn.CrossEntropyLoss()

print("\n== Stage 1: Linear Probe ==")
for ep in range(1, EPOCHS_PROBE+1):
    backbone.eval(); probe.train()
    t_loss=t_acc=0
    for imgs, labels in train_loader:
        imgs,labels = imgs.to(device), labels.to(device)
        with torch.no_grad(): _, pooled = backbone(imgs)
        logits = probe(pooled)
        loss = ce(logits, labels)
        opt_probe.zero_grad(); loss.backward(); opt_probe.step()
        t_loss += loss.item()*imgs.size(0)
        t_acc  += (logits.argmax(-1)==labels).float().sum().item()
    print(f"[Probe] {ep}/{EPOCHS_PROBE} loss {t_loss/len(train_loader.dataset):.4f} acc {t_acc/len(train_loader.dataset):.4f}")

# ---- Stage 2: Main Training (prior OFF)
cfg = HeadConfig(num_classes=num_classes, d_model=512, n_heads=8, n_layers=3, lambda_prior=LAMBDA_PRIOR, drop=0.1)
model = PriorGuidedEmotionModel(backbone=backbone, cfg=cfg).to(device)

opt_main = torch.optim.AdamW(model.parameters(), lr=LR_MAIN, weight_decay=WD_MAIN)
focal = FocalLoss(2.0)

print("\n== Stage 2: Main Training ==")
best_f1=-1.0; Path(OUT_DIR).mkdir(parents=True, exist_ok=True)

for ep in range(1, EPOCHS_MAIN+1):
    model.train(); t_loss=t_acc=t_f1=0
    for imgs, labels in train_loader:
        imgs,labels = imgs.to(device), labels.to(device)
        logits, val, ar, _ = model(imgs, priors=None, return_attn=False)
        loss = focal(logits, labels)
        opt_main.zero_grad(); loss.backward(); opt_main.step()
        t_loss += loss.item()*imgs.size(0)
        t_acc  += (logits.argmax(-1)==labels).float().sum().item()
        t_f1   += macro_f1(logits, labels, num_classes)*imgs.size(0)

    # val
    model.eval(); v_loss=v_acc=v_f1=0
    with torch.no_grad():
        for imgs, labels in val_loader:
            imgs,labels = imgs.to(device), labels.to(device)
            logits, val, ar, _ = model(imgs, priors=None, return_attn=False)
            loss = focal(logits, labels)
            v_loss += loss.item()*imgs.size(0)
            v_acc  += (logits.argmax(-1)==labels).float().sum().item()
            v_f1   += macro_f1(logits, labels, num_classes)*imgs.size(0)

    Ntr=len(train_loader.dataset); Nval=len(val_loader.dataset)
    print(f"[Main] {ep}/{EPOCHS_MAIN} | train loss {t_loss/Ntr:.4f} acc {t_acc/Ntr:.4f} f1 {t_f1/Ntr:.4f} "
          f"| val loss {v_loss/Nval:.4f} acc {v_acc/Nval:.4f} f1 {v_f1/Nval:.4f}")

    if (v_f1/Nval) > best_f1:
        best_f1 = v_f1/Nval
        torch.save({"model":model.state_dict(),"cfg":cfg.__dict__,"classes":classes}, f"{OUT_DIR}/best_model.pt")

print("Done. Saved:", f"{OUT_DIR}/best_model.pt")
with open(f"{OUT_DIR}/classes.json","w") as f: json.dump({"classes":classes}, f, indent=2)
print("Classes saved to:", f"{OUT_DIR}/classes.json")

# ---- (Optional) Evaluation: Confusion matrix & per-class metrics
import numpy as np
from collections import defaultdict
ckpt = torch.load(f"{OUT_DIR}/best_model.pt", map_permutation="cuda" if torch.cuda.is_available() else "cpu")
model.load_state_dict(ckpt["model"]); model.eval()

cm = np.zeros((num_classes, num_classes), dtype=int)
per_cls = defaultdict(lambda: {"tp":0,"fp":0,"fn":0})
with torch.no_grad():
    for imgs, labels in val_loader:
        imgs, labels = imgs.to(device), labels.to(device)
        logits, _, _, _ = model(imgs, priors=None, return_attn=False)
        preds = logits.argmax(-1)
        for t,p in zip(labels.tolist(), preds.tolist()):
            cm[t,p]+=1
            if t==p: per_cls[t]["tp"]+=1
            else: per_cls[p]["fp"]+=1; per_cls[t]["fn"]+=1
print("Confusion Matrix (rows=GT, cols=Pred):\n", cm)
for c,name in enumerate(classes):
    tp,fp,fn = per_cls[c]["tp"], per_cls[c]["fp"], per_cls[c]["fn"]
    prec = tp/(tp+fp+1e-8); rec = tp/(tp+fn+1e-8)
    f1 = 0.0 if (prec+rec)==0 else 2*prec*rec/(prec+rec)
    print(f"{name:>8s} | P {prec:.3f} R {rec:.3f} F1 {f1:.3f}")

# ---- (Optional) ONNX export (prior 없이)
# dummy = torch.randn(1,3,IMG_SIZE,IMG_SIZE, device=device)
# torch.onnx.export(
#     model, (dummy, None),
#     f"{OUT_DIR}/dog_emotion.onnx",
#     input_names=["image","prior"], output_names=["logits","val","ar"],
#     opset_version=17, dynamic_axes={"image":{0:"batch"}}
# )
# print("Exported ONNX:", f"{OUT_DIR}/dog_emotion.onnx")


Using device: cuda
Classes: ['angry', 'happy', 'relaxed', 'sad']

== Stage 1: Linear Probe ==
[Probe] 1/3 loss 1.1404 acc 0.5437
[Probe] 2/3 loss 0.9127 acc 0.6716
[Probe] 3/3 loss 0.8223 acc 0.6987

== Stage 2: Main Training ==
[Main] 1/20 | train loss 0.6006 acc 0.4931 f1 0.4304 | val loss 0.3893 acc 0.6288 f1 0.2098
[Main] 2/20 | train loss 0.3378 acc 0.6891 f1 0.6702 | val loss 0.3598 acc 0.6637 f1 0.2140
[Main] 3/20 | train loss 0.2912 acc 0.7344 f1 0.7143 | val loss 0.3558 acc 0.7362 f1 0.2331
[Main] 4/20 | train loss 0.2453 acc 0.7716 f1 0.7597 | val loss 0.3019 acc 0.7638 f1 0.2370
[Main] 5/20 | train loss 0.2335 acc 0.7700 f1 0.7584 | val loss 0.3191 acc 0.7600 f1 0.2356
[Main] 6/20 | train loss 0.2093 acc 0.7903 f1 0.7780 | val loss 0.2880 acc 0.7588 f1 0.2369
[Main] 7/20 | train loss 0.1985 acc 0.7984 f1 0.7862 | val loss 0.3134 acc 0.7750 f1 0.2412
[Main] 8/20 | train loss 0.1908 acc 0.8097 f1 0.8002 | val loss 0.3266 acc 0.7750 f1 0.2395
[Main] 9/20 | train loss 0.1831 acc

TypeError: Unpickler.__init__() got an unexpected keyword argument 'map_permutation'

In [3]:
import os, numpy as np
from collections import defaultdict

ckpt_path = f"{OUT_DIR}/best_model.pt"
assert os.path.exists(ckpt_path), f"Checkpoint not found: {ckpt_path}"

# 모델 클래스/구조(Backbone, PriorGuidedEmotionModel 등)가 이미 같은 셀/위에서 정의돼 있어야 합니다.
ckpt = torch.load(ckpt_path, map_location=("cuda" if torch.cuda.is_available() else "cpu"))
classes = ckpt.get("classes", ["angry","happy","relaxed","sad"])
num_classes = len(classes)

# 기존에 학습에 썼던 model 객체가 있다면 재사용:
# model.load_state_dict(ckpt["model"]); model.eval()

# 만약 커널 재시작 등으로 model이 없다면, 동일 구조로 재생성 후 로드:
try:
    model
except NameError:
    cfg = HeadConfig(num_classes=num_classes, d_model=512, n_heads=8, n_layers=3, lambda_prior=0.5, drop=0.1)
    backbone = DinoResNet50Backbone().to(device)
    model = PriorGuidedEmotionModel(backbone=backbone, cfg=cfg).to(device)

model.load_state_dict(ckpt["model"], strict=True)
model.eval()

# 혼동행렬 계산
cm = np.zeros((num_classes, num_classes), dtype=int)
per_cls = defaultdict(lambda: {"tp":0,"fp":0,"fn":0})

with torch.no_grad():
    for imgs, labels in val_loader:
        imgs, labels = imgs.to(device), labels.to(device)
        logits, _, _, _ = model(imgs, priors=None, return_attn=False)
        preds = logits.argmax(-1)
        for t,p in zip(labels.tolist(), preds.tolist()):
            cm[t,p] += 1
            if t==p: per_cls[t]["tp"]+=1
            else: per_cls[p]["fp"]+=1; per_cls[t]["fn"]+=1

print("Confusion Matrix (rows=GT, cols=Pred):\n", cm)
for c,name in enumerate(classes):
    tp,fp,fn = per_cls[c]["tp"], per_cls[c]["fp"], per_cls[c]["fn"]
    prec = tp/(tp+fp+1e-8); rec = tp/(tp+fn+1e-8)
    f1 = 0.0 if (prec+rec)==0 else 2*prec*rec/(prec+rec)
    print(f"{name:>8s} | P {prec:.3f} R {rec:.3f} F1 {f1:.3f}")


Confusion Matrix (rows=GT, cols=Pred):
 [[159  27   4  10]
 [  7 179  10   4]
 [ 18  19 147  16]
 [ 16   8  30 146]]
   angry | P 0.795 R 0.795 F1 0.795
   happy | P 0.768 R 0.895 F1 0.827
 relaxed | P 0.770 R 0.735 F1 0.752
     sad | P 0.830 R 0.730 F1 0.777


In [5]:
# == Load best (baseline) ==
ckpt = torch.load("/kaggle/working/best_model.pt", map_location="cuda" if torch.cuda.is_available() else "cpu")
classes = ckpt["classes"]
num_classes = len(classes)

# model, backbone, cfg는 이전 셀에서 정의된 클래스를 그대로 사용
cfg = HeadConfig(num_classes=num_classes, d_model=512, n_heads=8, n_layers=3, lambda_prior=0.5, drop=0.1)
backbone = DinoResNet50Backbone().to(device)
model_prior = PriorGuidedEmotionModel(backbone=backbone, cfg=cfg).to(device)
model_prior.load_state_dict(ckpt["model"], strict=False)

# layer4만 학습 가능하도록 해제 (나머지는 고정)
for p in backbone.resnet.layer4.parameters():
    p.requires_grad = True
for name, m in backbone.resnet.named_children():
    if name in ["conv1","bn1","layer1","layer2","layer3"]:
        for p in m.parameters():
            p.requires_grad = False

# == 간단 saliency prior: conv5 합(ReLU) → [0,1] 정규화 ==
@torch.no_grad()
def compute_prior_simple(images):
    feat,_ = backbone(images)          # (B,2048,7,7)
    cam = torch.relu(feat.sum(1))      # (B,7,7)
    B = cam.size(0)
    cam = cam.view(B,-1)
    cam = (cam - cam.min(1,keepdim=True).values) / (cam.max(1,keepdim=True).values - cam.min(1,keepdim=True).values + 1e-6)
    return cam.view(-1,7,7)            # (B,7,7)


In [6]:
EPOCHS_FT = 8
LR_FT     = 1e-4
WD_FT     = 0.05
criterion = nn.CrossEntropyLoss()   # 안정적 수렴용; 희망 시 focal로 대체 가능
optimizer = torch.optim.AdamW(filter(lambda p: p.requires_grad, model_prior.parameters()),
                              lr=LR_FT, weight_decay=WD_FT)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=EPOCHS_FT)

best_f1_ft = -1.0
print("\n== FT with priors (layer4) ==")
for ep in range(1, EPOCHS_FT+1):
    # train
    model_prior.train(); tl=ta=tf=0
    for imgs, labels in train_loader:
        imgs, labels = imgs.to(device), labels.to(device)
        pri = compute_prior_simple(imgs).to(device)    # (B,7,7)
        logits, _, _, _ = model_prior(imgs, priors=pri, return_attn=False)
        loss = criterion(logits, labels)
        optimizer.zero_grad(); loss.backward(); optimizer.step()

        tl += loss.item()*imgs.size(0)
        ta += (logits.argmax(-1)==labels).float().sum().item()
        tf += macro_f1(logits, labels, num_classes)*imgs.size(0)

    scheduler.step()

    # val
    model_prior.eval(); vl=va=vf=0
    with torch.no_grad():
        for imgs, labels in val_loader:
            imgs, labels = imgs.to(device), labels.to(device)
            pri = compute_prior_simple(imgs).to(device)
            logits, _, _, _ = model_prior(imgs, priors=pri, return_attn=False)
            loss = criterion(logits, labels)
            vl += loss.item()*imgs.size(0)
            va += (logits.argmax(-1)==labels).float().sum().item()
            vf += macro_f1(logits, labels, num_classes)*imgs.size(0)

    Ntr, Nval = len(train_loader.dataset), len(val_loader.dataset)
    tr_loss, tr_acc, tr_f1 = tl/Ntr, ta/Ntr, tf/Ntr
    val_loss, val_acc, val_f1 = vl/Nval, va/Nval, vf/Nval
    print(f"[FT] {ep}/{EPOCHS_FT} | tr {tr_loss:.4f} acc {tr_acc:.4f} f1 {tr_f1:.4f} "
          f"| val {val_loss:.4f} acc {val_acc:.4f} f1 {val_f1:.4f}")

    if val_f1 > best_f1_ft:
        best_f1_ft = val_f1
        torch.save({"model":model_prior.state_dict(),"cfg":cfg.__dict__,"classes":classes},
                   "/kaggle/working/best_model_prior.pt")

print("Saved:", "/kaggle/working/best_model_prior.pt")



== FT with priors (layer4) ==
[FT] 1/8 | tr 0.3298 acc 0.8816 f1 0.8704 | val 0.6179 acc 0.7963 f1 0.2420
[FT] 2/8 | tr 0.2246 acc 0.9184 f1 0.9084 | val 0.5688 acc 0.8387 f1 0.2521
[FT] 3/8 | tr 0.1746 acc 0.9431 f1 0.9336 | val 0.5480 acc 0.8413 f1 0.2507
[FT] 4/8 | tr 0.1325 acc 0.9541 f1 0.9507 | val 0.5925 acc 0.8400 f1 0.2515
[FT] 5/8 | tr 0.1043 acc 0.9697 f1 0.9682 | val 0.6062 acc 0.8313 f1 0.2498
[FT] 6/8 | tr 0.0763 acc 0.9747 f1 0.9732 | val 0.6149 acc 0.8462 f1 0.2523
[FT] 7/8 | tr 0.0695 acc 0.9778 f1 0.9752 | val 0.6489 acc 0.8337 f1 0.2496
[FT] 8/8 | tr 0.0569 acc 0.9800 f1 0.9788 | val 0.6634 acc 0.8375 f1 0.2494
Saved: /kaggle/working/best_model_prior.pt


In [None]:
# == Grad-CAM prior (cls score의 grad로 CAM 생성) ==
def compute_prior_gradcam(images, target_layer=None):
    if target_layer is None:
        target_layer = backbone.resnet.layer4  # conv5
    acts = []
    grads = []

    def fwd_hook(module, inp, out): acts.append(out)       # (B,2048,7,7)
    def bwd_hook(module, ginp, gout): grads.append(gout[0])# (B,2048,7,7)

    h1 = target_layer.register_forward_hook(fwd_hook)
    h2 = target_layer.register_full_backward_hook(bwd_hook)

    model_prior.eval()
    images = images.requires_grad_(True)
    logits,_,_,_ = model_prior(images, priors=None, return_attn=False)
    # 최대 점수 클래스 기준 CAM
    top = logits.argmax(dim=1)
    loss = logits.gather(1, top.unsqueeze(1)).sum()
    model_prior.zero_grad(); loss.backward()

    feat = acts[0]                      # (B,C,H,W)
    grad = grads[0]                     # (B,C,H,W)
    w = grad.mean(dim=(2,3), keepdim=True)
    cam = torch.relu((w*feat).sum(1))   # (B,H,W)
    # normalize
    B = cam.size(0); cam = cam.view(B,-1)
    cam = (cam - cam.min(1,keepdim=True).values) / (cam.max(1,keepdim=True).values - cam.min(1,keepdim=True).values + 1e-6)
    cam = cam.view(-1, feat.size(2), feat.size(3))

    h1.remove(); h2.remove()
    return cam.detach()


In [7]:
from PIL import Image
infer_tf = transforms.Compose([
    transforms.Resize(int(IMG_SIZE*1.15)),
    transforms.CenterCrop(IMG_SIZE),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225]),
])

# 최종 모델 로드 (prior FT 버전 권장)
ck = torch.load("/kaggle/working/best_model_prior.pt", map_location=device)
model_prior.load_state_dict(ck["model"]); model_prior.eval()

def predict(path, use_prior=True):
    img = infer_tf(Image.open(path).convert("RGB")).unsqueeze(0).to(device)
    pri = compute_prior_simple(img).to(device) if use_prior else None
    with torch.no_grad():
        logits,_,_,_ = model_prior(img, priors=pri, return_attn=False)
        probs = logits.softmax(-1).squeeze().tolist()
    return dict(zip(classes, [round(p,3) for p in probs]))

print(predict("/kaggle/input/dog-emotion/Dog Emotion/happy/.jpg", use_prior=True))


In [8]:
dummy = torch.randn(1,3,IMG_SIZE,IMG_SIZE, device=device)
torch.onnx.export(
    model_prior, (dummy, None),  # prior 없이 고정 내보내기
    "/kaggle/working/dog_emotion_prior.onnx",
    input_names=["image","prior"], output_names=["logits","val","ar"],
    opset_version=17, dynamic_axes={"image":{0:"batch"}}
)
print("Exported:", "/kaggle/working/dog_emotion_prior.onnx")


Exported: /kaggle/working/dog_emotion_prior.onnx
