In [1]:
import os
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
os.environ["OMP_NUM_THREADS"] = "1"
os.environ["MKL_NUM_THREADS"] = "1"


In [2]:
from pathlib import Path

# === 训练/验证用的 OUHands 路径（原始整图）===
ROOT = Path(r"D:\Courses\Csc2503\proj\archive\OUHANDS_train")
TRAIN_LIST = ROOT / r"data_split_for_intermediate_tests\training_files.txt"
VAL_LIST   = ROOT / r"data_split_for_intermediate_tests\validation_files.txt"
COLOUR_DIR = ROOT / r"train\hand_data\colour"

# === 测试集（原始整图）===
TEST_COLOUR = Path(r"D:\Courses\Csc2503\proj\archive\OUHANDS_test\test\hand_data\colour")

# 类别（A..K 跳过 G）
CLASSES = ['A','B','C','D','E','F','H','I','J','K']
CLASS2ID = {c:i for i,c in enumerate(CLASSES)}
IMG_EXTS = {".png",".jpg",".jpeg",".bmp",".tif",".tiff"}


In [3]:
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image

def read_list(fp: Path):
    return [ln.strip() for ln in fp.read_text(encoding="utf-8", errors="ignore").splitlines() if ln.strip()]

def resolve_image_path(colour_dir: Path, name: str):
    p = colour_dir / name
    if p.exists(): return p
    stem = Path(name).stem
    for e in IMG_EXTS:
        q = colour_dir / f"{stem}{e}"
        if q.exists(): return q
    return None

class RawImageListDataset(Dataset):
    """
    从文件名首字母得到类别，不使用 bbox；整图 -> ResNet 输入
    """
    def __init__(self, colour_dir: Path, names: list[str], transform=None):
        self.items = []
        self.transform = transform
        for nm in names:
            p = resolve_image_path(colour_dir, nm)
            if p is None: 
                continue
            letter = p.name[0].upper()
            if letter not in CLASS2ID:
                continue
            self.items.append((p, CLASS2ID[letter]))
        if len(self.items) == 0:
            raise RuntimeError("No images collected. Check paths and filelists.")

    def __len__(self):
        return len(self.items)
    def __getitem__(self, idx):
        p, y = self.items[idx]
        img = Image.open(p).convert("RGB")
        if self.transform:
            img = self.transform(img)
        return img, y

# transforms（ImageNet 预处理风格）
train_tf = transforms.Compose([
    transforms.Resize(256),
    transforms.RandomResizedCrop(224, scale=(0.8, 1.0)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.ColorJitter(0.1,0.1,0.1,0.05),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485,0.456,0.406],
                         std=[0.229,0.224,0.225]),
])
eval_tf = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485,0.456,0.406],
                         std=[0.229,0.224,0.225]),
])

# 列表 -> 数据集
train_names = read_list(TRAIN_LIST)
val_names   = read_list(VAL_LIST)
# 测试集：直接遍历文件夹
test_names  = [p.name for p in TEST_COLOUR.iterdir() if p.suffix.lower() in IMG_EXTS]

train_ds = RawImageListDataset(COLOUR_DIR, train_names, transform=train_tf)
val_ds   = RawImageListDataset(COLOUR_DIR, val_names,   transform=eval_tf)
test_ds  = RawImageListDataset(TEST_COLOUR, test_names, transform=eval_tf)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("Device:", device)

train_loader = DataLoader(train_ds, batch_size=64, shuffle=True,  num_workers=0, pin_memory=False)
val_loader   = DataLoader(val_ds,   batch_size=128, shuffle=False, num_workers=0, pin_memory=False)
test_loader  = DataLoader(test_ds,  batch_size=128, shuffle=False, num_workers=0, pin_memory=False)


Device: cuda:0


In [4]:
import torch.nn as nn
from torchvision import models
from collections import Counter
from sklearn.metrics import accuracy_score, f1_score
import numpy as np

# 类不平衡权重
cnt = Counter([y for _,y in train_ds.items])
class_weights = torch.tensor([len(train_ds)/cnt[i] for i in range(len(CLASSES))], dtype=torch.float32).to(device)

model = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)
model.fc = nn.Linear(model.fc.in_features, len(CLASSES))
model = model.to(device)

criterion = nn.CrossEntropyLoss(weight=class_weights)
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-3, weight_decay=1e-4)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=15)

def evaluate(model, loader):
    model.eval()
    losses, preds, trues = 0.0, [], []
    with torch.no_grad():
        for x, y in loader:
            x, y = x.to(device), y.to(device)
            logits = model(x)
            loss = criterion(logits, y)
            losses += loss.item() * x.size(0)
            preds.extend(logits.argmax(1).cpu().tolist())
            trues.extend(y.cpu().tolist())
    avg_loss = losses / len(loader.dataset)
    top1 = accuracy_score(trues, preds)
    macro_f1 = f1_score(trues, preds, average="macro")
    return avg_loss, top1, macro_f1

best_val = 0.0
EPOCHS = 50
for ep in range(EPOCHS):
    model.train()
    run = 0.0
    for x, y in train_loader:
        x, y = x.to(device), y.to(device)
        optimizer.zero_grad(set_to_none=True)
        logits = model(x)
        loss = criterion(logits, y)
        loss.backward()
        optimizer.step()
        run += loss.item() * x.size(0)
    train_loss = run / len(train_loader.dataset)
    val_loss, val_top1, val_f1 = evaluate(model, val_loader)
    scheduler.step()

    print(f"Epoch {ep+1:02d}/{EPOCHS} | train_loss={train_loss:.4f}  "
          f"val_loss={val_loss:.4f}  val_top1={val_top1*100:.2f}%  val_macroF1={val_f1:.4f}")

    if val_top1 > best_val:
        best_val = val_top1
        torch.save(model.state_dict(), "resnet18_raw_best.pt")
        print("✅ saved: resnet18_raw_best.pt")


Epoch 01/50 | train_loss=0.6439  val_loss=0.9097  val_top1=69.50%  val_macroF1=0.6856
✅ saved: resnet18_raw_best.pt
Epoch 02/50 | train_loss=0.1416  val_loss=0.3047  val_top1=89.50%  val_macroF1=0.8963
✅ saved: resnet18_raw_best.pt
Epoch 03/50 | train_loss=0.0927  val_loss=0.9233  val_top1=78.00%  val_macroF1=0.7624
Epoch 04/50 | train_loss=0.0984  val_loss=1.6225  val_top1=68.50%  val_macroF1=0.6910
Epoch 05/50 | train_loss=0.0689  val_loss=0.7233  val_top1=77.00%  val_macroF1=0.7744
Epoch 06/50 | train_loss=0.0403  val_loss=0.3600  val_top1=89.25%  val_macroF1=0.8924
Epoch 07/50 | train_loss=0.0239  val_loss=0.4249  val_top1=86.25%  val_macroF1=0.8642
Epoch 08/50 | train_loss=0.0127  val_loss=0.2184  val_top1=93.25%  val_macroF1=0.9319
✅ saved: resnet18_raw_best.pt
Epoch 09/50 | train_loss=0.0093  val_loss=0.3086  val_top1=89.50%  val_macroF1=0.8957
Epoch 10/50 | train_loss=0.0043  val_loss=0.2050  val_top1=92.50%  val_macroF1=0.9235
Epoch 11/50 | train_loss=0.0036  val_loss=0.2567  

In [5]:
# 载入最佳权重（如已保存）
sd = torch.load("resnet18_raw_best.pt", map_location="cpu")
model.load_state_dict(sd, strict=False)

test_loss, test_top1, test_f1 = evaluate(model, test_loader)
print(f"\n== Test ==")
print(f"Top-1: {test_top1:.4f}")
print(f"Macro-F1: {test_f1:.4f}")


  sd = torch.load("resnet18_raw_best.pt", map_location="cpu")



== Test ==
Top-1: 0.7470
Macro-F1: 0.7466


In [6]:
!pip -q install thop

from thop import profile
params_m = sum(p.numel() for p in model.parameters()) / 1e6
dummy = torch.randn(1, 3, 224, 224).to(device)
flops, _ = profile(model, inputs=(dummy,), verbose=False)
flops_g = flops / 1e9
print(f"Params (M): {params_m:.3f}")
print(f"FLOPs  (G): {flops_g:.2f}")

print(f"\n| ResNet18 (raw image) | None | {test_top1:.4f} | {test_f1:.4f} | {params_m:.2f} | {flops_g:.2f} |")


Params (M): 11.182
FLOPs  (G): 1.82

| ResNet18 (raw image) | None | 0.7470 | 0.7466 | 11.18 | 1.82 |
