In [1]:
import os
from PIL import Image
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader

# 讀取 data 資料夾
def load_data(data_dir="data"):
    X = []
    y = []
    all_txt_files = sorted([f for f in os.listdir(data_dir) if f.endswith(".txt")])

    for txt_file in all_txt_files:
        base_name = txt_file.replace(".txt", "")
        img_path = os.path.join(data_dir, f"{base_name}.png")
        txt_path = os.path.join(data_dir, txt_file)

        if not os.path.exists(img_path):
            continue

        # 載入圖像：128x128 單通道
        img = Image.open(img_path).convert("L")
        img_np = np.array(img) / 255.0  # normalize to 0~1
        X.append(img_np)

        # 載入動作
        with open(txt_path, "r") as f:
            action = f.read().strip()
            y.append(action)

    return np.array(X), np.array(y)

X, y_raw = load_data()

# 類別編碼器：把所有動作轉為分類標籤
encoder = LabelEncoder()
y = encoder.fit_transform(y_raw)
num_classes = len(encoder.classes_)

print(f"共載入 {len(X)} 筆樣本，{num_classes} 個動作類別：{encoder.classes_}")

# 在下面寫一個轉換函數 把X,y丟進去他會針對少數類別做過採樣並返回處理後的X,y 讓每個類別數量都等於 最多樣本的那個類別 然後print每個類別的原始樣本數量跟過採樣後樣本數量
from collections import Counter
import numpy as np

def oversample_minority_classes(X, y):
    # 統計每個類別的樣本數量
    counter = Counter(y)
    max_count = max(counter.values())

    print("原始樣本數量：")
    for label, count in counter.items():
        print(f"  類別 {label}（{encoder.inverse_transform([label])[0]}）: {count} 筆")

    # 分類資料收集
    class_to_indices = {label: np.where(y == label)[0] for label in counter}
    new_X = []
    new_y = []

    for label, indices in class_to_indices.items():
        count = len(indices)
        needed = max_count - count
        # 原始樣本先加入
        new_X.extend(X[indices])
        new_y.extend(y[indices])
        # 隨機過採樣補足
        if needed > 0:
            sampled_indices = np.random.choice(indices, size=needed, replace=True)
            new_X.extend(X[sampled_indices])
            new_y.extend(y[sampled_indices])

    new_X = np.array(new_X)
    new_y = np.array(new_y)

    # 統計過採樣後的結果
    new_counter = Counter(new_y)
    print("\n過採樣後樣本數量：")
    for label, count in new_counter.items():
        print(f"  類別 {label}（{encoder.inverse_transform([label])[0]}）: {count} 筆")

    return new_X, new_y
X_balanced, y_balanced = oversample_minority_classes(X, y)
X,y = X_balanced, y_balanced

共載入 533 筆樣本，29 個動作類別：['' 'Key.alt_l' 'Key.alt_l,Key.down' 'Key.alt_l,Key.left'
 'Key.alt_l,Key.right' 'Key.ctrl_l' 'Key.ctrl_l,Key.right' 'Key.down'
 'Key.left' 'Key.left,Key.alt_l' 'Key.left,Key.alt_l,Key.shift'
 'Key.left,Key.ctrl_l' 'Key.left,Key.right' 'Key.left,Key.shift'
 'Key.left,Key.up' 'Key.left,Key.up,Key.ctrl_l'
 'Key.left,Key.up,Key.shift' 'Key.left,z' 'Key.right'
 'Key.right,Key.ctrl_l' 'Key.right,Key.down' 'Key.right,Key.up'
 'Key.right,z' 'Key.shift,Key.ctrl_l' 'Key.shift,Key.right'
 'Key.shift,Key.up' 'Key.up' 'Key.up,Key.right' 'z,Key.right']
原始樣本數量：
  類別 0（）: 52 筆
  類別 18（Key.right）: 87 筆
  類別 28（z,Key.right）: 2 筆
  類別 6（Key.ctrl_l,Key.right）: 55 筆
  類別 8（Key.left）: 90 筆
  類別 26（Key.up）: 31 筆
  類別 14（Key.left,Key.up）: 8 筆
  類別 11（Key.left,Key.ctrl_l）: 112 筆
  類別 9（Key.left,Key.alt_l）: 11 筆
  類別 19（Key.right,Key.ctrl_l）: 35 筆
  類別 5（Key.ctrl_l）: 11 筆
  類別 7（Key.down）: 1 筆
  類別 22（Key.right,z）: 2 筆
  類別 10（Key.left,Key.alt_l,Key.shift）: 1 筆
  類別 4（Key.alt_l,Key.right）:

In [2]:
class ImageActionDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X).unsqueeze(1).float()  # (N, 1, 128, 128)
        self.y = torch.tensor(y).long()

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

# 簡單 CNN
class ActionCNN(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.net = nn.Sequential(
            nn.Conv2d(1, 16, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2),  # 64x64
            nn.Conv2d(16, 32, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2), # 32x32
            nn.Conv2d(32, 64, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2), # 16x16
            nn.Flatten(),
            nn.Linear(64*16*16, 128), nn.ReLU(),
            nn.Linear(128, num_classes)
        )

    def forward(self, x):
        return self.net(x)


In [3]:
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader
import torch.nn as nn
import torch

# 資料分割與 Dataloader
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
train_ds = ImageActionDataset(X_train, y_train)
val_ds = ImageActionDataset(X_val, y_val)
train_loader = DataLoader(train_ds, batch_size=32, shuffle=True)
val_loader = DataLoader(val_ds, batch_size=32)

# 模型與訓練設定
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = ActionCNN(num_classes).to(device)
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

# Early stopping 參數
best_val_loss = float('inf')
patience = 10  # 容忍 10 次沒改善
counter = 0
best_model_state = None

for epoch in range(100):
    # 訓練階段
    model.train()
    total_loss = 0
    for xb, yb in train_loader:
        xb, yb = xb.to(device), yb.to(device)
        pred = model(xb)
        loss = loss_fn(pred, yb)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    
    avg_train_loss = total_loss / len(train_loader)

    # 驗證階段
    model.eval()
    val_loss = 0
    correct = 0
    total = 0
    with torch.no_grad():
        for xb, yb in val_loader:
            xb, yb = xb.to(device), yb.to(device)
            pred = model(xb)
            loss = loss_fn(pred, yb)
            val_loss += loss.item()
            _, predicted = torch.max(pred, 1)
            correct += (predicted == yb).sum().item()
            total += yb.size(0)
    
    avg_val_loss = val_loss / len(val_loader)
    val_accuracy = correct / total

    print(f"Epoch {epoch+1} | Train Loss: {avg_train_loss:.4f} | Val Loss: {avg_val_loss:.4f} | Val Acc: {val_accuracy:.4f}")

    # Early stopping 檢查
    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        best_model_state = model.state_dict()
        counter = 0
    else:
        counter += 1
        if counter >= patience:
            print(f"Early stopping at epoch {epoch+1}")
            break

# 儲存最佳模型（可選）
if best_model_state:
    model.load_state_dict(best_model_state)
    torch.save(model.state_dict(), "best_model.pth")


Epoch 1 | Train Loss: 2.5855 | Val Loss: 1.3637 | Val Acc: 0.5954
Epoch 2 | Train Loss: 0.9892 | Val Loss: 0.9538 | Val Acc: 0.7108
Epoch 3 | Train Loss: 0.6548 | Val Loss: 0.6949 | Val Acc: 0.7738
Epoch 4 | Train Loss: 0.5027 | Val Loss: 0.6672 | Val Acc: 0.8138
Epoch 5 | Train Loss: 0.4307 | Val Loss: 0.6290 | Val Acc: 0.7969
Epoch 6 | Train Loss: 0.3446 | Val Loss: 0.5140 | Val Acc: 0.8292
Epoch 7 | Train Loss: 0.2887 | Val Loss: 0.5477 | Val Acc: 0.8431
Epoch 8 | Train Loss: 0.2838 | Val Loss: 0.5705 | Val Acc: 0.8262
Epoch 9 | Train Loss: 0.2389 | Val Loss: 0.4542 | Val Acc: 0.8477
Epoch 10 | Train Loss: 0.1876 | Val Loss: 0.4972 | Val Acc: 0.8585
Epoch 11 | Train Loss: 0.1589 | Val Loss: 0.4771 | Val Acc: 0.8769
Epoch 12 | Train Loss: 0.1592 | Val Loss: 0.4333 | Val Acc: 0.8738
Epoch 13 | Train Loss: 0.1241 | Val Loss: 0.5371 | Val Acc: 0.8615
Epoch 14 | Train Loss: 0.1345 | Val Loss: 0.4579 | Val Acc: 0.8892
Epoch 15 | Train Loss: 0.1077 | Val Loss: 0.4645 | Val Acc: 0.8877
Epoc