In [23]:
import os
from PIL import Image
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader

CONTROL_CHAR_MAP = {
    '\x03': 'c',  # Ctrl+C
    '\x1a': 'z',  # Ctrl+Z
    '\x18': 'x',  # Ctrl+X
    '\x08': 'Backspace',
    # 如果你知道還有哪些組合你會按，可以加進來
}

def clean_action(action: str) -> str:
    for ctrl_char, key_name in CONTROL_CHAR_MAP.items():
        action = action.replace(ctrl_char, key_name)
    return action.strip()

def is_valid_action(action: str) -> bool:
    keywords = [
        "Key.up", "Key.down", "Key.left", "Key.right",  # 上下左右
        "Key.shift", "Key.ctrl_l", "Key.alt_l", "z"     # 喝水/技能/跳
    ]
    return any(kw in action for kw in keywords)

def load_filtered_data(data_dir="merged_data"):
    X = []
    y = []
    all_txt_files = sorted([f for f in os.listdir(data_dir) if f.endswith(".txt")])

    for txt_file in all_txt_files:
        base_name = txt_file.replace(".txt", "")
        img_path = os.path.join(data_dir, f"{base_name}.png")
        txt_path = os.path.join(data_dir, txt_file)

        if not os.path.exists(img_path):
            continue

        # 載入動作
        with open(txt_path, "r") as f:
            raw = f.read().strip()
            action = clean_action(raw)

        if not is_valid_action(action):
            continue  # 過濾不要的動作

        # 載入圖像：128x128 單通道
        img = Image.open(img_path).convert("L")
        img_np = np.array(img) / 255.0
        X.append(img_np)
        y.append(action)

    return np.array(X), np.array(y)

# 載入資料 + 過濾
X, y_raw = load_filtered_data()
encoder = LabelEncoder()
y = encoder.fit_transform(y_raw)
num_classes = len(encoder.classes_)

print(f"共載入 {len(X)} 筆樣本，{num_classes} 個動作類別：{encoder.classes_}")

共載入 1412 筆樣本，70 個動作類別：['Key.alt_l' 'Key.alt_l,Key.down' 'Key.alt_l,Key.left'
 'Key.alt_l,Key.left,Key.ctrl_l,z' 'Key.alt_l,Key.right'
 'Key.alt_l,Key.right,z' 'Key.alt_l,z' 'Key.alt_l,z,Key.down'
 'Key.alt_l,z,Key.right' 'Key.ctrl_l' 'Key.ctrl_l,Key.alt_l,z,Key.right'
 'Key.ctrl_l,Key.right' 'Key.ctrl_l,Key.right,Key.alt_l'
 'Key.ctrl_l,Key.right,Key.alt_l,z' 'Key.ctrl_l,Key.right,z' 'Key.down'
 'Key.left' 'Key.left,Key.alt_l' 'Key.left,Key.alt_l,Key.shift'
 'Key.left,Key.alt_l,z' 'Key.left,Key.ctrl_l'
 'Key.left,Key.ctrl_l,Key.alt_l' 'Key.left,Key.ctrl_l,z'
 'Key.left,Key.down' 'Key.left,Key.right' 'Key.left,Key.right,z'
 'Key.left,Key.shift' 'Key.left,Key.shift,Key.ctrl_l'
 'Key.left,Key.shift,Key.up' 'Key.left,Key.up'
 'Key.left,Key.up,Key.ctrl_l' 'Key.left,Key.up,Key.shift'
 'Key.left,Key.up,z' 'Key.left,z' 'Key.left,z,Key.ctrl_l' 'Key.right'
 'Key.right,Key.alt_l' 'Key.right,Key.alt_l,Key.ctrl_l'
 'Key.right,Key.alt_l,z' 'Key.right,Key.ctrl_l'
 'Key.right,Key.ctrl_l,Key.alt_l' 'Ke

In [24]:
class ImageActionDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X).unsqueeze(1).float()  # (N, 1, 128, 128)
        self.y = torch.tensor(y).long()

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

# 簡單 CNN
class ActionCNN(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.net = nn.Sequential(
            nn.Conv2d(1, 16, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2),  # 64x64
            nn.Conv2d(16, 32, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2), # 32x32
            nn.Conv2d(32, 64, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2), # 16x16
            nn.Flatten(),
            nn.Linear(64*16*16, 128), nn.ReLU(),
            nn.Linear(128, num_classes)
        )

    def forward(self, x):
        return self.net(x)


In [25]:
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader
import torch.nn as nn
import torch

# 資料分割與 Dataloader
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
# 對訓練集過採樣
#X_train, y_train = oversample_minority_classes(X_train, y_train)
train_ds = ImageActionDataset(X_train, y_train)
val_ds = ImageActionDataset(X_val, y_val)
train_loader = DataLoader(train_ds, batch_size=32, shuffle=True)
val_loader = DataLoader(val_ds, batch_size=32)

# 模型與訓練設定
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = ActionCNN(num_classes).to(device)
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

# Early stopping 參數
best_val_loss = float('inf')
best_val_accuracy = 0.0
patience = 10  # 容忍 10 次沒改善
counter = 0
best_model_state = None

for epoch in range(100):
    # 訓練階段
    model.train()
    total_loss = 0
    correct = 0
    total = 0
    for xb, yb in train_loader:
        xb, yb = xb.to(device), yb.to(device)
        pred = model(xb)
        loss = loss_fn(pred, yb)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
        _, predicted = torch.max(pred, 1)
        correct += (predicted == yb).sum().item()
        total += yb.size(0)
    
    avg_train_loss = total_loss / len(train_loader)
    train_accuracy = correct / total

    # 驗證階段
    model.eval()
    val_loss = 0
    correct = 0
    total = 0
    with torch.no_grad():
        for xb, yb in val_loader:
            xb, yb = xb.to(device), yb.to(device)
            pred = model(xb)
            loss = loss_fn(pred, yb)
            val_loss += loss.item()
            _, predicted = torch.max(pred, 1)
            correct += (predicted == yb).sum().item()
            total += yb.size(0)
    
    avg_val_loss = val_loss / len(val_loader)
    val_accuracy = correct / total

    print(f"Epoch {epoch+1} | Train Loss: {avg_train_loss:.4f} | Train Acc: {train_accuracy:.4f} | Val Loss: {avg_val_loss:.4f} | Val Acc: {val_accuracy:.4f}")

    # Early stopping 檢查
    if val_accuracy > best_val_accuracy: 
        best_val_accuracy = val_accuracy
        best_model_state = model.state_dict()
        counter = 0
    else:
        counter += 1
        if counter >= patience:
            print(f"Early stopping at epoch {epoch+1}")
            break

# 儲存最佳模型（可選）
if best_model_state:
    model.load_state_dict(best_model_state)
    torch.save(model.state_dict(), "best_model.pth")


Epoch 1 | Train Loss: 3.1247 | Train Acc: 0.1727 | Val Loss: 3.0731 | Val Acc: 0.1767
Epoch 2 | Train Loss: 2.8686 | Train Acc: 0.1816 | Val Loss: 3.0770 | Val Acc: 0.1555
Epoch 3 | Train Loss: 2.7902 | Train Acc: 0.2037 | Val Loss: 2.9767 | Val Acc: 0.1802
Epoch 4 | Train Loss: 2.6607 | Train Acc: 0.2578 | Val Loss: 2.9567 | Val Acc: 0.2049
Epoch 5 | Train Loss: 2.5496 | Train Acc: 0.2941 | Val Loss: 2.9098 | Val Acc: 0.2191
Epoch 6 | Train Loss: 2.4082 | Train Acc: 0.2914 | Val Loss: 2.8849 | Val Acc: 0.2509
Epoch 7 | Train Loss: 2.2822 | Train Acc: 0.3224 | Val Loss: 3.0043 | Val Acc: 0.2544
Epoch 8 | Train Loss: 2.1547 | Train Acc: 0.3446 | Val Loss: 3.0219 | Val Acc: 0.2261
Epoch 9 | Train Loss: 2.0307 | Train Acc: 0.3685 | Val Loss: 3.0012 | Val Acc: 0.2686
Epoch 10 | Train Loss: 1.9146 | Train Acc: 0.3809 | Val Loss: 3.1685 | Val Acc: 0.2580
Epoch 11 | Train Loss: 1.7768 | Train Acc: 0.4207 | Val Loss: 3.3866 | Val Acc: 0.2898
Epoch 12 | Train Loss: 1.6474 | Train Acc: 0.4694 | 