In [131]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
import os
import numpy as np
import pandas as pd

# 這個函數只須給特徵資料夾
# 會在主資料夾底下尋找各選手資料底下的train or test 資料夾
# 進入 train or test 資料夾後會將 npy檔案都拿到 以及利用player_csv.loc[file_number - 1, 'description']
# 去找對應label
def load_features_and_labels(features_dir):
    def collect_data(split):
        X, y = [], []
        for subdir in os.listdir(features_dir):
            player_name = subdir.split('_video')[0]
            print(f"📂 處理選手：{player_name}")
            try:
                player_csv_path = f'data/{player_name}_videos_4S/{player_name}.csv'
                player_csv = pd.read_csv(player_csv_path)
            except Exception as e:
                print(f"❌ 讀取 CSV 失敗：{player_csv_path}，錯誤：{e}")
                continue

            split_path = os.path.join(features_dir, subdir, split)
            if not os.path.isdir(split_path):
                continue
            
            npy_files = [f for f in os.listdir(split_path) if f.endswith('.npy')]
            print(f"{split_path} 共有 {len(npy_files)} 個 .npy 檔案")
            for file in npy_files:
                if file.endswith(".npy"):
                    npy_path = os.path.join(split_path, file)
                    try:
                        file_number = int(file.replace('pitch_', '').replace('.npy', ''))
                        features = np.load(npy_path)#.reshape(4,10,100)
                        features = features.flatten()
                        
                        # 取得對應的標籤（例如 zone）
                        #label = player_csv.loc[file_number - 1, 'zone']
                        #label = 1 if str(label) in [str(i) for i in range(1, 10)] else 0
                        
                        # description
                        label = player_csv.loc[file_number-1, 'description']
                        label = 1 if "strike" in str(label).lower() else 0

                        X.append(features)
                        y.append(label)
                    except Exception as e:
                        print(f"❌ 失敗讀取 {file}：{e}")
        return np.array(X), np.array(y)

    X_train, y_train = collect_data("train")
    X_test, y_test = collect_data("test")
    return X_train, y_train, X_test, y_test

# prepare_and_train_v3.py
import os
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import accuracy_score
import pandas as pd
import random
import numpy as np
import torch

def set_all_random_seeds(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)  # if you are using multi-GPU.
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

class SimpleTCN(nn.Module):
    def __init__(self, input_size=400 * 12, num_classes=2):
        super(SimpleTCN, self).__init__()
        self.fc = nn.Sequential(
            nn.Linear(input_size, 256),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Linear(128, num_classes),
        )

    def forward(self, x):
        return self.fc(x)
'''
class SimpleTCN(nn.Module):
    def __init__(self, num_classes=2):
        super(SimpleTCN, self).__init__()
        # 輸入: (batch, 4, 10, 100)
        # 先合併 frames 和 features 維度
        self.conv1 = nn.Conv1d(in_channels=40, out_channels=64, kernel_size=3, padding=1)
        self.relu1 = nn.ReLU()
        self.conv2 = nn.Conv1d(64, 128, kernel_size=3, padding=1)
        self.relu2 = nn.ReLU()
        self.pool = nn.AdaptiveAvgPool1d(1)
        self.fc = nn.Linear(128, num_classes)

    def forward(self, x):
        # x: (batch, 4, 10, 100)
        x = x.view(x.size(0), -1, x.size(-1))  # (batch, 4*10, 100)
        x = self.conv1(x)                     # (batch, 64, 100)
        x = self.relu1(x)
        x = self.conv2(x)                     # (batch, 128, 100)
        x = self.relu2(x)
        x = self.pool(x)                      # (batch, 128, 1)
        x = x.squeeze(-1)                     # (batch, 128)
        x = self.fc(x)                        # (batch, num_classes)
        return x
'''

def train_and_evaluate():
    X_train, y_train, X_test, y_test = load_features_and_labels("features")
    print(X_train.shape)

    # 新的做法
    y_all = np.concatenate([y_train, y_test])
    le = LabelEncoder()
    y_all_encoded = le.fit_transform(y_all)
    y_train = y_all_encoded[:len(y_train)]
    y_test = y_all_encoded[len(y_train):]
    n_class = len(np.unique(y_all_encoded))

    print(f"📊 訓練資料筆數：{len(X_train)}，測試資料筆數：{len(X_test)}")

    model = SimpleTCN(input_size=X_train.shape[1],num_classes=n_class)
    #model = SimpleTCN()
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
    y_train_tensor = torch.tensor(y_train, dtype=torch.long)

    model.train()
    epochs = 200
    for epoch in range(epochs):
        outputs = model(X_train_tensor)
        loss = criterion(outputs, y_train_tensor)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        print(f"Epoch {epoch+1}/{epochs}, Loss: {loss.item():.4f}")
    
    # 訓練正確率
    model.eval()
    with torch.no_grad():
        X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
        y_pred = model(X_train_tensor).argmax(dim=1).numpy()
        acc = accuracy_score(y_train, y_pred)
        print(f"✅ 訓練正確率：{acc:.2%}")

    # 測試
    model.eval()
    with torch.no_grad():
        X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
        y_pred = model(X_test_tensor).argmax(dim=1).numpy()
        acc = accuracy_score(y_test, y_pred)
        print(f"✅ 測試正確率：{acc:.2%}")

    # 儲存模型
    torch.save(model.state_dict(), "model_strike_predictor_debug.pth")
    print("💾 模型已儲存為 model_strike_predictor_debug.pth")
    return acc


test_acc_list = []
for i in range(10):
    set_all_random_seeds(i)
    test_acc_list.append(train_and_evaluate())
print(test_acc_list)

📂 處理選手：Yu_Darvish_FF
features\Yu_Darvish_FF_videos_4S\train 共有 156 個 .npy 檔案
📂 處理選手：Yu_Darvish_FS
features\Yu_Darvish_FS_videos_4S\train 共有 149 個 .npy 檔案
📂 處理選手：Yu_Darvish_SL
features\Yu_Darvish_SL_videos_4S\train 共有 153 個 .npy 檔案
📂 處理選手：Yu_Darvish_FF
features\Yu_Darvish_FF_videos_4S\test 共有 40 個 .npy 檔案
📂 處理選手：Yu_Darvish_FS
features\Yu_Darvish_FS_videos_4S\test 共有 38 個 .npy 檔案
📂 處理選手：Yu_Darvish_SL
features\Yu_Darvish_SL_videos_4S\test 共有 40 個 .npy 檔案
(458, 4000)
📊 訓練資料筆數：458，測試資料筆數：118
Epoch 1/200, Loss: 4.5547
Epoch 2/200, Loss: 87.1511
Epoch 3/200, Loss: 6.2488
Epoch 4/200, Loss: 23.3607
Epoch 5/200, Loss: 26.3339
Epoch 6/200, Loss: 20.3076
Epoch 7/200, Loss: 11.6812
Epoch 8/200, Loss: 4.6812
Epoch 9/200, Loss: 9.4542
Epoch 10/200, Loss: 10.0811
Epoch 11/200, Loss: 4.9736
Epoch 12/200, Loss: 3.9309
Epoch 13/200, Loss: 5.4635
Epoch 14/200, Loss: 6.2284
Epoch 15/200, Loss: 5.8994
Epoch 16/200, Loss: 4.4993
Epoch 17/200, Loss: 3.0845
Epoch 18/200, Loss: 3.4399
Epoch 19/200, Loss: 4.528

In [132]:
test_acc_list

[0.7542372881355932,
 0.7372881355932204,
 0.7542372881355932,
 0.6694915254237288,
 0.7542372881355932,
 0.7542372881355932,
 0.7542372881355932,
 0.7542372881355932,
 0.7457627118644068,
 0.7457627118644068]

In [None]:
[0.7542372881355932,
 0.7372881355932204,
 0.7542372881355932,
 0.6694915254237288,
 0.7542372881355932,
 0.7542372881355932,
 0.7542372881355932,
 0.7542372881355932,
 0.7457627118644068,
 0.7457627118644068]

In [133]:
[0.7542372881355932,
 0.6694915254237288,
 0.6610169491525424,
 0.7033898305084746,
 0.6610169491525424,
 0.6949152542372882,
 0.6610169491525424,
 0.6864406779661016,
 0.6949152542372882,
 0.6864406779661016]

[0.7542372881355932,
 0.6694915254237288,
 0.6610169491525424,
 0.7033898305084746,
 0.6610169491525424,
 0.6949152542372882,
 0.6610169491525424,
 0.6864406779661016,
 0.6949152542372882,
 0.6864406779661016]