In [9]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
import os
import numpy as np
import pandas as pd
from tqdm import tqdm

# 這個函數只須給特徵資料夾
# 會在主資料夾底下尋找各選手資料底下的train or test 資料夾
# 進入 train or test 資料夾後會將 npy檔案都拿到 以及利用player_csv.loc[file_number - 1, 'description']
# 去找對應label
def load_features_and_labels(features_dir,label_set):
    def collect_data(split):
        X, y = [], []
        for subdir in os.listdir(features_dir):
            player_name = subdir.split('_video')[0]
            #print(f"📂 處理選手：{player_name}")
            try:
                player_csv_path = f'data/{player_name}_videos_4S/{player_name}.csv'
                player_csv = pd.read_csv(player_csv_path)
            except Exception as e:
                #print(f"❌ 讀取 CSV 失敗：{player_csv_path}，錯誤：{e}")
                continue

            split_path = os.path.join(features_dir, subdir, split)
            if not os.path.isdir(split_path):
                continue
            
            npy_files = [f for f in os.listdir(split_path) if f.endswith('.npy')]
            #print(f"{split_path} 共有 {len(npy_files)} 個 .npy 檔案")
            for file in npy_files:
                if file.endswith(".npy"):
                    npy_path = os.path.join(split_path, file)
                    try:
                        file_number = int(file.replace('pitch_', '').replace('.npy', ''))
                        features = np.load(npy_path)#.reshape(4,10,100)
                        features = features.flatten()
                        
                        # 取得對應的標籤（例如 zone）
                        if label_set == 'zone':
                            label = player_csv.loc[file_number - 1, 'zone']
                            label = 1 if str(label) in [str(i) for i in range(1, 10)] else 0
                        elif label_set == 'description':
                            label = player_csv.loc[file_number-1, 'description']
                            label = 1 if "strike" in str(label).lower() else 0
                        else:
                            raise 'error'

                        X.append(features)
                        y.append(label)
                    except Exception as e:
                        print(f"❌ 失敗讀取 {file}：{e}")
        return np.array(X), np.array(y)

    X_train, y_train = collect_data("train")
    X_test, y_test = collect_data("test")
    return X_train, y_train, X_test, y_test

# prepare_and_train_v3.py
import os
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import accuracy_score
import pandas as pd
import random
import numpy as np
import torch

def set_all_random_seeds(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)  # if you are using multi-GPU.
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

class SimpleTCN(nn.Module):
    def __init__(self, input_size=400 * 12, num_classes=2):
        super(SimpleTCN, self).__init__()
        self.fc = nn.Sequential(
            nn.Linear(input_size, 256),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Linear(128, num_classes),
        )

    def forward(self, x):
        return self.fc(x)

def train_and_evaluate(label_set):
    X_train, y_train, X_test, y_test = load_features_and_labels("features",label_set)
    #print(X_train.shape)

    # 新的做法
    y_all = np.concatenate([y_train, y_test])
    le = LabelEncoder()
    y_all_encoded = le.fit_transform(y_all)
    y_train = y_all_encoded[:len(y_train)]
    y_test = y_all_encoded[len(y_train):]
    n_class = len(np.unique(y_all_encoded))

    #print(f"📊 訓練資料筆數：{len(X_train)}，測試資料筆數：{len(X_test)}")

    model = SimpleTCN(input_size=X_train.shape[1],num_classes=n_class)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
    y_train_tensor = torch.tensor(y_train, dtype=torch.long)

    model.train()
    epochs = 200
    for epoch in range(epochs):
        outputs = model(X_train_tensor)
        loss = criterion(outputs, y_train_tensor)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step() 
        #print(f"Epoch {epoch+1}/{epochs}, Loss: {loss.item():.4f}")
    
    # 訓練正確率
    model.eval()
    with torch.no_grad():
        X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
        y_pred = model(X_train_tensor).argmax(dim=1).numpy()
        acc = accuracy_score(y_train, y_pred)
        #print(f"✅ 訓練正確率：{acc:.2%}")

    # 測試
    model.eval()
    with torch.no_grad():
        X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
        y_pred = model(X_test_tensor).argmax(dim=1).numpy()
        acc = accuracy_score(y_test, y_pred)
        #print(f"✅ 測試正確率：{acc:.2%}")

    # 儲存模型
    torch.save(model.state_dict(), "model_strike_predictor_debug.pth")
    print("💾 模型已儲存為 model_strike_predictor_debug.pth")
    return acc

for label_set in ['zone','description']:
    test_acc_list = []
    for i in tqdm(range(10)):
        set_all_random_seeds(i)
        acc = train_and_evaluate(label_set)
        test_acc_list.append(acc)
        print(acc)
    print(f"用{label_set}當label 平均準確度{np.mean(test_acc_list)}")

 10%|█         | 1/10 [00:05<00:47,  5.26s/it]

💾 模型已儲存為 model_strike_predictor_debug.pth
0.6016949152542372


 20%|██        | 2/10 [00:11<00:47,  5.91s/it]

💾 模型已儲存為 model_strike_predictor_debug.pth
0.5508474576271186


 30%|███       | 3/10 [00:17<00:42,  6.09s/it]

💾 模型已儲存為 model_strike_predictor_debug.pth
0.5084745762711864


 40%|████      | 4/10 [00:24<00:37,  6.25s/it]

💾 模型已儲存為 model_strike_predictor_debug.pth
0.5338983050847458


 50%|█████     | 5/10 [00:31<00:31,  6.38s/it]

💾 模型已儲存為 model_strike_predictor_debug.pth
0.6101694915254238


 60%|██████    | 6/10 [00:37<00:25,  6.32s/it]

💾 模型已儲存為 model_strike_predictor_debug.pth
0.5169491525423728


 70%|███████   | 7/10 [00:43<00:18,  6.30s/it]

💾 模型已儲存為 model_strike_predictor_debug.pth
0.5338983050847458


 80%|████████  | 8/10 [00:49<00:12,  6.30s/it]

💾 模型已儲存為 model_strike_predictor_debug.pth
0.5677966101694916


 90%|█████████ | 9/10 [00:56<00:06,  6.28s/it]

💾 模型已儲存為 model_strike_predictor_debug.pth
0.5508474576271186


100%|██████████| 10/10 [01:02<00:00,  6.22s/it]


💾 模型已儲存為 model_strike_predictor_debug.pth
0.5508474576271186
用zone當label 平均準確度0.5525423728813559


 10%|█         | 1/10 [00:06<00:57,  6.35s/it]

💾 模型已儲存為 model_strike_predictor_debug.pth
0.7542372881355932


 20%|██        | 2/10 [00:12<00:49,  6.24s/it]

💾 模型已儲存為 model_strike_predictor_debug.pth
0.7372881355932204


 30%|███       | 3/10 [00:18<00:43,  6.15s/it]

💾 模型已儲存為 model_strike_predictor_debug.pth
0.7542372881355932


 40%|████      | 4/10 [00:24<00:37,  6.18s/it]

💾 模型已儲存為 model_strike_predictor_debug.pth
0.6694915254237288


 50%|█████     | 5/10 [00:31<00:31,  6.28s/it]

💾 模型已儲存為 model_strike_predictor_debug.pth
0.7542372881355932


 60%|██████    | 6/10 [00:38<00:25,  6.45s/it]

💾 模型已儲存為 model_strike_predictor_debug.pth
0.7542372881355932


 70%|███████   | 7/10 [00:44<00:19,  6.62s/it]

💾 模型已儲存為 model_strike_predictor_debug.pth
0.7542372881355932


 80%|████████  | 8/10 [00:51<00:13,  6.51s/it]

💾 模型已儲存為 model_strike_predictor_debug.pth
0.7542372881355932


 90%|█████████ | 9/10 [00:57<00:06,  6.43s/it]

💾 模型已儲存為 model_strike_predictor_debug.pth
0.7457627118644068


100%|██████████| 10/10 [01:03<00:00,  6.38s/it]

💾 模型已儲存為 model_strike_predictor_debug.pth
0.7457627118644068
用description當label 平均準確度0.7423728813559322



