In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split

# ============ 1. 資料預處理 ============

# 讀資料
winners = pd.read_csv('./data/winners.csv')
drivers = pd.read_csv('./data/drivers_updated.csv')
laps = pd.read_csv('./data/fastest_laps_updated.csv')
teams = pd.read_csv('./data/teams_updated.csv')
winners['year'] = pd.to_datetime(winners['Date']).dt.year

# 時間字串轉秒
def time_to_seconds(x):
    if pd.isna(x): return 0
    if isinstance(x, (float, int)): return float(x)
    if ':' in x:
        parts = x.split(':')
        if len(parts) == 3:
            h, m, s = parts
            return int(h)*3600 + int(m)*60 + float(s)
        elif len(parts) == 2:
            m, s = parts
            return int(m)*60 + float(s)
        else:
            return float(parts[0])
    try:
        return float(x)
    except:
        return 0

winners['Time_sec'] = winners['Time'].apply(time_to_seconds)
laps['LapTime_sec'] = laps['Time'].apply(time_to_seconds)

# Label Encoding
le_driver = LabelEncoder().fit(pd.concat([winners['Winner'], drivers['Driver']]).astype(str))
le_team = LabelEncoder().fit(pd.concat([winners['Car'], teams['Team'], drivers['Car']]).astype(str))
le_gp = LabelEncoder().fit(winners['Grand Prix'])
le_nat = LabelEncoder().fit(drivers['Nationality'].astype(str))

# 年度PTS、國籍、車隊PTS
driver_pts = drivers[['Driver', 'Car', 'PTS', 'year']].drop_duplicates()
driver_pts.columns = ['Driver', 'Car', 'driver_pts', 'year']
team_pts = teams[['Team', 'PTS', 'year']].drop_duplicates()
team_pts.columns = ['Car', 'team_pts', 'year']
driver_nat = drivers[['Driver', 'Nationality', 'Car', 'year']].drop_duplicates()
driver_nat.columns = ['Driver', 'Nationality', 'Car', 'year']
laps_best = laps.groupby(['Grand Prix', 'Driver', 'Car', 'year'])['LapTime_sec'].min().reset_index()
laps_best.columns = ['Grand Prix', 'Driver', 'Car', 'year', 'best_lap_time']

df = winners.merge(driver_pts, left_on=['Winner', 'Car', 'year'], right_on=['Driver', 'Car', 'year'], how='left')
df = df.merge(team_pts, on=['Car', 'year'], how='left')
df = df.merge(driver_nat, left_on=['Winner', 'Car', 'year'], right_on=['Driver', 'Car', 'year'], how='left', suffixes=('', '_nat'))
df = df.merge(laps_best, left_on=['Grand Prix', 'Winner', 'Car', 'year'],
              right_on=['Grand Prix', 'Driver', 'Car', 'year'], how='left', suffixes=('', '_lap'))

# 填補缺失
df['driver_pts'] = df['driver_pts'].fillna(0)
df['team_pts'] = df['team_pts'].fillna(0)
df['Nationality'] = df['Nationality'].fillna('Unknown')
df['best_lap_time'] = df['best_lap_time'].fillna(0)
df['Laps'] = df['Laps'].fillna(0)
df['Time_sec'] = df['Time_sec'].fillna(0)

# 編碼
df['driver_id'] = le_driver.transform(df['Winner'].astype(str))
df['team_id'] = le_team.transform(df['Car'].astype(str))
df['gp_id'] = le_gp.transform(df['Grand Prix'])
df['nat_id'] = le_nat.transform(df['Nationality'].astype(str))

# 特徵欄位
df['Pos'] = pd.to_numeric(df['Pos'], errors='coerce').fillna(0)

# 歷史三場平均名次、最佳名次
df = df.sort_values(['Winner', 'year'])
df['hist_mean_pos'] = df.groupby('Winner')['Pos'].transform(lambda x: x.rolling(3, min_periods=1).mean())
df['hist_best_pos'] = df.groupby('Winner')['Pos'].transform(lambda x: x.rolling(3, min_periods=1).min())

feature_cols = [
    'driver_id',      # 車手ID
    'team_id',        # 車隊ID
    'gp_id',          # Grand Prix
    'year',           # 年份
    'Pos',            # 起跑位
    'Laps',           # 圈數
    'Time_sec',       # 比賽時間（秒）
    'driver_pts',     # 車手年度分數
    'team_pts',       # 車隊年度分數
    'nat_id',         # 國籍ID
    'best_lap_time',  # 本場最快圈速（秒）
    'hist_mean_pos',  # 近三場平均名次
    'hist_best_pos',  # 近三場最佳名次
]

# ========== 2. 場次集體樣本製作 ==========
def create_race_samples(df, n_drivers=20):
    races = []
    labels = []
    grouped = df.groupby(['year', 'Grand Prix'])
    for (year, gp), race in grouped:
        race = race.sort_values('Pos')
        if len(race) < n_drivers: continue
        race = race.iloc[:n_drivers]
        features = race[feature_cols].values
        if features.shape[0] != n_drivers: continue
        races.append(features)
        label_idx = np.where(race['Position'].values == 1)[0]
        if len(label_idx) == 0: continue
        labels.append(label_idx[0])
    return np.array(races), np.array(labels)

X, y = create_race_samples(df)
print('Race set shape:', X.shape, 'Label shape:', y.shape)
print('features:', feature_cols)

# ========== 3. PyTorch Dataset ==========
class F1RaceSet(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.long)
    def __len__(self): return len(self.X)
    def __getitem__(self, idx): return self.X[idx], self.y[idx]

# ========== 4. Set Transformer Model ==========
class DriverMLP(nn.Module):
    def __init__(self, in_dim, hidden=64, out_dim=64):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(in_dim, hidden),
            nn.ReLU(),
            nn.Linear(hidden, out_dim),
            nn.ReLU()
        )
    def forward(self, x):
        return self.net(x)

class SetAttentionBlock(nn.Module):
    def __init__(self, dim, heads=4):
        super().__init__()
        self.attn = nn.MultiheadAttention(dim, heads, batch_first=True)
        self.norm = nn.LayerNorm(dim)
    def forward(self, x):
        h, _ = self.attn(x, x, x)
        return self.norm(x + h)

class F1CollectiveSetTransformer(nn.Module):
    def __init__(self, driver_feat_dim, set_dim=64, n_drivers=20):
        super().__init__()
        self.driver_mlp = DriverMLP(driver_feat_dim, out_dim=set_dim)
        self.set_block = SetAttentionBlock(set_dim, heads=4)
        self.final = nn.Linear(set_dim, 1)  # 每人一個logit
        self.n_drivers = n_drivers
    def forward(self, x):
        dfeat = self.driver_mlp(x)           # (batch, n_drivers, set_dim)
        set_out = self.set_block(dfeat)      # (batch, n_drivers, set_dim)
        logits = self.final(set_out).squeeze(-1)  # (batch, n_drivers)
        return logits

# ========== 5. 訓練流程 ==========
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
trainset = F1RaceSet(X_train, y_train)
valset = F1RaceSet(X_val, y_val)
train_loader = DataLoader(trainset, batch_size=32, shuffle=True)
val_loader = DataLoader(valset, batch_size=32, shuffle=False)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = F1CollectiveSetTransformer(driver_feat_dim=X.shape[2], n_drivers=20).to(device)
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-3)
loss_fn = nn.CrossEntropyLoss()

best_acc = 0
for epoch in range(1, 31):
    model.train()
    for xb, yb in train_loader:
        xb, yb = xb.to(device), yb.to(device)
        logits = model(xb)
        loss = loss_fn(logits, yb)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    # 驗證
    model.eval()
    correct = 0; total = 0
    with torch.no_grad():
        for xb, yb in val_loader:
            xb, yb = xb.to(device), yb.to(device)
            logits = model(xb)
            pred = torch.argmax(logits, dim=1)
            correct += (pred == yb).sum().item()
            total += yb.size(0)
    acc = correct / total
    print(f"Epoch {epoch}: val acc = {acc:.4f}")
    if acc > best_acc:
        best_acc = acc
        torch.save(model.state_dict(), "best_f1_collective.pth")
print("Best val acc:", best_acc)




* Running on local URL:  http://127.0.0.1:7866
* To create a public link, set `share=True` in `launch()`.




Traceback (most recent call last):
  File "c:\Users\lu050\anaconda3\envs\pytorch\Lib\site-packages\gradio\queueing.py", line 625, in process_events
    response = await route_utils.call_process_api(
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    ...<5 lines>...
    )
    ^
  File "c:\Users\lu050\anaconda3\envs\pytorch\Lib\site-packages\gradio\route_utils.py", line 322, in call_process_api
    output = await app.get_blocks().process_api(
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    ...<11 lines>...
    )
    ^
  File "c:\Users\lu050\anaconda3\envs\pytorch\Lib\site-packages\gradio\blocks.py", line 2191, in process_api
    result = await self.call_function(
             ^^^^^^^^^^^^^^^^^^^^^^^^^
    ...<8 lines>...
    )
    ^
  File "c:\Users\lu050\anaconda3\envs\pytorch\Lib\site-packages\gradio\blocks.py", line 1702, in call_function
    prediction = await anyio.to_thread.run_sync(  # type: ignore
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
        f