In [None]:
import os
os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import top_k_accuracy_score
import gradio as gr

# === 1. 多任務模型 ===
class MultiTaskLSTMClassifier(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes_driver, num_classes_team, num_classes_pos):
        super().__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
        self.driver_head = nn.Linear(hidden_size, num_classes_driver)
        self.team_head = nn.Linear(hidden_size, num_classes_team)
        self.pos_head = nn.Linear(hidden_size, num_classes_pos)

    def forward(self, x):
        _, (h_n, _) = self.lstm(x)
        h = h_n[-1]
        return self.driver_head(h), self.team_head(h), self.pos_head(h)

# === 2. 載入資料 ===
drivers = pd.read_csv("./data/drivers_updated.csv")
winners = pd.read_csv("./data/winners.csv")
teams = pd.read_csv("./data/teams_updated.csv")
laps = pd.read_csv("./data/fastest_laps_updated.csv")

winners['year'] = pd.to_datetime(winners['Date']).dt.year

# 編碼輸入用 Driver 和 GP
le_driver_input = LabelEncoder()
le_gp_input = LabelEncoder()
winners['Winner_enc_in'] = le_driver_input.fit_transform(winners['Winner'].astype(str))
winners['GP_enc_in'] = le_gp_input.fit_transform(winners['Grand Prix'].astype(str))

# 合併資料
df = winners.merge(drivers, left_on=['Winner', 'Car', 'year'], right_on=['Driver', 'Car', 'year'], how='left')
df = df.merge(laps, left_on=['Grand Prix', 'Winner', 'Car', 'year'], right_on=['Grand Prix', 'Driver', 'Car', 'year'], how='left')
df = df.merge(teams, left_on=['Car', 'year'], right_on=['Team', 'year'], how='left')

# 欄位處理
df.rename(columns={
    'PTS_x': 'PTS',
    'Pos_x': 'Position',
    'Time_x': 'Time',
    'Time_y': 'Time_lap'
}, inplace=True)

for col in ['Time_lap']:
    if col in df.columns:
        df[col] = pd.to_numeric(df[col], errors='coerce').fillna(0)

main_features = ['PTS', 'Grid', 'Laps', 'Time', 'Position', 'Winner_enc_in', 'GP_enc_in']
for col in main_features:
    if col not in df.columns:
        df[col] = 0
    df[col] = pd.to_numeric(df[col], errors='coerce').fillna(0)

df['Winner_enc'] = df['Winner'].astype(str)
df['Team_enc'] = df['Team'].astype(str)
df['Position_enc'] = df['Position'].astype(str)

multi_driver = df['Winner_enc'].value_counts()
multi_driver = multi_driver[multi_driver > 1].index
multi_team = df['Team_enc'].value_counts()
multi_team = multi_team[multi_team > 1].index
multi_position = df['Position_enc'].value_counts()
multi_position = multi_position[multi_position > 1].index

df_filtered = df[
    (df['Winner_enc'].isin(multi_driver)) &
    (df['Team_enc'].isin(multi_team)) &
    (df['Position_enc'].isin(multi_position))
].copy()

n_seq = 3
def get_driver_seq_features(df, driver, year, n_seq=3):
    hist = df[(df['Winner_enc'] == driver) & (df['year'] < year)].sort_values('year', ascending=False)
    feats = hist.head(n_seq)[main_features].values
    if len(feats) < n_seq:
        pad = np.zeros((n_seq - len(feats), len(main_features)))
        feats = np.vstack([feats, pad])
    return feats

X_seq, y_driver, y_team, y_position = [], [], [], []
for idx, row in df_filtered.iterrows():
    seq_feats = get_driver_seq_features(df_filtered, row['Winner_enc'], row['year'], n_seq)
    X_seq.append(seq_feats)
    y_driver.append(row['Winner_enc'])
    y_team.append(row['Team_enc'])
    y_position.append(row['Position_enc'])

X_seq = np.stack(X_seq)
y_driver = np.array(y_driver)
y_team = np.array(y_team)
y_position = np.array(y_position)

le_driver = LabelEncoder()
y_driver_enc = le_driver.fit_transform(y_driver)
le_team = LabelEncoder()
y_team_enc = le_team.fit_transform(y_team)
le_position = LabelEncoder()
y_position_enc = le_position.fit_transform(y_position)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = MultiTaskLSTMClassifier(
    input_size=len(main_features),
    hidden_size=64,
    num_classes_driver=len(le_driver.classes_),
    num_classes_team=len(le_team.classes_),
    num_classes_pos=len(le_position.classes_)
).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

X_tensor = torch.tensor(X_seq, dtype=torch.float32).to(device)
y_driver_tensor = torch.tensor(y_driver_enc, dtype=torch.long).to(device)
y_team_tensor = torch.tensor(y_team_enc, dtype=torch.long).to(device)
y_pos_tensor = torch.tensor(y_position_enc, dtype=torch.long).to(device)

model_path = "./model/lstm_multi_task.pt"
os.makedirs(os.path.dirname(model_path), exist_ok=True)

if os.path.exists(model_path):
    model.load_state_dict(torch.load(model_path))
else:
    for epoch in range(300):
        model.train()
        optimizer.zero_grad()
        out_driver, out_team, out_pos = model(X_tensor)
        loss = criterion(out_driver, y_driver_tensor) + criterion(out_team, y_team_tensor) + criterion(out_pos, y_pos_tensor)
        loss.backward()
        optimizer.step()

        if (epoch + 1) % 50 == 0:
            with torch.no_grad():
                probs = out_driver.softmax(dim=1).cpu().numpy()
                acc1 = top_k_accuracy_score(y_driver_tensor.cpu(), probs, k=1)
                acc5 = top_k_accuracy_score(y_driver_tensor.cpu(), probs, k=5)
            print(f"Epoch {epoch+1}/300 | Loss: {loss.item():.4f} | Top-1: {acc1:.3f} | Top-5: {acc5:.3f}")

    torch.save(model.state_dict(), model_path)

driver_to_team_map = df_filtered[['Winner_enc', 'Team_enc']].drop_duplicates().set_index('Winner_enc')['Team_enc'].to_dict()

# === 預測函數（輸出前五名駕駛+車隊+信心度）===
def predict_driver_team(year, pts, grid, laps, time, position, driver_name, gp_name):
    driver_idx = le_driver_input.transform([driver_name])[0] if driver_name in le_driver_input.classes_ else 0
    gp_idx = le_gp_input.transform([gp_name])[0] if gp_name in le_gp_input.classes_ else 0

    input_feat = np.array([[pts, grid, laps, time, position, driver_idx, gp_idx]])
    input_seq = np.repeat(input_feat, n_seq, axis=0)[np.newaxis, ...]
    input_seq_tensor = torch.tensor(input_seq, dtype=torch.float32).to(device)

    model.eval()
    with torch.no_grad():
        out_driver, _, _ = model(input_seq_tensor)
        probs_driver = torch.softmax(out_driver, dim=1).cpu().numpy()[0]

    top5_driver_idx = np.argsort(probs_driver)[::-1][:5]
    top5_driver_names = le_driver.inverse_transform(top5_driver_idx)
    top5_driver_probs = probs_driver[top5_driver_idx]

    output_lines = []
    for i in range(5):
        driver = top5_driver_names[i]
        team = driver_to_team_map.get(driver, "未知車隊")
        confidence = top5_driver_probs[i] * 100
        output_lines.append(
            f"預測第 {i+1} 名：\n"
            f"駕駛：{driver}\n"
            f"車隊：{team}\n"
            f"信心度：{confidence:.2f}%\n"
            "-------------------------"
        )
    return "\n".join(output_lines)

# === Gradio UI ===
with gr.Blocks() as demo:
    gr.Markdown("# F1 駕駛與車隊預測")
    with gr.Row():
        year_input = gr.Number(label="年份", value=2024)
        pts_input = gr.Number(label="PTS", value=25)
        grid_input = gr.Number(label="起跑位", value=1)
        laps_input = gr.Number(label="圈數", value=56)
        time_input = gr.Number(label="比賽時間（秒）", value=5400)
        position_input = gr.Number(label="名次", value=1)
        driver_input = gr.Dropdown(choices=list(le_driver_input.classes_), label="駕駛", value=le_driver_input.classes_[0])
        gp_input = gr.Dropdown(choices=list(le_gp_input.classes_), label="Grand Prix 名稱", value=le_gp_input.classes_[0])
    predict_btn = gr.Button("預測前五名駕駛與車隊")
    output = gr.Textbox(label="預測結果", lines=20)
    predict_btn.click(
        predict_driver_team,
        inputs=[year_input, pts_input, grid_input, laps_input, time_input, position_input, driver_input, gp_input],
        outputs=output
    )
demo.launch()



In [30]:
import os
os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'

import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from sklearn.preprocessing import LabelEncoder
import gradio as gr

# === 模型定義 ===
class MultiTaskLSTMClassifier(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes_driver, num_classes_team, num_classes_pos):
        super().__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
        self.driver_head = nn.Linear(hidden_size, num_classes_driver)
        self.team_head = nn.Linear(hidden_size, num_classes_team)
        self.pos_head = nn.Linear(hidden_size, num_classes_pos)

    def forward(self, x):
        _, (h_n, _) = self.lstm(x)
        h = h_n[-1]
        return self.driver_head(h), self.team_head(h), self.pos_head(h)

# === UI 用 GP 名單 ===
try:
    winners_ui = pd.read_csv("./data/winners.csv")
    gp_names_for_ui = sorted(winners_ui["Grand Prix"].dropna().unique().tolist())
except:
    gp_names_for_ui = ["Monaco", "Silverstone", "Suzuka"]

# === 預測函數 ===
def predict_top5_by_gp(year, laps, gp_name):
    drivers = pd.read_csv("./data/drivers_updated.csv")
    winners = pd.read_csv("./data/winners.csv")
    teams = pd.read_csv("./data/teams_updated.csv")
    laps_data = pd.read_csv("./data/fastest_laps_updated.csv")

    winners['year'] = pd.to_datetime(winners['Date']).dt.year

    df = winners.merge(drivers, left_on=['Winner', 'Car', 'year'], right_on=['Driver', 'Car', 'year'], how='left')
    df = df.merge(laps_data, left_on=['Grand Prix', 'Winner', 'Car', 'year'], right_on=['Grand Prix', 'Driver', 'Car', 'year'], how='left')
    df = df.merge(teams, left_on=['Car', 'year'], right_on=['Team', 'year'], how='left')

    start_year = max(1950, year - 20)
    df = df[(df['year'] >= start_year) & (df['year'] <= min(year, 2024))].copy()
    if df.empty:
        return "資料不足"

    le_driver = LabelEncoder()
    le_team = LabelEncoder()
    le_gp = LabelEncoder()

    df['Winner_enc'] = le_driver.fit_transform(df['Winner'].astype(str))
    df['Team_enc'] = le_team.fit_transform(df['Team'].astype(str))
    df['GP_enc'] = le_gp.fit_transform(df['Grand Prix'].astype(str))

    main_features = ['Laps', 'year', 'GP_enc']
    for col in main_features:
        df[col] = pd.to_numeric(df[col], errors='coerce').fillna(0)

    n_seq = 10
    def get_seq_features(df, driver, year):
        hist = df[(df['Winner'] == driver) & (df['year'] < year)].sort_values('year', ascending=False)
        feats = hist.head(n_seq)[main_features].values
        if len(feats) < n_seq:
            pad = np.zeros((n_seq - len(feats), len(main_features)))
            feats = np.vstack([feats, pad])
        return feats

    X_seq, y_driver, y_team = [], [], []
    for _, row in df.iterrows():
        X_seq.append(get_seq_features(df, row['Winner'], row['year']))
        y_driver.append(row['Winner'])
        y_team.append(row['Team'])

    if not X_seq:
        return "序列資料不足"

    X_seq = np.stack(X_seq)
    y_driver_enc = le_driver.transform(y_driver)
    y_team_enc = le_team.transform(y_team)

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = MultiTaskLSTMClassifier(len(main_features), 64, len(le_driver.classes_), len(le_team.classes_), 1).to(device)

    X_tensor = torch.tensor(X_seq, dtype=torch.float32).to(device)
    y_driver_tensor = torch.tensor(y_driver_enc, dtype=torch.long).to(device)
    y_team_tensor = torch.tensor(y_team_enc, dtype=torch.long).to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.CrossEntropyLoss()

    for _ in range(100):
        model.train()
        optimizer.zero_grad()
        out_driver, out_team, _ = model(X_tensor)
        loss = criterion(out_driver, y_driver_tensor) + criterion(out_team, y_team_tensor)
        loss.backward()
        optimizer.step()

    if gp_name not in le_gp.classes_:
        return "GP 名稱無效"
    gp_idx = le_gp.transform([gp_name])[0]

    input_feat = np.array([[laps, year, gp_idx]])
    input_seq = np.repeat(input_feat, n_seq, axis=0)[np.newaxis, ...]
    input_tensor = torch.tensor(input_seq, dtype=torch.float32).to(device)

    model.eval()
    with torch.no_grad():
        out_driver, _, _ = model(input_tensor)
        probs_driver = torch.softmax(out_driver / 1.5, dim=1).cpu().numpy()[0]

    # === 活躍車手過濾（最多放寬至 10 年）===
    driver_years = df.groupby('Winner')['year'].apply(set).to_dict()
    active_indices = []
    for max_offset in range(2, 11):
        active_indices = [
            i for i, name in enumerate(le_driver.classes_)
            if any((year - d) in driver_years.get(name, set()) for d in range(1, max_offset + 1))
        ]
        if len(active_indices) >= 5:
            break

    if len(active_indices) < 5:
        return "查無足夠活躍車手"

    active_probs = probs_driver[active_indices]
    top5_idx = np.argsort(active_probs)[::-1][:5]
    final_driver_ids = np.array(active_indices)[top5_idx]

    top5_names = le_driver.inverse_transform(final_driver_ids)
    top5_probs = probs_driver[final_driver_ids]

    # === 使用最後出賽年份對應車隊 ===
    last_team_map = (
        df.sort_values('year')
        .dropna(subset=['Winner', 'Team'])
        .drop_duplicates(subset=['Winner'], keep='last')
        .set_index('Winner')['Team']
        .to_dict()
    )

    result = []
    for i in range(len(top5_names)):
        driver = top5_names[i]
        team = last_team_map.get(driver, "未知車隊")
        confidence = top5_probs[i] * 100
        result.append(f"第 {i+1} 名：{driver}（{team}） 信心度：{confidence:.6f}%")
    return "\n".join(result)

# === Gradio UI ===
with gr.Blocks() as demo:
    gr.Markdown("# F1 駕駛與車隊預測（活躍過濾 + 最終車隊）")
    year = gr.Number(label="年份", value=2025)
    laps = gr.Number(label="圈數", value=56)
    gp_name = gr.Dropdown(choices=gp_names_for_ui, label="Grand Prix 名稱", value=gp_names_for_ui[0])
    output = gr.Textbox(label="預測結果", lines=10)
    btn = gr.Button("預測")
    btn.click(predict_top5_by_gp, inputs=[year, laps, gp_name], outputs=output)

demo.launch()


* Running on local URL:  http://127.0.0.1:7887

To create a public link, set `share=True` in `launch()`.


