In [None]:
import os
os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'

import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from sklearn.preprocessing import LabelEncoder
import gradio as gr

# === 模型定義 ===
class MultiTaskLSTMClassifier(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes_driver, num_classes_team, num_classes_pos):
        super().__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
        self.driver_head = nn.Linear(hidden_size, num_classes_driver)
        self.team_head = nn.Linear(hidden_size, num_classes_team)
        self.pos_head = nn.Linear(hidden_size, num_classes_pos)

    def forward(self, x):
        _, (h_n, _) = self.lstm(x)
        h = h_n[-1]
        return self.driver_head(h), self.team_head(h), self.pos_head(h)

# === 預測函數 ===
def predict_top5_by_gp(year, laps, gp_name):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    drivers = pd.read_csv("./data/drivers_updated.csv")
    winners = pd.read_csv("./data/winners.csv")
    teams = pd.read_csv("./data/teams_updated.csv")
    laps_data = pd.read_csv("./data/fastest_laps_updated.csv")

    winners['year'] = pd.to_datetime(winners['Date']).dt.year
    df = winners.merge(drivers, left_on=['Winner', 'Car', 'year'], right_on=['Driver', 'Car', 'year'], how='left')
    df = df.merge(laps_data, left_on=['Grand Prix', 'Winner', 'Car', 'year'], right_on=['Grand Prix', 'Driver', 'Car', 'year'], how='left')
    df = df.merge(teams, left_on=['Car', 'year'], right_on=['Team', 'year'], how='left')
    df = df.dropna(subset=['Winner', 'Team', 'Grand Prix'])

    start_year = max(1950, year - 20)
    df = df[(df['year'] >= start_year) & (df['year'] < year)].copy()
    if df.empty:
        return "歷史資料不足"

    le_driver = LabelEncoder()
    le_team = LabelEncoder()
    le_gp = LabelEncoder()

    df['Winner_enc'] = le_driver.fit_transform(df['Winner'].astype(str))
    df['Team_enc'] = le_team.fit_transform(df['Team'].astype(str))
    df['GP_enc'] = le_gp.fit_transform(df['Grand Prix'].astype(str))

    main_features = ['Laps', 'year', 'GP_enc']
    for col in main_features:
        df[col] = pd.to_numeric(df[col], errors='coerce').fillna(0)

    n_seq = 10
    def get_seq_features(df, driver, year):
        hist = df[(df['Winner'] == driver) & (df['year'] < year)].sort_values('year', ascending=False)
        feats = hist.head(n_seq)[main_features].values
        if len(feats) < n_seq:
            pad = np.zeros((n_seq - len(feats), len(main_features)))
            feats = np.vstack([feats, pad])
        return feats

    X_seq, y_driver, y_team = [], [], []
    for _, row in df.iterrows():
        X_seq.append(get_seq_features(df, row['Winner'], row['year']))
        y_driver.append(row['Winner'])
        y_team.append(row['Team'])

    if not X_seq:
        return "序列資料不足"

    X_seq = np.stack(X_seq)
    y_driver_enc = le_driver.transform(y_driver)
    y_team_enc = le_team.transform(y_team)

    model = MultiTaskLSTMClassifier(
        input_size=len(main_features),
        hidden_size=64,
        num_classes_driver=len(le_driver.classes_),
        num_classes_team=len(le_team.classes_),
        num_classes_pos=1
    )

    X_tensor = torch.tensor(X_seq, dtype=torch.float32)
    y_driver_tensor = torch.tensor(y_driver_enc, dtype=torch.long)
    y_team_tensor = torch.tensor(y_team_enc, dtype=torch.long)

    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.CrossEntropyLoss()
    for _ in range(500):
        model.train()
        optimizer.zero_grad()
        out_driver, out_team, _ = model(X_tensor)
        loss = criterion(out_driver, y_driver_tensor) + criterion(out_team, y_team_tensor)
        loss.backward()
        optimizer.step()

    if gp_name not in le_gp.classes_:
        return "GP 名稱無效"
    gp_idx = le_gp.transform([gp_name])[0]

    input_feat = np.array([[laps, year - 1, gp_idx]])
    input_seq = np.repeat(input_feat, n_seq, axis=0)[np.newaxis, ...]
    input_tensor = torch.tensor(input_seq, dtype=torch.float32)

    model.eval()
    with torch.no_grad():
        out_driver, _, _ = model(input_tensor)
        probs_driver = torch.softmax(out_driver / 5, dim=1).cpu().numpy()[0]

    driver_years = df.groupby('Winner')['year'].apply(set).to_dict()
    active_indices = []
    for max_offset in range(2, 11):
        active_indices = [
            i for i, name in enumerate(le_driver.classes_)
            if any((year - d) in driver_years.get(name, set()) for d in range(1, max_offset + 1))
        ]
        if len(active_indices) >= 5:
            break

    if len(active_indices) < 5:
        return "查無足夠活躍車手"

    active_probs = probs_driver[active_indices]
    top5_idx = np.argsort(active_probs)[::-1][:5]
    final_driver_ids = np.array(active_indices)[top5_idx]

    top5_names = le_driver.inverse_transform(final_driver_ids)
    top5_probs = probs_driver[final_driver_ids]

    last_team_map = (
        df.sort_values('year')
        .dropna(subset=['Winner', 'Team'])
        .drop_duplicates(subset=['Winner'], keep='last')
        .set_index('Winner')['Team']
        .to_dict()
    )

    result = []
    for i in range(len(top5_names)):
        driver = top5_names[i]
        team = last_team_map.get(driver, "未知車隊")
        confidence = top5_probs[i] * 100
        result.append(f"第 {i+1} 名：{driver}（{team}） 信心度：{confidence:.2f}%")
    return "\n".join(result)

# === UI 元件 ===
try:
    winners_ui = pd.read_csv("./data/winners.csv")
    gp_names_for_ui = sorted(winners_ui["Grand Prix"].dropna().unique().tolist())
except:
    gp_names_for_ui = ["Monaco", "Silverstone", "Suzuka"]

with gr.Blocks() as demo:
    gr.Markdown("# F1 駕駛與車隊預測")
    year = gr.Number(label="年份", value=2025)
    laps = gr.Number(label="圈數", value=56)
    gp_name = gr.Dropdown(choices=gp_names_for_ui, label="Grand Prix 名稱", value=gp_names_for_ui[0])
    output = gr.Textbox(label="預測結果", lines=10)
    btn = gr.Button("預測")
    btn.click(predict_top5_by_gp, inputs=[year, laps, gp_name], outputs=output)

demo.launch()


In [None]:
import os
os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'

import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from sklearn.preprocessing import LabelEncoder
import gradio as gr

class MultiTaskLSTMClassifier(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes_driver, num_classes_team, num_classes_pos):
        super().__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
        self.driver_head = nn.Linear(hidden_size, num_classes_driver)
        self.team_head = nn.Linear(hidden_size, num_classes_team)
        self.pos_head = nn.Linear(hidden_size, num_classes_pos)

    def forward(self, x):
        _, (h_n, _) = self.lstm(x)
        h = h_n[-1]
        return self.driver_head(h), self.team_head(h), self.pos_head(h)

def predict_top5_by_gp(year, laps, gp_name, show_team_rank=True, show_internal_rank=False):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    drivers = pd.read_csv("./data/drivers_updated.csv")
    winners = pd.read_csv("./data/winners.csv")
    teams = pd.read_csv("./data/teams_updated.csv")
    laps_data = pd.read_csv("./data/fastest_laps_updated.csv")

    winners['year'] = pd.to_datetime(winners['Date']).dt.year
    df = winners.merge(drivers, left_on=['Winner', 'Car', 'year'], right_on=['Driver', 'Car', 'year'], how='left')
    df = df.merge(laps_data, left_on=['Grand Prix', 'Winner', 'Car', 'year'], right_on=['Grand Prix', 'Driver', 'Car', 'year'], how='left')
    df = df.merge(teams, left_on=['Car', 'year'], right_on=['Team', 'year'], how='left')
    df = df.dropna(subset=['Winner', 'Team', 'Grand Prix'])

    start_year = max(1950, year - 20)
    df = df[(df['year'] >= start_year) & (df['year'] < year)].copy()
    if df.empty:
        return "歷史資料不足"

    le_driver = LabelEncoder()
    le_team = LabelEncoder()
    le_gp = LabelEncoder()

    df['Winner_enc'] = le_driver.fit_transform(df['Winner'].astype(str))
    df['Team_enc'] = le_team.fit_transform(df['Team'].astype(str))
    df['GP_enc'] = le_gp.fit_transform(df['Grand Prix'].astype(str))

    main_features = ['Laps', 'year', 'GP_enc']
    for col in main_features:
        df[col] = pd.to_numeric(df[col], errors='coerce').fillna(0)

    n_seq = 10
    def get_seq_features(df, driver, year):
        hist = df[(df['Winner'] == driver) & (df['year'] < year)].sort_values('year', ascending=False)
        feats = hist.head(n_seq)[main_features].values
        if len(feats) < n_seq:
            pad = np.zeros((n_seq - len(feats), len(main_features)))
            feats = np.vstack([feats, pad])
        return feats

    X_seq, y_driver, y_team = [], [], []
    for _, row in df.iterrows():
        X_seq.append(get_seq_features(df, row['Winner'], row['year']))
        y_driver.append(row['Winner'])
        y_team.append(row['Team'])

    if not X_seq:
        return "序列資料不足"

    X_seq = np.stack(X_seq)
    y_driver_enc = le_driver.transform(y_driver)
    y_team_enc = le_team.transform(y_team)

    model = MultiTaskLSTMClassifier(
        input_size=len(main_features),
        hidden_size=64,
        num_classes_driver=len(le_driver.classes_),
        num_classes_team=len(le_team.classes_),
        num_classes_pos=1
    )

    X_tensor = torch.tensor(X_seq, dtype=torch.float32)
    y_driver_tensor = torch.tensor(y_driver_enc, dtype=torch.long)
    y_team_tensor = torch.tensor(y_team_enc, dtype=torch.long)

    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.CrossEntropyLoss()
    for _ in range(100):
        model.train()
        optimizer.zero_grad()
        out_driver, out_team, _ = model(X_tensor)
        loss = criterion(out_driver, y_driver_tensor) + criterion(out_team, y_team_tensor)
        loss.backward()
        optimizer.step()

    if gp_name not in le_gp.classes_:
        return "GP 名稱無效"
    gp_idx = le_gp.transform([gp_name])[0]

    input_feat = np.array([[laps, year - 1, gp_idx]])
    input_seq = np.repeat(input_feat, n_seq, axis=0)[np.newaxis, ...]
    input_tensor = torch.tensor(input_seq, dtype=torch.float32)

    model.eval()
    with torch.no_grad():
        out_driver, _, _ = model(input_tensor)
        probs_driver = torch.softmax(out_driver / 1.5, dim=1).cpu().numpy()[0]

    driver_years = df.groupby('Winner')['year'].apply(set).to_dict()
    active_indices = []
    for max_offset in range(2, 11):
        active_indices = [
            i for i, name in enumerate(le_driver.classes_)
            if any((year - d) in driver_years.get(name, set()) for d in range(1, max_offset + 1))
        ]
        if len(active_indices) >= 5:
            break

    if len(active_indices) < 5:
        return "查無足夠活躍車手"

    active_probs = probs_driver[active_indices]
    top5_idx = np.argsort(active_probs)[::-1][:5]
    final_driver_ids = np.array(active_indices)[top5_idx]

    top5_names = le_driver.inverse_transform(final_driver_ids)
    top5_probs = probs_driver[final_driver_ids]

    last_team_map = (
        df.sort_values('year')
        .dropna(subset=['Winner', 'Team'])
        .drop_duplicates(subset=['Winner'], keep='last')
        .set_index('Winner')['Team']
        .to_dict()
    )

    # 組裝輸出
    result_lines = []
    driver_records = []

    for i in range(len(top5_names)):
        driver = top5_names[i]
        team = last_team_map.get(driver, "未知車隊")
        confidence = top5_probs[i] * 100
        result_lines.append(f"第 {i+1} 名：{driver}（{team}） 信心度：{confidence:.2f}%")
        driver_records.append({'driver': driver, 'team': team, 'confidence': confidence})

    df_pred = pd.DataFrame(driver_records)

    if show_team_rank:
        result_lines.append("\n🏁 車隊總排名：")
        team_scores = df_pred.groupby('team')['confidence'].sum().sort_values(ascending=False)
        for idx, (team, score) in enumerate(team_scores.items(), 1):
            result_lines.append(f"{idx}. {team}（總信心度 {score:.2f}%）")

    if show_internal_rank:
        result_lines.append("\n👥 車隊內部排序：")
        internal_sorted = df_pred.sort_values(['team', 'confidence'], ascending=[True, False])
        for team in internal_sorted['team'].unique():
            result_lines.append(f"{team}：")
            for _, row in internal_sorted[internal_sorted['team'] == team].iterrows():
                result_lines.append(f"  {row['driver']}（{row['confidence']:.2f}%）")

    return "\n".join(result_lines)

# === UI ===
try:
    winners_ui = pd.read_csv("./data/winners.csv")
    gp_names_for_ui = sorted(winners_ui["Grand Prix"].dropna().unique().tolist())
except:
    gp_names_for_ui = ["Monaco", "Silverstone", "Suzuka"]

with gr.Blocks() as demo:
    gr.Markdown("# F1 駕駛與車隊預測")
    year = gr.Number(label="年份", value=2025)
    laps = gr.Number(label="圈數", value=56)
    gp_name = gr.Dropdown(choices=gp_names_for_ui, label="Grand Prix 名稱", value=gp_names_for_ui[0])
    show_team_rank = gr.Checkbox(label="顯示車隊總排名", value=True)
    show_internal_rank = gr.Checkbox(label="顯示車隊內排序", value=False)
    output = gr.Textbox(label="預測結果", lines=20)
    btn = gr.Button("預測")
    btn.click(
        predict_top5_by_gp,
        inputs=[year, laps, gp_name, show_team_rank, show_internal_rank],
        outputs=output
    )

demo.launch()


In [None]:
import os
os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'

import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from sklearn.preprocessing import LabelEncoder
import gradio as gr

# === 模型定義 ===
class MultiTaskLSTMClassifier(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes_driver, num_classes_team, num_classes_pos):
        super().__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
        self.driver_head = nn.Linear(hidden_size, num_classes_driver)
        self.team_head = nn.Linear(hidden_size, num_classes_team)
        self.pos_head = nn.Linear(hidden_size, num_classes_pos)

    def forward(self, x):
        _, (h_n, _) = self.lstm(x)
        h = h_n[-1]
        return self.driver_head(h), self.team_head(h), self.pos_head(h)

# === 預測函數 ===
def predict_top5_by_gp(year, laps, gp_name, show_team_rank=True, show_internal_rank=False):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    drivers = pd.read_csv("./data/drivers_updated.csv")
    winners = pd.read_csv("./data/winners.csv")
    teams = pd.read_csv("./data/teams_updated.csv")
    laps_data = pd.read_csv("./data/fastest_laps_updated.csv")

    winners['year'] = pd.to_datetime(winners['Date']).dt.year
    df = winners.merge(drivers, left_on=['Winner', 'Car', 'year'], right_on=['Driver', 'Car', 'year'], how='left')
    df = df.merge(laps_data, left_on=['Grand Prix', 'Winner', 'Car', 'year'], right_on=['Grand Prix', 'Driver', 'Car', 'year'], how='left')
    df = df.merge(teams, left_on=['Car', 'year'], right_on=['Team', 'year'], how='left')
    df = df.dropna(subset=['Winner', 'Team', 'Grand Prix'])

    start_year = max(1950, year - 10)
    df = df[(df['year'] >= start_year) & (df['year'] < year)].copy()
    if df.empty:
        return "歷史資料不足"

    le_driver = LabelEncoder()
    le_team = LabelEncoder()
    le_gp = LabelEncoder()

    df['Winner_enc'] = le_driver.fit_transform(df['Winner'].astype(str))
    df['Team_enc'] = le_team.fit_transform(df['Team'].astype(str))
    df['GP_enc'] = le_gp.fit_transform(df['Grand Prix'].astype(str))

    main_features = ['Laps', 'year', 'GP_enc']
    for col in main_features:
        df[col] = pd.to_numeric(df[col], errors='coerce').fillna(0)

    n_seq = 10
    def get_seq_features(df, driver, year):
        hist = df[(df['Winner'] == driver) & (df['year'] < year)].sort_values('year', ascending=False)
        feats = hist.head(n_seq)[main_features].values
        if len(feats) < n_seq:
            pad = np.zeros((n_seq - len(feats), len(main_features)))
            feats = np.vstack([feats, pad])
        return feats

    X_seq, y_driver, y_team = [], [], []
    for _, row in df.iterrows():
        X_seq.append(get_seq_features(df, row['Winner'], row['year']))
        y_driver.append(row['Winner'])
        y_team.append(row['Team'])

    if not X_seq:
        return "序列資料不足"

    X_seq = np.stack(X_seq)
    y_driver_enc = le_driver.transform(y_driver)
    y_team_enc = le_team.transform(y_team)

    model = MultiTaskLSTMClassifier(
        input_size=len(main_features),
        hidden_size=64,
        num_classes_driver=len(le_driver.classes_),
        num_classes_team=len(le_team.classes_),
        num_classes_pos=1
    )

    X_tensor = torch.tensor(X_seq, dtype=torch.float32)
    y_driver_tensor = torch.tensor(y_driver_enc, dtype=torch.long)
    y_team_tensor = torch.tensor(y_team_enc, dtype=torch.long)

    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.CrossEntropyLoss()
    for _ in range(500):
        model.train()
        optimizer.zero_grad()
        out_driver, out_team, _ = model(X_tensor)
        loss = criterion(out_driver, y_driver_tensor) + criterion(out_team, y_team_tensor)
        loss.backward()
        optimizer.step()

    if gp_name not in le_gp.classes_:
        return "GP 名稱無效"
    gp_idx = le_gp.transform([gp_name])[0]

    input_feat = np.array([[laps, year - 1, gp_idx]])
    input_seq = np.repeat(input_feat, n_seq, axis=0)[np.newaxis, ...]
    input_tensor = torch.tensor(input_seq, dtype=torch.float32)

    model.eval()
    with torch.no_grad():
        out_driver, _, _ = model(input_tensor)
        probs_driver = torch.softmax(out_driver / 1.5, dim=1).cpu().numpy()[0]


    # === 活躍車手過濾（只納入近5年內出賽者）===
    recent_years = set(range(year - 5, year))
    driver_years = df.groupby('Winner')['year'].apply(set).to_dict()
    active_indices = [
        i for i, name in enumerate(le_driver.classes_)
        if recent_years & driver_years.get(name, set())
    ]
    if len(active_indices) < 5:
        return "查無足夠活躍車手"

    active_probs = probs_driver[active_indices]
    sorted_idx = np.argsort(active_probs)[::-1]
    final_driver_ids = np.array(active_indices)[sorted_idx][:15]


    # 使用整份資料找出最新車隊
    full_df = winners.merge(teams, left_on=['Car', 'year'], right_on=['Team', 'year'], how='left')
    full_df = full_df.dropna(subset=['Winner', 'Team'])
    last_team_map = (
        full_df.sort_values('year')
        .drop_duplicates(subset=['Winner'], keep='last')
        .set_index('Winner')['Team']
        .to_dict()
    )

    driver_records = []
    for idx in final_driver_ids:
        driver = le_driver.classes_[idx]
        team = last_team_map.get(driver, "未知車隊")
        confidence = probs_driver[idx] * 100
        driver_records.append({'driver': driver, 'team': team, 'confidence': confidence})

    df_pred = pd.DataFrame(driver_records)

    result_lines = []
    for i, row in df_pred.head(5).iterrows():
        result_lines.append(f"第 {i+1} 名：{row['driver']}（{row['team']}） 信心度：{row['confidence']:.2f}%")

    if show_team_rank:
        result_lines.append("\n🏁 車隊總排名：")
        team_scores = df_pred.groupby('team')['confidence'].sum().sort_values(ascending=False)
        for idx, (team, score) in enumerate(team_scores.items(), 1):
            result_lines.append(f"{idx}. {team}（總信心度 {score:.2f}%）")

    if show_internal_rank:
        result_lines.append("\n👥 車隊內部排序：")
        internal_sorted = df_pred.sort_values(['team', 'confidence'], ascending=[True, False])
        for team in internal_sorted['team'].unique():
            result_lines.append(f"{team}：")
            for _, row in internal_sorted[internal_sorted['team'] == team].iterrows():
                result_lines.append(f"  {row['driver']}（{row['confidence']:.2f}%）")

    return "\n".join(result_lines)

# === Gradio UI ===
try:
    winners_ui = pd.read_csv("./data/winners.csv")
    gp_names_for_ui = sorted(winners_ui["Grand Prix"].dropna().unique().tolist())
except:
    gp_names_for_ui = ["Monaco", "Silverstone", "Suzuka"]

with gr.Blocks() as demo:
    gr.Markdown("# F1 駕駛與車隊預測")
    year = gr.Number(label="年份", value=2025)
    laps = gr.Number(label="圈數", value=56)
    gp_name = gr.Dropdown(choices=gp_names_for_ui, label="Grand Prix 名稱", value=gp_names_for_ui[0])
    show_team_rank = gr.Checkbox(label="顯示車隊總排名", value=True)
    show_internal_rank = gr.Checkbox(label="顯示車隊內排序", value=False)
    output = gr.Textbox(label="預測結果", lines=20)
    btn = gr.Button("預測")
    btn.click(
        predict_top5_by_gp,
        inputs=[year, laps, gp_name, show_team_rank, show_internal_rank],
        outputs=output
    )

demo.launch()

In [None]:
import os
os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'

import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from sklearn.preprocessing import LabelEncoder
import gradio as gr

# === 模型定義 ===
class MultiTaskLSTMClassifier(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes_driver, num_classes_team, num_classes_pos):
        super().__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
        self.driver_head = nn.Linear(hidden_size, num_classes_driver)
        self.team_head = nn.Linear(hidden_size, num_classes_team)
        self.pos_head = nn.Linear(hidden_size, num_classes_pos)

    def forward(self, x):
        _, (h_n, _) = self.lstm(x)
        h = h_n[-1]
        return self.driver_head(h), self.team_head(h), self.pos_head(h)

# === 預測函數 ===
def predict_all_by_gp(year, laps, gp_name, show_team_rank=True, show_internal_rank=False):
    
    drivers = pd.read_csv("./data/drivers_updated.csv")
    winners = pd.read_csv("./data/winners.csv")
    teams = pd.read_csv("./data/teams_updated.csv")
    laps_data = pd.read_csv("./data/fastest_laps_updated.csv")

 

    winners["year"] = pd.to_datetime(winners["Date"]).dt.year
    df = winners.merge(drivers, left_on=["Winner", "Car", "year"], right_on=["Driver", "Car", "year"], how="left")
    df = df.merge(laps_data, left_on=["Grand Prix", "Winner", "Car", "year"], right_on=["Grand Prix", "Driver", "Car", "year"], how="left")
    df = df.merge(teams, left_on=["Car", "year"], right_on=["Team", "year"], how="left")
    df = df.dropna(subset=["Winner", "Team", "Grand Prix"])

    start_year = max(1950, year - 20)
    df = df[(df["year"] >= start_year) & (df["year"] < year)].copy()
    if df.empty:
        return "歷史資料不足"

    le_driver = LabelEncoder()
    le_team = LabelEncoder()
    le_gp = LabelEncoder()

    df["Winner_enc"] = le_driver.fit_transform(df["Winner"].astype(str))
    df["Team_enc"] = le_team.fit_transform(df["Team"].astype(str))
    df["GP_enc"] = le_gp.fit_transform(df["Grand Prix"].astype(str))

    main_features = ["Laps", "year", "GP_enc"]
    df[main_features] = df[main_features].apply(pd.to_numeric, errors="coerce").fillna(0)

    n_seq = 10
    def get_seq_features(df, driver, year):
        hist = df[(df["Winner"] == driver) & (df["year"] < year)].sort_values("year", ascending=False)
        feats = hist.head(n_seq)[main_features].values
        if len(feats) < n_seq:
            pad = np.zeros((n_seq - len(feats), len(main_features)))
            feats = np.vstack([feats, pad])
        return feats

    X_seq, y_driver, y_team, y_pos = [], [], [], []
    for _, row in df.iterrows():
        X_seq.append(get_seq_features(df, row["Winner"], row["year"]))
        y_driver.append(row["Winner"])
        y_team.append(row["Team"])
        y_pos.append(1)

    if not X_seq:
        return "序列資料不足"

    X_seq = np.stack(X_seq)
    y_driver_enc = le_driver.transform(y_driver)
    y_team_enc = le_team.transform(y_team)
    y_pos_tensor = torch.tensor(y_pos, dtype=torch.float32)

    model = MultiTaskLSTMClassifier(
        input_size=len(main_features),
        hidden_size=64,
        num_classes_driver=len(le_driver.classes_),
        num_classes_team=len(le_team.classes_),
        num_classes_pos=1
    )

    X_tensor = torch.tensor(X_seq, dtype=torch.float32)
    y_driver_tensor = torch.tensor(y_driver_enc, dtype=torch.long)
    y_team_tensor = torch.tensor(y_team_enc, dtype=torch.long)

    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.CrossEntropyLoss(reduction="none")
    criterion_pos = nn.MSELoss()

    weights = np.exp(-(year - df["year"].values))
    weights_tensor = torch.tensor(weights, dtype=torch.float32)

    for _ in range(100):
        model.train()
        optimizer.zero_grad()
        out_driver, out_team, out_pos = model(X_tensor)
        loss_driver = criterion(out_driver, y_driver_tensor)
        loss_team = criterion(out_team, y_team_tensor)
        loss_pos = criterion_pos(out_pos.squeeze(), y_pos_tensor)
        loss = (loss_driver * weights_tensor).mean() + (loss_team * weights_tensor).mean() + loss_pos
        loss.backward()
        optimizer.step()

    if gp_name not in le_gp.classes_:
        return "GP 名稱無效"
    gp_idx = le_gp.transform([gp_name])[0]

    input_feat = np.array([[laps, year - 1, gp_idx]])
    input_seq = np.repeat(input_feat, n_seq, axis=0)[np.newaxis, ...]
    input_tensor = torch.tensor(input_seq, dtype=torch.float32)

    model.eval()
    with torch.no_grad():
        out_driver, _, out_pos = model(input_tensor)
        probs_driver = torch.softmax(out_driver / 3, dim=1).cpu().numpy()[0]
        pos_pred = out_pos.item()

    recent_years = set(range(year - 4, year))
    driver_years = df.groupby("Winner")["year"].apply(set).to_dict()
    active_indices = [
        i for i, name in enumerate(le_driver.classes_)
        if recent_years & driver_years.get(name, set())
    ]
    if len(active_indices) == 0:
        return "查無足夠活躍車手"

    active_probs = probs_driver[active_indices]
    sorted_idx = np.argsort(active_probs)[::-1]
    final_driver_ids = np.array(active_indices)[sorted_idx]  # 不再只取前 20

    full_df = winners.merge(teams, left_on=["Car", "year"], right_on=["Team", "year"], how="left")
    full_df = full_df.dropna(subset=["Winner", "Team"])
    last_team_map = (
        full_df.sort_values("year")
        .drop_duplicates(subset=["Winner"], keep="last")
        .set_index("Winner")["Team"]
        .to_dict()
    )

    driver_records = []
    for idx in final_driver_ids:
        driver = le_driver.classes_[idx]
        team = last_team_map.get(driver, "未知車隊")
        confidence = probs_driver[idx] * 100
        driver_records.append({"driver": driver, "team": team, "confidence": confidence})

    df_pred = pd.DataFrame(driver_records)

    result_lines = []
    for i, row in df_pred.iterrows():  # 全部列出
        result_lines.append(f"第 {i+1} 名：{row['driver']}（{row['team']}） 信心度：{row['confidence']:.2f}%")

    if show_team_rank:
        result_lines.append("\n🏁 車隊總排名：")
        team_scores = df_pred.groupby("team")["confidence"].sum().sort_values(ascending=False)
        for idx, (team, score) in enumerate(team_scores.items(), 1):
            result_lines.append(f"{idx}. {team}（信心度 {score:.2f}%）")

    if show_internal_rank:
        result_lines.append("\n👥 車隊內部排序：")
        internal_sorted = df_pred.sort_values(["team", "confidence"], ascending=[True, False])
        for team in internal_sorted["team"].unique():
            result_lines.append(f"{team}：")
            for _, row in internal_sorted[internal_sorted["team"] == team].iterrows():
                result_lines.append(f"  {row['driver']}（{row['confidence']:.2f}%）")

    return "\n".join(result_lines)



# === Gradio UI ===
try:
    winners_ui = pd.read_csv("./data/winners.csv")
    gp_names_for_ui = sorted(winners_ui["Grand Prix"].dropna().unique().tolist())
except:
    gp_names_for_ui = ["Monaco", "Silverstone", "Suzuka"]

with gr.Blocks() as demo:
    gr.Markdown("# F1 駕駛與車隊預測")
    year = gr.Number(label="年份", value=2025)
    laps = gr.Number(label="圈數", value=56)
    gp_name = gr.Dropdown(choices=gp_names_for_ui, label="Grand Prix 名稱", value=gp_names_for_ui[0])
    show_team_rank = gr.Checkbox(label="顯示車隊總排名", value=True)
    show_internal_rank = gr.Checkbox(label="顯示車隊內排序", value=False)
    output = gr.Textbox(label="預測結果", lines=20)
    btn = gr.Button("預測")
    btn.click(
        predict_top5_by_gp,
        inputs=[year, laps, gp_name, show_team_rank, show_internal_rank],
        outputs=output
    )

demo.launch()


In [None]:
# 彙整完整程式碼，包括模型定義、資料載入、預測函數（含 pos）與執行用例
import os
os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'
device = torch.device("cuda")


import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from sklearn.preprocessing import LabelEncoder
import gradio as gr

# 資料載入
drivers_df = pd.read_csv("./data/drivers_updated.csv")
winners_df = pd.read_csv("./data/winners.csv")
teams_df = pd.read_csv("./data/teams_updated.csv")
laps_df = pd.read_csv("./data/fastest_laps_updated.csv")

# 模型定義
class MultiTaskLSTMClassifier(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes_driver, num_classes_team, num_classes_pos):
        super().__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
        self.driver_head = nn.Linear(hidden_size, num_classes_driver)
        self.team_head = nn.Linear(hidden_size, num_classes_team)
        self.pos_head = nn.Linear(hidden_size, num_classes_pos)

    def forward(self, x):
        _, (h_n, _) = self.lstm(x)
        h = h_n[-1]
        return self.driver_head(h), self.team_head(h), self.pos_head(h)

# 主預測函數，包含 driver confidence 與 position 預測
def predict_all_by_gp_with_pos(year, laps, gp_name, show_team_rank=True, show_internal_rank=False):

    drivers = drivers_df.copy()
    winners = winners_df.copy()
    teams = teams_df.copy()
    laps_data = laps_df.copy()

    winners["year"] = pd.to_datetime(winners["Date"]).dt.year
    df = winners.merge(drivers, left_on=["Winner", "Car", "year"], right_on=["Driver", "Car", "year"], how="left")
    df = df.merge(laps_data, left_on=["Grand Prix", "Winner", "Car", "year"], right_on=["Grand Prix", "Driver", "Car", "year"], how="left")
    df = df.merge(teams, left_on=["Car", "year"], right_on=["Team", "year"], how="left")
    df = df.dropna(subset=["Winner", "Team", "Grand Prix"])

    start_year = max(1950, year - 20)
    df = df[(df["year"] >= start_year) & (df["year"] < year)].copy()
    if df.empty:
        return "歷史資料不足"

    le_driver = LabelEncoder()
    le_team = LabelEncoder()
    le_gp = LabelEncoder()
    df["Winner_enc"] = le_driver.fit_transform(df["Winner"].astype(str))
    df["Team_enc"] = le_team.fit_transform(df["Team"].astype(str))
    df["GP_enc"] = le_gp.fit_transform(df["Grand Prix"].astype(str))

    main_features = ["Laps", "year", "GP_enc"]
    df[main_features] = df[main_features].apply(pd.to_numeric, errors="coerce").fillna(0)

    n_seq = 10
    def get_seq_features(df, driver, year):
        hist = df[(df["Winner"] == driver) & (df["year"] < year)].sort_values("year", ascending=False)
        feats = hist.head(n_seq)[main_features].values
        if len(feats) < n_seq:
            pad = np.zeros((n_seq - len(feats), len(main_features)))
            feats = np.vstack([feats, pad])
        return feats

    X_seq, y_driver, y_team, y_pos = [], [], [], []
    for _, row in df.iterrows():
        X_seq.append(get_seq_features(df, row["Winner"], row["year"]))
        y_driver.append(row["Winner"])
        y_team.append(row["Team"])
        y_pos.append(1)

    if not X_seq:
        return "序列資料不足"

    X_seq = np.stack(X_seq)
    y_driver_enc = le_driver.transform(y_driver)
    y_team_enc = le_team.transform(y_team)
    y_pos_tensor = torch.tensor(y_pos, dtype=torch.float32)

    model = MultiTaskLSTMClassifier(
        input_size=len(main_features),
        hidden_size=64,
        num_classes_driver=len(le_driver.classes_),
        num_classes_team=len(le_team.classes_),
        num_classes_pos=1
    )

    X_tensor = torch.tensor(X_seq, dtype=torch.float32)
    y_driver_tensor = torch.tensor(y_driver_enc, dtype=torch.long)
    y_team_tensor = torch.tensor(y_team_enc, dtype=torch.long)

    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.CrossEntropyLoss(reduction="none")
    criterion_pos = nn.MSELoss()

    weights = np.exp(-(year - df["year"].values))
    weights_tensor = torch.tensor(weights, dtype=torch.float32)

    for _ in range(100):
        model.train()
        optimizer.zero_grad()
        out_driver, out_team, out_pos = model(X_tensor)
        loss_driver = criterion(out_driver, y_driver_tensor)
        loss_team = criterion(out_team, y_team_tensor)
        loss_pos = criterion_pos(out_pos.squeeze(), y_pos_tensor)
        loss = (loss_driver * weights_tensor).mean() + (loss_team * weights_tensor).mean() + loss_pos
        loss.backward()
        optimizer.step()

    if gp_name not in le_gp.classes_:
        return "GP 名稱無效"
    gp_idx = le_gp.transform([gp_name])[0]

    recent_years = set(range(year - 4, year))
    driver_years = df.groupby("Winner")["year"].apply(set).to_dict()
    active_indices = [i for i, name in enumerate(le_driver.classes_) if recent_years & driver_years.get(name, set())]
    if len(active_indices) == 0:
        return "查無足夠活躍車手"

    full_df = winners.merge(teams, left_on=["Car", "year"], right_on=["Team", "year"], how="left")
    full_df = full_df.dropna(subset=["Winner", "Team"])
    last_team_map = (
        full_df.sort_values("year")
        .drop_duplicates(subset=["Winner"], keep="last")
        .set_index("Winner")["Team"]
        .to_dict()
    )

    driver_records = []
    for idx in active_indices:
        driver = le_driver.classes_[idx]
        team = last_team_map.get(driver, "未知車隊")
        input_tensor = torch.tensor(np.expand_dims(get_seq_features(df, driver, year), axis=0), dtype=torch.float32)
        with torch.no_grad():
            out_driver, _, out_pos = model(input_tensor)
            confidence = torch.softmax(out_driver / 3, dim=1)[0, idx].item() * 100
            position_score = out_pos.item()
        driver_records.append({"driver": driver, "team": team, "confidence": confidence, "position_score": position_score})

    df_pred = pd.DataFrame(driver_records).sort_values(by="confidence", ascending=False)

    result_lines = []
    for i, row in df_pred.iterrows():
        result_lines.append(f"第 {i+1} 名：{row['driver']}（{row['team']}） 信心度：{row['confidence']:.2f}%、位置分數：{row['position_score']:.2f}")

    if show_team_rank:
        result_lines.append("\n🏁 車隊總排名：")
        team_scores = df_pred.groupby("team")["confidence"].sum().sort_values(ascending=False)
        for idx, (team, score) in enumerate(team_scores.items(), 1):
            result_lines.append(f"{idx}. {team}（信心度 {score:.2f}%）")

    if show_internal_rank:
        result_lines.append("\n👥 車隊內部排序：")
        internal_sorted = df_pred.sort_values(["team", "confidence"], ascending=[True, False])
        for team in internal_sorted["team"].unique():
            result_lines.append(f"{team}：")
            for _, row in internal_sorted[internal_sorted["team"] == team].iterrows():
                result_lines.append(f"  {row['driver']}（{row['confidence']:.2f}%）")

    return "\n".join(result_lines)



# === Gradio UI ===
try:
    winners_ui = pd.read_csv("./data/winners.csv")
    gp_names_for_ui = sorted(winners_ui["Grand Prix"].dropna().unique().tolist())
except:
    gp_names_for_ui = ["Monaco", "Silverstone", "Suzuka"]

with gr.Blocks() as demo:
    gr.Markdown("# F1 駕駛與車隊預測")
    year = gr.Number(label="年份", value=2025)
    laps = gr.Number(label="圈數", value=56)
    gp_name = gr.Dropdown(choices=gp_names_for_ui, label="Grand Prix 名稱", value=gp_names_for_ui[0])
    show_team_rank = gr.Checkbox(label="顯示車隊總排名", value=True)
    show_internal_rank = gr.Checkbox(label="顯示車隊內排序", value=False)
    output = gr.Textbox(label="預測結果", lines=20)
    btn = gr.Button("預測")
    btn.click(
        predict_all_by_gp_with_pos,
        inputs=[year, laps, gp_name, show_team_rank, show_internal_rank],
        outputs=output
    )

demo.launch()


* Running on local URL:  http://127.0.0.1:7892

To create a public link, set `share=True` in `launch()`.


