In [1]:
# 1) Imports & device
import time
import numpy as np
import pandas as pd
from typing import Tuple, Dict, Any, Optional

import torch
from sklearn.metrics import (
    accuracy_score,
    roc_auc_score,
    f1_score,
    mean_absolute_error,
    mean_squared_error,
)
from sklearn.model_selection import train_test_split

from tabpfn import TabPFNClassifier, TabPFNRegressor

from relbench.datasets import get_dataset
from relbench.tasks import get_task

def get_device() -> str:
    if torch.backends.mps.is_available():
        return "mps"
    if torch.cuda.is_available():
        return "cuda"
    return "cpu"

DEVICE = get_device()
DEVICE


'mps'

In [2]:
# 2) Utilities to extract pandas DataFrames from RelBench tables
def table_to_pandas(table) -> pd.DataFrame:
    # RelBench Table typically exposes .to_pandas(); fallbacks included
    if hasattr(table, "to_pandas"):
        return table.to_pandas()
    if hasattr(table, "df"):
        return table.df.copy()
    if hasattr(table, "data"):
        return pd.DataFrame(table.data)
    raise RuntimeError("Unsupported RelBench Table object; cannot materialize to pandas.")

def get_db_table_names(db) -> list:
    if hasattr(db, "list_tables"):
        return list(db.list_tables())
    if hasattr(db, "tables"):
        return list(db.tables.keys())
    if hasattr(db, "get_table_names"):
        return list(db.get_table_names())
    if hasattr(db, "table_dict"):
        return list(db.table_dict.keys())
    # Check for specifically named attributes like 'results', 'drivers', etc. that are common in F1 dataset
    known_f1_tables = ["drivers", "races", "results", "circuits", "standings", "constructors",
                     "constructor_results", "constructor_standings", "qualifying"]
    if all(hasattr(db, table) for table in known_f1_tables[:4]):  # Check at least a few core tables exist
        return known_f1_tables

    raise RuntimeError("Unsupported RelBench Database object; cannot list tables. "
                      "Please provide a custom table_names list to build_merged_view.")

def get_db_table_df(db, name: str) -> pd.DataFrame:
    if hasattr(db, "get_table"):
        t = db.get_table(name)
        return table_to_pandas(t)
    # common fallback attributes
    if hasattr(db, "tables") and name in db.tables:
        return table_to_pandas(db.tables[name])
    if hasattr(db, "table_dict") and name in db.table_dict:
        return table_to_pandas(db.table_dict[name])
    if hasattr(db, name):
        # Try to get the table as a direct attribute
        return table_to_pandas(getattr(db, name))
    raise KeyError(f"Table '{name}' not found in Database.")


In [3]:
# 3) Light preprocessing for TabPFN
def preprocess_df(df: pd.DataFrame) -> pd.DataFrame:
    df = df.copy()
    # Convert datetime-like columns to numeric ordinals
    for col in df.columns:
        if np.issubdtype(df[col].dtype, np.datetime64):
            df[col] = pd.to_datetime(df[col]).map(pd.Timestamp.toordinal).astype("int64")
        # Some datasets encode timestamps as strings
        elif df[col].dtype == "object":
            # try parse; if fails, keep as string (TabPFN can handle categoricals)
            try:
                # Use more explicit parsing options to avoid warnings
                if any(df[col].str.contains('-', na=False)):
                    # Likely ISO format
                    parsed = pd.to_datetime(df[col], format='%Y-%m-%d', errors="coerce")
                    if parsed.isna().mean() > 0.5:  # If most values failed, try more flexible format
                        parsed = pd.to_datetime(df[col], infer_datetime_format=True, errors="coerce")
                else:
                    # Try common formats
                    parsed = pd.to_datetime(df[col], infer_datetime_format=True, errors="coerce")
                
                # Only convert if parsing was successful
                if parsed.isna().mean() < 0.5:  # More than half of values parsed successfully
                    df[col] = parsed.map(pd.Timestamp.toordinal).astype("float64")
                    # Fill NaN values with median to avoid downstream issues
                    df[col] = df[col].fillna(df[col].median())
            except Exception:
                pass
    return df

def split_X_y(df: pd.DataFrame, target_name: Optional[str] = None) -> Tuple[pd.DataFrame, pd.Series]:
    if target_name is None:
        # common target name candidates in RelBench tasks
        for cand in ["target", "label", "y", "dnf", "top3", "position"]:
            if cand in df.columns:
                target_name = cand
                break
    if target_name is None or target_name not in df.columns:
        raise ValueError(
            f"Could not infer target column. Columns: {list(df.columns)[:10]}..."
        )
    y = df[target_name]
    X = df.drop(columns=[target_name])
    return X, y


In [4]:
# 4) Build SINGLE vs MERGED feature views

def build_single_view(table) -> pd.DataFrame:
    df = table_to_pandas(table)
    # Limit to max 10000 samples
    if len(df) > 10000:
        print(f"Limiting dataframe from {len(df)} to 10000 samples")
        df = df.sample(10000, random_state=42)
    return preprocess_df(df)

def discover_fk_columns(df: pd.DataFrame) -> list:
    # Heuristic: columns ending with 'Id' that aren't the row's own index
    return [c for c in df.columns if c.lower().endswith("id")]

def pick_right_table_name(db_table_names: list, fk_col: str) -> Optional[str]:
    # Exact match (e.g., driverId -> 'drivers', circuitId -> 'circuits')
    base = fk_col[:-2]  # drop 'Id'
    candidates = [
        base.lower(),
        base.lower() + "s",  # plural
        base.lower() + "es", # rare pluralization
        base.lower().replace("y", "ies") + ""  # e.g., 'qualify' -> 'qualifies' (heuristic)
    ]
    for name in db_table_names:
        lname = name.lower()
        if lname in candidates:
            return name
    # sometimes the fk column already matches the table primary key (e.g., raceId -> races)
    for name in db_table_names:
        if fk_col.lower().startswith(name.lower()[:-1]) and name.lower().endswith("s"):
            return name
    return None

def primary_key_name_for_table(table_name: str) -> str:
    # F1 schema uses '<table>Id' primary keys (plural to singular)
    # E.g., 'drivers' -> 'driverId', 'races' -> 'raceId', 'results' -> 'resultId'
    if table_name.lower().endswith("ies"):   # constructor_standings -> not this pattern
        singular = table_name[:-3] + "y"
    elif table_name.lower().endswith("s"):
        singular = table_name[:-1]
    else:
        singular = table_name
    return f"{singular}Id"

def build_merged_view(base_table, db) -> pd.DataFrame:
    base = table_to_pandas(base_table)

    try:
        db_names = get_db_table_names(db)
    except RuntimeError as e:
        print(f"Warning: {e}. Continuing with base table only.")
        return preprocess_df(base.copy())

    merged = base.copy()
    fk_cols = discover_fk_columns(merged)

    for fk in fk_cols:
        right_name = pick_right_table_name(db_names, fk)
        if right_name is None:
            continue
        try:
            right_df = get_db_table_df(db, right_name)
        except KeyError:
            continue

        pk = primary_key_name_for_table(right_name)
        if pk not in right_df.columns or fk not in merged.columns:
            continue

        # Perform left join and drop duplicate key from right
        right_cols = [c for c in right_df.columns]
        suffix = f"__{right_name}"
        merged = merged.merge(
            right_df,
            how="left",
            left_on=fk,
            right_on=pk,
            suffixes=("", suffix),
        )
        # Drop the right pk to avoid duplicates
        if pk in merged.columns:
            # Preserve the left fk; drop the right pk if it is duplicated
            cols_to_drop = [c for c in merged.columns if c == pk and c != fk]
            if cols_to_drop:
                merged = merged.drop(columns=cols_to_drop)

    merged = preprocess_df(merged)
    return merged


In [5]:
# Evaluation & timing utilities for ML models
# Add a standardization function to help avoid numerical issues
def standardize_features(X_train, X_test=None):
    """Standardize features to help prevent numerical issues.

    Returns standardized X_train and optionally X_test, along with scaler
    """
    from sklearn.preprocessing import RobustScaler

    # Use RobustScaler which is less affected by outliers
    scaler = RobustScaler()
    X_train_scaled = pd.DataFrame(
        scaler.fit_transform(X_train),
        columns=X_train.columns,
        index=X_train.index
    )

    if X_test is not None:
        X_test_scaled = pd.DataFrame(
            scaler.transform(X_test),
            columns=X_test.columns,
            index=X_test.index
        )
        return X_train_scaled, X_test_scaled, scaler

    return X_train_scaled, scaler

def fit_predict_with_timing(estimator, X_train, y_train, X_test):
    """
    Fit an estimator and time both fitting and prediction operations.
    
    Returns:
    - predictions (point predictions)
    - probabilities (for classification only, otherwise None)
    - fit_time_s (in seconds)
    - predict_time_s (in seconds)
    """
    # Handle NaN values that might cause warnings
    X_train = X_train.fillna(X_train.median())
    X_test = X_test.fillna(X_train.median())
    
    # Apply standardization to help prevent numerical issues
    X_train_scaled, X_test_scaled, _ = standardize_features(X_train, X_test)
    
    # Fit and time
    t0 = time.time()
    estimator.fit(X_train_scaled, y_train)
    fit_time_s = time.time() - t0

    # Predict and time
    t0 = time.time()
    if hasattr(estimator, "predict_proba"):
        proba = estimator.predict_proba(X_test_scaled)
        preds = np.argmax(proba, axis=1)
        # Map integer predictions back to original classes
        if hasattr(estimator, "classes_"):
            preds = estimator.classes_[preds]
    else:
        proba = None
        preds = estimator.predict(X_test_scaled)
    predict_time_s = time.time() - t0

    return preds, proba, fit_time_s, predict_time_s

def eval_classification(y_true, y_pred, y_proba=None) -> Dict[str, float]:
    """
    Evaluate classification predictions with common metrics.
    """
    metrics = {}
    metrics["accuracy"] = accuracy_score(y_true, y_pred)

    try:
        metrics["f1_macro"] = f1_score(y_true, y_pred, average="macro", zero_division=0)
    except:
        # Handle edge cases where f1 fails (e.g., single class in test set)
        metrics["f1_macro"] = np.nan

    # For binary classification, add AUC if probabilities provided
    if y_proba is not None and len(np.unique(y_true)) <= 2:
        try:
            # Get probability of positive class
            if y_proba.shape[1] == 2:
                y_score = y_proba[:, 1]  # Use second column for positive class
                metrics["roc_auc"] = roc_auc_score(y_true, y_score)
        except:
            # Handle edge cases where ROC AUC fails (e.g., single class in test set)
            metrics["roc_auc"] = np.nan

    return metrics

def eval_regression(y_true, y_pred) -> Dict[str, float]:
    """
    Evaluate regression predictions with common metrics.
    """
    metrics = {}
    metrics["mae"] = mean_absolute_error(y_true, y_pred)
    metrics["mse"] = mean_squared_error(y_true, y_pred)
    # R2 score (coefficient of determination)
    # metrics["r2"] = r2_score(y_true, y_pred)
    return metrics



In [None]:
# 5) Run single vs merged view experiments

task_name = "driver-dnf"
db = get_dataset("rel-f1").get_db()  # FIX: Use "rel-f1" instead of "f1"
task = get_task("rel-f1", task_name)  # FIX: Use "rel-f1" instead of just task_name

# Target column inferred from task metadata
target_name = None
if hasattr(task, 'target'):
    target_name = task.target
elif hasattr(task, 'target_col'):
    target_name = task.target_col
problem_type = "classification"  # Default to classification for driver-dnf

# Tables for splits
train_table = task.get_table("train")
val_table   = task.get_table("val")
test_table  = task.get_table("test", mask_input_cols=False)

# SINGLE view
df_tr_single = build_single_view(train_table)
df_va_single = build_single_view(val_table)
df_te_single = build_single_view(test_table)

Xtr_s, ytr_s = split_X_y(df_tr_single, target_name)
Xva_s, yva_s = split_X_y(df_va_single, target_name)
Xte_s, yte_s = split_X_y(df_te_single, target_name)

# MERGED view
df_tr_merged = build_merged_view(train_table, db)
df_va_merged = build_merged_view(val_table, db)
df_te_merged = build_merged_view(test_table, db)

Xtr_m, ytr_m = split_X_y(df_tr_merged, target_name)
Xva_m, yva_m = split_X_y(df_va_merged, target_name)
Xte_m, yte_m = split_X_y(df_te_merged, target_name)

# Choose estimator
if problem_type == "classification":
    est_single = TabPFNClassifier(device=DEVICE)
    est_merged = TabPFNClassifier(device=DEVICE)
elif problem_type == "regression":
    est_single = TabPFNRegressor(device=DEVICE)
    est_merged = TabPFNRegressor(device=DEVICE)
else:
    raise ValueError("problem_type must be 'classification' or 'regression'.")

# Fit on train, evaluate on test
yhat_s, proba_s, fit_s, pred_s = fit_predict_with_timing(est_single, Xtr_s, ytr_s, Xte_s)
yhat_m, proba_m, fit_m, pred_m = fit_predict_with_timing(est_merged, Xtr_m, ytr_m, Xte_m)

# Metrics
if problem_type == "classification":
    metrics_single = eval_classification(yte_s, yhat_s, proba_s)
    metrics_merged = eval_classification(yte_m, yhat_m, proba_m)
else:
    metrics_single = eval_regression(yte_s, yhat_s)
    metrics_merged = eval_regression(yte_m, yhat_m)

# Add timing & metadata
metrics_single.update({"fit_time_s": fit_s, "predict_time_s": pred_s, "view": "single", "task": task_name})
metrics_merged.update({"fit_time_s": fit_m, "predict_time_s": pred_m, "view": "merged", "task": task_name})

results_single = pd.DataFrame([metrics_single])
results_merged = pd.DataFrame([metrics_merged])

results_single, results_merged


Loading Database object from /Users/michaelflppv/Library/Caches/relbench/rel-f1/db...
Done in 0.04 seconds.
Limiting dataframe from 11411 to 10000 samples


  parsed = pd.to_datetime(df[col], infer_datetime_format=True, errors="coerce")
  parsed = pd.to_datetime(df[col], infer_datetime_format=True, errors="coerce")
  parsed = pd.to_datetime(df[col], infer_datetime_format=True, errors="coerce")
  parsed = pd.to_datetime(df[col], infer_datetime_format=True, errors="coerce")
  parsed = pd.to_datetime(df[col], infer_datetime_format=True, errors="coerce")
  parsed = pd.to_datetime(df[col], infer_datetime_format=True, errors="coerce")
  parsed = pd.to_datetime(df[col], infer_datetime_format=True, errors="coerce")
  parsed = pd.to_datetime(df[col], infer_datetime_format=True, errors="coerce")
  parsed = pd.to_datetime(df[col], infer_datetime_format=True, errors="coerce")
  parsed = pd.to_datetime(df[col], infer_datetime_format=True, errors="coerce")
  parsed = pd.to_datetime(df[col], infer_datetime_format=True, errors="coerce")
  parsed = pd.to_datetime(df[col], infer_datetime_format=True, errors="coerce")
  parsed = pd.to_datetime(df[col], infer

In [None]:
# 6) Aggregate results for all tasks
def run_task(task_name: str, problem_type: str) -> pd.DataFrame:
    db = get_dataset("rel-f1").get_db()
    task = get_task("rel-f1", task_name)

    # Target column inferred from task metadata
    if hasattr(task, 'target'):
        target_name = task.target
    elif hasattr(task, 'target_col'):
        target_name = task.target_col
    else:
        # If we can't find the target attribute, infer it from the test table
        test_table = task.get_table("test", mask_input_cols=False)
        test_df = table_to_pandas(test_table)
        # Look for typical target column names
        possible_targets = ['target', 'label', 'y', 'did_not_finish', 'top3', 'position']
        for col in possible_targets:
            if col in test_df.columns:
                target_name = col
                break
        else:
            raise ValueError(f"Could not determine target column for task {task_name}")
    # Tables for splits
    train_table = task.get_table("train")
    val_table   = task.get_table("val")
    test_table  = task.get_table("test", mask_input_cols=False)

    # Set maximum number of samples to 10000 for each table
    MAX_SAMPLES = 10000

    # SINGLE view
    df_tr_single = build_single_view(train_table)
    if len(df_tr_single) > MAX_SAMPLES:
        df_tr_single = df_tr_single.sample(MAX_SAMPLES, random_state=42)

    df_va_single = build_single_view(val_table)
    if len(df_va_single) > MAX_SAMPLES:
        df_va_single = df_va_single.sample(MAX_SAMPLES, random_state=42)

    df_te_single = build_single_view(test_table)
    if len(df_te_single) > MAX_SAMPLES:
        df_te_single = df_te_single.sample(MAX_SAMPLES, random_state=42)

    Xtr_s, ytr_s = split_X_y(df_tr_single, target_name)
    Xva_s, yva_s = split_X_y(df_va_single, target_name)
    Xte_s, yte_s = split_X_y(df_te_single, target_name)

    # MERGED view
    df_tr_merged = build_merged_view(train_table, db)
    if len(df_tr_merged) > MAX_SAMPLES:
        df_tr_merged = df_tr_merged.sample(MAX_SAMPLES, random_state=42)

    df_va_merged = build_merged_view(val_table, db)
    if len(df_va_merged) > MAX_SAMPLES:
        df_va_merged = df_va_merged.sample(MAX_SAMPLES, random_state=42)

    df_te_merged = build_merged_view(test_table, db)
    if len(df_te_merged) > MAX_SAMPLES:
        df_te_merged = df_te_merged.sample(MAX_SAMPLES, random_state=42)

    Xtr_m, ytr_m = split_X_y(df_tr_merged, target_name)
    Xva_m, yva_m = split_X_y(df_va_merged, target_name)
    Xte_m, yte_m = split_X_y(df_te_merged, target_name)

    # Choose estimator
    if problem_type == "classification":
        est_single = TabPFNClassifier(device=DEVICE)
        est_merged = TabPFNClassifier(device=DEVICE)
    elif problem_type == "regression":
        est_single = TabPFNRegressor(device=DEVICE)
        est_merged = TabPFNRegressor(device=DEVICE)
    else:
        raise ValueError("problem_type must be 'classification' or 'regression'.")

    # Fit on train, evaluate on test
    yhat_s, proba_s, fit_s, pred_s = fit_predict_with_timing(est_single, Xtr_s, ytr_s, Xte_s)
    yhat_m, proba_m, fit_m, pred_m = fit_predict_with_timing(est_merged, Xtr_m, ytr_m, Xte_m)

    # Metrics
    if problem_type == "classification":
        metrics_single = eval_classification(yte_s, yhat_s, proba_s)
        metrics_merged = eval_classification(yte_m, yhat_m, proba_m)
    else:
        metrics_single = eval_regression(yte_s, yhat_s)
        metrics_merged = eval_regression(yte_m, yhat_m)

    # Add timing & metadata
    metrics_single.update({"fit_time_s": fit_s, "predict_time_s": pred_s, "view": "single", "task": task_name})
    metrics_merged.update({"fit_time_s": fit_m, "predict_time_s": pred_m, "view": "merged", "task": task_name})

    return pd.DataFrame([metrics_single, metrics_merged])


results_all = []
# Driver DNF (classification)
results_all.append(run_task("driver-dnf", problem_type="classification"))
# Driver Top-3 (classification)
results_all.append(run_task("driver-top3", problem_type="classification"))
# Driver Position (regression)
results_all.append(run_task("driver-position", problem_type="regression"))

results = pd.concat(results_all, ignore_index=True)
results


In [None]:
# 7) Pretty summary per task
def tidy(df: pd.DataFrame) -> pd.DataFrame:
    cols = ["task", "view", "accuracy", "roc_auc", "f1_macro", "mae", "mse", "fit_time_s", "predict_time_s"]
    return df[[c for c in cols if c in df.columns]].sort_values(["task", "view"])

summary = tidy(results)
summary


In [None]:
# 8) Optional: quick comparison deltas (merged - single) per task
def compare_merged_vs_single(df: pd.DataFrame) -> pd.DataFrame:
    rows = []
    for task in df["task"].unique():
        sub = df[df.task == task].set_index("view")
        if {"single", "merged"} <= set(sub.index):
            a = sub.loc["single"]
            b = sub.loc["merged"]
            delta = (b - a).to_dict()
            delta["task"] = task
            rows.append(delta)
    out = pd.DataFrame(rows)
    return out[[c for c in ["task", "accuracy", "roc_auc", "f1_macro", "mae", "mse", "fit_time_s", "predict_time_s"] if c in out.columns]]

delta = compare_merged_vs_single(summary)
delta


In [None]:
# 9) (Optional) Save results to CSV
summary_path = "tabpfn_rel_f1_summary.csv"
delta_path = "tabpfn_rel_f1_delta.csv"
summary.to_csv(summary_path, index=False)
delta.to_csv(delta_path, index=False)
summary_path, delta_path
