In [1]:
import pandas as pd
from pathlib import Path
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F

# ----------------------------
# 1. Load data
# ----------------------------
root_dir = Path.cwd().parent.parent
dataset_path = root_dir / "outputs" / "college_stats.csv"

df = pd.read_csv(dataset_path)


In [2]:
# ----------------------------
# 2. Simple ranking model
# ----------------------------
class RankMLP(nn.Module):
    def __init__(self, input_dim, hidden_dim=64):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, 1)
        )

    def forward(self, x):
        # x: [N, D]
        return self.net(x).squeeze(-1)  # [N]

# ----------------------------
# 3. Listwise losses
# ----------------------------

def listnet_loss(scores, labels):
    """
    ListNet top-1 cross entropy.
    scores: [N] model scores (higher means better)
    labels: [N] OVERALL_PICK (lower is better in reality)
    We convert labels to relevance by rel = -labels.
    """
    rel = -labels  # larger rel = better
    P_y = F.softmax(rel, dim=0)
    P_s = F.softmax(scores, dim=0)
    loss = -torch.sum(P_y * torch.log(P_s + 1e-12))
    return loss

def listmle_loss(scores, labels):
    """
    ListMLE loss.
    scores: [N]
    labels: [N] OVERALL_PICK (lower = better)
    We sort items by true ranking (ascending OVERALL_PICK).
    """
    # sort by true rank: best (smallest pick) first
    _, idx = torch.sort(labels, descending=False)
    s_sorted = scores[idx]

    # log-sum-exp over suffixes:
    # denominator for position i is sum_{j>=i} exp(s_j)
    log_cumsumexp = torch.logcumsumexp(s_sorted.flip(0), dim=0).flip(0)

    # log-likelihood: sum_i [s_i - log(sum_{j>=i} exp(s_j))]
    log_likelihood = torch.sum(s_sorted - log_cumsumexp)
    return -log_likelihood  # negate to get loss

# ----------------------------
# 4. Evaluation: pairwise ranking accuracy
# ----------------------------
def pairwise_accuracy(scores, labels):
    """
    Pairwise accuracy within one list.
    True order: lower OVERALL_PICK is better.
    """
    scores = scores.detach().cpu().numpy()
    labels = labels.detach().cpu().numpy()
    n = len(labels)
    if n < 2:
        return 0.0

    correct = 0
    total = 0
    for i in range(n):
        for j in range(i + 1, n):
            total += 1
            true_better = labels[i] < labels[j]  # True if i should rank ahead of j
            pred_better = scores[i] > scores[j]  # True if model scores i > j
            if (true_better and pred_better) or ((not true_better) and (not pred_better)):
                correct += 1
    return correct / total if total > 0 else 0.0

def evaluate_model(model, groups):
    model.eval()
    total_correct = 0.0
    total_pairs = 0
    with torch.no_grad():
        for season, X, y in groups:
            s = model(X)
            n = len(y)
            if n < 2:
                continue
            n_pairs = n * (n - 1) // 2
            acc = pairwise_accuracy(s, y)
            total_correct += acc * n_pairs
            total_pairs += n_pairs
    return total_correct / total_pairs if total_pairs > 0 else 0.0

# ----------------------------
# 5. Training loop helper
# ----------------------------
def train_listwise(model, groups, loss_fn, n_epochs=200, lr=1e-3, name="model"):
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    for epoch in range(1, n_epochs + 1):
        model.train()
        total_loss = 0.0
        for season, X, y in groups:
            optimizer.zero_grad()
            scores = model(X)
            loss = loss_fn(scores, y)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        avg_loss = total_loss / max(len(groups), 1)

        if epoch % 20 == 0 or epoch == 1:
            print(f"[{name}] Epoch {epoch:3d} | train loss = {avg_loss:.4f}")

    return model


In [3]:
# ----------------------------
# 6. K-fold cross-validation
# ----------------------------

from sklearn.model_selection import KFold
import numpy as np
import torch

def prepare_kfold_folds(df, feature_cols, k_folds=5, random_state=42):
    """
    Prepare K-fold season-wise data for listwise ranking.

    Returns a list of folds, where each fold is a dict:
      {
        "fold_id": int,
        "train_seasons": [...],
        "test_seasons":  [...],
        "train_groups":  [(season, X, y), ...],
        "test_groups":   [(season, X, y), ...],
      }

    Each fold has its own scaling (mean/std) computed from that fold's TRAIN seasons only.
    """
    all_seasons = sorted(df["SEASON"].unique())
    kf = KFold(n_splits=k_folds, shuffle=True, random_state=random_state)

    folds = []

    for fold_id, (train_idx, test_idx) in enumerate(kf.split(all_seasons), start=1):
        train_seasons = [all_seasons[i] for i in train_idx]
        test_seasons  = [all_seasons[i] for i in test_idx]

        df_train = df[df["SEASON"].isin(train_seasons)].copy()
        df_test  = df[df["SEASON"].isin(test_seasons)].copy()

        # ---- scaling: fit ONLY on this fold's training data ----
        train_feats = df_train[feature_cols]
        feat_mean = train_feats.mean()
        feat_std  = train_feats.std().replace(0, 1.0)

        def make_groups(df_subset, seasons_subset):
            groups = []
            for season in seasons_subset:
                g = df_subset[df_subset["SEASON"] == season].copy()
                if g.empty:
                    continue

                g = g.sort_values("OVERALL_PICK")  # lower pick = better
                g_scaled = (g[feature_cols] - feat_mean) / feat_std

                X = torch.tensor(g_scaled.values, dtype=torch.float32)
                y = torch.tensor(g["OVERALL_PICK"].values, dtype=torch.float32)
                groups.append((season, X, y))
            return groups

        train_groups = make_groups(df_train, train_seasons)
        test_groups  = make_groups(df_test,  test_seasons)

        folds.append({
            "fold_id": fold_id,
            "train_seasons": train_seasons,
            "test_seasons":  test_seasons,
            "train_groups":  train_groups,
            "test_groups":   test_groups,
        })

    return folds


In [4]:
drop_cols = ["player_name", "OVERALL_PICK", "SEASON"]
feature_cols = [c for c in df.columns if c not in drop_cols]

folds = prepare_kfold_folds(
    df=df,
    feature_cols=feature_cols,
    k_folds=5,
    random_state=42,
)


In [5]:
def run_kfold_for_loss(folds, loss_fn, model_name, n_epochs=200, hidden_dim=64, lr=1e-3):
    train_accs = []
    test_accs  = []

    for fold in folds:
        fold_id = fold["fold_id"]
        train_groups = fold["train_groups"]
        test_groups  = fold["test_groups"]

        print(f"\n===== {model_name} | Fold {fold_id} =====")
        print("Train seasons:", fold["train_seasons"])
        print("Test  seasons:", fold["test_seasons"])

        torch.manual_seed(42)
        model = RankMLP(input_dim=len(feature_cols), hidden_dim=hidden_dim)

        model = train_listwise(
            model,
            train_groups,
            loss_fn=loss_fn,
            n_epochs=n_epochs,
            lr=lr,
            name=f"{model_name} Fold {fold_id}"
        )

        train_acc = evaluate_model(model, train_groups)
        test_acc  = evaluate_model(model, test_groups)

        train_accs.append(train_acc)
        test_accs.append(test_acc)

        print(f"[{model_name} Fold {fold_id}] "
              f"Train pairwise acc = {train_acc:.3f} | "
              f"Test pairwise acc = {test_acc:.3f}")

    print(f"\n=== {model_name} {len(folds)}-fold CV (season-wise) ===")
    for i, (tr, te) in enumerate(zip(train_accs, test_accs), start=1):
        print(f"Fold {i}: train = {tr:.3f}, test = {te:.3f}")
    print(f"Mean train acc: {np.mean(train_accs):.3f}")
    print(f"Mean  test acc: {np.mean(test_accs):.3f}")

    return train_accs, test_accs


In [6]:
# ----------------------------
# 7. Train and evaluate ListNet
# ----------------------------
listnet_train_accs, listnet_test_accs = run_kfold_for_loss(
    folds=folds,
    loss_fn=listnet_loss,
    model_name="ListNet",
    n_epochs=200,
    hidden_dim=64,
    lr=1e-3,
)



===== ListNet | Fold 1 =====
Train seasons: [np.int64(2001), np.int64(2002), np.int64(2004), np.int64(2005), np.int64(2006), np.int64(2007), np.int64(2008), np.int64(2010), np.int64(2011), np.int64(2013), np.int64(2014), np.int64(2015), np.int64(2016), np.int64(2018), np.int64(2019), np.int64(2020), np.int64(2021), np.int64(2022), np.int64(2023), np.int64(2025)]
Test  seasons: [np.int64(2000), np.int64(2009), np.int64(2012), np.int64(2017), np.int64(2024)]
[ListNet Fold 1] Epoch   1 | train loss = 3.7765
[ListNet Fold 1] Epoch  20 | train loss = 2.5079
[ListNet Fold 1] Epoch  40 | train loss = 2.2566
[ListNet Fold 1] Epoch  60 | train loss = 2.0607
[ListNet Fold 1] Epoch  80 | train loss = 1.9108
[ListNet Fold 1] Epoch 100 | train loss = 1.7987
[ListNet Fold 1] Epoch 120 | train loss = 1.7069
[ListNet Fold 1] Epoch 140 | train loss = 1.6306
[ListNet Fold 1] Epoch 160 | train loss = 1.5624
[ListNet Fold 1] Epoch 180 | train loss = 1.5000
[ListNet Fold 1] Epoch 200 | train loss = 1.4438

In [7]:
# ----------------------------
# 8. Train and evaluate ListMLE
# ----------------------------
listmle_train_accs, listmle_test_accs = run_kfold_for_loss(
    folds=folds,
    loss_fn=listmle_loss,
    model_name="ListMLE",
    n_epochs=200,
    hidden_dim=64,
    lr=1e-3,
)



===== ListMLE | Fold 1 =====
Train seasons: [np.int64(2001), np.int64(2002), np.int64(2004), np.int64(2005), np.int64(2006), np.int64(2007), np.int64(2008), np.int64(2010), np.int64(2011), np.int64(2013), np.int64(2014), np.int64(2015), np.int64(2016), np.int64(2018), np.int64(2019), np.int64(2020), np.int64(2021), np.int64(2022), np.int64(2023), np.int64(2025)]
Test  seasons: [np.int64(2000), np.int64(2009), np.int64(2012), np.int64(2017), np.int64(2024)]
[ListMLE Fold 1] Epoch   1 | train loss = 141.3167
[ListMLE Fold 1] Epoch  20 | train loss = 130.7051
[ListMLE Fold 1] Epoch  40 | train loss = 129.2607
[ListMLE Fold 1] Epoch  60 | train loss = 128.2911
[ListMLE Fold 1] Epoch  80 | train loss = 127.3813
[ListMLE Fold 1] Epoch 100 | train loss = 126.5648
[ListMLE Fold 1] Epoch 120 | train loss = 125.8590
[ListMLE Fold 1] Epoch 140 | train loss = 125.1991
[ListMLE Fold 1] Epoch 160 | train loss = 124.5510
[ListMLE Fold 1] Epoch 180 | train loss = 123.9752
[ListMLE Fold 1] Epoch 200 |

In [8]:
import lightgbm as lgb
from scipy.stats import spearmanr, kendalltau

def build_lgb_data_for_fold(df, feature_cols, train_seasons, test_seasons):
    """
    Build LightGBM ranking data (X, y, group) for a given fold.
    Scaling is fit on TRAIN seasons only.
    Returns:
      X_train, y_train_rel, group_train,
      X_test,  y_test_rel,  group_test,
      feat_mean, feat_std
    """
    df_train = df[df["SEASON"].isin(train_seasons)].copy()
    df_test  = df[df["SEASON"].isin(test_seasons)].copy()

    # ---- scaling (TRAIN only) ----
    train_feats = df_train[feature_cols]
    feat_mean = train_feats.mean()
    feat_std  = train_feats.std().replace(0, 1.0)

    def build_X_y_group(df_subset, seasons_subset):
        dfs = []
        ys = []
        group = []
        for season in seasons_subset:
            g = df_subset[df_subset["SEASON"] == season].copy()
            if g.empty:
                continue
            g = g.sort_values("OVERALL_PICK")  # lower pick = better
            dfs.append(g)
            ys.append(g["OVERALL_PICK"].values.astype(float))
            group.append(len(g))
        if not dfs:
            return np.empty((0, len(feature_cols))), np.array([]), []
        df_cat = pd.concat(dfs, axis=0)
        X = ((df_cat[feature_cols] - feat_mean) / feat_std).values
        y = np.concatenate(ys, axis=0)
        return X, y, group

    X_train, y_train, group_train = build_X_y_group(df_train, train_seasons)
    X_test,  y_test,  group_test  = build_X_y_group(df_test,  test_seasons)

    # LightGBM expects "higher is better"
    max_y = y_train.max()
    y_train_rel = max_y - y_train   # pick 1 → big number, pick 60 → small number
    y_test_rel  = max_y - y_test


    return (
        X_train, y_train_rel, group_train,
        X_test,  y_test_rel,  group_test,
        feat_mean, feat_std,
    )
import lightgbm as lgb
from scipy.stats import spearmanr, kendalltau

def run_lambdamart_cv(df, feature_cols, k_folds=5, random_state=42, num_boost_round=300):
    """
    LambdaMART (LightGBM lambdarank) with season-wise K-fold CV.
    Uses your existing `prepare_kfold_folds` to define folds.
    Computes train & test metrics per fold and their averages.
    """
    folds = prepare_kfold_folds(df, feature_cols, k_folds=k_folds, random_state=random_state)

    base_params = {
        "objective": "lambdarank",
        "metric": "ndcg",
        "ndcg_at": [5, 10, 20],
        "learning_rate": 0.05,
        "num_leaves": 31,
        "min_data_in_leaf": 20,
        "feature_fraction": 0.8,
        "bagging_fraction": 0.8,
        "bagging_freq": 1,
        "max_depth": -1,
        "verbose": -1,
    }

    train_pair_list = []
    test_pair_list  = []
    train_spear_list = []
    test_spear_list  = []
    train_kend_list  = []
    test_kend_list   = []

    for fold in folds:
        fold_id = fold["fold_id"]
        train_seasons = fold["train_seasons"]
        test_seasons  = fold["test_seasons"]

        print(f"\n===== LambdaMART | Fold {fold_id} =====")
        print("Train seasons:", train_seasons)
        print("Test  seasons:", test_seasons)

        # ---- Rebuild raw train/test data for this fold ----
        df_train = df[df["SEASON"].isin(train_seasons)].copy()
        df_test  = df[df["SEASON"].isin(test_seasons)].copy()

        # ---- Fit scaling on TRAIN only ----
        feat_mean = df_train[feature_cols].mean()
        feat_std  = df_train[feature_cols].std().replace(0, 1.0)

        def build_group(df_slice, seasons):
            dfs = []
            ys = []
            groups = []
            for s in seasons:
                g = df_slice[df_slice["SEASON"] == s].copy()
                if g.empty:
                    continue
                g = g.sort_values("OVERALL_PICK")
                dfs.append(g)
                ys.append(g["OVERALL_PICK"].values.astype(float))
                groups.append(len(g))
            if not dfs:
                return np.empty((0, len(feature_cols))), np.array([]), []
            df_cat = pd.concat(dfs, axis=0)
            X = ((df_cat[feature_cols] - feat_mean) / feat_std).values
            y = np.concatenate(ys, axis=0)
            return X, y, groups

        X_train, y_train, group_train = build_group(df_train, train_seasons)
        X_test,  y_test,  group_test  = build_group(df_test,  test_seasons)

        # ---- Map picks -> non-negative integer relevance ----
        max_y = y_train.max()
        y_train_rel = (max_y - y_train).astype(int)
        y_test_rel  = (max_y - y_test).astype(int)

        assert y_train_rel.min() >= 0
        assert y_test_rel.min()  >= 0

        # ---- label_gain long enough ----
        max_label = int(max(y_train_rel.max(), y_test_rel.max()))
        params = dict(base_params)
        params["label_gain"] = list(range(max_label + 1))

        train_set = lgb.Dataset(X_train, label=y_train_rel, group=group_train)
        valid_set = lgb.Dataset(X_test,  label=y_test_rel,  group=group_test, reference=train_set)

        model = lgb.train(
            params,
            train_set,
            num_boost_round=num_boost_round,
            valid_sets=[valid_set],
            valid_names=["valid"],
        )

        # ---- Evaluate on TRAIN + TEST seasons ----
        train_pair, train_spear, train_kend = evaluate_lambdamart_fold(
            model, df, feature_cols, feat_mean, feat_std, train_seasons
        )
        test_pair, test_spear, test_kend = evaluate_lambdamart_fold(
            model, df, feature_cols, feat_mean, feat_std, test_seasons
        )

        print(f"[Fold {fold_id}]")
        print(f"  Train: Pairwise = {train_pair:.3f}, Spearman = {train_spear:.3f}, Kendall = {train_kend:.3f}")
        print(f"  Test : Pairwise = {test_pair:.3f}, Spearman = {test_spear:.3f}, Kendall = {test_kend:.3f}")

        train_pair_list.append(train_pair)
        test_pair_list.append(test_pair)
        train_spear_list.append(train_spear)
        test_spear_list.append(test_spear)
        train_kend_list.append(train_kend)
        test_kend_list.append(test_kend)

    print("\n=== LambdaMART K-fold CV (season-wise) ===")
    for i, (tr_p, te_p, tr_s, te_s, tr_k, te_k) in enumerate(
        zip(train_pair_list, test_pair_list,
            train_spear_list, test_spear_list,
            train_kend_list, test_kend_list),
        start=1,
    ):
        print(f"Fold {i}: "
              f"TrainPair = {tr_p:.3f}, TestPair = {te_p:.3f} | "
              f"TrainSpearman = {tr_s:.3f}, TestSpearman = {te_s:.3f} | "
              f"TrainKendall = {tr_k:.3f}, TestKendall = {te_k:.3f}")

    print("\nMean Train pairwise:", np.mean(train_pair_list))
    print("Mean Test  pairwise:", np.mean(test_pair_list))
    print("Mean Train Spearman:", np.mean(train_spear_list))
    print("Mean Test  Spearman:", np.mean(test_spear_list))
    print("Mean Train Kendall :", np.mean(train_kend_list))
    print("Mean Test  Kendall :", np.mean(test_kend_list))

    return {
        "train_pair":  train_pair_list,
        "test_pair":   test_pair_list,
        "train_spear": train_spear_list,
        "test_spear":  test_spear_list,
        "train_kend":  train_kend_list,
        "test_kend":   test_kend_list,
    }


def evaluate_lambdamart_fold(model, df, feature_cols, feat_mean, feat_std, seasons):
    """
    Evaluate LambdaMART model on given seasons:
      - pairwise accuracy
      - mean Spearman
      - mean Kendall
    """
    total_correct = 0
    total_pairs = 0
    spear_list = []
    kend_list = []

    for s in seasons:
        g = df[df["SEASON"] == s].copy()
        if g.empty:
            continue

        g = g.sort_values("OVERALL_PICK")
        X = ((g[feature_cols] - feat_mean) / feat_std).values
        true_pick = g["OVERALL_PICK"].values.astype(float)

        scores = model.predict(X)
        n = len(true_pick)

        # pairwise accuracy
        correct = 0
        total = 0
        for i in range(n):
            for j in range(i + 1, n):
                total += 1
                true_better = true_pick[i] < true_pick[j]   # smaller pick = better
                pred_better = scores[i] > scores[j]         # higher score = better
                if true_better == pred_better:
                    correct += 1
        total_correct += correct
        total_pairs += total

        # rank correlations (negate picks so higher is better)
        spear, _ = spearmanr(-true_pick, scores)
        kend, _ = kendalltau(-true_pick, scores)
        spear_list.append(spear)
        kend_list.append(kend)

    pair_acc = total_correct / total_pairs if total_pairs > 0 else 0.0
    mean_spear = float(np.nanmean(spear_list)) if spear_list else 0.0
    mean_kend  = float(np.nanmean(kend_list))  if kend_list else 0.0

    return pair_acc, mean_spear, mean_kend



In [9]:
drop_cols = ["player_name", "OVERALL_PICK", "SEASON"]
feature_cols = [c for c in df.columns if c not in drop_cols]

lambdamart_cv_results = run_lambdamart_cv(
    df=df,
    feature_cols=feature_cols,
    k_folds=5,
    random_state=42,
    num_boost_round=300,
)



===== LambdaMART | Fold 1 =====
Train seasons: [np.int64(2001), np.int64(2002), np.int64(2004), np.int64(2005), np.int64(2006), np.int64(2007), np.int64(2008), np.int64(2010), np.int64(2011), np.int64(2013), np.int64(2014), np.int64(2015), np.int64(2016), np.int64(2018), np.int64(2019), np.int64(2020), np.int64(2021), np.int64(2022), np.int64(2023), np.int64(2025)]
Test  seasons: [np.int64(2000), np.int64(2009), np.int64(2012), np.int64(2017), np.int64(2024)]
[Fold 1]
  Train: Pairwise = 0.906, Spearman = 0.938, Kendall = 0.824
  Test : Pairwise = 0.687, Spearman = 0.544, Kendall = 0.379

===== LambdaMART | Fold 2 =====
Train seasons: [np.int64(2000), np.int64(2002), np.int64(2004), np.int64(2005), np.int64(2007), np.int64(2008), np.int64(2009), np.int64(2011), np.int64(2012), np.int64(2013), np.int64(2015), np.int64(2016), np.int64(2017), np.int64(2018), np.int64(2019), np.int64(2020), np.int64(2021), np.int64(2022), np.int64(2024), np.int64(2025)]
Test  seasons: [np.int64(2001), np.