In [15]:
import os, sys
import pandas as pd
sys.path.append(os.path.abspath(".."))

GAMES = "../data/raw/games_2025_regular.csv"
FEATS = "../features/team_week_features_2024.parquet"
SEASON = 2025

In [16]:
games = pd.read_csv(GAMES)
games = games.rename(columns={
    "homeTeam":"home", "awayTeam":"away",
    "homePoints":"home_pts", "awayPoints":"away_pts",
    "neutralSite":"neutral", "seasonType":"season_type"
})
g = games[(games["season"]==2025) & (games["season_type"]=="regular") & (games["completed"]==True)]
g = g[["home","away","week","neutral","home_pts","away_pts"]].copy()

# Check ordering
print("Games rows unsorted where week decreases within same team:")
mask = g.sort_values(["home","week"]).groupby("home")["week"].diff().lt(0)
print(g.loc[mask, ["home","week"]])

Games rows unsorted where week decreases within same team:
Empty DataFrame
Columns: [home, week]
Index: []


In [17]:
from src.datasets import prepare_training_set
games = pd.read_csv(GAMES)
feats = pd.read_parquet(FEATS)

print("Unique weeks in games:", sorted(games['week'].unique())[:15])
print("Unique weeks in feats:", sorted(feats['week'].unique())[:15])
from src.train_predict import run_training_pipeline, run_prediction_pipeline

# Train on all completed 2024 regular-season games, validate on last 2 completed weeks
models = run_training_pipeline(GAMES, FEATS, SEASON, val_weeks=2)


Unique weeks in games: [np.int64(1), np.int64(2), np.int64(3), np.int64(4), np.int64(5), np.int64(6), np.int64(7), np.int64(8), np.int64(9), np.int64(10), np.int64(11), np.int64(12), np.int64(13), np.int64(14), np.int64(16)]
Unique weeks in feats: [np.int64(1), np.int64(2), np.int64(3), np.int64(4), np.int64(5), np.int64(6), np.int64(7), np.int64(8), np.int64(9), np.int64(10), np.int64(11), np.int64(12), np.int64(13), np.int64(14), np.int64(15)]
[LightGBM] [Info] Number of positive: 1490, number of negative: 1062
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000165 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 958
[LightGBM] [Info] Number of data points in the train set: 2552, number of used features: 6
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.583856 -> initscore=0.338622
[LightGBM] [Info] Start training from score 0.338622
Validation (weeks [np.int64(10), np.int64(11)]) — LogLoss: 0.73

In [18]:
unplayed = games[
    (games["season"] == 2025) &
    (games["seasonType"] == "regular") &
    (games["completed"] == False)
][["week", "homeTeam", "awayTeam"]]

print(unplayed)


      week                 homeTeam                         awayTeam
1314     5                  Catawba                        Limestone
2870    11            Curry College                        Biddeford
2871    11           Olivet College                           Albion
2887    11            Mass Maritime                     Worcester St
2888    11  Merchant Marine Academy  Worcester Polytechnic Institute
...    ...                      ...                              ...
3647    14                  Liberty                   Kennesaw State
3648    14         Washington State                     Oregon State
3649    14                   Nevada                             UNLV
3650    14                  Hawai'i                          Wyoming
3651    16                     Navy                             Army

[585 rows x 3 columns]


In [19]:
preds = run_prediction_pipeline(models, GAMES, FEATS, SEASON, weeks=[5,10], out_stub="predictions_remaining")
print(preds)


✅ Saved predictions to C:\Users\palle\source\repos\cfp-predictor\data\processed\predictions_remaining_weeks_5-10.csv
      home       away  week  neutral  p_home_win
0  Catawba  Limestone     5        0    0.620625


In [20]:
games = pd.read_csv(GAMES)
mask = (games["season"]==2025) & (games["seasonType"]=="regular") & (games["completed"]==False)
print(games.loc[mask, ["week","homeTeam","awayTeam"]])


      week                 homeTeam                         awayTeam
1314     5                  Catawba                        Limestone
2870    11            Curry College                        Biddeford
2871    11           Olivet College                           Albion
2887    11            Mass Maritime                     Worcester St
2888    11  Merchant Marine Academy  Worcester Polytechnic Institute
...    ...                      ...                              ...
3647    14                  Liberty                   Kennesaw State
3648    14         Washington State                     Oregon State
3649    14                   Nevada                             UNLV
3650    14                  Hawai'i                          Wyoming
3651    16                     Navy                             Army

[585 rows x 3 columns]


In [21]:
# Predict the last 3 regular-season weeks (adjust to whatever's left: e.g., 11–13)
preds = run_prediction_pipeline(models, GAMES, FEATS, SEASON, weeks=[11,12,13], out_stub="predictions_regular")
preds.head(10)


✅ Saved predictions to C:\Users\palle\source\repos\cfp-predictor\data\processed\predictions_regular_weeks_11-12-13.csv


Unnamed: 0,home,away,week,neutral,p_home_win
0,Curry College,Biddeford,11,0,0.735999
1,Olivet College,Albion,11,0,0.504392
2,Mass Maritime,Worcester St,11,0,0.584484
3,Merchant Marine Academy,Worcester Polytechnic Institute,11,0,0.531932
4,Juniata College,Moravian,11,0,0.560576
5,Carnegie Mellon,Franklin & Marshall,11,0,0.683937
6,Framingham State,Dean College,11,0,0.630518
7,Amherst,Williams,11,0,0.707675
8,Bates,Hamilton,11,0,0.740215
9,Virginia Union,Virginia St,11,0,0.435305
