In [14]:
import pickle
import lightgbm as lgbm
import pandas as pd
from datetime import datetime
import pytz

run_type = "predict"

exclude = [
    "id",
    "year",
    "week",
    "season_type",
    "start_date",
    "completed",
    "home_team",
    "home_points",
    "home_post_wp",
    "away_team",
    "away_points",
    "away_post_wp",
    "home_ml",
    "away_ml",
    "spread_open",
    "over_under_open",
    "team_home",
    "team_away",
]

In [15]:
df = pd.read_parquet(path="./data/X_predict.parquet")

In [16]:
df["start_date"] = pd.to_datetime(df["start_date"], utc=True)

In [17]:
# Filter to only upcoming games
df = df[df["start_date"] > datetime.now(pytz.utc)]

In [18]:
# Filter to only games with spreads
df = df.loc[~df.spread.isna(), :]

In [19]:
import os

models = [x for x in os.listdir("models/") if "model-" in x]

In [20]:
models.sort(reverse=True)

In [21]:
latest = models[0]

In [23]:
with open(f"models/{latest}", "rb") as f:
    model = pickle.load(f)

In [24]:
df[["sp_st_rating_home", "sp_st_rating_away"]] = df[
    ["sp_st_rating_home", "sp_st_rating_away"]
].fillna(0)

In [25]:
y_preds = model.predict(df.drop(columns=exclude))
y_preds = pd.Series(y_preds, name="preds")



In [26]:
if run_type == "predict":
    preds_df = df[["id", "home_team", "away_team"]].copy()
    preds_df = preds_df.reset_index(drop=True)
    preds_df["predicted"] = y_preds

In [27]:
preds_df.columns = ["id", "home", "away", "predicted"]

In [28]:
preds_df

Unnamed: 0,id,home,away,predicted
0,401531894,Georgia Southern,Louisiana Monroe,-13.289279
1,401532429,Bowling Green,Akron,-7.666934
2,401520341,Missouri,South Carolina,-7.135139
3,401520342,Nebraska,Northwestern,-7.203763
4,401532426,Northern Illinois,Eastern Michigan,-7.953636
...,...,...,...,...
70,401520429,Florida,Florida State,5.349927
71,401520441,South Carolina,Clemson,5.090819
72,401520427,Auburn,Alabama,12.551769
73,401525562,NC State,North Carolina,-1.099447


In [29]:
preds_df.to_clipboard(sep=",", index=False)