In [1]:
import pickle
import lightgbm as lgbm
import pandas as pd
from datetime import datetime
import pytz

run_type = "predict"

exclude = [
    "id",
    "year",
    "week",
    "season_type",
    "start_date",
    "completed",
    "home_team",
    "home_points",
    "home_post_wp",
    "away_team",
    "away_points",
    "away_post_wp",
    "home_ml",
    "away_ml",
    "spread_open",
    "over_under_open",
    "team_home",
    "team_away",
]

In [2]:
df = pd.read_parquet(path="./X_predict.parquet")

In [3]:
df["start_date"] = pd.to_datetime(df["start_date"], utc=True)

In [4]:
# Filter to only upcoming games
df = df[df["start_date"] > datetime.now(pytz.utc)]

In [5]:
# Filter to only games with spreads
df = df.loc[~df.spread.isna(), :]

In [6]:
import os

models = [x for x in os.listdir() if "model-" in x]

In [7]:
models.sort(reverse=True)

In [8]:
latest = models[0]

In [9]:
with open(latest, "rb") as f:
    model = pickle.load(f)

In [10]:
df[["sp_st_rating_home", "sp_st_rating_away"]] = df[
    ["sp_st_rating_home", "sp_st_rating_away"]
].fillna(0)

In [11]:
y_preds = model.predict(df.drop(columns=exclude))
y_preds = pd.Series(y_preds, name="preds")



In [12]:
if run_type == "predict":
    preds_df = df[["id", "home_team", "away_team"]].copy()
    preds_df = preds_df.reset_index(drop=True)
    preds_df["predicted"] = y_preds

In [13]:
preds_df.columns = ["id", "home", "away", "predicted"]

In [14]:
preds_df

Unnamed: 0,id,home,away,predicted
0,401520307,Ohio State,Maryland,-16.245862
1,401520304,Mississippi State,Western Michigan,-19.023920
2,401520311,Wisconsin,Rutgers,-8.701414
3,401520305,Missouri,LSU,3.515074
4,401520298,Army,Boston College,-3.732851
...,...,...,...,...
84,401520429,Florida,Florida State,5.987861
85,401520433,LSU,Texas A&M,-5.583962
86,401520441,South Carolina,Clemson,5.053704
87,401525562,NC State,North Carolina,-1.498479


In [15]:
preds_df.to_clipboard(sep=",", index=False)