In [1]:
import numpy as np
import pandas as pd

import cfbd

import requests
import json
import os
import sys

from datetime import datetime as dt

In [2]:
# Local Imports
from resources.helper import load_configs, authenticate_api
from preprocessing.data_gathering import get_game_stats, get_games

In [3]:
# Load configs
api_configs_path = "/Users/taylorlast/PersonalProjects/cfb-predictions/configs/api_configs.yml"
api_configs = load_configs(config_path=api_configs_path)

# Auth API
api_configuration = authenticate_api(api_key=api_configs["API_KEY"])

In [16]:
games = get_games(configuration=api_configuration, year=2022, only_fbs=True)
stats = get_game_stats(configuration=api_configuration, year=2022)

In [5]:
games

Unnamed: 0,id,season,week,home_team,away_team,home_points,away_points,home_division,away_division,home_pregame_elo,away_pregame_elo,neutral_site,point_diff,winner
0,401405059,2022,1,Northwestern,Nebraska,31.0,28.0,fbs,fbs,1361.0,1636.0,True,3.0,Northwestern
1,401404146,2022,1,Utah State,Connecticut,31.0,20.0,fbs,fbs,1566.0,1117.0,False,11.0,Utah State
2,401405058,2022,1,Illinois,Wyoming,38.0,6.0,fbs,fbs,1496.0,1497.0,False,32.0,Illinois
3,401426530,2022,1,Florida Atlantic,Charlotte,43.0,13.0,fbs,fbs,1427.0,1272.0,False,30.0,Florida Atlantic
4,401426531,2022,1,UTEP,North Texas,13.0,31.0,fbs,fbs,1360.0,1426.0,False,-18.0,North Texas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
733,401437036,2022,14,Georgia,LSU,50.0,30.0,fbs,fbs,2132.0,1729.0,True,20.0,Georgia
734,401437034,2022,14,Boise State,Fresno State,16.0,28.0,fbs,fbs,1750.0,1676.0,False,-12.0,Fresno State
735,401437009,2022,14,North Carolina,Clemson,10.0,39.0,fbs,fbs,1599.0,1821.0,True,-29.0,Clemson
736,401437031,2022,14,Michigan,Purdue,43.0,22.0,fbs,fbs,2144.0,1622.0,True,21.0,Michigan


In [6]:
api_instance =cfbd.BettingApi(cfbd.ApiClient(api_configuration))
spreads = api_instance.get_lines(year=2022)
spreads_df =  pd.DataFrame().from_records(
    [
        s.to_dict()
        for s in spreads
    ]
)
spreads_df["lines"] = spreads_df["lines"].apply(lambda x: [book for book in x if book["provider"]=="consensus"])
spreads_df = spreads_df[spreads_df.lines.str.len() != 0].reset_index(drop=True)
spreads_df["consensus_spread(reversed)"] = spreads_df["lines"].apply(lambda x: x[0]["spread"]).astype(float) * -1

In [7]:
spreads_df[spreads_df["home_team"] == "Georgia"]

Unnamed: 0,id,season,week,season_type,start_date,home_team,home_conference,home_score,away_team,away_conference,away_score,lines,consensus_spread(reversed)
112,401437036,2022,14,regular,2022-12-03T21:00:00.000Z,Georgia,SEC,50.0,LSU,SEC,30.0,"[{'provider': 'consensus', 'spread': '-17.5', ...",17.5
235,401403911,2022,6,regular,2022-10-08T19:30:00.000Z,Georgia,SEC,42.0,Auburn,SEC,10.0,"[{'provider': 'consensus', 'spread': '-28', 'f...",28.0
251,401403918,2022,7,regular,2022-10-15T19:30:00.000Z,Georgia,SEC,55.0,Vanderbilt,SEC,0.0,"[{'provider': 'consensus', 'spread': '-37.5', ...",37.5
298,401403858,2022,1,regular,2022-09-03T19:30:00.000Z,Georgia,SEC,49.0,Oregon,Pac-12,3.0,"[{'provider': 'consensus', 'spread': '-16', 'f...",16.0
576,401403872,2022,2,regular,2022-09-10T20:00:00.000Z,Georgia,SEC,33.0,Samford,Southern,0.0,"[{'provider': 'consensus', 'spread': '-53', 'f...",53.0
867,401403959,2022,13,regular,2022-11-26T17:00:00.000Z,Georgia,SEC,37.0,Georgia Tech,ACC,14.0,"[{'provider': 'consensus', 'spread': '-36.5', ...",36.5
1130,401403928,2022,9,regular,2022-10-29T19:30:00.000Z,Georgia,SEC,42.0,Florida,SEC,20.0,"[{'provider': 'consensus', 'spread': '-23', 'f...",23.0
1151,401403895,2022,4,regular,2022-09-24T16:00:00.000Z,Georgia,SEC,39.0,Kent State,Mid-American,22.0,"[{'provider': 'consensus', 'spread': '-45', 'f...",45.0
1190,401403933,2022,10,regular,2022-11-05T19:30:00.000Z,Georgia,SEC,27.0,Tennessee,SEC,13.0,"[{'provider': 'consensus', 'spread': '-10', 'f...",10.0


In [12]:
df = pd.merge(games, spreads_df[["id", "home_team", "consensus_spread(reversed)"]], on=["id", "home_team"])

In [13]:
df["diff"] = df["point_diff"] - df["consensus_spread(reversed)"]

# If diff is positive, the home team won by more than they should've based on vegas spread.

In [21]:
model_cols = [
    "id",
    "home_team",
    "away_team",
    "home_pregame_elo",
    "away_pregame_elo",
    "neutral_site",
    "consensus_spread(reversed)",
    "diff",
]
df[(df["home_team"]=="Georgia") | (df["away_team"]=="Georgia")][model_cols]

Unnamed: 0,id,home_team,away_team,home_pregame_elo,away_pregame_elo,neutral_site,consensus_spread(reversed),diff
23,401403858,Georgia,Oregon,1980.0,1574.0,True,16.0,30.0
100,401403888,South Carolina,Georgia,1521.0,2070.0,False,-25.5,-15.5
153,401403895,Georgia,Kent State,2121.0,1274.0,False,45.0,-28.0
253,401403907,Missouri,Georgia,1411.0,2090.0,False,-30.5,26.5
289,401403911,Georgia,Auburn,2058.0,1558.0,False,28.0,4.0
342,401403918,Georgia,Vanderbilt,2087.0,1338.0,False,37.5,17.5
439,401403928,Georgia,Florida,2154.0,1553.0,True,23.0,-1.0
499,401403933,Georgia,Tennessee,2155.0,1970.0,False,10.0,4.0
574,401403944,Mississippi State,Georgia,1688.0,2168.0,False,-16.5,-9.5
623,401403949,Kentucky,Georgia,1583.0,2184.0,False,-22.5,12.5


In [19]:
stats[stats["team"]=="Georgia"].sort_values(by="week")

Unnamed: 0,id,season,week,team,opponent,plays_offense,drives_offense,ppa_offense,totalPPA_offense,successRate_offense,...,passingDowns.successRate_defense,passingDowns.explosiveness_defense,rushingPlays.ppa_defense,rushingPlays.totalPPA_defense,rushingPlays.successRate_defense,rushingPlays.explosiveness_defense,passingPlays.ppa_defense,passingPlays.totalPPA_defense,passingPlays.successRate_defense,passingPlays.explosiveness_defense
10,401403858,,1,Georgia,Oregon,53,9,0.771659,40.897905,0.641509,...,0.380952,1.021821,0.192477,5.581827,0.482759,0.725854,-0.141829,-4.964005,0.457143,0.684255
2704,401442015,,1,Georgia,Ohio State,60,14,0.4398,26.387984,0.466667,...,0.25,1.623531,0.215467,6.033064,0.464286,0.838008,0.436056,16.570126,0.552632,1.413922
2694,401442010,,1,Georgia,TCU,72,12,0.585582,42.161938,0.597222,...,0.043478,3.380716,-0.030176,-0.663871,0.5,0.559341,0.000961,0.026906,0.178571,1.800572
36,401403872,,2,Georgia,Samford,74,12,0.241488,17.870084,0.567568,...,0.0,,-0.432325,-6.48488,0.066667,0.604642,-0.126138,-3.405718,0.185185,0.970611
66,401403888,,3,Georgia,South Carolina,65,11,0.534859,34.765813,0.707692,...,0.321429,2.002657,-0.124745,-3.742343,0.3,0.890928,-0.245956,-9.59227,0.282051,1.944875
80,401403895,,4,Georgia,Kent State,76,10,0.35188,26.742847,0.592105,...,0.230769,1.702834,0.04623,1.248207,0.407407,0.563998,0.33452,8.362992,0.52,1.376614
102,401403907,,5,Georgia,Missouri,77,11,0.302467,23.289973,0.480519,...,0.2,2.889965,-0.081902,-1.55613,0.263158,1.600364,0.084572,2.875449,0.294118,1.792237
111,401403911,,6,Georgia,Auburn,71,14,0.327297,23.238116,0.507042,...,0.222222,2.860821,0.078446,1.882712,0.416667,0.819932,0.142061,5.398329,0.210526,2.296877
124,401403918,,7,Georgia,Vanderbilt,79,11,0.460861,36.407989,0.582278,...,0.222222,2.17614,-0.13489,-2.967575,0.272727,0.534573,-0.020606,-0.494552,0.25,1.880429
145,401403928,,9,Georgia,Florida,77,13,0.319529,24.603702,0.493506,...,0.235294,2.583696,0.075483,2.339968,0.322581,0.922595,-0.061927,-2.477063,0.225,2.24935
