# Imports

In [1]:
import os
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import statsmodels.formula.api as smf

pd.set_option("display.max_columns", None)

# Games Data

In [2]:
data_path = os.path.join("..", "data", "games.csv")
df = pd.read_csv(data_path, parse_dates=["Date", "Open"], date_format="%Y-%m-%d", index_col=0)

df = df[df["N"] == 0].drop(columns="N")
df = df[df["Season"] > 17]

display(df.head(), df.shape)

Unnamed: 0,Season,Date,HID,AID,POFF,Open,OddsH,OddsA,H,A,HSC,ASC,HFGM,AFGM,HFGA,AFGA,HFG3M,AFG3M,HFG3A,AFG3A,HFTM,AFTM,HFTA,AFTA,HORB,AORB,HDRB,ADRB,HRB,ARB,HAST,AAST,HSTL,ASTL,HBLK,ABLK,HTOV,ATOV,HPF,APF
19835,18,1992-11-03,13,0,0,1992-11-02,1.183476,5.088113,1,0,89,74,33.0,29.0,75.0,71.0,7.0,2.0,20.0,8.0,16.0,14.0,30.0,17.0,12.0,12.0,27.0,32.0,39.0,44.0,25.0,16.0,12.0,3.0,3.0,2.0,8.0,19.0,19.0,22.0
19836,18,1992-11-03,4,43,0,1992-11-02,1.195249,4.856962,1,0,83,82,31.0,30.0,73.0,83.0,1.0,5.0,10.0,10.0,20.0,17.0,26.0,21.0,7.0,17.0,31.0,26.0,38.0,43.0,20.0,14.0,8.0,7.0,11.0,5.0,20.0,18.0,24.0,32.0
19837,18,1992-11-03,17,20,0,1992-11-02,1.524799,2.470634,1,0,109,93,45.0,32.0,89.0,85.0,7.0,7.0,20.0,25.0,12.0,22.0,20.0,30.0,7.0,11.0,39.0,35.0,46.0,46.0,32.0,17.0,7.0,4.0,6.0,1.0,10.0,11.0,28.0,21.0
19838,18,1992-11-04,15,28,0,1992-11-03,1.388495,2.988044,0,1,87,89,31.0,36.0,79.0,79.0,2.0,7.0,6.0,14.0,23.0,10.0,31.0,15.0,16.0,12.0,24.0,27.0,40.0,39.0,15.0,18.0,11.0,8.0,12.0,9.0,15.0,19.0,17.0,28.0
19839,18,1992-11-04,5,22,0,1992-11-03,1.358132,3.154191,1,0,95,89,38.0,35.0,85.0,83.0,1.0,2.0,6.0,9.0,18.0,17.0,28.0,18.0,15.0,12.0,32.0,30.0,47.0,42.0,26.0,25.0,8.0,4.0,5.0,4.0,8.0,12.0,14.0,21.0


(9153, 40)

In [3]:
meta_columns = ["Season", "Date", "HID", "AID", "POFF", "Open", "H", "A", "OddsH", "OddsA"]
featuresH = ["HSC", "HFGM", "HFGA", "HFG3M", "HFG3A", "HFTM", "HFTA", "HORB", "HDRB", "HRB", "HAST", "HSTL", "HBLK", "HTOV", "HPF"]
featuresA = ["ASC", "AFGM", "AFGA", "AFG3M", "AFG3A", "AFTM", "AFTA", "AORB", "ADRB", "ARB", "AAST", "ASTL", "ABLK", "ATOV", "APF"]

In [4]:
df_bare = df[meta_columns].drop(columns=["Open"])
df_bare["Timestamp"] = df_bare["Date"].astype(int)

display(df_bare.head(), df_bare.shape)

Unnamed: 0,Season,Date,HID,AID,POFF,H,A,OddsH,OddsA,Timestamp
19835,18,1992-11-03,13,0,0,1,0,1.183476,5.088113,720748800000000000
19836,18,1992-11-03,4,43,0,1,0,1.195249,4.856962,720748800000000000
19837,18,1992-11-03,17,20,0,1,0,1.524799,2.470634,720748800000000000
19838,18,1992-11-04,15,28,0,0,1,1.388495,2.988044,720835200000000000
19839,18,1992-11-04,5,22,0,1,0,1.358132,3.154191,720835200000000000


(9153, 10)

In [5]:
rename_columnsH = {
    "HID": "TID", "AID": "OID", "H": "W", "OddsH": "OddsT", "OddsA": "OddsO", "POFF": "TPOFF",
    "HSC": "TSC", "HFGM": "TFGM", "HFGA": "TFGA", "HFG3M": "TFG3M", "HFG3A": "TFG3A", "HFTM": "TFTM", "HFTA": "TFTA", "HORB": "TORB", "HDRB": "TDRB", "HRB": "TRB", "HAST": "TAST", "HSTL": "TSTL", "HBLK": "TBLK", "HTOV": "TTOV", "HPF": "TPF",
    "ASC": "OSC", "AFGM": "OFGM", "AFGA": "OFGA", "AFG3M": "OFG3M", "AFG3A": "OFG3A", "AFTM": "OFTM", "AFTA": "OFTA", "AORB": "OORB", "ADRB": "ODRB", "ARB": "ORB", "AAST": "OAST", "ASTL": "OSTL", "ABLK": "OBLK", "ATOV": "OTOV", "APF": "OPF",
}


df_home_game_stats = df.copy().drop(columns=["A", "Open"]).rename(columns=rename_columnsH)

display(df_home_game_stats.head().set_index(["Season", "Date", "TID", "OID"]))

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,TPOFF,OddsT,OddsO,W,TSC,OSC,TFGM,OFGM,TFGA,OFGA,TFG3M,OFG3M,TFG3A,OFG3A,TFTM,OFTM,TFTA,OFTA,TORB,OORB,TDRB,ODRB,TRB,ORB,TAST,OAST,TSTL,OSTL,TBLK,OBLK,TTOV,OTOV,TPF,OPF
Season,Date,TID,OID,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1
18,1992-11-03,13,0,0,1.183476,5.088113,1,89,74,33.0,29.0,75.0,71.0,7.0,2.0,20.0,8.0,16.0,14.0,30.0,17.0,12.0,12.0,27.0,32.0,39.0,44.0,25.0,16.0,12.0,3.0,3.0,2.0,8.0,19.0,19.0,22.0
18,1992-11-03,4,43,0,1.195249,4.856962,1,83,82,31.0,30.0,73.0,83.0,1.0,5.0,10.0,10.0,20.0,17.0,26.0,21.0,7.0,17.0,31.0,26.0,38.0,43.0,20.0,14.0,8.0,7.0,11.0,5.0,20.0,18.0,24.0,32.0
18,1992-11-03,17,20,0,1.524799,2.470634,1,109,93,45.0,32.0,89.0,85.0,7.0,7.0,20.0,25.0,12.0,22.0,20.0,30.0,7.0,11.0,39.0,35.0,46.0,46.0,32.0,17.0,7.0,4.0,6.0,1.0,10.0,11.0,28.0,21.0
18,1992-11-04,15,28,0,1.388495,2.988044,0,87,89,31.0,36.0,79.0,79.0,2.0,7.0,6.0,14.0,23.0,10.0,31.0,15.0,16.0,12.0,24.0,27.0,40.0,39.0,15.0,18.0,11.0,8.0,12.0,9.0,15.0,19.0,17.0,28.0
18,1992-11-04,5,22,0,1.358132,3.154191,1,95,89,38.0,35.0,85.0,83.0,1.0,2.0,6.0,9.0,18.0,17.0,28.0,18.0,15.0,12.0,32.0,30.0,47.0,42.0,26.0,25.0,8.0,4.0,5.0,4.0,8.0,12.0,14.0,21.0


In [6]:
rename_columnsA = {
    "HID": "OID", "AID": "TID", "A": "W", "OddsH": "OddsO", "OddsA": "OddsT", "POFF": "TPOFF",
    "HSC": "OSC", "HFGM": "OFGM", "HFGA": "OFGA", "HFG3M": "OFG3M", "HFG3A": "OFG3A", "HFTM": "OFTM", "HFTA": "OFTA", "HORB": "OORB", "HDRB": "ODRB", "HRB": "ORB", "HAST": "OAST", "HSTL": "OSTL", "HBLK": "OBLK", "HTOV": "OTOV", "HPF": "OPF",
    "ASC": "TSC", "AFGM": "TFGM", "AFGA": "TFGA", "AFG3M": "TFG3M", "AFG3A": "TFG3A", "AFTM": "TFTM", "AFTA": "TFTA", "AORB": "TORB", "ADRB": "TDRB", "ARB": "TRB", "AAST": "TAST", "ASTL": "TSTL", "ABLK": "TBLK", "ATOV": "TTOV", "APF": "TPF",
}


df_away_game_stats = df.copy().drop(columns=["H", "Open"]).rename(columns=rename_columnsA)

display(df_away_game_stats.head().set_index(["Season", "Date", "TID", "OID"]))

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,TPOFF,OddsO,OddsT,W,OSC,TSC,OFGM,TFGM,OFGA,TFGA,OFG3M,TFG3M,OFG3A,TFG3A,OFTM,TFTM,OFTA,TFTA,OORB,TORB,ODRB,TDRB,ORB,TRB,OAST,TAST,OSTL,TSTL,OBLK,TBLK,OTOV,TTOV,OPF,TPF
Season,Date,TID,OID,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1
18,1992-11-03,0,13,0,1.183476,5.088113,0,89,74,33.0,29.0,75.0,71.0,7.0,2.0,20.0,8.0,16.0,14.0,30.0,17.0,12.0,12.0,27.0,32.0,39.0,44.0,25.0,16.0,12.0,3.0,3.0,2.0,8.0,19.0,19.0,22.0
18,1992-11-03,43,4,0,1.195249,4.856962,0,83,82,31.0,30.0,73.0,83.0,1.0,5.0,10.0,10.0,20.0,17.0,26.0,21.0,7.0,17.0,31.0,26.0,38.0,43.0,20.0,14.0,8.0,7.0,11.0,5.0,20.0,18.0,24.0,32.0
18,1992-11-03,20,17,0,1.524799,2.470634,0,109,93,45.0,32.0,89.0,85.0,7.0,7.0,20.0,25.0,12.0,22.0,20.0,30.0,7.0,11.0,39.0,35.0,46.0,46.0,32.0,17.0,7.0,4.0,6.0,1.0,10.0,11.0,28.0,21.0
18,1992-11-04,28,15,0,1.388495,2.988044,1,87,89,31.0,36.0,79.0,79.0,2.0,7.0,6.0,14.0,23.0,10.0,31.0,15.0,16.0,12.0,24.0,27.0,40.0,39.0,15.0,18.0,11.0,8.0,12.0,9.0,15.0,19.0,17.0,28.0
18,1992-11-04,22,5,0,1.358132,3.154191,0,95,89,38.0,35.0,85.0,83.0,1.0,2.0,6.0,9.0,18.0,17.0,28.0,18.0,15.0,12.0,32.0,30.0,47.0,42.0,26.0,25.0,8.0,4.0,5.0,4.0,8.0,12.0,14.0,21.0


In [7]:
features = [
    "TPOFF", "OddsT", "OddsO", "W", "TSC", "OSC", "TFGM", "OFGM", "TFGA", "OFGA", "TFG3M", "OFG3M",
    "TFG3A", "OFG3A", "TFTM", "OFTM", "TFTA", "OFTA", "TORB", "OORB", "TDRB", "ODRB", "TRB", "ORB",
    "TAST", "OAST", "TSTL", "OSTL", "TBLK", "OBLK", "TTOV", "OTOV", "TPF", "OPF"
]

In [8]:
rolling_seasons = 3


df_home_game_stats["H"] = 1
df_away_game_stats["H"] = 0

df_game_stats = pd.concat([df_home_game_stats, df_away_game_stats])

df_league_stats = df_game_stats \
    .groupby("Season")[["H"] + features] \
    .mean() \
    .drop(columns="H") \
    .rolling(rolling_seasons, closed="left") \
    .mean() \
    .reset_index()

display(df_league_stats.head(), df_league_stats.shape)

df_home_away_stats = df_game_stats \
    .groupby(["Season", "H"])[features] \
    .mean() \
    .reset_index() \
    .set_index("Season") \
    .groupby("H") \
    .rolling(rolling_seasons, closed="left") \
    .mean() \
    .reset_index()
    
display(df_home_away_stats.head(20), df_home_away_stats.shape)

df_home_away_advantage = pd.merge(df_home_away_stats, df_league_stats, on="Season", suffixes=("", "_season"))
features_advantage = []
features_season = []

for feature_name in features:
    feature_name_advantage = f"{feature_name}_advantage"
    feature_name_season = f"{feature_name}_season"

    features_advantage.append(feature_name_advantage)
    features_season.append(feature_name_season)

    df_home_away_advantage[feature_name_advantage] = df_home_away_advantage[feature_name] - df_home_away_advantage[feature_name_season]


display(df_home_away_advantage[["Season", "H"] + features_advantage].head(20), df_home_away_advantage.shape)


Unnamed: 0,Season,TPOFF,OddsT,OddsO,W,TSC,OSC,TFGM,OFGM,TFGA,OFGA,TFG3M,OFG3M,TFG3A,OFG3A,TFTM,OFTM,TFTA,OFTA,TORB,OORB,TDRB,ODRB,TRB,ORB,TAST,OAST,TSTL,OSTL,TBLK,OBLK,TTOV,OTOV,TPF,OPF
0,18,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,19,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,20,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,21,0.06534,2.407783,2.407783,0.5,95.75908,95.75908,35.522536,35.522536,79.582987,79.582987,5.516064,5.516064,15.621546,15.621546,19.197943,19.197943,25.581855,25.581855,11.720517,11.720517,29.923965,29.923965,41.644481,41.644481,20.92574,20.92574,7.503591,7.503591,4.87479,4.87479,14.591482,14.591482,22.362934,22.362934
4,22,0.063918,2.396547,2.396547,0.5,97.573575,97.573575,36.042076,36.042076,79.568351,79.568351,5.822732,5.822732,16.310828,16.310828,19.666692,19.666692,26.184681,26.184681,11.413623,11.413623,29.848001,29.848001,41.261624,41.261624,20.921883,20.921883,7.270949,7.270949,4.722888,4.722888,14.633983,14.633983,22.581149,22.581149


(7, 35)

Unnamed: 0,H,Season,TPOFF,OddsT,OddsO,W,TSC,OSC,TFGM,OFGM,TFGA,OFGA,TFG3M,OFG3M,TFG3A,OFG3A,TFTM,OFTM,TFTA,OFTA,TORB,OORB,TDRB,ODRB,TRB,ORB,TAST,OAST,TSTL,OSTL,TBLK,OBLK,TTOV,OTOV,TPF,OPF
0,0,18,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,0,19,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,0,20,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,0,21,0.06534,3.14257,1.672997,0.389675,94.015426,97.502734,34.930284,36.114789,79.432436,79.733538,5.476842,5.555286,15.626693,15.616398,18.678016,19.717869,24.913225,26.250485,11.426381,12.014653,29.298898,30.549031,40.725279,42.563684,20.087077,21.764403,7.448746,7.558436,4.567886,5.181693,14.81303,14.369933,22.797647,21.92822
4,0,22,0.063918,3.121166,1.671927,0.398291,95.948236,99.198914,35.511743,36.572408,79.490517,79.646184,5.781067,5.864398,16.284908,16.336748,19.143683,20.1897,25.518384,26.850979,11.156701,11.670545,29.281999,30.414003,40.4387,42.084548,20.086023,21.757743,7.238573,7.303325,4.395435,5.050341,14.819579,14.448388,23.045929,22.116369
5,0,23,0.064409,3.15582,1.680022,0.395656,96.658858,100.082111,35.86322,36.997652,79.784497,80.018741,6.092969,6.149639,17.073826,17.035868,18.83945,19.937167,25.127638,26.51138,10.886288,11.403062,29.584342,30.699329,40.47063,42.102392,20.218526,21.958007,7.146203,7.233226,4.319245,5.023158,14.728676,14.25668,22.530071,21.589548
6,0,24,0.063463,3.243821,1.708001,0.394279,97.577318,100.94882,36.273335,37.35827,80.433671,80.571605,6.379207,6.474038,17.810362,17.720505,18.65144,19.758241,24.615938,26.002798,10.882256,11.304171,29.76166,30.825236,40.643916,42.129407,20.341648,22.090077,7.170398,7.305078,4.362922,5.06677,14.615922,14.116426,21.914542,21.078012
7,1,18,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
8,1,19,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
9,1,20,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


(14, 36)

Unnamed: 0,Season,H,TPOFF_advantage,OddsT_advantage,OddsO_advantage,W_advantage,TSC_advantage,OSC_advantage,TFGM_advantage,OFGM_advantage,TFGA_advantage,OFGA_advantage,TFG3M_advantage,OFG3M_advantage,TFG3A_advantage,OFG3A_advantage,TFTM_advantage,OFTM_advantage,TFTA_advantage,OFTA_advantage,TORB_advantage,OORB_advantage,TDRB_advantage,ODRB_advantage,TRB_advantage,ORB_advantage,TAST_advantage,OAST_advantage,TSTL_advantage,OSTL_advantage,TBLK_advantage,OBLK_advantage,TTOV_advantage,OTOV_advantage,TPF_advantage,OPF_advantage
0,18,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,18,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,19,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,19,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,20,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
5,20,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
6,21,0,0.0,0.734787,-0.734787,-0.110325,-1.743654,1.743654,-0.592253,0.592253,-0.150551,0.150551,-0.039222,0.039222,0.005147,-0.005147,-0.519927,0.519927,-0.66863,0.66863,-0.294136,0.294136,-0.625067,0.625067,-0.919203,0.919203,-0.838663,0.838663,-0.054845,0.054845,-0.306903,0.306903,0.221549,-0.221549,0.434713,-0.434713
7,21,1,0.0,-0.734787,0.734787,0.110325,1.743654,-1.743654,0.592253,-0.592253,0.150551,-0.150551,0.039222,-0.039222,-0.005147,0.005147,0.519927,-0.519927,0.66863,-0.66863,0.294136,-0.294136,0.625067,-0.625067,0.919203,-0.919203,0.838663,-0.838663,0.054845,-0.054845,0.306903,-0.306903,-0.221549,0.221549,-0.434713,0.434713
8,22,0,0.0,0.72462,-0.72462,-0.101709,-1.625339,1.625339,-0.530333,0.530333,-0.077834,0.077834,-0.041665,0.041665,-0.02592,0.02592,-0.523008,0.523008,-0.666297,0.666297,-0.256922,0.256922,-0.566002,0.566002,-0.822924,0.822924,-0.83586,0.83586,-0.032376,0.032376,-0.327453,0.327453,0.185596,-0.185596,0.46478,-0.46478
9,22,1,0.0,-0.72462,0.72462,0.101709,1.625339,-1.625339,0.530333,-0.530333,0.077834,-0.077834,0.041665,-0.041665,0.02592,-0.02592,0.523008,-0.523008,0.666297,-0.666297,0.256922,-0.256922,0.566002,-0.566002,0.822924,-0.822924,0.83586,-0.83586,0.032376,-0.032376,0.327453,-0.327453,-0.185596,0.185596,-0.46478,0.46478


(14, 104)

In [9]:
df_game_stats.head()

Unnamed: 0,Season,Date,TID,OID,TPOFF,OddsT,OddsO,W,TSC,OSC,TFGM,OFGM,TFGA,OFGA,TFG3M,OFG3M,TFG3A,OFG3A,TFTM,OFTM,TFTA,OFTA,TORB,OORB,TDRB,ODRB,TRB,ORB,TAST,OAST,TSTL,OSTL,TBLK,OBLK,TTOV,OTOV,TPF,OPF,H
19835,18,1992-11-03,13,0,0,1.183476,5.088113,1,89,74,33.0,29.0,75.0,71.0,7.0,2.0,20.0,8.0,16.0,14.0,30.0,17.0,12.0,12.0,27.0,32.0,39.0,44.0,25.0,16.0,12.0,3.0,3.0,2.0,8.0,19.0,19.0,22.0,1
19836,18,1992-11-03,4,43,0,1.195249,4.856962,1,83,82,31.0,30.0,73.0,83.0,1.0,5.0,10.0,10.0,20.0,17.0,26.0,21.0,7.0,17.0,31.0,26.0,38.0,43.0,20.0,14.0,8.0,7.0,11.0,5.0,20.0,18.0,24.0,32.0,1
19837,18,1992-11-03,17,20,0,1.524799,2.470634,1,109,93,45.0,32.0,89.0,85.0,7.0,7.0,20.0,25.0,12.0,22.0,20.0,30.0,7.0,11.0,39.0,35.0,46.0,46.0,32.0,17.0,7.0,4.0,6.0,1.0,10.0,11.0,28.0,21.0,1
19838,18,1992-11-04,15,28,0,1.388495,2.988044,0,87,89,31.0,36.0,79.0,79.0,2.0,7.0,6.0,14.0,23.0,10.0,31.0,15.0,16.0,12.0,24.0,27.0,40.0,39.0,15.0,18.0,11.0,8.0,12.0,9.0,15.0,19.0,17.0,28.0,1
19839,18,1992-11-04,5,22,0,1.358132,3.154191,1,95,89,38.0,35.0,85.0,83.0,1.0,2.0,6.0,9.0,18.0,17.0,28.0,18.0,15.0,12.0,32.0,30.0,47.0,42.0,26.0,25.0,8.0,4.0,5.0,4.0,8.0,12.0,14.0,21.0,1


In [10]:
df_game_stats_advantage = pd.merge(df_game_stats, df_home_away_advantage[["Season", "H"] + features_advantage + features_season], on=["Season", "H"])

features_norm_advantage = []
for feature_name, feature_name_advantage, feature_name_season in zip(features, features_advantage, features_season):
    feature_name_norm_advantage = f"{feature_name}_norm_advantage"
    features_norm_advantage.append(feature_name_norm_advantage)

    df_game_stats_advantage[feature_name_norm_advantage] = df_game_stats_advantage[feature_name] - df_game_stats_advantage[feature_name_advantage]


df_game_stats_norm_advantage = df_game_stats_advantage[["Season", "H", "Date", "TID", "OID"] + features_norm_advantage]
df_game_stats_norm_advantage = df_game_stats_norm_advantage.rename(columns={fna: f for f, fna  in zip(features, features_norm_advantage)})

display(df_game_stats_norm_advantage.head(), df_game_stats_norm_advantage.shape)


Unnamed: 0,Season,H,Date,TID,OID,TPOFF,OddsT,OddsO,W,TSC,OSC,TFGM,OFGM,TFGA,OFGA,TFG3M,OFG3M,TFG3A,OFG3A,TFTM,OFTM,TFTA,OFTA,TORB,OORB,TDRB,ODRB,TRB,ORB,TAST,OAST,TSTL,OSTL,TBLK,OBLK,TTOV,OTOV,TPF,OPF
0,18,1,1992-11-03,13,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,18,1,1992-11-03,4,43,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,18,1,1992-11-03,17,20,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,18,1,1992-11-04,15,28,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,18,1,1992-11-04,5,22,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


(18306, 39)

In [11]:
agg = {f: "mean" for f in features}
agg["Timestamp"] = "max"

df_game_stats_norm_advantage["Timestamp"] = df_game_stats_norm_advantage["Date"].astype(int)

df_team_stats_norm_advantage = df_game_stats_norm_advantage \
    .groupby(["Season", "TID"]) \
    .rolling(1_000_000, min_periods=1, closed="left") \
    .agg(agg) \
    .reset_index() \
    .drop(columns="level_2") \
    .dropna() \
    .sort_values(by="Timestamp")

df_team_stats_norm_advantage["Timestamp"] = df_team_stats_norm_advantage["Timestamp"].astype(int)


display(df_team_stats_norm_advantage.head(), df_team_stats_norm_advantage.shape)


Unnamed: 0,Season,TID,TPOFF,OddsT,OddsO,W,TSC,OSC,TFGM,OFGM,TFGA,OFGA,TFG3M,OFG3M,TFG3A,OFG3A,TFTM,OFTM,TFTA,OFTA,TORB,OORB,TDRB,ODRB,TRB,ORB,TAST,OAST,TSTL,OSTL,TBLK,OBLK,TTOV,OTOV,TPF,OPF,Timestamp
7805,21,0,0.0,2.008869,3.059532,-0.110325,64.256346,109.743654,24.407747,39.592253,64.849449,79.150551,2.960778,7.039222,17.005147,12.994853,12.480073,23.519927,21.33137,32.66863,3.705864,13.294136,24.374933,36.625067,28.080797,49.919203,12.161337,22.838663,5.945155,10.054845,2.693097,5.306903,23.221549,15.778451,24.434713,20.565287,815702400000000000
8880,21,17,0.0,2.332758,1.552162,0.889675,112.256346,107.743654,45.407747,40.592253,82.849449,77.150551,5.960778,13.039222,12.005147,29.994853,15.480073,13.519927,23.33137,17.66863,11.705864,4.294136,30.374933,25.625067,42.080797,29.919203,29.161337,29.838663,10.945155,7.054845,0.693097,5.306903,21.221549,20.778451,19.434713,24.565287,815702400000000000
8694,21,15,0.0,1.99224,3.232637,-0.110325,95.256346,106.743654,30.407747,43.592253,71.849449,93.150551,7.960778,2.039222,21.005147,13.994853,26.480073,17.519927,33.33137,25.66863,12.705864,11.294136,34.374933,27.625067,47.080797,38.919203,18.161337,26.838663,2.945155,12.054845,8.693097,0.306903,22.221549,7.778451,23.434713,24.565287,815788800000000000
8513,21,12,0.0,2.176735,2.013945,0.889675,105.256346,98.743654,38.407747,32.592253,73.849449,74.150551,2.960778,10.039222,10.005147,21.994853,25.480073,23.519927,30.33137,36.66863,10.705864,10.294136,35.374933,26.625067,46.080797,36.919203,18.161337,19.838663,5.945155,7.054845,2.693097,3.306903,16.221549,12.778451,25.434713,21.565287,815788800000000000
8328,21,8,0.0,2.596014,1.142956,-0.110325,96.256346,111.743654,33.407747,39.592253,81.849449,86.150551,2.960778,5.039222,19.005147,16.994853,26.480073,27.519927,41.33137,41.66863,11.705864,19.294136,33.374933,36.625067,45.080797,55.919203,17.161337,29.838663,5.945155,6.054845,3.693097,4.306903,10.221549,15.778451,31.434713,29.565287,815788800000000000


(10382, 37)

In [12]:
df_home_advantage = df_home_away_advantage[df_home_away_advantage["H"] == 1]
df_away_advantage = df_home_away_advantage[df_home_away_advantage["H"] == 0]

In [13]:
df_features_norm_advantage = pd.merge_asof(
    df_bare, df_team_stats_norm_advantage,
    on="Timestamp", left_by=["Season", "HID"], right_by=["Season", "TID"],
    allow_exact_matches=False
).drop(columns="TID").dropna()
df_features_norm_advantage = pd.merge_asof(
    df_features_norm_advantage, df_team_stats_norm_advantage,
    on="Timestamp", left_by=["Season", "AID"], right_by=["Season", "TID"],
    allow_exact_matches=False,  suffixes=("_H", "_A")
).drop(columns="TID").dropna()
df_features_norm_advantage = pd.merge(
    df_features_norm_advantage, 
    df_home_advantage[["Season"] + features_advantage],
    on="Season"
)
df_features_norm_advantage = pd.merge(
    df_features_norm_advantage, 
    df_away_advantage[["Season"] + features_advantage + features_season],
    on="Season", suffixes=("_H", "_A")
)

for feature_name, feature_name_advantage, feature_name_season in zip(features, features_advantage, features_season):
    df_features_norm_advantage[f"{feature_name}_H"] += df_features_norm_advantage[f"{feature_name_advantage}_H"]
    df_features_norm_advantage[f"{feature_name}_H"] /= df_features_norm_advantage[feature_name_season]

    df_features_norm_advantage[f"{feature_name}_A"] += df_features_norm_advantage[f"{feature_name_advantage}_A"]
    df_features_norm_advantage[f"{feature_name}_A"] /= df_features_norm_advantage[feature_name_season]


display(df_features_norm_advantage.head(), df_features_norm_advantage.shape)

Unnamed: 0,Season,Date,HID,AID,POFF,H,A,OddsH,OddsA,Timestamp,TPOFF_H,OddsT_H,OddsO_H,W_H,TSC_H,OSC_H,TFGM_H,OFGM_H,TFGA_H,OFGA_H,TFG3M_H,OFG3M_H,TFG3A_H,OFG3A_H,TFTM_H,OFTM_H,TFTA_H,OFTA_H,TORB_H,OORB_H,TDRB_H,ODRB_H,TRB_H,ORB_H,TAST_H,OAST_H,TSTL_H,OSTL_H,TBLK_H,OBLK_H,TTOV_H,OTOV_H,TPF_H,OPF_H,TPOFF_A,OddsT_A,OddsO_A,W_A,TSC_A,OSC_A,TFGM_A,OFGM_A,TFGA_A,OFGA_A,TFG3M_A,OFG3M_A,TFG3A_A,OFG3A_A,TFTM_A,OFTM_A,TFTA_A,OFTA_A,TORB_A,OORB_A,TDRB_A,ODRB_A,TRB_A,ORB_A,TAST_A,OAST_A,TSTL_A,OSTL_A,TBLK_A,OBLK_A,TTOV_A,OTOV_A,TPF_A,OPF_A,TPOFF_advantage_H,OddsT_advantage_H,OddsO_advantage_H,W_advantage_H,TSC_advantage_H,OSC_advantage_H,TFGM_advantage_H,OFGM_advantage_H,TFGA_advantage_H,OFGA_advantage_H,TFG3M_advantage_H,OFG3M_advantage_H,TFG3A_advantage_H,OFG3A_advantage_H,TFTM_advantage_H,OFTM_advantage_H,TFTA_advantage_H,OFTA_advantage_H,TORB_advantage_H,OORB_advantage_H,TDRB_advantage_H,ODRB_advantage_H,TRB_advantage_H,ORB_advantage_H,TAST_advantage_H,OAST_advantage_H,TSTL_advantage_H,OSTL_advantage_H,TBLK_advantage_H,OBLK_advantage_H,TTOV_advantage_H,OTOV_advantage_H,TPF_advantage_H,OPF_advantage_H,TPOFF_advantage_A,OddsT_advantage_A,OddsO_advantage_A,W_advantage_A,TSC_advantage_A,OSC_advantage_A,TFGM_advantage_A,OFGM_advantage_A,TFGA_advantage_A,OFGA_advantage_A,TFG3M_advantage_A,OFG3M_advantage_A,TFG3A_advantage_A,OFG3A_advantage_A,TFTM_advantage_A,OFTM_advantage_A,TFTA_advantage_A,OFTA_advantage_A,TORB_advantage_A,OORB_advantage_A,TDRB_advantage_A,ODRB_advantage_A,TRB_advantage_A,ORB_advantage_A,TAST_advantage_A,OAST_advantage_A,TSTL_advantage_A,OSTL_advantage_A,TBLK_advantage_A,OBLK_advantage_A,TTOV_advantage_A,OTOV_advantage_A,TPF_advantage_A,OPF_advantage_A,TPOFF_season,OddsT_season,OddsO_season,W_season,TSC_season,OSC_season,TFGM_season,OFGM_season,TFGA_season,OFGA_season,TFG3M_season,OFG3M_season,TFG3A_season,OFG3A_season,TFTM_season,OFTM_season,TFTA_season,OFTA_season,TORB_season,OORB_season,TDRB_season,ODRB_season,TRB_season,ORB_season,TAST_season,OAST_season,TSTL_season,OSTL_season,TBLK_season,OBLK_season,TTOV_season,OTOV_season,TPF_season,OPF_season
0,21,1995-11-10,43,12,0,0,1,1.290665,3.640357,815961600000000000,0.0,0.610855,1.09695,2.0,1.169602,1.086059,1.154197,1.069743,0.967543,1.118329,0.725155,0.725155,0.960212,0.640142,1.354312,1.250134,1.172706,1.133616,0.682564,1.450448,1.035959,0.902287,0.936499,1.056563,1.003549,0.573456,0.666348,0.666348,1.230822,0.615411,1.09653,0.959464,1.117921,1.162638,0.0,1.209212,0.53126,1.558698,1.08097,1.049376,1.064549,0.934182,0.926063,0.93363,0.529645,1.827108,0.640801,1.407652,1.300147,1.25221,1.159523,1.459521,0.888334,0.903396,1.161272,0.910646,1.084456,0.908606,0.827817,0.988129,0.784999,0.947505,0.489497,0.741326,1.126897,0.860564,1.156799,0.944893,0.0,-0.734787,0.734787,0.110325,1.743654,-1.743654,0.592253,-0.592253,0.150551,-0.150551,0.039222,-0.039222,-0.005147,0.005147,0.519927,-0.519927,0.66863,-0.66863,0.294136,-0.294136,0.625067,-0.625067,0.919203,-0.919203,0.838663,-0.838663,0.054845,-0.054845,0.306903,-0.306903,-0.221549,0.221549,-0.434713,0.434713,0.0,0.734787,-0.734787,-0.110325,-1.743654,1.743654,-0.592253,0.592253,-0.150551,0.150551,-0.039222,0.039222,0.005147,-0.005147,-0.519927,0.519927,-0.66863,0.66863,-0.294136,0.294136,-0.625067,0.625067,-0.919203,0.919203,-0.838663,0.838663,-0.054845,0.054845,-0.306903,0.306903,0.221549,-0.221549,0.434713,-0.434713,0.06534,2.407783,2.407783,0.5,95.75908,95.75908,35.522536,35.522536,79.582987,79.582987,5.516064,5.516064,15.621546,15.621546,19.197943,19.197943,25.581855,25.581855,11.720517,11.720517,29.923965,29.923965,41.644481,41.644481,20.92574,20.92574,7.503591,7.503591,4.87479,4.87479,14.591482,14.591482,22.362934,22.362934
1,21,1995-11-10,30,13,0,0,1,1.371997,3.075118,815961600000000000,0.0,0.711931,0.859833,2.0,1.138273,0.98163,0.98529,1.041592,0.741364,1.030371,1.087732,1.087732,0.512113,1.280283,1.718934,0.729245,1.641789,0.938165,0.511923,0.938525,1.002541,0.534689,0.86446,0.648345,1.003549,1.003549,0.932887,0.932887,1.435959,0.0,1.576262,1.027997,1.028488,1.386222,0.0,1.151172,0.861012,1.558698,0.882555,0.819633,0.83934,0.765275,0.926063,0.983892,0.529645,0.739376,0.576786,1.087582,1.14388,1.043854,0.924981,0.912258,1.229615,1.50064,1.0276,0.877228,1.084456,1.052682,0.827817,0.605825,1.318077,1.214044,1.104908,0.946463,1.19543,1.20323,1.022649,1.034326,0.0,-0.734787,0.734787,0.110325,1.743654,-1.743654,0.592253,-0.592253,0.150551,-0.150551,0.039222,-0.039222,-0.005147,0.005147,0.519927,-0.519927,0.66863,-0.66863,0.294136,-0.294136,0.625067,-0.625067,0.919203,-0.919203,0.838663,-0.838663,0.054845,-0.054845,0.306903,-0.306903,-0.221549,0.221549,-0.434713,0.434713,0.0,0.734787,-0.734787,-0.110325,-1.743654,1.743654,-0.592253,0.592253,-0.150551,0.150551,-0.039222,0.039222,0.005147,-0.005147,-0.519927,0.519927,-0.66863,0.66863,-0.294136,0.294136,-0.625067,0.625067,-0.919203,0.919203,-0.838663,0.838663,-0.054845,0.054845,-0.306903,0.306903,0.221549,-0.221549,0.434713,-0.434713,0.06534,2.407783,2.407783,0.5,95.75908,95.75908,35.522536,35.522536,79.582987,79.582987,5.516064,5.516064,15.621546,15.621546,19.197943,19.197943,25.581855,25.581855,11.720517,11.720517,29.923965,29.923965,41.644481,41.644481,20.92574,20.92574,7.503591,7.503591,4.87479,4.87479,14.591482,14.591482,22.362934,22.362934
2,21,1995-11-10,3,44,0,1,0,1.219087,4.460257,815961600000000000,0.0,0.485295,2.25335,0.0,1.221816,1.232259,1.182348,1.126046,1.319377,1.193722,1.087732,1.812887,1.344297,1.408311,1.406401,1.45849,1.759059,1.407247,1.962371,1.535768,1.169631,1.269885,1.392742,1.344716,0.669033,1.051337,0.799617,0.799617,1.641096,1.435959,0.959464,1.713328,1.430939,1.878108,0.0,1.34891,0.211006,-0.441302,0.997427,1.143362,1.008247,1.018635,0.976325,1.084416,0.710934,1.101953,0.640801,1.023567,1.039702,1.616832,1.198613,1.654972,0.888334,1.58596,1.127854,1.111154,1.060443,1.244785,1.018969,1.322645,1.051538,1.880392,1.310045,1.1516,1.538096,1.134696,1.335667,1.12376,0.0,-0.734787,0.734787,0.110325,1.743654,-1.743654,0.592253,-0.592253,0.150551,-0.150551,0.039222,-0.039222,-0.005147,0.005147,0.519927,-0.519927,0.66863,-0.66863,0.294136,-0.294136,0.625067,-0.625067,0.919203,-0.919203,0.838663,-0.838663,0.054845,-0.054845,0.306903,-0.306903,-0.221549,0.221549,-0.434713,0.434713,0.0,0.734787,-0.734787,-0.110325,-1.743654,1.743654,-0.592253,0.592253,-0.150551,0.150551,-0.039222,0.039222,0.005147,-0.005147,-0.519927,0.519927,-0.66863,0.66863,-0.294136,0.294136,-0.625067,0.625067,-0.919203,0.919203,-0.838663,0.838663,-0.054845,0.054845,-0.306903,0.306903,0.221549,-0.221549,0.434713,-0.434713,0.06534,2.407783,2.407783,0.5,95.75908,95.75908,35.522536,35.522536,79.582987,79.582987,5.516064,5.516064,15.621546,15.621546,19.197943,19.197943,25.581855,25.581855,11.720517,11.720517,29.923965,29.923965,41.644481,41.644481,20.92574,20.92574,7.503591,7.503591,4.87479,4.87479,14.591482,14.591482,22.362934,22.362934
3,21,1995-11-10,17,2,0,1,0,1.278218,3.7543,815961600000000000,0.0,0.663669,0.949815,2.0,1.190488,1.106945,1.294953,1.126046,1.042936,0.967543,1.087732,2.356753,0.76817,1.920425,0.833423,0.677156,0.938165,0.664534,1.023846,0.341282,1.035959,0.835451,1.03255,0.696371,1.433641,1.385853,1.465965,0.932887,0.205137,1.025685,1.439196,1.439196,0.84962,1.117921,0.0,1.101705,1.506161,-0.441302,1.070527,1.185134,1.120852,1.159391,1.127111,0.921065,1.073511,0.376798,1.216928,0.639483,0.883436,1.512654,0.72953,1.420431,1.229615,0.903396,0.593166,1.044318,0.772289,1.004657,1.018969,0.701401,1.184807,0.814235,0.694634,1.356737,0.921298,1.066163,1.156799,0.900176,0.0,-0.734787,0.734787,0.110325,1.743654,-1.743654,0.592253,-0.592253,0.150551,-0.150551,0.039222,-0.039222,-0.005147,0.005147,0.519927,-0.519927,0.66863,-0.66863,0.294136,-0.294136,0.625067,-0.625067,0.919203,-0.919203,0.838663,-0.838663,0.054845,-0.054845,0.306903,-0.306903,-0.221549,0.221549,-0.434713,0.434713,0.0,0.734787,-0.734787,-0.110325,-1.743654,1.743654,-0.592253,0.592253,-0.150551,0.150551,-0.039222,0.039222,0.005147,-0.005147,-0.519927,0.519927,-0.66863,0.66863,-0.294136,0.294136,-0.625067,0.625067,-0.919203,0.919203,-0.838663,0.838663,-0.054845,0.054845,-0.306903,0.306903,0.221549,-0.221549,0.434713,-0.434713,0.06534,2.407783,2.407783,0.5,95.75908,95.75908,35.522536,35.522536,79.582987,79.582987,5.516064,5.516064,15.621546,15.621546,19.197943,19.197943,25.581855,25.581855,11.720517,11.720517,29.923965,29.923965,41.644481,41.644481,20.92574,20.92574,7.503591,7.503591,4.87479,4.87479,14.591482,14.591482,22.362934,22.362934
4,21,1995-11-10,0,35,0,1,0,1.406762,2.899507,815961600000000000,0.0,0.529152,1.575855,0.0,0.68923,1.12783,0.703779,1.097895,0.816757,0.992674,0.543866,1.269021,1.088241,0.832184,0.677156,1.198045,0.859985,1.250887,0.341282,1.109166,0.835451,1.203049,0.696371,1.176626,0.621244,1.051337,0.799617,1.332695,0.615411,1.025685,1.576262,1.09653,1.073204,0.939054,0.0,1.121714,1.170199,1.558698,1.028756,0.997162,1.092701,1.074937,0.976325,1.18494,0.529645,0.739376,1.152914,1.279624,0.935525,0.78341,0.885891,0.834078,0.717693,0.647435,1.395198,1.077736,1.20452,0.956631,1.257909,1.083705,0.251921,1.080775,0.899771,1.1516,1.675162,0.99763,1.201516,1.168477,0.0,-0.734787,0.734787,0.110325,1.743654,-1.743654,0.592253,-0.592253,0.150551,-0.150551,0.039222,-0.039222,-0.005147,0.005147,0.519927,-0.519927,0.66863,-0.66863,0.294136,-0.294136,0.625067,-0.625067,0.919203,-0.919203,0.838663,-0.838663,0.054845,-0.054845,0.306903,-0.306903,-0.221549,0.221549,-0.434713,0.434713,0.0,0.734787,-0.734787,-0.110325,-1.743654,1.743654,-0.592253,0.592253,-0.150551,0.150551,-0.039222,0.039222,0.005147,-0.005147,-0.519927,0.519927,-0.66863,0.66863,-0.294136,0.294136,-0.625067,0.625067,-0.919203,0.919203,-0.838663,0.838663,-0.054845,0.054845,-0.306903,0.306903,0.221549,-0.221549,0.434713,-0.434713,0.06534,2.407783,2.407783,0.5,95.75908,95.75908,35.522536,35.522536,79.582987,79.582987,5.516064,5.516064,15.621546,15.621546,19.197943,19.197943,25.581855,25.581855,11.720517,11.720517,29.923965,29.923965,41.644481,41.644481,20.92574,20.92574,7.503591,7.503591,4.87479,4.87479,14.591482,14.591482,22.362934,22.362934


(5121, 180)

In [14]:
df_game_stats["Timestamp"] = df_game_stats["Date"].astype(int)


df_team_stats = df_game_stats \
    .groupby(["Season", "TID"]) \
    .rolling(1_000_000, min_periods=1, closed="left") \
    .agg(agg) \
    .reset_index() \
    .drop(columns="level_2") \
    .dropna() \
    .sort_values(by="Timestamp")

df_team_stats["Timestamp"] = df_team_stats["Timestamp"].astype(int)


display(df_team_stats.head(), df_team_stats.shape)

Unnamed: 0,Season,TID,TPOFF,OddsT,OddsO,W,TSC,OSC,TFGM,OFGM,TFGA,OFGA,TFG3M,OFG3M,TFG3A,OFG3A,TFTM,OFTM,TFTA,OFTA,TORB,OORB,TDRB,ODRB,TRB,ORB,TAST,OAST,TSTL,OSTL,TBLK,OBLK,TTOV,OTOV,TPF,OPF,Timestamp
349,18,4,0.0,1.195249,4.856962,1.0,83.0,82.0,31.0,30.0,73.0,83.0,1.0,5.0,10.0,10.0,20.0,17.0,26.0,21.0,7.0,17.0,31.0,26.0,38.0,43.0,20.0,14.0,8.0,7.0,11.0,5.0,20.0,18.0,24.0,32.0,720748800000000000
1056,18,17,0.0,1.524799,2.470634,1.0,109.0,93.0,45.0,32.0,89.0,85.0,7.0,7.0,20.0,25.0,12.0,22.0,20.0,30.0,7.0,11.0,39.0,35.0,46.0,46.0,32.0,17.0,7.0,4.0,6.0,1.0,10.0,11.0,28.0,21.0,720748800000000000
787,18,13,0.0,1.183476,5.088113,1.0,89.0,74.0,33.0,29.0,75.0,71.0,7.0,2.0,20.0,8.0,16.0,14.0,30.0,17.0,12.0,12.0,27.0,32.0,39.0,44.0,25.0,16.0,12.0,3.0,3.0,2.0,8.0,19.0,19.0,22.0,720748800000000000
2188,18,39,0.0,1.293894,3.612292,1.0,88.0,83.0,35.0,33.0,108.0,83.0,4.0,3.0,25.0,14.0,14.0,14.0,20.0,19.0,21.0,10.0,34.0,48.0,55.0,58.0,24.0,18.0,18.0,7.0,4.0,11.0,11.0,25.0,20.0,20.0,720835200000000000
623,18,11,0.0,1.563269,2.36833,0.0,74.0,99.0,26.0,37.0,82.0,76.0,3.0,5.0,13.0,12.0,19.0,20.0,31.0,30.0,18.0,11.0,29.0,37.0,47.0,48.0,16.0,25.0,9.0,12.0,8.0,5.0,18.0,18.0,23.0,22.0,720835200000000000


(18097, 37)

In [15]:
df_features = pd.merge_asof(
    df_bare, df_team_stats,
    on="Timestamp", left_by=["Season", "HID"], right_by=["Season", "TID"],
    allow_exact_matches=False
).drop(columns="TID").dropna()
df_features = pd.merge_asof(
    df_features, df_team_stats,
    on="Timestamp", left_by=["Season", "AID"], right_by=["Season", "TID"],
    allow_exact_matches=False,  suffixes=("_H", "_A")
).drop(columns="TID").dropna()


df_features = df_features[df_features["Season"] >= df_features_norm_advantage["Season"].min()]

display(df_features.head(), df_features.shape)

Unnamed: 0,Season,Date,HID,AID,POFF,H,A,OddsH,OddsA,Timestamp,TPOFF_H,OddsT_H,OddsO_H,W_H,TSC_H,OSC_H,TFGM_H,OFGM_H,TFGA_H,OFGA_H,TFG3M_H,OFG3M_H,TFG3A_H,OFG3A_H,TFTM_H,OFTM_H,TFTA_H,OFTA_H,TORB_H,OORB_H,TDRB_H,ODRB_H,TRB_H,ORB_H,TAST_H,OAST_H,TSTL_H,OSTL_H,TBLK_H,OBLK_H,TTOV_H,OTOV_H,TPF_H,OPF_H,TPOFF_A,OddsT_A,OddsO_A,W_A,TSC_A,OSC_A,TFGM_A,OFGM_A,TFGA_A,OFGA_A,TFG3M_A,OFG3M_A,TFG3A_A,OFG3A_A,TFTM_A,OFTM_A,TFTA_A,OFTA_A,TORB_A,OORB_A,TDRB_A,ODRB_A,TRB_A,ORB_A,TAST_A,OAST_A,TSTL_A,OSTL_A,TBLK_A,OBLK_A,TTOV_A,OTOV_A,TPF_A,OPF_A
3813,21,1995-11-10,43,12,0,0,1,1.290665,3.640357,815961600000000000,0.0,1.470806,2.641219,1.0,112.0,104.0,41.0,38.0,77.0,89.0,4.0,4.0,15.0,10.0,26.0,24.0,30.0,29.0,8.0,17.0,31.0,27.0,39.0,44.0,21.0,12.0,5.0,5.0,6.0,3.0,16.0,14.0,25.0,26.0,0.0,1.441949,2.748731,1.0,107.0,97.0,39.0,32.0,74.0,74.0,3.0,10.0,10.0,22.0,26.0,23.0,31.0,36.0,11.0,10.0,36.0,26.0,47.0,36.0,19.0,19.0,6.0,7.0,3.0,3.0,16.0,13.0,25.0,22.0
3814,21,1995-11-10,30,13,0,0,1,1.371997,3.075118,815961600000000000,0.0,1.714175,2.070291,1.0,109.0,94.0,35.0,37.0,59.0,82.0,6.0,6.0,8.0,20.0,33.0,14.0,42.0,24.0,6.0,11.0,30.0,16.0,36.0,27.0,21.0,21.0,7.0,7.0,7.0,0.0,23.0,15.0,23.0,31.0,0.0,1.302199,3.542703,1.0,88.0,75.0,31.0,26.0,74.0,78.0,3.0,4.0,9.0,17.0,23.0,19.0,25.0,22.0,15.0,17.0,32.0,25.0,47.0,42.0,19.0,11.0,10.0,9.0,6.0,4.0,17.0,18.0,22.0,24.0
3815,21,1995-11-10,3,44,0,1,0,1.219087,4.460257,815961600000000000,0.0,1.168486,5.425578,0.0,117.0,118.0,42.0,40.0,105.0,95.0,6.0,10.0,21.0,22.0,27.0,28.0,45.0,36.0,23.0,18.0,35.0,38.0,58.0,56.0,14.0,22.0,6.0,6.0,8.0,7.0,14.0,25.0,32.0,42.0,0.0,1.778309,1.977631,0.0,99.0,106.0,37.0,35.0,78.0,86.0,4.0,6.0,10.0,16.0,21.0,30.0,32.0,41.0,11.0,18.0,35.0,32.0,46.0,50.0,23.0,26.0,8.0,14.0,7.0,5.0,22.0,17.0,29.0,26.0
3816,21,1995-11-10,17,2,0,1,0,1.278218,3.7543,815961600000000000,0.0,1.597972,2.286949,1.0,114.0,106.0,46.0,40.0,83.0,77.0,6.0,13.0,12.0,30.0,16.0,13.0,24.0,17.0,12.0,4.0,31.0,25.0,43.0,29.0,30.0,29.0,11.0,7.0,1.0,5.0,21.0,21.0,19.0,25.0,0.0,1.183093,5.096083,0.0,106.0,110.0,41.0,40.0,90.0,73.0,6.0,2.0,19.0,10.0,18.0,28.0,20.0,35.0,15.0,10.0,19.0,30.0,34.0,40.0,23.0,13.0,9.0,6.0,4.0,6.0,13.0,16.0,25.0,21.0
3817,21,1995-11-10,0,35,0,1,0,1.406762,2.899507,815961600000000000,0.0,1.274083,3.794318,0.0,66.0,108.0,25.0,39.0,65.0,79.0,3.0,7.0,17.0,13.0,13.0,23.0,22.0,32.0,4.0,13.0,25.0,36.0,29.0,49.0,13.0,22.0,6.0,10.0,3.0,5.0,23.0,16.0,24.0,21.0,0.0,1.231271,4.287158,1.0,102.0,92.0,40.0,37.0,78.0,94.0,3.0,4.0,18.0,20.0,19.0,14.0,24.0,20.0,9.0,7.0,43.0,31.0,52.0,38.0,28.0,21.0,2.0,8.0,5.0,5.0,24.0,15.0,26.0,27.0


(5121, 78)

In [16]:
formula = "H ~ W_H + W_A"

print(formula)

model = smf.logit(formula=formula, data=df_features)
result = model.fit()

display(result.summary())

model_norm_advantage = smf.logit(formula=formula, data=df_features_norm_advantage)
result_norm_advantage = model_norm_advantage.fit()

display(result_norm_advantage.summary())


H ~ W_H + W_A
Optimization terminated successfully.
         Current function value: 0.626175
         Iterations 5


0,1,2,3
Dep. Variable:,H,No. Observations:,5121.0
Model:,Logit,Df Residuals:,5118.0
Method:,MLE,Df Model:,2.0
Date:,"Wed, 06 Nov 2024",Pseudo R-squ.:,0.06765
Time:,23:59:58,Log-Likelihood:,-3206.6
converged:,True,LL-Null:,-3439.3
Covariance Type:,nonrobust,LLR p-value:,8.965e-102

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.1914,0.124,1.546,0.122,-0.051,0.434
W_H,2.4479,0.147,16.646,0.000,2.160,2.736
W_A,-2.0193,0.152,-13.269,0.000,-2.318,-1.721


Optimization terminated successfully.
         Current function value: 0.626471
         Iterations 5


0,1,2,3
Dep. Variable:,H,No. Observations:,5121.0
Model:,Logit,Df Residuals:,5118.0
Method:,MLE,Df Model:,2.0
Date:,"Wed, 06 Nov 2024",Pseudo R-squ.:,0.06721
Time:,23:59:58,Log-Likelihood:,-3208.2
converged:,True,LL-Null:,-3439.3
Covariance Type:,nonrobust,LLR p-value:,4.084e-101

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-0.2378,0.105,-2.264,0.024,-0.444,-0.032
W_H,1.2240,0.074,16.652,0.000,1.080,1.368
W_A,-1.0033,0.076,-13.166,0.000,-1.153,-0.854


In [17]:
featuresT_remove_corr = [
    "TPOFF", "OddsT", "OddsO", "W", "TFGM", "OFGM", "TFGA", "OFGA", "TFG3M", "OFG3M",
    "TFG3A", "OFG3A", "TFTM", "OFTM", "TFTA", "OFTA", "TORB", "OORB", "TDRB", "ODRB",
    "TAST", "OAST", "TSTL", "OSTL", "TBLK", "OBLK", "TTOV", "OTOV", "TPF", "OPF"
]

formula = "H ~ POFF + " + " + ".join(map(lambda x: x + "_H", featuresT_remove_corr)) + " + " + " + ".join(map(lambda x: x + "_A", featuresT_remove_corr))

print(formula)

model = smf.logit(formula=formula, data=df_features)
result = model.fit()

display(result.summary())

model_norm_advantage = smf.logit(formula=formula, data=df_features_norm_advantage)
result_norm_advantage = model_norm_advantage.fit()

display(result_norm_advantage.summary())

H ~ POFF + TPOFF_H + OddsT_H + OddsO_H + W_H + TFGM_H + OFGM_H + TFGA_H + OFGA_H + TFG3M_H + OFG3M_H + TFG3A_H + OFG3A_H + TFTM_H + OFTM_H + TFTA_H + OFTA_H + TORB_H + OORB_H + TDRB_H + ODRB_H + TAST_H + OAST_H + TSTL_H + OSTL_H + TBLK_H + OBLK_H + TTOV_H + OTOV_H + TPF_H + OPF_H + TPOFF_A + OddsT_A + OddsO_A + W_A + TFGM_A + OFGM_A + TFGA_A + OFGA_A + TFG3M_A + OFG3M_A + TFG3A_A + OFG3A_A + TFTM_A + OFTM_A + TFTA_A + OFTA_A + TORB_A + OORB_A + TDRB_A + ODRB_A + TAST_A + OAST_A + TSTL_A + OSTL_A + TBLK_A + OBLK_A + TTOV_A + OTOV_A + TPF_A + OPF_A
Optimization terminated successfully.
         Current function value: 0.586239
         Iterations 6


0,1,2,3
Dep. Variable:,H,No. Observations:,5121.0
Model:,Logit,Df Residuals:,5059.0
Method:,MLE,Df Model:,61.0
Date:,"Wed, 06 Nov 2024",Pseudo R-squ.:,0.1271
Time:,23:59:59,Log-Likelihood:,-3002.1
converged:,True,LL-Null:,-3439.3
Covariance Type:,nonrobust,LLR p-value:,2.402e-144

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-2.5240,1.732,-1.457,0.145,-5.918,0.870
POFF,0.5280,0.226,2.337,0.019,0.085,0.971
TPOFF_H,9.0518,4.913,1.842,0.065,-0.577,18.681
OddsT_H,-0.3898,0.157,-2.479,0.013,-0.698,-0.082
OddsO_H,0.1999,0.051,3.933,0.000,0.100,0.299
W_H,-0.5308,0.365,-1.456,0.145,-1.245,0.184
TFGM_H,0.1378,0.055,2.514,0.012,0.030,0.245
OFGM_H,-0.0194,0.054,-0.357,0.721,-0.126,0.087
TFGA_H,-0.0649,0.050,-1.293,0.196,-0.163,0.033


Optimization terminated successfully.
         Current function value: 0.586106
         Iterations 6


0,1,2,3
Dep. Variable:,H,No. Observations:,5121.0
Model:,Logit,Df Residuals:,5059.0
Method:,MLE,Df Model:,61.0
Date:,"Wed, 06 Nov 2024",Pseudo R-squ.:,0.1273
Time:,23:59:59,Log-Likelihood:,-3001.4
converged:,True,LL-Null:,-3439.3
Covariance Type:,nonrobust,LLR p-value:,1.271e-144

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-3.9990,1.789,-2.236,0.025,-7.505,-0.493
POFF,0.5475,0.227,2.417,0.016,0.103,0.992
TPOFF_H,0.6885,0.320,2.154,0.031,0.062,1.315
OddsT_H,-0.9213,0.385,-2.390,0.017,-1.677,-0.166
OddsO_H,0.4928,0.125,3.947,0.000,0.248,0.738
W_H,-0.2469,0.183,-1.353,0.176,-0.605,0.111
TFGM_H,5.0789,1.984,2.559,0.010,1.189,8.968
OFGM_H,-0.8621,1.949,-0.442,0.658,-4.681,2.957
TFGA_H,-5.2566,4.027,-1.306,0.192,-13.148,2.635
