# Imports

In [1]:
import os
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import statsmodels.formula.api as smf

pd.set_option("display.max_columns", None)

# Games Data

In [2]:
data_path = os.path.join("..", "data", "games.csv")
df = pd.read_csv(data_path, parse_dates=["Date", "Open"], date_format="%Y-%m-%d", index_col=0)

df = df[df["N"] == 0].drop(columns="N")
df = df[df["Season"] > 20]

display(df.head(), df.shape)

Unnamed: 0,Season,Date,HID,AID,POFF,Open,OddsH,OddsA,H,A,HSC,ASC,HFGM,AFGM,HFGA,AFGA,HFG3M,AFG3M,HFG3A,AFG3A,HFTM,AFTM,HFTA,AFTA,HORB,AORB,HDRB,ADRB,HRB,ARB,HAST,AAST,HSTL,ASTL,HBLK,ABLK,HTOV,ATOV,HPF,APF
23739,21,1995-11-07,0,11,0,1995-11-06,1.274083,3.794318,0,1,66,108,25.0,39.0,65.0,79.0,3.0,7.0,17.0,13.0,13.0,23.0,22.0,32.0,4.0,13.0,25.0,36.0,29.0,49.0,13.0,22.0,6.0,10.0,3.0,5.0,23.0,16.0,24.0,21.0
23740,21,1995-11-07,17,43,0,1995-11-06,1.597972,2.286949,1,0,114,106,46.0,40.0,83.0,77.0,6.0,13.0,12.0,30.0,16.0,13.0,24.0,17.0,12.0,4.0,31.0,25.0,43.0,29.0,30.0,29.0,11.0,7.0,1.0,5.0,21.0,21.0,19.0,25.0
23741,21,1995-11-08,41,39,0,1995-11-07,1.471072,2.640288,0,1,87,91,30.0,36.0,75.0,84.0,3.0,1.0,16.0,11.0,24.0,18.0,39.0,33.0,10.0,10.0,42.0,36.0,52.0,46.0,18.0,17.0,5.0,6.0,6.0,4.0,19.0,15.0,31.0,31.0
23742,21,1995-11-08,15,22,0,1995-11-07,1.257454,3.967424,0,1,97,105,31.0,43.0,72.0,93.0,8.0,2.0,21.0,14.0,27.0,17.0,34.0,25.0,13.0,11.0,35.0,27.0,48.0,38.0,19.0,26.0,3.0,12.0,9.0,0.0,22.0,8.0,23.0,25.0
23743,21,1995-11-08,13,19,0,1995-11-07,1.302199,3.542703,1,0,88,75,31.0,26.0,74.0,78.0,3.0,4.0,9.0,17.0,23.0,19.0,25.0,22.0,15.0,17.0,32.0,25.0,47.0,42.0,19.0,11.0,10.0,9.0,6.0,4.0,17.0,18.0,22.0,24.0


(5251, 40)

In [3]:
meta_columns = ["Season", "Date", "HID", "AID", "POFF", "Open", "H", "A", "OddsH", "OddsA"]
featuresH = ["HSC", "HFGM", "HFGA", "HFG3M", "HFG3A", "HFTM", "HFTA", "HORB", "HDRB", "HRB", "HAST", "HSTL", "HBLK", "HTOV", "HPF"]
featuresA = ["ASC", "AFGM", "AFGA", "AFG3M", "AFG3A", "AFTM", "AFTA", "AORB", "ADRB", "ARB", "AAST", "ASTL", "ABLK", "ATOV", "APF"]

In [4]:
df_bare = df[meta_columns].drop(columns=["Open"])

display(df_bare.head(), df_bare.shape)

Unnamed: 0,Season,Date,HID,AID,POFF,H,A,OddsH,OddsA
23739,21,1995-11-07,0,11,0,0,1,1.274083,3.794318
23740,21,1995-11-07,17,43,0,1,0,1.597972,2.286949
23741,21,1995-11-08,41,39,0,0,1,1.471072,2.640288
23742,21,1995-11-08,15,22,0,0,1,1.257454,3.967424
23743,21,1995-11-08,13,19,0,1,0,1.302199,3.542703


(5251, 9)

In [5]:
rename_columnsH = {
    "HID": "TID", "AID": "OID", "H": "W", "OddsH": "OddsT", "OddsA": "OddsO", "POFF": "TPOFF",
    "HSC": "TSC", "HFGM": "TFGM", "HFGA": "TFGA", "HFG3M": "TFG3M", "HFG3A": "TFG3A", "HFTM": "TFTM", "HFTA": "TFTA", "HORB": "TORB", "HDRB": "TDRB", "HRB": "TRB", "HAST": "TAST", "HSTL": "TSTL", "HBLK": "TBLK", "HTOV": "TTOV", "HPF": "TPF",
    "ASC": "OSC", "AFGM": "OFGM", "AFGA": "OFGA", "AFG3M": "OFG3M", "AFG3A": "OFG3A", "AFTM": "OFTM", "AFTA": "OFTA", "AORB": "OORB", "ADRB": "ODRB", "ARB": "ORB", "AAST": "OAST", "ASTL": "OSTL", "ABLK": "OBLK", "ATOV": "OTOV", "APF": "OPF",
}


df_home_game_stats = df.copy().drop(columns=["A", "Open"]).rename(columns=rename_columnsH)

display(df_home_game_stats.head().set_index(["Season", "Date", "TID", "OID"]))

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,TPOFF,OddsT,OddsO,W,TSC,OSC,TFGM,OFGM,TFGA,OFGA,TFG3M,OFG3M,TFG3A,OFG3A,TFTM,OFTM,TFTA,OFTA,TORB,OORB,TDRB,ODRB,TRB,ORB,TAST,OAST,TSTL,OSTL,TBLK,OBLK,TTOV,OTOV,TPF,OPF
Season,Date,TID,OID,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1
21,1995-11-07,0,11,0,1.274083,3.794318,0,66,108,25.0,39.0,65.0,79.0,3.0,7.0,17.0,13.0,13.0,23.0,22.0,32.0,4.0,13.0,25.0,36.0,29.0,49.0,13.0,22.0,6.0,10.0,3.0,5.0,23.0,16.0,24.0,21.0
21,1995-11-07,17,43,0,1.597972,2.286949,1,114,106,46.0,40.0,83.0,77.0,6.0,13.0,12.0,30.0,16.0,13.0,24.0,17.0,12.0,4.0,31.0,25.0,43.0,29.0,30.0,29.0,11.0,7.0,1.0,5.0,21.0,21.0,19.0,25.0
21,1995-11-08,41,39,0,1.471072,2.640288,0,87,91,30.0,36.0,75.0,84.0,3.0,1.0,16.0,11.0,24.0,18.0,39.0,33.0,10.0,10.0,42.0,36.0,52.0,46.0,18.0,17.0,5.0,6.0,6.0,4.0,19.0,15.0,31.0,31.0
21,1995-11-08,15,22,0,1.257454,3.967424,0,97,105,31.0,43.0,72.0,93.0,8.0,2.0,21.0,14.0,27.0,17.0,34.0,25.0,13.0,11.0,35.0,27.0,48.0,38.0,19.0,26.0,3.0,12.0,9.0,0.0,22.0,8.0,23.0,25.0
21,1995-11-08,13,19,0,1.302199,3.542703,1,88,75,31.0,26.0,74.0,78.0,3.0,4.0,9.0,17.0,23.0,19.0,25.0,22.0,15.0,17.0,32.0,25.0,47.0,42.0,19.0,11.0,10.0,9.0,6.0,4.0,17.0,18.0,22.0,24.0


In [6]:
rename_columnsA = {
    "HID": "OID", "AID": "TID", "A": "W", "OddsH": "OddsO", "OddsA": "OddsT", "POFF": "TPOFF",
    "HSC": "OSC", "HFGM": "OFGM", "HFGA": "OFGA", "HFG3M": "OFG3M", "HFG3A": "OFG3A", "HFTM": "OFTM", "HFTA": "OFTA", "HORB": "OORB", "HDRB": "ODRB", "HRB": "ORB", "HAST": "OAST", "HSTL": "OSTL", "HBLK": "OBLK", "HTOV": "OTOV", "HPF": "OPF",
    "ASC": "TSC", "AFGM": "TFGM", "AFGA": "TFGA", "AFG3M": "TFG3M", "AFG3A": "TFG3A", "AFTM": "TFTM", "AFTA": "TFTA", "AORB": "TORB", "ADRB": "TDRB", "ARB": "TRB", "AAST": "TAST", "ASTL": "TSTL", "ABLK": "TBLK", "ATOV": "TTOV", "APF": "TPF",
}


df_away_game_stats = df.copy().drop(columns=["H", "Open"]).rename(columns=rename_columnsA)

display(df_away_game_stats.head().set_index(["Season", "Date", "TID", "OID"]))

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,TPOFF,OddsO,OddsT,W,OSC,TSC,OFGM,TFGM,OFGA,TFGA,OFG3M,TFG3M,OFG3A,TFG3A,OFTM,TFTM,OFTA,TFTA,OORB,TORB,ODRB,TDRB,ORB,TRB,OAST,TAST,OSTL,TSTL,OBLK,TBLK,OTOV,TTOV,OPF,TPF
Season,Date,TID,OID,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1
21,1995-11-07,11,0,0,1.274083,3.794318,1,66,108,25.0,39.0,65.0,79.0,3.0,7.0,17.0,13.0,13.0,23.0,22.0,32.0,4.0,13.0,25.0,36.0,29.0,49.0,13.0,22.0,6.0,10.0,3.0,5.0,23.0,16.0,24.0,21.0
21,1995-11-07,43,17,0,1.597972,2.286949,0,114,106,46.0,40.0,83.0,77.0,6.0,13.0,12.0,30.0,16.0,13.0,24.0,17.0,12.0,4.0,31.0,25.0,43.0,29.0,30.0,29.0,11.0,7.0,1.0,5.0,21.0,21.0,19.0,25.0
21,1995-11-08,39,41,0,1.471072,2.640288,1,87,91,30.0,36.0,75.0,84.0,3.0,1.0,16.0,11.0,24.0,18.0,39.0,33.0,10.0,10.0,42.0,36.0,52.0,46.0,18.0,17.0,5.0,6.0,6.0,4.0,19.0,15.0,31.0,31.0
21,1995-11-08,22,15,0,1.257454,3.967424,1,97,105,31.0,43.0,72.0,93.0,8.0,2.0,21.0,14.0,27.0,17.0,34.0,25.0,13.0,11.0,35.0,27.0,48.0,38.0,19.0,26.0,3.0,12.0,9.0,0.0,22.0,8.0,23.0,25.0
21,1995-11-08,19,13,0,1.302199,3.542703,0,88,75,31.0,26.0,74.0,78.0,3.0,4.0,9.0,17.0,23.0,19.0,25.0,22.0,15.0,17.0,32.0,25.0,47.0,42.0,19.0,11.0,10.0,9.0,6.0,4.0,17.0,18.0,22.0,24.0


In [7]:
features = [
    "TPOFF", "OddsT", "OddsO", "W", "TSC", "OSC", "TFGM", "OFGM", "TFGA", "OFGA", "TFG3M", "OFG3M",
    "TFG3A", "OFG3A", "TFTM", "OFTM", "TFTA", "OFTA", "TORB", "OORB", "TDRB", "ODRB", "TRB", "ORB",
    "TAST", "OAST", "TSTL", "OSTL", "TBLK", "OBLK", "TTOV", "OTOV", "TPF", "OPF"
]

In [8]:
df_home_game_stats["H"] = 1
df_away_game_stats["H"] = 0

df_game_stats = pd.concat([df_home_game_stats, df_away_game_stats])

df_league_stats = df_game_stats.groupby("Season")[["H"] + features].mean().drop(columns="H")
display(df_league_stats.head(), df_league_stats.shape)

df_home_away_stats = df_game_stats.groupby(["Season", "H"])[features].mean().reset_index()
display(df_home_away_stats.head(8), df_home_away_stats.shape)

df_home_away_advantage = pd.merge(df_home_away_stats, df_league_stats, on="Season", suffixes=("", "_season"))
features_advantage = []
features_season = []

for feature_name in features:
    feature_name_advantage = f"{feature_name}_advantage"
    feature_name_season = f"{feature_name}_season"

    features_advantage.append(feature_name_advantage)
    features_season.append(feature_name_season)

    df_home_away_advantage[feature_name_advantage] = df_home_away_advantage[feature_name] - df_home_away_advantage[feature_name_season]


display(df_home_away_advantage[["Season", "H"] + features_advantage].head(8), df_home_away_advantage.shape)


Unnamed: 0_level_0,TPOFF,OddsT,OddsO,W,TSC,OSC,TFGM,OFGM,TFGA,OFGA,TFG3M,OFG3M,TFG3A,OFG3A,TFTM,OFTM,TFTA,OFTA,TORB,OORB,TDRB,ODRB,TRB,ORB,TAST,OAST,TSTL,OSTL,TBLK,OBLK,TTOV,OTOV,TPF,OPF
Season,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1
21,0.060351,2.339891,2.339891,0.5,98.467914,98.467914,36.390374,36.390374,79.567991,79.567991,6.087853,6.087853,17.022536,17.022536,19.599312,19.599312,26.076012,26.076012,11.121085,11.121085,29.942322,29.942322,41.063407,41.063407,21.179526,21.179526,7.229183,7.229183,4.594729,4.594729,15.092437,15.092437,22.218869,22.218869
22,0.065399,2.500808,2.500808,0.5,99.582129,99.582129,37.08289,37.08289,81.256654,81.256654,6.530418,6.530418,18.076426,18.076426,18.885932,18.885932,25.009125,25.009125,11.192395,11.192395,30.704943,30.704943,41.897338,41.897338,21.618631,21.618631,7.226996,7.226996,4.749049,4.749049,14.025475,14.025475,21.119392,21.119392
23,0.064639,2.587033,2.587033,0.5,99.739163,99.739163,36.974144,36.974144,80.68327,80.68327,6.661597,6.661597,18.197338,18.197338,19.129278,19.129278,24.842966,24.842966,10.96616,10.96616,30.23308,30.23308,41.19924,41.19924,20.84943,20.84943,7.257034,7.257034,4.80076,4.80076,13.980608,13.980608,21.15057,21.15057
24,0.0625,2.572398,2.572398,0.5,100.263338,100.263338,37.541159,37.541159,81.45846,81.45846,6.444741,6.444741,18.193598,18.193598,18.73628,18.73628,24.704268,24.704268,10.934451,10.934451,30.701982,30.701982,41.636433,41.636433,21.130335,21.130335,7.184451,7.184451,4.863186,4.863186,14.15282,14.15282,21.009527,21.009527


(4, 34)

Unnamed: 0,Season,H,TPOFF,OddsT,OddsO,W,TSC,OSC,TFGM,OFGM,TFGA,OFGA,TFG3M,OFG3M,TFG3A,OFG3A,TFTM,OFTM,TFTA,OFTA,TORB,OORB,TDRB,ODRB,TRB,ORB,TAST,OAST,TSTL,OSTL,TBLK,OBLK,TTOV,OTOV,TPF,OPF
0,21,0,0.060351,3.037516,1.642266,0.404889,96.919786,100.016043,35.884645,36.896104,79.472116,79.663866,5.995416,6.18029,16.979374,17.065699,19.15508,20.043545,25.478992,26.673033,10.91673,11.325439,29.458365,30.42628,40.375095,41.751719,20.300229,22.058824,7.203209,7.255157,4.288006,4.901451,15.301757,14.883117,22.644767,21.792972
1,21,1,0.060351,1.642266,3.037516,0.595111,100.016043,96.919786,36.896104,35.884645,79.663866,79.472116,6.18029,5.995416,17.065699,16.979374,20.043545,19.15508,26.673033,25.478992,11.325439,10.91673,30.42628,29.458365,41.751719,40.375095,22.058824,20.300229,7.255157,7.203209,4.901451,4.288006,14.883117,15.301757,21.792972,22.644767
2,22,0,0.065399,3.281491,1.720126,0.390114,97.726236,101.438023,36.478327,37.687452,81.13308,81.380228,6.481369,6.579468,18.084411,18.068441,18.288213,19.48365,24.318631,25.69962,10.968821,11.41597,30.164259,31.245627,41.13308,42.661597,20.676046,22.561217,7.123194,7.330798,4.342966,5.155133,14.338403,13.712548,21.511027,20.727757
3,22,1,0.065399,1.720126,3.281491,0.609886,101.438023,97.726236,37.687452,36.478327,81.380228,81.13308,6.579468,6.481369,18.068441,18.084411,19.48365,18.288213,25.69962,24.318631,11.41597,10.968821,31.245627,30.164259,42.661597,41.13308,22.561217,20.676046,7.330798,7.123194,5.155133,4.342966,13.712548,14.338403,20.727757,21.511027
4,23,0,0.064639,3.412455,1.761611,0.387833,98.085932,101.392395,36.457034,37.491255,80.695817,80.670722,6.660837,6.662357,18.3673,18.027376,18.511027,19.747529,24.05019,25.635741,10.761217,11.171103,29.662357,30.803802,40.423574,41.974905,20.048669,21.65019,7.184791,7.329278,4.457795,5.143726,14.207605,13.753612,21.587833,20.713308
5,23,1,0.064639,1.761611,3.412455,0.612167,101.392395,98.085932,37.491255,36.457034,80.670722,80.695817,6.662357,6.660837,18.027376,18.3673,19.747529,18.511027,25.635741,24.05019,11.171103,10.761217,30.803802,29.662357,41.974905,40.423574,21.65019,20.048669,7.329278,7.184791,5.143726,4.457795,13.753612,14.207605,20.713308,21.587833
6,24,0,0.0625,3.386063,1.758733,0.400915,98.842226,101.684451,37.054878,38.027439,81.278201,81.63872,6.484756,6.404726,18.30564,18.081555,18.247713,19.224848,24.102896,25.30564,10.689787,11.179116,30.351372,31.052591,41.041159,42.231707,20.301829,21.958841,7.101372,7.26753,4.662348,5.064024,14.432165,13.873476,21.451982,20.567073
7,24,1,0.0625,1.758733,3.386063,0.599085,101.684451,98.842226,38.027439,37.054878,81.63872,81.278201,6.404726,6.484756,18.081555,18.30564,19.224848,18.247713,25.30564,24.102896,11.179116,10.689787,31.052591,30.351372,42.231707,41.041159,21.958841,20.301829,7.26753,7.101372,5.064024,4.662348,13.873476,14.432165,20.567073,21.451982


(8, 36)

Unnamed: 0,Season,H,TPOFF_advantage,OddsT_advantage,OddsO_advantage,W_advantage,TSC_advantage,OSC_advantage,TFGM_advantage,OFGM_advantage,TFGA_advantage,OFGA_advantage,TFG3M_advantage,OFG3M_advantage,TFG3A_advantage,OFG3A_advantage,TFTM_advantage,OFTM_advantage,TFTA_advantage,OFTA_advantage,TORB_advantage,OORB_advantage,TDRB_advantage,ODRB_advantage,TRB_advantage,ORB_advantage,TAST_advantage,OAST_advantage,TSTL_advantage,OSTL_advantage,TBLK_advantage,OBLK_advantage,TTOV_advantage,OTOV_advantage,TPF_advantage,OPF_advantage
0,21,0,0.0,0.697625,-0.697625,-0.095111,-1.548128,1.548128,-0.50573,0.50573,-0.095875,0.095875,-0.092437,0.092437,-0.043163,0.043163,-0.444232,0.444232,-0.597021,0.597021,-0.204354,0.204354,-0.483957,0.483957,-0.688312,0.688312,-0.879297,0.879297,-0.025974,0.025974,-0.306723,0.306723,0.20932,-0.20932,0.425898,-0.425898
1,21,1,0.0,-0.697625,0.697625,0.095111,1.548128,-1.548128,0.50573,-0.50573,0.095875,-0.095875,0.092437,-0.092437,0.043163,-0.043163,0.444232,-0.444232,0.597021,-0.597021,0.204354,-0.204354,0.483957,-0.483957,0.688312,-0.688312,0.879297,-0.879297,0.025974,-0.025974,0.306723,-0.306723,-0.20932,0.20932,-0.425898,0.425898
2,22,0,0.0,0.780682,-0.780682,-0.109886,-1.855894,1.855894,-0.604563,0.604563,-0.123574,0.123574,-0.049049,0.049049,0.007985,-0.007985,-0.597719,0.597719,-0.690494,0.690494,-0.223574,0.223574,-0.540684,0.540684,-0.764259,0.764259,-0.942586,0.942586,-0.103802,0.103802,-0.406084,0.406084,0.312928,-0.312928,0.391635,-0.391635
3,22,1,0.0,-0.780682,0.780682,0.109886,1.855894,-1.855894,0.604563,-0.604563,0.123574,-0.123574,0.049049,-0.049049,-0.007985,0.007985,0.597719,-0.597719,0.690494,-0.690494,0.223574,-0.223574,0.540684,-0.540684,0.764259,-0.764259,0.942586,-0.942586,0.103802,-0.103802,0.406084,-0.406084,-0.312928,0.312928,-0.391635,0.391635
4,23,0,0.0,0.825422,-0.825422,-0.112167,-1.653232,1.653232,-0.51711,0.51711,0.012548,-0.012548,-0.00076,0.00076,0.169962,-0.169962,-0.618251,0.618251,-0.792776,0.792776,-0.204943,0.204943,-0.570722,0.570722,-0.775665,0.775665,-0.80076,0.80076,-0.072243,0.072243,-0.342966,0.342966,0.226996,-0.226996,0.437262,-0.437262
5,23,1,0.0,-0.825422,0.825422,0.112167,1.653232,-1.653232,0.51711,-0.51711,-0.012548,0.012548,0.00076,-0.00076,-0.169962,0.169962,0.618251,-0.618251,0.792776,-0.792776,0.204943,-0.204943,0.570722,-0.570722,0.775665,-0.775665,0.80076,-0.80076,0.072243,-0.072243,0.342966,-0.342966,-0.226996,0.226996,-0.437262,0.437262
6,24,0,0.0,0.813665,-0.813665,-0.099085,-1.421113,1.421113,-0.48628,0.48628,-0.180259,0.180259,0.040015,-0.040015,0.112043,-0.112043,-0.488567,0.488567,-0.601372,0.601372,-0.244665,0.244665,-0.35061,0.35061,-0.595274,0.595274,-0.828506,0.828506,-0.083079,0.083079,-0.200838,0.200838,0.279345,-0.279345,0.442454,-0.442454
7,24,1,0.0,-0.813665,0.813665,0.099085,1.421113,-1.421113,0.48628,-0.48628,0.180259,-0.180259,-0.040015,0.040015,-0.112043,0.112043,0.488567,-0.488567,0.601372,-0.601372,0.244665,-0.244665,0.35061,-0.35061,0.595274,-0.595274,0.828506,-0.828506,0.083079,-0.083079,0.200838,-0.200838,-0.279345,0.279345,-0.442454,0.442454


(8, 104)

In [9]:
df_game_stats.head()

Unnamed: 0,Season,Date,TID,OID,TPOFF,OddsT,OddsO,W,TSC,OSC,TFGM,OFGM,TFGA,OFGA,TFG3M,OFG3M,TFG3A,OFG3A,TFTM,OFTM,TFTA,OFTA,TORB,OORB,TDRB,ODRB,TRB,ORB,TAST,OAST,TSTL,OSTL,TBLK,OBLK,TTOV,OTOV,TPF,OPF,H
23739,21,1995-11-07,0,11,0,1.274083,3.794318,0,66,108,25.0,39.0,65.0,79.0,3.0,7.0,17.0,13.0,13.0,23.0,22.0,32.0,4.0,13.0,25.0,36.0,29.0,49.0,13.0,22.0,6.0,10.0,3.0,5.0,23.0,16.0,24.0,21.0,1
23740,21,1995-11-07,17,43,0,1.597972,2.286949,1,114,106,46.0,40.0,83.0,77.0,6.0,13.0,12.0,30.0,16.0,13.0,24.0,17.0,12.0,4.0,31.0,25.0,43.0,29.0,30.0,29.0,11.0,7.0,1.0,5.0,21.0,21.0,19.0,25.0,1
23741,21,1995-11-08,41,39,0,1.471072,2.640288,0,87,91,30.0,36.0,75.0,84.0,3.0,1.0,16.0,11.0,24.0,18.0,39.0,33.0,10.0,10.0,42.0,36.0,52.0,46.0,18.0,17.0,5.0,6.0,6.0,4.0,19.0,15.0,31.0,31.0,1
23742,21,1995-11-08,15,22,0,1.257454,3.967424,0,97,105,31.0,43.0,72.0,93.0,8.0,2.0,21.0,14.0,27.0,17.0,34.0,25.0,13.0,11.0,35.0,27.0,48.0,38.0,19.0,26.0,3.0,12.0,9.0,0.0,22.0,8.0,23.0,25.0,1
23743,21,1995-11-08,13,19,0,1.302199,3.542703,1,88,75,31.0,26.0,74.0,78.0,3.0,4.0,9.0,17.0,23.0,19.0,25.0,22.0,15.0,17.0,32.0,25.0,47.0,42.0,19.0,11.0,10.0,9.0,6.0,4.0,17.0,18.0,22.0,24.0,1


In [10]:
df_game_stats_advantage = pd.merge(df_game_stats, df_home_away_advantage[["Season", "H"] + features_advantage + features_season], on=["Season", "H"])

features_norm_advantage = []
for feature_name, feature_name_advantage, feature_name_season in zip(features, features_advantage, features_season):
    feature_name_norm_advantage = f"{feature_name}_norm_advantage"
    features_norm_advantage.append(feature_name_norm_advantage)

    df_game_stats_advantage[feature_name_norm_advantage] = df_game_stats_advantage[feature_name] - df_game_stats_advantage[feature_name_advantage]


df_game_stats_norm_advantage = df_game_stats_advantage[["Season", "H", "Date", "TID", "OID"] + features_norm_advantage]
df_game_stats_norm_advantage = df_game_stats_norm_advantage.rename(columns={fna: f for f, fna  in zip(features, features_norm_advantage)})

display(df_game_stats_norm_advantage.head(), df_game_stats_norm_advantage.shape)


Unnamed: 0,Season,H,Date,TID,OID,TPOFF,OddsT,OddsO,W,TSC,OSC,TFGM,OFGM,TFGA,OFGA,TFG3M,OFG3M,TFG3A,OFG3A,TFTM,OFTM,TFTA,OFTA,TORB,OORB,TDRB,ODRB,TRB,ORB,TAST,OAST,TSTL,OSTL,TBLK,OBLK,TTOV,OTOV,TPF,OPF
0,21,1,1995-11-07,0,11,0.0,1.971708,3.096693,-0.095111,64.451872,109.548128,24.49427,39.50573,64.904125,79.095875,2.907563,7.092437,16.956837,13.043163,12.555768,23.444232,21.402979,32.597021,3.795646,13.204354,24.516043,36.483957,28.311688,49.688312,12.120703,22.879297,5.974026,10.025974,2.693277,5.306723,23.20932,15.79068,24.425898,20.574102
1,21,1,1995-11-07,17,43,0.0,2.295597,1.589324,0.904889,112.451872,107.548128,45.49427,40.50573,82.904125,77.095875,5.907563,13.092437,11.956837,30.043163,15.555768,13.444232,23.402979,17.597021,11.795646,4.204354,30.516043,25.483957,42.311688,29.688312,29.120703,29.879297,10.974026,7.025974,0.693277,5.306723,21.20932,20.79068,19.425898,24.574102
2,21,1,1995-11-08,41,39,0.0,2.168697,1.942663,-0.095111,85.451872,92.548128,29.49427,36.50573,74.904125,84.095875,2.907563,1.092437,15.956837,11.043163,23.555768,18.444232,38.402979,33.597021,9.795646,10.204354,41.516043,36.483957,51.311688,46.688312,17.120703,17.879297,4.974026,6.025974,5.693277,4.306723,19.20932,14.79068,31.425898,30.574102
3,21,1,1995-11-08,15,22,0.0,1.955079,3.269799,-0.095111,95.451872,106.548128,30.49427,43.50573,71.904125,93.095875,7.907563,2.092437,20.956837,14.043163,26.555768,17.444232,33.402979,25.597021,12.795646,11.204354,34.516043,27.483957,47.311688,38.688312,18.120703,26.879297,2.974026,12.025974,8.693277,0.306723,22.20932,7.79068,23.425898,24.574102
4,21,1,1995-11-08,13,19,0.0,1.999824,2.845077,0.904889,86.451872,76.548128,30.49427,26.50573,73.904125,78.095875,2.907563,4.092437,8.956837,17.043163,22.555768,19.444232,24.402979,22.597021,14.795646,17.204354,31.516043,25.483957,46.311688,42.688312,18.120703,11.879297,9.974026,9.025974,5.693277,4.306723,17.20932,17.79068,22.425898,23.574102


(10502, 39)

In [11]:
df_team_stats_norm_advantage = df_game_stats_norm_advantage.groupby(["Season", "TID"])[features].mean().reset_index()

display(df_team_stats_norm_advantage.head(), df_team_stats_norm_advantage.shape)


Unnamed: 0,Season,TID,TPOFF,OddsT,OddsO,W,TSC,OSC,TFGM,OFGM,TFGA,OFGA,TFG3M,OFG3M,TFG3A,OFG3A,TFTM,OFTM,TFTA,OFTA,TORB,OORB,TDRB,ODRB,TRB,ORB,TAST,OAST,TSTL,OSTL,TBLK,OBLK,TTOV,OTOV,TPF,OPF
0,21,0,0.046512,2.119491,2.342789,0.511628,94.348837,95.732558,35.488372,35.348837,76.593023,79.44186,6.395349,6.5,18.686047,18.197674,16.976744,18.534884,24.744186,24.872093,9.872093,11.162791,30.686047,30.209302,40.55814,41.372093,20.372093,20.337209,6.872093,6.895349,5.383721,3.848837,14.651163,14.453488,21.511628,21.906977
1,21,1,0.078652,1.83148,2.815255,0.616909,96.207324,92.017395,34.983082,34.185458,79.043867,79.371864,8.482107,5.596544,23.021987,16.101609,17.759054,18.049935,23.476438,24.119068,10.660625,9.822521,32.522652,31.050382,43.183277,40.872902,20.563154,19.650329,7.100832,6.966584,4.108913,4.351761,14.227071,14.008884,20.735122,20.11881
2,21,2,0.0,2.753803,1.871267,0.378049,99.146341,102.036585,37.158537,38.121951,80.829268,80.097561,6.402439,6.329268,17.695122,17.52439,18.426829,19.463415,23.304878,25.902439,11.317073,11.634146,28.317073,29.365854,39.634146,41.0,20.670732,22.804878,7.695122,7.195122,3.670732,4.158537,15.463415,15.036585,21.963415,19.939024
3,21,3,0.0,3.066587,1.743674,0.268293,101.597561,106.743902,36.560976,40.085366,78.634146,82.45122,6.097561,7.02439,16.609756,18.04878,22.378049,19.54878,29.390244,25.768293,10.560976,11.768293,28.963415,30.280488,39.52439,42.04878,20.487805,25.146341,6.878049,8.304878,4.963415,5.487805,16.378049,15.280488,22.280488,24.292683
4,21,4,0.196078,1.496543,3.954165,0.723625,97.930429,90.44212,36.303809,34.804034,77.086355,78.746978,7.311913,4.580244,19.156016,13.589082,18.010897,16.253808,24.164764,21.874451,9.515601,10.38636,31.157177,29.058509,40.672778,39.444869,21.659229,17.527045,7.14655,6.95149,5.160652,4.211897,13.876653,14.329229,19.596586,21.109296


(120, 36)

In [12]:
df_team_stats = df_game_stats.groupby(["Season", "TID"])[features].mean().reset_index()

display(df_team_stats.head(), df_team_stats.shape)

Unnamed: 0,Season,TID,TPOFF,OddsT,OddsO,W,TSC,OSC,TFGM,OFGM,TFGA,OFGA,TFG3M,OFG3M,TFG3A,OFG3A,TFTM,OFTM,TFTA,OFTA,TORB,OORB,TDRB,ODRB,TRB,ORB,TAST,OAST,TSTL,OSTL,TBLK,OBLK,TTOV,OTOV,TPF,OPF
0,21,0,0.046512,2.119491,2.342789,0.511628,94.348837,95.732558,35.488372,35.348837,76.593023,79.44186,6.395349,6.5,18.686047,18.197674,16.976744,18.534884,24.744186,24.872093,9.872093,11.162791,30.686047,30.209302,40.55814,41.372093,20.372093,20.337209,6.872093,6.895349,5.383721,3.848837,14.651163,14.453488,21.511628,21.906977
1,21,1,0.078652,1.823642,2.823093,0.617978,96.224719,92.0,34.988764,34.179775,79.044944,79.370787,8.483146,5.595506,23.022472,16.101124,17.764045,18.044944,23.483146,24.11236,10.662921,9.820225,32.52809,31.044944,43.191011,40.865169,20.573034,19.640449,7.101124,6.966292,4.11236,4.348315,14.224719,14.011236,20.730337,20.123596
2,21,2,0.0,2.753803,1.871267,0.378049,99.146341,102.036585,37.158537,38.121951,80.829268,80.097561,6.402439,6.329268,17.695122,17.52439,18.426829,19.463415,23.304878,25.902439,11.317073,11.634146,28.317073,29.365854,39.634146,41.0,20.670732,22.804878,7.695122,7.195122,3.670732,4.158537,15.463415,15.036585,21.963415,19.939024
3,21,3,0.0,3.066587,1.743674,0.268293,101.597561,106.743902,36.560976,40.085366,78.634146,82.45122,6.097561,7.02439,16.609756,18.04878,22.378049,19.54878,29.390244,25.768293,10.560976,11.768293,28.963415,30.280488,39.52439,42.04878,20.487805,25.146341,6.878049,8.304878,4.963415,5.487805,16.378049,15.280488,22.280488,24.292683
4,21,4,0.196078,1.482864,3.967844,0.72549,97.960784,90.411765,36.313725,34.794118,77.088235,78.745098,7.313725,4.578431,19.156863,13.588235,18.019608,16.245098,24.176471,21.862745,9.519608,10.382353,31.166667,29.04902,40.686275,39.431373,21.676471,17.509804,7.147059,6.95098,5.166667,4.205882,13.872549,14.333333,19.588235,21.117647


(120, 36)

In [13]:
df_features = pd.merge(
    df_bare, df_team_stats,
    left_on=["Season", "HID"], right_on=["Season", "TID"]
).drop(columns="TID")
df_features = pd.merge(
    df_features, df_team_stats,
    left_on=["Season", "AID"], right_on=["Season", "TID"], suffixes=("_H", "_A")
).drop(columns="TID")

display(df_features.head(), df_features.shape)

Unnamed: 0,Season,Date,HID,AID,POFF,H,A,OddsH,OddsA,TPOFF_H,OddsT_H,OddsO_H,W_H,TSC_H,OSC_H,TFGM_H,OFGM_H,TFGA_H,OFGA_H,TFG3M_H,OFG3M_H,TFG3A_H,OFG3A_H,TFTM_H,OFTM_H,TFTA_H,OFTA_H,TORB_H,OORB_H,TDRB_H,ODRB_H,TRB_H,ORB_H,TAST_H,OAST_H,TSTL_H,OSTL_H,TBLK_H,OBLK_H,TTOV_H,OTOV_H,TPF_H,OPF_H,TPOFF_A,OddsT_A,OddsO_A,W_A,TSC_A,OSC_A,TFGM_A,OFGM_A,TFGA_A,OFGA_A,TFG3M_A,OFG3M_A,TFG3A_A,OFG3A_A,TFTM_A,OFTM_A,TFTA_A,OFTA_A,TORB_A,OORB_A,TDRB_A,ODRB_A,TRB_A,ORB_A,TAST_A,OAST_A,TSTL_A,OSTL_A,TBLK_A,OBLK_A,TTOV_A,OTOV_A,TPF_A,OPF_A
0,21,1995-11-07,0,11,0,0,1,1.274083,3.794318,0.046512,2.119491,2.342789,0.511628,94.348837,95.732558,35.488372,35.348837,76.593023,79.44186,6.395349,6.5,18.686047,18.197674,16.976744,18.534884,24.744186,24.872093,9.872093,11.162791,30.686047,30.209302,40.55814,41.372093,20.372093,20.337209,6.872093,6.895349,5.383721,3.848837,14.651163,14.453488,21.511628,21.906977,0.108696,1.787051,2.694662,0.597826,98.086957,93.5,36.641304,34.032609,80.641304,78.184783,5.858696,5.934783,15.25,17.119565,18.945652,19.5,25.847826,26.836957,11.956522,10.967391,31.521739,29.858696,43.478261,40.826087,22.152174,20.347826,7.706522,7.413043,5.293478,5.184783,15.902174,17.076087,23.141304,23.423913
1,21,1996-03-13,0,11,0,1,0,1.59506,2.293425,0.046512,2.119491,2.342789,0.511628,94.348837,95.732558,35.488372,35.348837,76.593023,79.44186,6.395349,6.5,18.686047,18.197674,16.976744,18.534884,24.744186,24.872093,9.872093,11.162791,30.686047,30.209302,40.55814,41.372093,20.372093,20.337209,6.872093,6.895349,5.383721,3.848837,14.651163,14.453488,21.511628,21.906977,0.108696,1.787051,2.694662,0.597826,98.086957,93.5,36.641304,34.032609,80.641304,78.184783,5.858696,5.934783,15.25,17.119565,18.945652,19.5,25.847826,26.836957,11.956522,10.967391,31.521739,29.858696,43.478261,40.826087,22.152174,20.347826,7.706522,7.413043,5.293478,5.184783,15.902174,17.076087,23.141304,23.423913
2,21,1996-05-03,0,11,1,0,1,2.043143,1.731877,0.046512,2.119491,2.342789,0.511628,94.348837,95.732558,35.488372,35.348837,76.593023,79.44186,6.395349,6.5,18.686047,18.197674,16.976744,18.534884,24.744186,24.872093,9.872093,11.162791,30.686047,30.209302,40.55814,41.372093,20.372093,20.337209,6.872093,6.895349,5.383721,3.848837,14.651163,14.453488,21.511628,21.906977,0.108696,1.787051,2.694662,0.597826,98.086957,93.5,36.641304,34.032609,80.641304,78.184783,5.858696,5.934783,15.25,17.119565,18.945652,19.5,25.847826,26.836957,11.956522,10.967391,31.521739,29.858696,43.478261,40.826087,22.152174,20.347826,7.706522,7.413043,5.293478,5.184783,15.902174,17.076087,23.141304,23.423913
3,21,1996-05-05,0,11,1,0,1,2.14769,1.667978,0.046512,2.119491,2.342789,0.511628,94.348837,95.732558,35.488372,35.348837,76.593023,79.44186,6.395349,6.5,18.686047,18.197674,16.976744,18.534884,24.744186,24.872093,9.872093,11.162791,30.686047,30.209302,40.55814,41.372093,20.372093,20.337209,6.872093,6.895349,5.383721,3.848837,14.651163,14.453488,21.511628,21.906977,0.108696,1.787051,2.694662,0.597826,98.086957,93.5,36.641304,34.032609,80.641304,78.184783,5.858696,5.934783,15.25,17.119565,18.945652,19.5,25.847826,26.836957,11.956522,10.967391,31.521739,29.858696,43.478261,40.826087,22.152174,20.347826,7.706522,7.413043,5.293478,5.184783,15.902174,17.076087,23.141304,23.423913
4,21,1995-11-26,17,11,0,1,0,1.499747,2.545381,0.057471,2.134118,2.35249,0.494253,103.034483,103.701149,37.850575,37.931034,81.54023,82.091954,7.356322,6.482759,20.954023,18.149425,19.977011,21.356322,26.770115,28.103448,11.0,11.54023,30.356322,30.609195,41.356322,42.149425,22.091954,22.218391,7.229885,7.896552,5.068966,4.931034,15.505747,14.597701,23.034483,23.091954,0.108696,1.787051,2.694662,0.597826,98.086957,93.5,36.641304,34.032609,80.641304,78.184783,5.858696,5.934783,15.25,17.119565,18.945652,19.5,25.847826,26.836957,11.956522,10.967391,31.521739,29.858696,43.478261,40.826087,22.152174,20.347826,7.706522,7.413043,5.293478,5.184783,15.902174,17.076087,23.141304,23.423913


(5251, 77)

In [14]:
df_home_advantage = df_home_away_advantage[df_home_away_advantage["H"] == 1]
df_away_advantage = df_home_away_advantage[df_home_away_advantage["H"] == 0]

In [15]:
df_features_norm_advantage = pd.merge(
    df_bare, df_team_stats_norm_advantage,
    left_on=["Season", "HID"], right_on=["Season", "TID"]
).drop(columns="TID")
df_features_norm_advantage = pd.merge(
    df_features_norm_advantage, df_team_stats_norm_advantage,
    left_on=["Season", "AID"], right_on=["Season", "TID"], suffixes=("_H", "_A")
).drop(columns="TID")
df_features_norm_advantage = pd.merge(
    df_features_norm_advantage, 
    df_home_advantage[["Season"] + features_advantage],
    on="Season"
)
df_features_norm_advantage = pd.merge(
    df_features_norm_advantage, 
    df_away_advantage[["Season"] + features_advantage + features_season],
    on="Season", suffixes=("_H", "_A")
)

for feature_name, feature_name_advantage, feature_name_season in zip(features, features_advantage, features_season):
    df_features_norm_advantage[f"{feature_name}_H"] += df_features_norm_advantage[f"{feature_name_advantage}_H"]
    df_features_norm_advantage[f"{feature_name}_H"] /= df_features_norm_advantage[feature_name_season]

    df_features_norm_advantage[f"{feature_name}_A"] += df_features_norm_advantage[f"{feature_name_advantage}_A"]
    df_features_norm_advantage[f"{feature_name}_A"] /= df_features_norm_advantage[feature_name_season]


display(df_features_norm_advantage.head(), df_features_norm_advantage.shape)

Unnamed: 0,Season,Date,HID,AID,POFF,H,A,OddsH,OddsA,TPOFF_H,OddsT_H,OddsO_H,W_H,TSC_H,OSC_H,TFGM_H,OFGM_H,TFGA_H,OFGA_H,TFG3M_H,OFG3M_H,TFG3A_H,OFG3A_H,TFTM_H,OFTM_H,TFTA_H,OFTA_H,TORB_H,OORB_H,TDRB_H,ODRB_H,TRB_H,ORB_H,TAST_H,OAST_H,TSTL_H,OSTL_H,TBLK_H,OBLK_H,TTOV_H,OTOV_H,TPF_H,OPF_H,TPOFF_A,OddsT_A,OddsO_A,W_A,TSC_A,OSC_A,TFGM_A,OFGM_A,TFGA_A,OFGA_A,TFG3M_A,OFG3M_A,TFG3A_A,OFG3A_A,TFTM_A,OFTM_A,TFTA_A,OFTA_A,TORB_A,OORB_A,TDRB_A,ODRB_A,TRB_A,ORB_A,TAST_A,OAST_A,TSTL_A,OSTL_A,TBLK_A,OBLK_A,TTOV_A,OTOV_A,TPF_A,OPF_A,TPOFF_advantage_H,OddsT_advantage_H,OddsO_advantage_H,W_advantage_H,TSC_advantage_H,OSC_advantage_H,TFGM_advantage_H,OFGM_advantage_H,TFGA_advantage_H,OFGA_advantage_H,TFG3M_advantage_H,OFG3M_advantage_H,TFG3A_advantage_H,OFG3A_advantage_H,TFTM_advantage_H,OFTM_advantage_H,TFTA_advantage_H,OFTA_advantage_H,TORB_advantage_H,OORB_advantage_H,TDRB_advantage_H,ODRB_advantage_H,TRB_advantage_H,ORB_advantage_H,TAST_advantage_H,OAST_advantage_H,TSTL_advantage_H,OSTL_advantage_H,TBLK_advantage_H,OBLK_advantage_H,TTOV_advantage_H,OTOV_advantage_H,TPF_advantage_H,OPF_advantage_H,TPOFF_advantage_A,OddsT_advantage_A,OddsO_advantage_A,W_advantage_A,TSC_advantage_A,OSC_advantage_A,TFGM_advantage_A,OFGM_advantage_A,TFGA_advantage_A,OFGA_advantage_A,TFG3M_advantage_A,OFG3M_advantage_A,TFG3A_advantage_A,OFG3A_advantage_A,TFTM_advantage_A,OFTM_advantage_A,TFTA_advantage_A,OFTA_advantage_A,TORB_advantage_A,OORB_advantage_A,TDRB_advantage_A,ODRB_advantage_A,TRB_advantage_A,ORB_advantage_A,TAST_advantage_A,OAST_advantage_A,TSTL_advantage_A,OSTL_advantage_A,TBLK_advantage_A,OBLK_advantage_A,TTOV_advantage_A,OTOV_advantage_A,TPF_advantage_A,OPF_advantage_A,TPOFF_season,OddsT_season,OddsO_season,W_season,TSC_season,OSC_season,TFGM_season,OFGM_season,TFGA_season,OFGA_season,TFG3M_season,OFG3M_season,TFG3A_season,OFG3A_season,TFTM_season,OFTM_season,TFTA_season,OFTA_season,TORB_season,OORB_season,TDRB_season,ODRB_season,TRB_season,ORB_season,TAST_season,OAST_season,TSTL_season,OSTL_season,TBLK_season,OBLK_season,TTOV_season,OTOV_season,TPF_season,OPF_season
0,21,1995-11-07,0,11,0,0,1,1.274083,3.794318,0.77068,0.607663,1.299383,1.213477,0.97389,0.956499,0.989111,0.957481,0.963816,0.99721,1.065694,1.052516,1.10026,1.066499,0.888857,0.923025,0.971821,0.930935,0.906067,0.985375,1.041002,0.992753,1.004458,0.990755,1.003393,0.918713,0.954197,0.950228,1.238472,0.770908,0.956893,0.971534,0.949001,1.005131,1.801046,1.061877,0.853474,1.005431,0.980409,0.96527,0.992998,0.949106,1.012284,0.983821,0.947174,0.99004,0.893336,1.008236,0.943983,1.017599,0.968354,1.052077,1.056746,1.004555,1.036586,1.01337,1.042046,1.010983,1.004408,1.002247,1.062437,1.029026,1.085321,1.195175,1.067521,1.117564,1.060684,1.035067,0.0,-0.697625,0.697625,0.095111,1.548128,-1.548128,0.50573,-0.50573,0.095875,-0.095875,0.092437,-0.092437,0.043163,-0.043163,0.444232,-0.444232,0.597021,-0.597021,0.204354,-0.204354,0.483957,-0.483957,0.688312,-0.688312,0.879297,-0.879297,0.025974,-0.025974,0.306723,-0.306723,-0.20932,0.20932,-0.425898,0.425898,0.0,0.697625,-0.697625,-0.095111,-1.548128,1.548128,-0.50573,0.50573,-0.095875,0.095875,-0.092437,0.092437,-0.043163,0.043163,-0.444232,0.444232,-0.597021,0.597021,-0.204354,0.204354,-0.483957,0.483957,-0.688312,0.688312,-0.879297,0.879297,-0.025974,0.025974,-0.306723,0.306723,0.20932,-0.20932,0.425898,-0.425898,0.060351,2.339891,2.339891,0.5,98.467914,98.467914,36.390374,36.390374,79.567991,79.567991,6.087853,6.087853,17.022536,17.022536,19.599312,19.599312,26.076012,26.076012,11.121085,11.121085,29.942322,29.942322,41.063407,41.063407,21.179526,21.179526,7.229183,7.229183,4.594729,4.594729,15.092437,15.092437,22.218869,22.218869
1,21,1996-03-13,0,11,0,1,0,1.59506,2.293425,0.77068,0.607663,1.299383,1.213477,0.97389,0.956499,0.989111,0.957481,0.963816,0.99721,1.065694,1.052516,1.10026,1.066499,0.888857,0.923025,0.971821,0.930935,0.906067,0.985375,1.041002,0.992753,1.004458,0.990755,1.003393,0.918713,0.954197,0.950228,1.238472,0.770908,0.956893,0.971534,0.949001,1.005131,1.801046,1.061877,0.853474,1.005431,0.980409,0.96527,0.992998,0.949106,1.012284,0.983821,0.947174,0.99004,0.893336,1.008236,0.943983,1.017599,0.968354,1.052077,1.056746,1.004555,1.036586,1.01337,1.042046,1.010983,1.004408,1.002247,1.062437,1.029026,1.085321,1.195175,1.067521,1.117564,1.060684,1.035067,0.0,-0.697625,0.697625,0.095111,1.548128,-1.548128,0.50573,-0.50573,0.095875,-0.095875,0.092437,-0.092437,0.043163,-0.043163,0.444232,-0.444232,0.597021,-0.597021,0.204354,-0.204354,0.483957,-0.483957,0.688312,-0.688312,0.879297,-0.879297,0.025974,-0.025974,0.306723,-0.306723,-0.20932,0.20932,-0.425898,0.425898,0.0,0.697625,-0.697625,-0.095111,-1.548128,1.548128,-0.50573,0.50573,-0.095875,0.095875,-0.092437,0.092437,-0.043163,0.043163,-0.444232,0.444232,-0.597021,0.597021,-0.204354,0.204354,-0.483957,0.483957,-0.688312,0.688312,-0.879297,0.879297,-0.025974,0.025974,-0.306723,0.306723,0.20932,-0.20932,0.425898,-0.425898,0.060351,2.339891,2.339891,0.5,98.467914,98.467914,36.390374,36.390374,79.567991,79.567991,6.087853,6.087853,17.022536,17.022536,19.599312,19.599312,26.076012,26.076012,11.121085,11.121085,29.942322,29.942322,41.063407,41.063407,21.179526,21.179526,7.229183,7.229183,4.594729,4.594729,15.092437,15.092437,22.218869,22.218869
2,21,1996-05-03,0,11,1,0,1,2.043143,1.731877,0.77068,0.607663,1.299383,1.213477,0.97389,0.956499,0.989111,0.957481,0.963816,0.99721,1.065694,1.052516,1.10026,1.066499,0.888857,0.923025,0.971821,0.930935,0.906067,0.985375,1.041002,0.992753,1.004458,0.990755,1.003393,0.918713,0.954197,0.950228,1.238472,0.770908,0.956893,0.971534,0.949001,1.005131,1.801046,1.061877,0.853474,1.005431,0.980409,0.96527,0.992998,0.949106,1.012284,0.983821,0.947174,0.99004,0.893336,1.008236,0.943983,1.017599,0.968354,1.052077,1.056746,1.004555,1.036586,1.01337,1.042046,1.010983,1.004408,1.002247,1.062437,1.029026,1.085321,1.195175,1.067521,1.117564,1.060684,1.035067,0.0,-0.697625,0.697625,0.095111,1.548128,-1.548128,0.50573,-0.50573,0.095875,-0.095875,0.092437,-0.092437,0.043163,-0.043163,0.444232,-0.444232,0.597021,-0.597021,0.204354,-0.204354,0.483957,-0.483957,0.688312,-0.688312,0.879297,-0.879297,0.025974,-0.025974,0.306723,-0.306723,-0.20932,0.20932,-0.425898,0.425898,0.0,0.697625,-0.697625,-0.095111,-1.548128,1.548128,-0.50573,0.50573,-0.095875,0.095875,-0.092437,0.092437,-0.043163,0.043163,-0.444232,0.444232,-0.597021,0.597021,-0.204354,0.204354,-0.483957,0.483957,-0.688312,0.688312,-0.879297,0.879297,-0.025974,0.025974,-0.306723,0.306723,0.20932,-0.20932,0.425898,-0.425898,0.060351,2.339891,2.339891,0.5,98.467914,98.467914,36.390374,36.390374,79.567991,79.567991,6.087853,6.087853,17.022536,17.022536,19.599312,19.599312,26.076012,26.076012,11.121085,11.121085,29.942322,29.942322,41.063407,41.063407,21.179526,21.179526,7.229183,7.229183,4.594729,4.594729,15.092437,15.092437,22.218869,22.218869
3,21,1996-05-05,0,11,1,0,1,2.14769,1.667978,0.77068,0.607663,1.299383,1.213477,0.97389,0.956499,0.989111,0.957481,0.963816,0.99721,1.065694,1.052516,1.10026,1.066499,0.888857,0.923025,0.971821,0.930935,0.906067,0.985375,1.041002,0.992753,1.004458,0.990755,1.003393,0.918713,0.954197,0.950228,1.238472,0.770908,0.956893,0.971534,0.949001,1.005131,1.801046,1.061877,0.853474,1.005431,0.980409,0.96527,0.992998,0.949106,1.012284,0.983821,0.947174,0.99004,0.893336,1.008236,0.943983,1.017599,0.968354,1.052077,1.056746,1.004555,1.036586,1.01337,1.042046,1.010983,1.004408,1.002247,1.062437,1.029026,1.085321,1.195175,1.067521,1.117564,1.060684,1.035067,0.0,-0.697625,0.697625,0.095111,1.548128,-1.548128,0.50573,-0.50573,0.095875,-0.095875,0.092437,-0.092437,0.043163,-0.043163,0.444232,-0.444232,0.597021,-0.597021,0.204354,-0.204354,0.483957,-0.483957,0.688312,-0.688312,0.879297,-0.879297,0.025974,-0.025974,0.306723,-0.306723,-0.20932,0.20932,-0.425898,0.425898,0.0,0.697625,-0.697625,-0.095111,-1.548128,1.548128,-0.50573,0.50573,-0.095875,0.095875,-0.092437,0.092437,-0.043163,0.043163,-0.444232,0.444232,-0.597021,0.597021,-0.204354,0.204354,-0.483957,0.483957,-0.688312,0.688312,-0.879297,0.879297,-0.025974,0.025974,-0.306723,0.306723,0.20932,-0.20932,0.425898,-0.425898,0.060351,2.339891,2.339891,0.5,98.467914,98.467914,36.390374,36.390374,79.567991,79.567991,6.087853,6.087853,17.022536,17.022536,19.599312,19.599312,26.076012,26.076012,11.121085,11.121085,29.942322,29.942322,41.063407,41.063407,21.179526,21.179526,7.229183,7.229183,4.594729,4.594729,15.092437,15.092437,22.218869,22.218869
4,21,1995-11-26,17,11,0,1,0,1.499747,2.545381,0.952277,0.610487,1.306956,1.180914,1.062279,1.037244,1.054183,1.02828,1.026006,1.030502,1.223719,1.049509,1.233523,1.063635,1.042197,1.06672,1.049777,1.054592,1.007699,1.019103,1.030175,1.005923,1.024088,1.009493,1.085074,1.007057,1.003731,1.088682,1.170736,1.005671,1.013357,0.981248,1.01732,1.058683,1.801046,1.061877,0.853474,1.005431,0.980409,0.96527,0.992998,0.949106,1.012284,0.983821,0.947174,0.99004,0.893336,1.008236,0.943983,1.017599,0.968354,1.052077,1.056746,1.004555,1.036586,1.01337,1.042046,1.010983,1.004408,1.002247,1.062437,1.029026,1.085321,1.195175,1.067521,1.117564,1.060684,1.035067,0.0,-0.697625,0.697625,0.095111,1.548128,-1.548128,0.50573,-0.50573,0.095875,-0.095875,0.092437,-0.092437,0.043163,-0.043163,0.444232,-0.444232,0.597021,-0.597021,0.204354,-0.204354,0.483957,-0.483957,0.688312,-0.688312,0.879297,-0.879297,0.025974,-0.025974,0.306723,-0.306723,-0.20932,0.20932,-0.425898,0.425898,0.0,0.697625,-0.697625,-0.095111,-1.548128,1.548128,-0.50573,0.50573,-0.095875,0.095875,-0.092437,0.092437,-0.043163,0.043163,-0.444232,0.444232,-0.597021,0.597021,-0.204354,0.204354,-0.483957,0.483957,-0.688312,0.688312,-0.879297,0.879297,-0.025974,0.025974,-0.306723,0.306723,0.20932,-0.20932,0.425898,-0.425898,0.060351,2.339891,2.339891,0.5,98.467914,98.467914,36.390374,36.390374,79.567991,79.567991,6.087853,6.087853,17.022536,17.022536,19.599312,19.599312,26.076012,26.076012,11.121085,11.121085,29.942322,29.942322,41.063407,41.063407,21.179526,21.179526,7.229183,7.229183,4.594729,4.594729,15.092437,15.092437,22.218869,22.218869


(5251, 179)

In [16]:
formula = "H ~ W_H + W_A"

print(formula)

model = smf.logit(formula=formula, data=df_features)
result = model.fit()

display(result.summary())

model_norm_advantage = smf.logit(formula=formula, data=df_features_norm_advantage)
result_norm_advantage = model_norm_advantage.fit()

display(result_norm_advantage.summary())


H ~ W_H + W_A
Optimization terminated successfully.
         Current function value: 0.565747
         Iterations 6


0,1,2,3
Dep. Variable:,H,No. Observations:,5251.0
Model:,Logit,Df Residuals:,5248.0
Method:,MLE,Df Model:,2.0
Date:,"Wed, 06 Nov 2024",Pseudo R-squ.:,0.1573
Time:,23:58:23,Log-Likelihood:,-2970.7
converged:,True,LL-Null:,-3525.1
Covariance Type:,nonrobust,LLR p-value:,1.7049999999999998e-241

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.1892,0.147,1.291,0.197,-0.098,0.476
W_H,5.4932,0.228,24.070,0.000,5.046,5.940
W_A,-4.8250,0.230,-20.978,0.000,-5.276,-4.374


Optimization terminated successfully.
         Current function value: 0.565541
         Iterations 6


0,1,2,3
Dep. Variable:,H,No. Observations:,5251.0
Model:,Logit,Df Residuals:,5248.0
Method:,MLE,Df Model:,2.0
Date:,"Wed, 06 Nov 2024",Pseudo R-squ.:,0.1576
Time:,23:58:23,Log-Likelihood:,-2969.7
converged:,True,LL-Null:,-3525.1
Covariance Type:,nonrobust,LLR p-value:,5.8009999999999995e-242

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-0.8815,0.150,-5.891,0.000,-1.175,-0.588
W_H,2.7498,0.114,24.088,0.000,2.526,2.974
W_A,-2.4183,0.115,-20.953,0.000,-2.644,-2.192


In [17]:
featuresT_remove_corr = [
    "TPOFF", "OddsT", "OddsO", "W", "TFGM", "OFGM", "TFGA", "OFGA", "TFG3M", "OFG3M",
    "TFG3A", "OFG3A", "TFTM", "OFTM", "TFTA", "OFTA", "TORB", "OORB", "TDRB", "ODRB",
    "TAST", "OAST", "TSTL", "OSTL", "TBLK", "OBLK", "TTOV", "OTOV", "TPF", "OPF"
]

formula = "H ~ POFF + " + " + ".join(map(lambda x: x + "_H", featuresT_remove_corr)) + " + " + " + ".join(map(lambda x: x + "_A", featuresT_remove_corr))

print(formula)

model = smf.logit(formula=formula, data=df_features)
result = model.fit()

display(result.summary())

model_norm_advantage = smf.logit(formula=formula, data=df_features_norm_advantage)
result_norm_advantage = model_norm_advantage.fit()

display(result_norm_advantage.summary())

H ~ POFF + TPOFF_H + OddsT_H + OddsO_H + W_H + TFGM_H + OFGM_H + TFGA_H + OFGA_H + TFG3M_H + OFG3M_H + TFG3A_H + OFG3A_H + TFTM_H + OFTM_H + TFTA_H + OFTA_H + TORB_H + OORB_H + TDRB_H + ODRB_H + TAST_H + OAST_H + TSTL_H + OSTL_H + TBLK_H + OBLK_H + TTOV_H + OTOV_H + TPF_H + OPF_H + TPOFF_A + OddsT_A + OddsO_A + W_A + TFGM_A + OFGM_A + TFGA_A + OFGA_A + TFG3M_A + OFG3M_A + TFG3A_A + OFG3A_A + TFTM_A + OFTM_A + TFTA_A + OFTA_A + TORB_A + OORB_A + TDRB_A + ODRB_A + TAST_A + OAST_A + TSTL_A + OSTL_A + TBLK_A + OBLK_A + TTOV_A + OTOV_A + TPF_A + OPF_A
Optimization terminated successfully.
         Current function value: 0.562262
         Iterations 6


0,1,2,3
Dep. Variable:,H,No. Observations:,5251.0
Model:,Logit,Df Residuals:,5189.0
Method:,MLE,Df Model:,61.0
Date:,"Wed, 06 Nov 2024",Pseudo R-squ.:,0.1625
Time:,23:58:23,Log-Likelihood:,-2952.4
converged:,True,LL-Null:,-3525.1
Covariance Type:,nonrobust,LLR p-value:,9.633e-200

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-1.0731,2.847,-0.377,0.706,-6.654,4.507
POFF,0.2710,0.140,1.931,0.053,-0.004,0.546
TPOFF_H,-0.1456,1.033,-0.141,0.888,-2.170,1.879
OddsT_H,0.0579,0.129,0.448,0.654,-0.195,0.311
OddsO_H,-0.0607,0.106,-0.570,0.568,-0.269,0.148
W_H,6.5287,1.361,4.796,0.000,3.861,9.197
TFGM_H,0.0547,0.129,0.425,0.671,-0.198,0.307
OFGM_H,0.0640,0.147,0.434,0.664,-0.225,0.353
TFGA_H,-0.0146,0.130,-0.112,0.911,-0.269,0.240


Optimization terminated successfully.
         Current function value: 0.562125
         Iterations 6


0,1,2,3
Dep. Variable:,H,No. Observations:,5251.0
Model:,Logit,Df Residuals:,5189.0
Method:,MLE,Df Model:,61.0
Date:,"Wed, 06 Nov 2024",Pseudo R-squ.:,0.1627
Time:,23:58:23,Log-Likelihood:,-2951.7
converged:,True,LL-Null:,-3525.1
Covariance Type:,nonrobust,LLR p-value:,4.854e-200

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-2.3389,2.960,-0.790,0.429,-8.140,3.462
POFF,0.2728,0.140,1.946,0.052,-0.002,0.548
TPOFF_H,-0.0152,0.066,-0.231,0.818,-0.144,0.114
OddsT_H,0.1722,0.340,0.506,0.613,-0.495,0.839
OddsO_H,-0.1576,0.272,-0.580,0.562,-0.690,0.375
W_H,3.3242,0.673,4.942,0.000,2.006,4.643
TFGM_H,2.8139,4.775,0.589,0.556,-6.545,12.173
OFGM_H,2.2556,5.505,0.410,0.682,-8.535,13.046
TFGA_H,-0.6646,10.189,-0.065,0.948,-20.635,19.305
