In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
from src.process import processor
STATS_TO_ADJUST = ['PTS', 'PACE', 'FGM', 'FGA', '3PT_FGM', '3PT_FGA', 'FTM', 'FTA', 'ORB', 'DRB', 'TRB', 'AST', 'STL', 'BLK', 'TOV', 'PF', '3PAR', 'FTR', 'ORTG', 'DRTG', '2PT_FGM', '2PT_FGA']

In [3]:
fit_seasonal_slopes = processor.FitSeasonalSlopes()
home_adjustment = processor.HomeAdjustment()
rest_adjustment = processor.RestAdjustment()
opponent_adjustment = processor.OpponentAdjustment()

In [7]:
df = pd.read_csv("data/game/box_raw/box_reshaped_regular.csv")
df[STATS_TO_ADJUST] = df[STATS_TO_ADJUST].astype('float64')
df = df[df['SEASON'] >= '2020-21']

In [8]:
df = fit_seasonal_slopes.forward(df)
df = home_adjustment.forward(df)
df = rest_adjustment.forward(df)
df = opponent_adjustment.forward(df)
display(df.head())

fitting seasonal slopes for 2020-21
fitting seasonal slopes for 2021-22
fitting seasonal slopes for 2022-23
fitting seasonal slopes for 2023-24
calculating home adjustment for 2020-21
calculating home adjustment for 2021-22
calculating home adjustment for 2022-23
calculating home adjustment for 2023-24
calculating rest adjustment for 2020-21
calculating rest adjustment for 2021-22
calculating rest adjustment for 2022-23
calculating rest adjustment for 2023-24
calculating opponent adjustment for 2020-21
2020-21 opponent effect magnitude for iteration 0: 8.23010530241026
2020-21 opponent effect magnitude for iteration 1: 0.2988339970952884
2020-21 opponent effect magnitude for iteration 2: 0.012036908727914268
calculating opponent adjustment for 2021-22
2021-22 opponent effect magnitude for iteration 0: 7.810762289121206
2021-22 opponent effect magnitude for iteration 1: 0.2832868336890952
2021-22 opponent effect magnitude for iteration 2: 0.020447172419096767
calculating opponent adjust

Unnamed: 0,SEASON,DATE,GAME_NUMBER,TEAM,OPPONENT,PTS,PACE,FGM,FGA,3PT_FGM,...,TOV,PF,3PAR,FTR,ORTG,DRTG,REST,2PT_FGM,2PT_FGA,IS_HOME
48052,2020-21,2020-12-22,1,CLI,LAK,122.51414,103.460012,46.114886,95.434632,15.472733,...,14.160359,26.869122,0.445547,0.21308,118.580251,108.482291,1in3,30.750716,53.279249,0
48053,2020-21,2020-12-22,1,LAK,CLI,111.851909,105.594762,38.807508,83.420224,10.410682,...,19.517187,20.857526,0.378069,0.369391,105.49888,106.996157,1in3,28.410721,51.765846,1
48054,2020-21,2020-12-22,1,NET,WAR,125.079408,107.660064,43.301525,91.883668,15.542719,...,18.309639,21.559175,0.390723,0.306993,116.29914,93.360349,1in3,27.710928,56.087328,1
48055,2020-21,2020-12-22,1,WAR,NET,99.037505,109.587035,36.985105,95.566407,9.953093,...,19.552257,23.971487,0.33215,0.23998,90.411353,106.25999,1in3,27.033799,64.103638,0
48056,2020-21,2020-12-23,1,76E,WIZ,105.357485,101.320722,38.609826,83.041131,8.010591,...,17.16017,22.005526,0.344342,0.315867,104.254985,102.020906,1in3,30.587031,54.564841,1


In [9]:
df = opponent_adjustment.backward(df)
df = rest_adjustment.backward(df)
df = home_adjustment.backward(df)
df = fit_seasonal_slopes.backward(df)
display(df.head())

Unnamed: 0,SEASON,DATE,GAME_NUMBER,TEAM,OPPONENT,PTS,PACE,FGM,FGA,3PT_FGM,...,TOV,PF,3PAR,FTR,ORTG,DRTG,REST,2PT_FGM,2PT_FGA,IS_HOME
48052,2020-21,2020-12-22,1,CLI,LAK,115.99139,103.47886,43.99735,92.99613,13.99801,...,16.00171,29.00284,0.42996,0.204,112.09158,105.29592,1in3,29.99934,53.00064,0
48053,2020-21,2020-12-22,1,LAK,CLI,108.9928,103.51053,37.99765,80.99614,8.9983,...,18.99874,19.99906,0.35797,0.383,105.29677,112.10574,1in3,28.99936,52.00019,1
48054,2020-21,2020-12-22,1,NET,WAR,124.99916,110.52526,41.99802,91.99976,14.99951,...,20.00115,22.00089,0.37999,0.34805,113.09555,89.59809,1in3,26.99852,57.00098,1
48055,2020-21,2020-12-22,1,WAR,NET,99.00085,110.49151,36.99957,99.00238,10.00142,...,17.9989,24.00008,0.33303,0.232,89.60024,113.10408,1in3,26.99815,65.9986,0
48056,2020-21,2020-12-23,1,76E,WIZ,113.00489,107.21691,41.00044,87.00263,8.00053,...,18.0014,25.00385,0.322,0.34504,105.39829,99.79482,1in3,32.99991,59.0014,1


In [16]:
mean_df = df.groupby(['SEASON', 'TEAM'])[STATS_TO_ADJUST].mean().sort_values(by=['PTS'], ascending=False)

In [18]:
mean_filled_df = df.copy()

In [19]:
def fill_mean(row):
    team = row['TEAM']
    season = row['SEASON']
    for stat in STATS_TO_ADJUST:
        row[stat] = mean_df.loc[(season, team), stat]
    return row

In [20]:
mean_filled_df = mean_filled_df.apply(fill_mean, axis=1)
display(mean_filled_df.head())

Unnamed: 0,SEASON,DATE,GAME_NUMBER,TEAM,OPPONENT,PTS,PACE,FGM,FGA,3PT_FGM,...,TOV,PF,3PAR,FTR,ORTG,DRTG,REST,2PT_FGM,2PT_FGA,IS_HOME
48052,2020-21,2020-12-22,1,CLI,LAK,114.012075,96.938729,41.790581,86.693328,14.264026,...,13.194626,19.208982,0.40138,0.2243,117.618769,111.39243,1in3,27.526562,51.998875,0
48053,2020-21,2020-12-22,1,LAK,CLI,109.450312,99.653326,40.574256,86.135882,11.041236,...,15.150758,19.110086,0.362526,0.273913,109.811815,107.147706,1in3,29.533029,54.916694,1
48054,2020-21,2020-12-22,1,NET,WAR,118.567095,100.207254,43.137498,87.345775,14.166957,...,13.542019,19.042297,0.414111,0.262022,118.346332,113.848584,1in3,28.970549,51.23412,1
48055,2020-21,2020-12-22,1,WAR,NET,113.497832,102.310456,41.242118,88.160865,14.527081,...,15.148767,21.216807,0.438098,0.241249,111.016791,110.099631,1in3,26.715046,49.552941,0
48056,2020-21,2020-12-23,1,76E,WIZ,113.636875,100.366843,41.401527,86.901562,11.264255,...,14.444638,20.222742,0.347125,0.299076,113.349447,107.779413,1in3,30.13728,56.775841,1


In [21]:
mean_filled_df[mean_filled_df['TEAM'] == 'LAK']

Unnamed: 0,SEASON,DATE,GAME_NUMBER,TEAM,OPPONENT,PTS,PACE,FGM,FGA,3PT_FGM,...,TOV,PF,3PAR,FTR,ORTG,DRTG,REST,2PT_FGM,2PT_FGA,IS_HOME
48053,2020-21,2020-12-22,1,LAK,CLI,109.450312,99.653326,40.574256,86.135882,11.041236,...,15.150758,19.110086,0.362526,0.273913,109.811815,107.147706,1in3,29.533029,54.916694,1
48084,2020-21,2020-12-25,2,LAK,MAV,109.450312,99.653326,40.574256,86.135882,11.041236,...,15.150758,19.110086,0.362526,0.273913,109.811815,107.147706,1in3,29.533029,54.916694,1
48119,2020-21,2020-12-27,3,LAK,TIM,109.450312,99.653326,40.574256,86.135882,11.041236,...,15.150758,19.110086,0.362526,0.273913,109.811815,107.147706,2in3,29.533029,54.916694,1
48134,2020-21,2020-12-28,4,LAK,BLA,109.450312,99.653326,40.574256,86.135882,11.041236,...,15.150758,19.110086,0.362526,0.273913,109.811815,107.147706,2in2,29.533029,54.916694,1
48168,2020-21,2020-12-30,5,LAK,SPU,109.450312,99.653326,40.574256,86.135882,11.041236,...,15.150758,19.110086,0.362526,0.273913,109.811815,107.147706,2in3,29.533029,54.916694,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
57502,2023-24,2024-04-07,80,LAK,TIM,117.991919,101.786398,43.590785,87.485149,11.724525,...,13.953756,15.716768,0.357640,0.280485,115.901201,115.238921,2in2,31.866296,56.295135,1
57529,2023-24,2024-04-09,81,LAK,WAR,117.991919,101.786398,43.590785,87.485149,11.724525,...,13.953756,15.716768,0.357640,0.280485,115.901201,115.238921,2in3,31.866296,56.295135,1
57584,2023-24,2024-04-12,82,LAK,GRI,117.991919,101.786398,43.590785,87.485149,11.724525,...,13.953756,15.716768,0.357640,0.280485,115.901201,115.238921,1in3,31.866296,56.295135,0
57614,2023-24,2024-04-14,83,LAK,PEL,117.991919,101.786398,43.590785,87.485149,11.724525,...,13.953756,15.716768,0.357640,0.280485,115.901201,115.238921,2in3,31.866296,56.295135,0


In [22]:
processed_df = opponent_adjustment.backward(mean_filled_df)
processed_df = rest_adjustment.backward(processed_df)
processed_df = home_adjustment.backward(processed_df)
processed_df = fit_seasonal_slopes.backward(processed_df)
display(processed_df.head())

Unnamed: 0,SEASON,DATE,GAME_NUMBER,TEAM,OPPONENT,PTS,PACE,FGM,FGA,3PT_FGM,...,TOV,PF,3PAR,FTR,ORTG,DRTG,REST,2PT_FGM,2PT_FGA,IS_HOME
48052,2020-21,2020-12-22,1,CLI,LAK,107.48932,96.95757,39.67304,84.25482,12.7893,...,15.03597,21.3427,0.3858,0.21522,111.13009,108.20605,1in3,26.77519,51.72026,0
48053,2020-21,2020-12-22,1,LAK,CLI,106.5912,97.5691,39.7644,83.7118,9.62885,...,14.63232,18.25162,0.34243,0.28752,109.6097,112.25729,1in3,30.12166,55.15104,1
48054,2020-21,2020-12-22,1,NET,WAR,118.48685,103.07244,41.83399,87.46187,13.62375,...,15.23353,19.48402,0.40337,0.30308,115.14275,110.08633,1in3,28.25814,52.14777,1
48055,2020-21,2020-12-22,1,WAR,NET,113.46118,103.21493,41.25658,91.59684,14.57541,...,13.59541,21.2454,0.43898,0.23327,110.20567,116.94372,1in3,26.6794,51.4479,0
48056,2020-21,2020-12-23,1,76E,WIZ,121.28428,106.26303,43.79214,90.86306,11.2542,...,15.28586,23.22107,0.32479,0.32825,114.49275,105.55333,1in3,32.55016,61.2124,1


In [26]:
processed_df[(processed_df['TEAM'] == 'LAK') & (processed_df['SEASON'] == '2023-24') & (processed_df['OPPONENT'] == 'CEL')]


Unnamed: 0,SEASON,DATE,GAME_NUMBER,TEAM,OPPONENT,PTS,PACE,FGM,FGA,3PT_FGM,...,TOV,PF,3PAR,FTR,ORTG,DRTG,REST,2PT_FGM,2PT_FGA,IS_HOME
56035,2023-24,2023-12-25,32,LAK,CEL,112.22539,99.06758,42.94586,89.16034,11.97392,...,12.04891,12.54456,0.36606,0.21221,113.22842,120.83157,2in3,30.98627,56.52984,1
56598,2023-24,2024-02-01,51,LAK,CEL,110.11081,98.88783,42.44073,89.3613,12.10202,...,11.77578,11.7193,0.3774,0.19497,111.38173,122.71092,2in3,30.35304,55.64325,0
