In [56]:
import numpy as np
import pandas as pd

#### Import detailed regular season data. This includes every regular season game that every team played in a give season. It includes the scores, as well as basketball statistics such as three-pointers, free throws, turnovers, etc. of both the winning and losing teams.

In [57]:
season_stats= pd.read_csv("data/MRegularSeasonDetailedResults.csv")
season_stats = season_stats.drop(['DayNum', 'WLoc', 'NumOT'], axis=1)

In [58]:
season_stats

Unnamed: 0,Season,WTeamID,WScore,LTeamID,LScore,WFGM,WFGA,WFGM3,WFGA3,WFTM,...,LFGA3,LFTM,LFTA,LOR,LDR,LAst,LTO,LStl,LBlk,LPF
0,2003,1104,68,1328,62,27,58,3,14,11,...,10,16,22,10,22,8,18,9,2,20
1,2003,1272,70,1393,63,26,62,8,20,10,...,24,9,20,20,25,7,12,8,6,16
2,2003,1266,73,1437,61,24,58,8,18,17,...,26,14,23,31,22,9,12,2,5,23
3,2003,1296,56,1457,50,18,38,3,9,17,...,22,8,15,17,20,9,19,4,3,23
4,2003,1400,77,1208,71,30,61,6,14,11,...,16,17,27,21,15,12,10,7,1,14
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
100418,2022,1400,79,1242,76,28,67,3,20,20,...,13,15,23,5,24,10,15,3,5,21
100419,2022,1411,66,1126,63,24,59,2,20,16,...,21,15,24,5,23,10,19,13,2,23
100420,2022,1422,68,1441,49,23,56,13,32,9,...,24,8,11,10,18,5,16,8,2,12
100421,2022,1438,69,1181,68,31,65,2,12,5,...,17,18,22,11,25,14,14,3,9,11


### Calculate Games Played. This will be used to generate 'per game' basketball statistics

In [59]:
GP_W = pd.DataFrame(season_stats.groupby(['Season', 'WTeamID'])['WFGM'].count())
GP_W.reset_index(inplace=True)
GP_W.rename(columns={'WTeamID': 'TeamID', 'WFGM':'GP_W'}, inplace=True)
GP_L = pd.DataFrame(season_stats.groupby(['Season', 'LTeamID'])['LFGM'].count())
GP_L.reset_index(inplace=True)
GP_L.rename(columns={'LTeamID': 'TeamID', 'LFGM':'GP_L'}, inplace=True)
GP = pd.merge(left=GP_W, right=GP_L, how='outer', on=['Season', 'TeamID'])
GP.replace(np.nan, 0, inplace=True)
GP['GP'] = GP['GP_W'] + GP['GP_L']
GP = GP.drop(['GP_W', 'GP_L'], axis=1)
GP

Unnamed: 0,Season,TeamID,GP
0,2003,1102,28.0
1,2003,1103,27.0
2,2003,1104,28.0
3,2003,1105,26.0
4,2003,1106,28.0
...,...,...,...
6887,2015,1363,28.0
6888,2021,1152,9.0
6889,2022,1175,16.0
6890,2022,1237,19.0


#### Sum the total of each team per season. This is done by grouping each basketball statistic by Season and TeamID. This aggreagtion is done twice, once for the games a team won, and another for games the same team lossed.

#### A similar aggregation will be done to calculate 'allowed' or 'against' basketball statistics. These are denoted by "_Ag". For example, the feature "FGM_Ag" represents how many field goals a team allowed in a give season.

### Field Goals

Field Goals Made

In [114]:
FGM_W = pd.DataFrame(season_stats.groupby(['Season', 'WTeamID'])['WFGM'].sum())
FGM_W.reset_index(inplace=True)
FGM_W.rename(columns={'WTeamID':'TeamID'}, inplace=True)
FGM_L = pd.DataFrame(season_stats.groupby(['Season', 'LTeamID'])['LFGM'].sum())
FGM_L.reset_index(inplace=True)
FGM_L.rename(columns={'LTeamID':'TeamID'}, inplace=True)
FGM_tot = pd.merge(left=FGM_W, right=FGM_L, how='outer', on=['Season', 'TeamID'])
FGM_tot.fillna(0, inplace=True)
FGM_tot['FGM'] = FGM_tot['WFGM'] + FGM_tot['LFGM']
FGM_tot.drop(['WFGM', 'LFGM'], axis = 1, inplace=True)

In [61]:
#FGM = pd.merge(left=GP, right=FGM_tot, how='outer', on=['Season', 'TeamID'])
#FGM

Field Goals Attempted

In [115]:
FGA_W = pd.DataFrame(season_stats.groupby(['Season', 'WTeamID'])['WFGA'].sum())
FGA_W.reset_index(inplace=True)
FGA_W.rename(columns={'WTeamID':'TeamID'}, inplace=True)
FGA_L = pd.DataFrame(season_stats.groupby(['Season', 'LTeamID'])['LFGA'].sum())
FGA_L.reset_index(inplace=True)
FGA_L.rename(columns={'LTeamID':'TeamID'}, inplace=True)
FGA_tot = pd.merge(left=FGA_W, right=FGA_L, how='outer', on=['Season', 'TeamID'])
FGA_tot.fillna(0, inplace=True)
FGA_tot['FGA'] = FGA_tot['WFGA'] + FGA_tot['LFGA']
FGA_tot.drop(['WFGA', 'LFGA'], axis = 1, inplace=True)

Field Goal Percentage

In [63]:
FGP = pd.merge(left=FGM_tot, right=FGA_tot, how='outer', on=['Season', 'TeamID'])
FGP['FG%'] = round(FGP['FGM'] / FGP['FGA']*100,4)

Field Goal Made Against

In [64]:
FGM_W_Ag = pd.DataFrame(season_stats.groupby(['Season', 'WTeamID'])['LFGM'].sum())
FGM_W_Ag.reset_index(inplace=True)
FGM_W_Ag.rename(columns={'WTeamID':'TeamID'}, inplace=True)
FGM_L_Ag = pd.DataFrame(season_stats.groupby(['Season', 'LTeamID'])['WFGM'].sum())
FGM_L_Ag.reset_index(inplace=True)
FGM_L_Ag.rename(columns={'LTeamID':'TeamID'}, inplace=True)
FGM_tot_Ag = pd.merge(left=FGM_W_Ag, right=FGM_L_Ag, how='outer', on=['Season', 'TeamID'])
FGM_tot_Ag.fillna(0, inplace=True)
FGM_tot_Ag['FGM_Ag'] = FGM_tot_Ag['WFGM'] + FGM_tot_Ag['LFGM']
FGM_tot_Ag.drop(['WFGM', 'LFGM'], axis = 1, inplace=True)

Field Goal Attempts Against

In [65]:
FGA_W_Ag = pd.DataFrame(season_stats.groupby(['Season', 'WTeamID'])['LFGA'].sum())
FGA_W_Ag.reset_index(inplace=True)
FGA_W_Ag.rename(columns={'WTeamID':'TeamID'}, inplace=True)
FGA_L_Ag = pd.DataFrame(season_stats.groupby(['Season', 'LTeamID'])['WFGA'].sum())
FGA_L_Ag.reset_index(inplace=True)
FGA_L_Ag.rename(columns={'LTeamID':'TeamID'}, inplace=True)
FGA_tot_Ag = pd.merge(left=FGA_W_Ag, right=FGA_L_Ag, how='outer', on=['Season', 'TeamID'])
FGA_tot_Ag.fillna(0, inplace=True)
FGA_tot_Ag['FGA_Ag'] = FGA_tot_Ag['WFGA'] + FGA_tot_Ag['LFGA']
FGA_tot_Ag.drop(['WFGA', 'LFGA'], axis = 1, inplace=True)

Field Goals Against Percentage

In [116]:
FGP_Ag = pd.merge(left=FGM_tot_Ag, right=FGA_tot_Ag, how='outer', on=['Season', 'TeamID'])
FGP_Ag['FG%_Ag'] = round(FGP_Ag['FGM_Ag'] / FGP_Ag['FGA_Ag']*100,4)

Field Goals Made / Against per game

In [67]:
df_FG = FGP.merge(FGP_Ag,how='outer', on=['Season', 'TeamID'])
df_FG = df_FG.merge(GP,how='outer', on=['Season', 'TeamID'])
df_FG['FGM_pg'] = round(df_FG['FGM']/df_FG['GP'],3)
df_FG['FGM_Ag_pg'] = round(df_FG['FGM_Ag']/df_FG['GP'],3)
df_FG.drop('GP', axis=1, inplace=True)
df_FG

Unnamed: 0,Season,TeamID,FGM,FGA,FG%,FGM_Ag,FGA_Ag,FG%_Ag,FGM_pg,FGM_Ag_pg
0,2003,1102,536.0,1114.0,48.1149,540.0,1188.0,45.4545,19.143,19.286
1,2003,1103,733.0,1508.0,48.6074,750.0,1539.0,48.7329,27.148,27.778
2,2003,1104,673.0,1601.0,42.0362,651.0,1554.0,41.8919,24.036,23.250
3,2003,1105,634.0,1602.0,39.5755,702.0,1533.0,45.7926,24.385,27.000
4,2003,1106,656.0,1548.0,42.3773,608.0,1495.0,40.6689,23.429,21.714
...,...,...,...,...,...,...,...,...,...,...
6887,2015,1363,503.0,1452.0,34.6419,687.0,1449.0,47.4120,17.964,24.536
6888,2021,1152,180.0,497.0,36.2173,295.0,558.0,52.8674,20.000,32.778
6889,2022,1175,326.0,863.0,37.7752,424.0,985.0,43.0457,20.375,26.500
6890,2022,1237,349.0,944.0,36.9703,428.0,957.0,44.7231,18.368,22.526


### 3-Pointers

3-Pointers Made

In [117]:
FGM3_W = pd.DataFrame(season_stats.groupby(['Season', 'WTeamID'])['WFGM3'].sum())
FGM3_W.reset_index(inplace=True)
FGM3_W.rename(columns={'WTeamID':'TeamID'}, inplace=True)
FGM3_L = pd.DataFrame(season_stats.groupby(['Season', 'LTeamID'])['LFGM3'].sum())
FGM3_L.reset_index(inplace=True)
FGM3_L.rename(columns={'LTeamID':'TeamID'}, inplace=True)
FGM3_tot = pd.merge(left=FGM3_W, right=FGM3_L, how='outer', on=['Season', 'TeamID'])
FGM3_tot.fillna(0, inplace=True)
FGM3_tot['FGM3'] = FGM3_tot['WFGM3'] + FGM3_tot['LFGM3']
FGM3_tot.drop(['WFGM3', 'LFGM3'], axis = 1, inplace=True)

3-Pointers Attempted

In [118]:
FGA3_W = pd.DataFrame(season_stats.groupby(['Season', 'WTeamID'])['WFGA3'].sum())
FGA3_W.reset_index(inplace=True)
FGA3_W.rename(columns={'WTeamID':'TeamID'}, inplace=True)
FGA3_L = pd.DataFrame(season_stats.groupby(['Season', 'LTeamID'])['LFGA3'].sum())
FGA3_L.reset_index(inplace=True)
FGA3_L.rename(columns={'LTeamID':'TeamID'}, inplace=True)
FGA3_tot = pd.merge(left=FGA3_W, right=FGA3_L, how='outer', on=['Season', 'TeamID'])
FGA3_tot.fillna(0, inplace=True)
FGA3_tot['FGA3'] = FGA3_tot['WFGA3'] + FGA3_tot['LFGA3']
FGA3_tot.drop(['WFGA3', 'LFGA3'], axis = 1, inplace=True)

3-Pointer Percentage

In [119]:
FG3P = pd.merge(left=FGM3_tot, right=FGA3_tot, how='outer', on=['Season', 'TeamID'])
FG3P['FG3%'] = round(FG3P['FGM3'] / FG3P['FGA3']*100,4)

3-Pointers Made Against

In [71]:
FGM3_W_Ag = pd.DataFrame(season_stats.groupby(['Season', 'WTeamID'])['LFGM3'].sum())
FGM3_W_Ag.reset_index(inplace=True)
FGM3_W_Ag.rename(columns={'WTeamID':'TeamID'}, inplace=True)
FGM3_L_Ag = pd.DataFrame(season_stats.groupby(['Season', 'LTeamID'])['WFGM3'].sum())
FGM3_L_Ag.reset_index(inplace=True)
FGM3_L_Ag.rename(columns={'LTeamID':'TeamID'}, inplace=True)
FGM3_tot_Ag = pd.merge(left=FGM3_W_Ag, right=FGM3_L_Ag, how='outer', on=['Season', 'TeamID'])
FGM3_tot_Ag.fillna(0, inplace=True)
FGM3_tot_Ag['FGM3_Ag'] = FGM3_tot_Ag['WFGM3'] + FGM3_tot_Ag['LFGM3']
FGM3_tot_Ag.drop(['WFGM3', 'LFGM3'], axis = 1, inplace=True)

3-Pointers Attempted Against

In [72]:
FGA3_W_Ag = pd.DataFrame(season_stats.groupby(['Season', 'WTeamID'])['LFGA3'].sum())
FGA3_W_Ag.reset_index(inplace=True)
FGA3_W_Ag.rename(columns={'WTeamID':'TeamID'}, inplace=True)
FGA3_L_Ag = pd.DataFrame(season_stats.groupby(['Season', 'LTeamID'])['WFGA3'].sum())
FGA3_L_Ag.reset_index(inplace=True)
FGA3_L_Ag.rename(columns={'LTeamID':'TeamID'}, inplace=True)
FGA3_tot_Ag = pd.merge(left=FGA3_W_Ag, right=FGA3_L_Ag, how='outer', on=['Season', 'TeamID'])
FGA3_tot_Ag.fillna(0, inplace=True)
FGA3_tot_Ag['FGA3_Ag'] = FGA3_tot_Ag['WFGA3'] + FGA3_tot_Ag['LFGA3']
FGA3_tot_Ag.drop(['WFGA3', 'LFGA3'], axis = 1, inplace=True)

3-Pointers Percentage Against

In [121]:
FG3P_Ag = pd.merge(left=FGM3_tot_Ag, right=FGA3_tot_Ag, how='outer', on=['Season', 'TeamID'])
FG3P_Ag['FG3%_Ag'] = round(FG3P_Ag['FGM3_Ag'] / FG3P_Ag['FGA3_Ag']*100,4)

3-Pointers Made / Against per game

In [122]:
df_FG3 = FG3P.merge(FG3P_Ag,how='outer', on=['Season', 'TeamID'])
df_FG3 = df_FG3.merge(GP,how='outer', on=['Season', 'TeamID'])
df_FG3['FGM3_pg'] = round(df_FG3['FGM3']/df_FG3['GP'],3)
df_FG3['FGM3_Ag_pg'] = round(df_FG3['FGM3_Ag']/df_FG3['GP'],3)
df_FG3.drop('GP', axis=1, inplace=True)
df_FG3

Unnamed: 0,Season,TeamID,FGM3,FGA3,FG3%,FGM3_Ag,FGA3_Ag,FG3%_Ag,FGM3_pg,FGM3_Ag_pg
0,2003,1102,219.0,583.0,37.5643,133.0,348.0,38.2184,7.821,4.750
1,2003,1103,147.0,434.0,33.8710,180.0,496.0,36.2903,5.444,6.667
2,2003,1104,178.0,556.0,32.0144,178.0,536.0,33.2090,6.357,6.357
3,2003,1105,197.0,540.0,36.4815,163.0,456.0,35.7456,7.577,6.269
4,2003,1106,171.0,494.0,34.6154,134.0,426.0,31.4554,6.107,4.786
...,...,...,...,...,...,...,...,...,...,...
6887,2015,1363,186.0,633.0,29.3839,191.0,492.0,38.8211,6.643,6.821
6888,2021,1152,51.0,203.0,25.1232,81.0,205.0,39.5122,5.667,9.000
6889,2022,1175,76.0,268.0,28.3582,168.0,450.0,37.3333,4.750,10.500
6890,2022,1237,93.0,337.0,27.5964,151.0,419.0,36.0382,4.895,7.947


### Free Throws

Free Throws Made

In [134]:
FTM_W = pd.DataFrame(season_stats.groupby(['Season', 'WTeamID'])['WFTM'].sum())
FTM_W.reset_index(inplace=True)
FTM_W.rename(columns={'WTeamID':'TeamID'}, inplace=True)
FTM_L = pd.DataFrame(season_stats.groupby(['Season', 'LTeamID'])['LFTM'].sum())
FTM_L.reset_index(inplace=True)
FTM_L.rename(columns={'LTeamID':'TeamID'}, inplace=True)
FTM_tot = pd.merge(left=FTM_W, right=FTM_L, how='outer', on=['Season', 'TeamID'])
FTM_tot.fillna(0, inplace=True)
FTM_tot['FTM'] = FTM_tot['WFTM'] + FTM_tot['LFTM']
FTM_tot.drop(['WFTM', 'LFTM'], axis = 1, inplace=True)

Free Throws Attempted

In [135]:
FTA_W = pd.DataFrame(season_stats.groupby(['Season', 'WTeamID'])['WFTA'].sum())
FTA_W.reset_index(inplace=True)
FTA_W.rename(columns={'WTeamID':'TeamID'}, inplace=True)
FTA_L = pd.DataFrame(season_stats.groupby(['Season', 'LTeamID'])['LFTA'].sum())
FTA_L.reset_index(inplace=True)
FTA_L.rename(columns={'LTeamID':'TeamID'}, inplace=True)
FTA_tot = pd.merge(left=FTA_W, right=FTA_L, how='outer', on=['Season', 'TeamID'])
FTA_tot.fillna(0, inplace=True)
FTA_tot['FTA'] = FTA_tot['WFTA'] + FTA_tot['LFTA']
FTA_tot.drop(['WFTA', 'LFTA'], axis = 1, inplace=True)

Free Throw Percentage

In [126]:
FTP = pd.merge(left=FTM_tot, right=FTA_tot, how='outer', on=['Season', 'TeamID'])
FTP['FT%'] = round(FTP['FTM'] / FTP['FTA']*100,4)

Free Throws Made Against

In [127]:
FTM_W_Ag = pd.DataFrame(season_stats.groupby(['Season', 'WTeamID'])['LFTM'].sum())
FTM_W_Ag.reset_index(inplace=True)
FTM_W_Ag.rename(columns={'WTeamID':'TeamID'}, inplace=True)
FTM_L_Ag = pd.DataFrame(season_stats.groupby(['Season', 'LTeamID'])['WFTM'].sum())
FTM_L_Ag.reset_index(inplace=True)
FTM_L_Ag.rename(columns={'LTeamID':'TeamID'}, inplace=True)
FTM_tot_Ag = pd.merge(left=FTM_W_Ag, right=FTM_L_Ag, how='outer', on=['Season', 'TeamID'])
FTM_tot_Ag.fillna(0, inplace=True)
FTM_tot_Ag['FTM_Ag'] = FTM_tot_Ag['WFTM'] + FTM_tot_Ag['LFTM']
FTM_tot_Ag.drop(['WFTM', 'LFTM'], axis = 1, inplace=True)

Free Throws Attempted Against

In [128]:
FTA_W_Ag = pd.DataFrame(season_stats.groupby(['Season', 'WTeamID'])['LFTA'].sum())
FTA_W_Ag.reset_index(inplace=True)
FTA_W_Ag.rename(columns={'WTeamID':'TeamID'}, inplace=True)
FTA_L_Ag = pd.DataFrame(season_stats.groupby(['Season', 'LTeamID'])['WFTA'].sum())
FTA_L_Ag.reset_index(inplace=True)
FTA_L_Ag.rename(columns={'LTeamID':'TeamID'}, inplace=True)
FTA_tot_Ag = pd.merge(left=FTA_W_Ag, right=FTA_L_Ag, how='outer', on=['Season', 'TeamID'])
FTA_tot_Ag.fillna(0, inplace=True)
FTA_tot_Ag['FTA_Ag'] = FTA_tot_Ag['WFTA'] + FTA_tot_Ag['LFTA']
FTA_tot_Ag.drop(['WFTA', 'LFTA'], axis = 1, inplace=True)

Free Throws Made / Attempted per game

In [129]:
FT_Ag_combined = pd.merge(left=FTM_tot_Ag, right=FTA_tot_Ag, how='outer', on=['Season', 'TeamID'])
FT_combined = pd.merge(left=FTP, right=FT_Ag_combined, how='outer', on=['Season', 'TeamID'])

In [130]:
df_FT = FT_combined.merge(GP,how='outer', on=['Season', 'TeamID'])
df_FT['FTM_pg'] = round(df_FT['FTM']/df_FT['GP'],3)
df_FT['FTM_Ag_pg'] = round(df_FT['FTM_Ag']/df_FT['GP'],3)
df_FT.drop('GP', axis=1, inplace=True)
df_FT

Unnamed: 0,Season,TeamID,FTM,FTA,FT%,FTM_Ag,FTA_Ag,FTM_pg,FTM_Ag_pg
0,2003,1102,312.0,479.0,65.1357,383.0,539.0,11.143,13.679
1,2003,1103,514.0,698.0,73.6390,430.0,598.0,19.037,15.926
2,2003,1104,416.0,586.0,70.9898,340.0,480.0,14.857,12.143
3,2003,1105,401.0,568.0,70.5986,426.0,637.0,15.423,16.385
4,2003,1106,298.0,461.0,64.6421,435.0,615.0,10.643,15.536
...,...,...,...,...,...,...,...,...,...
6887,2015,1363,279.0,422.0,66.1137,414.0,622.0,9.964,14.786
6888,2021,1152,88.0,126.0,69.8413,127.0,183.0,9.778,14.111
6889,2022,1175,185.0,279.0,66.3082,212.0,295.0,11.562,13.250
6890,2022,1237,158.0,239.0,66.1088,295.0,387.0,8.316,15.526


### Offensive Rebounds

In [82]:
OR_W = pd.DataFrame(season_stats.groupby(['Season', 'WTeamID'])['WOR'].sum())
OR_W.reset_index(inplace=True)
OR_W.rename(columns={'WTeamID':'TeamID'}, inplace=True)
OR_L = pd.DataFrame(season_stats.groupby(['Season', 'LTeamID'])['LOR'].sum())
OR_L.reset_index(inplace=True)
OR_L.rename(columns={'LTeamID':'TeamID'}, inplace=True)
OR_tot = pd.merge(left=OR_W, right=OR_L, how='outer', on=['Season', 'TeamID'])
OR_tot.fillna(0, inplace=True)
OR_tot['OR'] = OR_tot['WOR'] + OR_tot['LOR']
OR_tot.drop(['WOR', 'LOR'], axis = 1, inplace=True)

Offensive Rebounds Against

In [83]:
OR_W_Ag = pd.DataFrame(season_stats.groupby(['Season', 'WTeamID'])['LOR'].sum())
OR_W_Ag.reset_index(inplace=True)
OR_W_Ag.rename(columns={'WTeamID':'TeamID'}, inplace=True)
OR_L_Ag = pd.DataFrame(season_stats.groupby(['Season', 'LTeamID'])['WOR'].sum())
OR_L_Ag.reset_index(inplace=True)
OR_L_Ag.rename(columns={'LTeamID':'TeamID'}, inplace=True)
OR_tot_Ag = pd.merge(left=OR_W_Ag, right=OR_L_Ag, how='outer', on=['Season', 'TeamID'])
OR_tot_Ag.fillna(0, inplace=True)
OR_tot_Ag['OR_Ag'] = OR_tot_Ag['WOR'] + OR_tot_Ag['LOR']
OR_tot_Ag.drop(['WOR', 'LOR'], axis = 1, inplace=True)

Offensive Rebounds per game

In [84]:
OR_combined = pd.merge(left=OR_tot, right=OR_tot_Ag, how='outer', on=['Season', 'TeamID'])

In [85]:
df_OR = OR_combined.merge(GP,how='outer', on=['Season', 'TeamID'])
df_OR['OR_pg'] = round(df_OR['OR']/df_OR['GP'],3)
df_OR['OR_Ag_pg'] = round(df_OR['OR_Ag']/df_OR['GP'],3)
df_OR.drop('GP', axis=1, inplace=True)
df_OR

Unnamed: 0,Season,TeamID,OR,OR_Ag,OR_pg,OR_Ag_pg
0,2003,1102,117.0,269.0,4.179,9.607
1,2003,1103,264.0,325.0,9.778,12.037
2,2003,1104,380.0,305.0,13.571,10.893
3,2003,1105,351.0,343.0,13.500,13.192
4,2003,1106,344.0,317.0,12.286,11.321
...,...,...,...,...,...,...
6887,2015,1363,245.0,275.0,8.750,9.821
6888,2021,1152,42.0,97.0,4.667,10.778
6889,2022,1175,132.0,184.0,8.250,11.500
6890,2022,1237,147.0,165.0,7.737,8.684


### Defensive Rebounds

In [86]:
DR_W = pd.DataFrame(season_stats.groupby(['Season', 'WTeamID'])['WDR'].sum())
DR_W.reset_index(inplace=True)
DR_W.rename(columns={'WTeamID':'TeamID'}, inplace=True)
DR_L = pd.DataFrame(season_stats.groupby(['Season', 'LTeamID'])['LDR'].sum())
DR_L.reset_index(inplace=True)
DR_L.rename(columns={'LTeamID':'TeamID'}, inplace=True)
DR_tot = pd.merge(left=DR_W, right=DR_L, how='outer', on=['Season', 'TeamID'])
DR_tot.fillna(0, inplace=True)
DR_tot['DR'] = DR_tot['WDR'] + DR_tot['LDR']
DR_tot.drop(['WDR', 'LDR'], axis = 1, inplace=True)

Defensive Rebounds Against

In [87]:
DR_W_Ag = pd.DataFrame(season_stats.groupby(['Season', 'WTeamID'])['LDR'].sum())
DR_W_Ag.reset_index(inplace=True)
DR_W_Ag.rename(columns={'WTeamID':'TeamID'}, inplace=True)
DR_L_Ag = pd.DataFrame(season_stats.groupby(['Season', 'LTeamID'])['WDR'].sum())
DR_L_Ag.reset_index(inplace=True)
DR_L_Ag.rename(columns={'LTeamID':'TeamID'}, inplace=True)
DR_tot_Ag = pd.merge(left=DR_W_Ag, right=DR_L_Ag, how='outer', on=['Season', 'TeamID'])
DR_tot_Ag.fillna(0, inplace=True)
DR_tot_Ag['DR_Ag'] = DR_tot_Ag['WDR'] + DR_tot_Ag['LDR']
DR_tot_Ag.drop(['WDR', 'LDR'], axis = 1, inplace=True)

Defensive Rebounds per game

In [88]:
DR_combined = pd.merge(left=DR_tot, right=DR_tot_Ag, how='outer', on=['Season', 'TeamID'])

In [89]:
df_DR = DR_combined.merge(GP,how='outer', on=['Season', 'TeamID'])
df_DR['DR_pg'] = round(df_DR['DR']/df_DR['GP'],3)
df_DR['DR_Ag_pg'] = round(df_DR['DR_Ag']/df_DR['GP'],3)
df_DR.drop('GP', axis=1, inplace=True)
df_DR

Unnamed: 0,Season,TeamID,DR,DR_Ag,DR_pg,DR_Ag_pg
0,2003,1102,471.0,564.0,16.821,20.143
1,2003,1103,538.0,595.0,19.926,22.037
2,2003,1104,670.0,634.0,23.929,22.643
3,2003,1105,601.0,686.0,23.115,26.385
4,2003,1106,668.0,626.0,23.857,22.357
...,...,...,...,...,...,...
6887,2015,1363,590.0,774.0,21.071,27.643
6888,2021,1152,170.0,264.0,18.889,29.333
6889,2022,1175,345.0,395.0,21.562,24.688
6890,2022,1237,359.0,412.0,18.895,21.684


### Assists

In [90]:
Ast_W = pd.DataFrame(season_stats.groupby(['Season', 'WTeamID'])['WAst'].sum())
Ast_W.reset_index(inplace=True)
Ast_W.rename(columns={'WTeamID':'TeamID'}, inplace=True)
Ast_L = pd.DataFrame(season_stats.groupby(['Season', 'LTeamID'])['LAst'].sum())
Ast_L.reset_index(inplace=True)
Ast_L.rename(columns={'LTeamID':'TeamID'}, inplace=True)
Ast_tot = pd.merge(left=Ast_W, right=Ast_L, how='outer', on=['Season', 'TeamID'])
Ast_tot.fillna(0, inplace=True)
Ast_tot['Ast'] = Ast_tot['WAst'] + Ast_tot['LAst']
Ast_tot.drop(['WAst', 'LAst'], axis = 1, inplace=True)

Assists against

In [91]:
Ast_W_Ag = pd.DataFrame(season_stats.groupby(['Season', 'WTeamID'])['LAst'].sum())
Ast_W_Ag.reset_index(inplace=True)
Ast_W_Ag.rename(columns={'WTeamID':'TeamID'}, inplace=True)
Ast_L_Ag = pd.DataFrame(season_stats.groupby(['Season', 'LTeamID'])['WAst'].sum())
Ast_L_Ag.reset_index(inplace=True)
Ast_L_Ag.rename(columns={'LTeamID':'TeamID'}, inplace=True)
Ast_tot_Ag = pd.merge(left=Ast_W_Ag, right=Ast_L_Ag, how='outer', on=['Season', 'TeamID'])
Ast_tot_Ag.fillna(0, inplace=True)
Ast_tot_Ag['Ast_Ag'] = Ast_tot_Ag['WAst'] + Ast_tot_Ag['LAst']
Ast_tot_Ag.drop(['WAst', 'LAst'], axis = 1, inplace=True)

Assists per game

In [92]:
Ast_combined = pd.merge(left=Ast_tot, right=Ast_tot_Ag, how='outer', on=['Season', 'TeamID'])

In [93]:
df_Ast = Ast_combined.merge(GP,how='outer', on=['Season', 'TeamID'])
df_Ast['Ast_pg'] = round(df_Ast['Ast']/df_Ast['GP'],3)
df_Ast['Ast_Ag_pg'] = round(df_Ast['Ast_Ag']/df_Ast['GP'],3)
df_Ast.drop('GP', axis=1, inplace=True)
df_Ast

Unnamed: 0,Season,TeamID,Ast,Ast_Ag,Ast_pg,Ast_Ag_pg
0,2003,1102,364.0,256.0,13.000,9.143
1,2003,1103,411.0,418.0,15.222,15.481
2,2003,1104,339.0,327.0,12.107,11.679
3,2003,1105,378.0,411.0,14.538,15.808
4,2003,1106,327.0,330.0,11.679,11.786
...,...,...,...,...,...,...
6887,2015,1363,263.0,421.0,9.393,15.036
6888,2021,1152,89.0,174.0,9.889,19.333
6889,2022,1175,130.0,278.0,8.125,17.375
6890,2022,1237,183.0,238.0,9.632,12.526


### Turnovers

In [94]:
TO_W = pd.DataFrame(season_stats.groupby(['Season', 'WTeamID'])['WTO'].sum())
TO_W.reset_index(inplace=True)
TO_W.rename(columns={'WTeamID':'TeamID'}, inplace=True)
TO_L = pd.DataFrame(season_stats.groupby(['Season', 'LTeamID'])['LTO'].sum())
TO_L.reset_index(inplace=True)
TO_L.rename(columns={'LTeamID':'TeamID'}, inplace=True)
TO_tot = pd.merge(left=TO_W, right=TO_L, how='outer', on=['Season', 'TeamID'])
TO_tot.fillna(0, inplace=True)
TO_tot['TO'] = TO_tot['WTO'] + TO_tot['LTO']
TO_tot.drop(['WTO', 'LTO'], axis = 1, inplace=True)

Turnovers Against

In [95]:
TO_W_Ag = pd.DataFrame(season_stats.groupby(['Season', 'WTeamID'])['LTO'].sum())
TO_W_Ag.reset_index(inplace=True)
TO_W_Ag.rename(columns={'WTeamID':'TeamID'}, inplace=True)
TO_L_Ag = pd.DataFrame(season_stats.groupby(['Season', 'LTeamID'])['WTO'].sum())
TO_L_Ag.reset_index(inplace=True)
TO_L_Ag.rename(columns={'LTeamID':'TeamID'}, inplace=True)
TO_tot_Ag = pd.merge(left=TO_W_Ag, right=TO_L_Ag, how='outer', on=['Season', 'TeamID'])
TO_tot_Ag.fillna(0, inplace=True)
TO_tot_Ag['TO_Ag'] = TO_tot_Ag['WTO'] + TO_tot_Ag['LTO']
TO_tot_Ag.drop(['WTO', 'LTO'], axis = 1, inplace=True)

Turnovers per game 

In [96]:
TO_combined = pd.merge(left=TO_tot, right=TO_tot_Ag, how='outer', on=['Season', 'TeamID'])

In [97]:
df_TO = TO_combined.merge(GP,how='outer', on=['Season', 'TeamID'])
df_TO['TO_pg'] = round(df_TO['TO']/df_TO['GP'],3)
df_TO['TO_Ag_pg'] = round(df_TO['TO_Ag']/df_TO['GP'],3)
df_TO.drop('GP', axis=1, inplace=True)
df_TO

Unnamed: 0,Season,TeamID,TO,TO_Ag,TO_pg,TO_Ag_pg
0,2003,1102,320.0,363.0,11.429,12.964
1,2003,1103,341.0,414.0,12.630,15.333
2,2003,1104,372.0,388.0,13.286,13.857
3,2003,1105,485.0,489.0,18.654,18.808
4,2003,1106,477.0,422.0,17.036,15.071
...,...,...,...,...,...,...
6887,2015,1363,386.0,318.0,13.786,11.357
6888,2021,1152,131.0,99.0,14.556,11.000
6889,2022,1175,260.0,199.0,16.250,12.438
6890,2022,1237,287.0,231.0,15.105,12.158


### Steals

In [98]:
Stl_W = pd.DataFrame(season_stats.groupby(['Season', 'WTeamID'])['WStl'].sum())
Stl_W.reset_index(inplace=True)
Stl_W.rename(columns={'WTeamID':'TeamID'}, inplace=True)
Stl_L = pd.DataFrame(season_stats.groupby(['Season', 'LTeamID'])['LStl'].sum())
Stl_L.reset_index(inplace=True)
Stl_L.rename(columns={'LTeamID':'TeamID'}, inplace=True)
Stl_tot = pd.merge(left=Stl_W, right=Stl_L, how='outer', on=['Season', 'TeamID'])
Stl_tot.fillna(0, inplace=True)
Stl_tot['Stl'] = Stl_tot['WStl'] + Stl_tot['LStl']
Stl_tot.drop(['WStl', 'LStl'], axis = 1, inplace=True)

Steals Against

In [99]:
Stl_W_Ag = pd.DataFrame(season_stats.groupby(['Season', 'WTeamID'])['LStl'].sum())
Stl_W_Ag.reset_index(inplace=True)
Stl_W_Ag.rename(columns={'WTeamID':'TeamID'}, inplace=True)
Stl_L_Ag = pd.DataFrame(season_stats.groupby(['Season', 'LTeamID'])['WStl'].sum())
Stl_L_Ag.reset_index(inplace=True)
Stl_L_Ag.rename(columns={'LTeamID':'TeamID'}, inplace=True)
Stl_tot_Ag = pd.merge(left=Stl_W_Ag, right=Stl_L_Ag, how='outer', on=['Season', 'TeamID'])
Stl_tot_Ag.fillna(0, inplace=True)
Stl_tot_Ag['Stl_Ag'] = Stl_tot_Ag['WStl'] + Stl_tot_Ag['LStl']
Stl_tot_Ag.drop(['WStl', 'LStl'], axis = 1, inplace=True)

Steal per game

In [100]:
Stl_combined = pd.merge(left=Stl_tot, right=Stl_tot_Ag, how='outer', on=['Season', 'TeamID'])

In [101]:
df_Stl = Stl_combined.merge(GP,how='outer', on=['Season', 'TeamID'])
df_Stl['Stl_pg'] = round(df_Stl['Stl']/df_Stl['GP'],3)
df_Stl['Stl_Ag_pg'] = round(df_Stl['Stl_Ag']/df_Stl['GP'],3)
df_Stl.drop('GP', axis=1, inplace=True)
df_Stl

Unnamed: 0,Season,TeamID,Stl,Stl_Ag,Stl_pg,Stl_Ag_pg
0,2003,1102,167.0,152.0,5.964,5.429
1,2003,1103,196.0,173.0,7.259,6.407
2,2003,1104,185.0,155.0,6.607,5.536
3,2003,1105,242.0,244.0,9.308,9.385
4,2003,1106,234.0,246.0,8.357,8.786
...,...,...,...,...,...,...
6887,2015,1363,136.0,195.0,4.857,6.964
6888,2021,1152,49.0,58.0,5.444,6.444
6889,2022,1175,95.0,130.0,5.938,8.125
6890,2022,1237,98.0,133.0,5.158,7.000


### Blocks

In [102]:
Blk_W = pd.DataFrame(season_stats.groupby(['Season', 'WTeamID'])['WBlk'].sum())
Blk_W.reset_index(inplace=True)
Blk_W.rename(columns={'WTeamID':'TeamID'}, inplace=True)
Blk_L = pd.DataFrame(season_stats.groupby(['Season', 'LTeamID'])['LBlk'].sum())
Blk_L.reset_index(inplace=True)
Blk_L.rename(columns={'LTeamID':'TeamID'}, inplace=True)
Blk_tot = pd.merge(left=Blk_W, right=Blk_L, how='outer', on=['Season', 'TeamID'])
Blk_tot.fillna(0, inplace=True)
Blk_tot['Blk'] = Blk_tot['WBlk'] + Blk_tot['LBlk']
Blk_tot.drop(['WBlk', 'LBlk'], axis = 1, inplace=True)

Blocks Against

In [103]:
Blk_W_Ag = pd.DataFrame(season_stats.groupby(['Season', 'WTeamID'])['LBlk'].sum())
Blk_W_Ag.reset_index(inplace=True)
Blk_W_Ag.rename(columns={'WTeamID':'TeamID'}, inplace=True)
Blk_L_Ag = pd.DataFrame(season_stats.groupby(['Season', 'LTeamID'])['WBlk'].sum())
Blk_L_Ag.reset_index(inplace=True)
Blk_L_Ag.rename(columns={'LTeamID':'TeamID'}, inplace=True)
Blk_tot_Ag = pd.merge(left=Blk_W_Ag, right=Blk_L_Ag, how='outer', on=['Season', 'TeamID'])
Blk_tot_Ag.fillna(0, inplace=True)
Blk_tot_Ag['Blk_Ag'] = Blk_tot_Ag['WBlk'] + Blk_tot_Ag['LBlk']
Blk_tot_Ag.drop(['WBlk', 'LBlk'], axis = 1, inplace=True)

Blocks per game

In [104]:
Blk_combined = pd.merge(left=Blk_tot, right=Blk_tot_Ag, how='outer', on=['Season', 'TeamID'])

In [105]:
df_Blk = Blk_combined.merge(GP,how='outer', on=['Season', 'TeamID'])
df_Blk['Blk_pg'] = round(df_Blk['Blk']/df_Blk['GP'],3)
df_Blk['Blk_Ag_pg'] = round(df_Blk['Blk_Ag']/df_Blk['GP'],3)
df_Blk.drop('GP', axis=1, inplace=True)
df_Blk

Unnamed: 0,Season,TeamID,Blk,Blk_Ag,Blk_pg,Blk_Ag_pg
0,2003,1102,50.0,44.0,1.786,1.571
1,2003,1103,63.0,77.0,2.333,2.852
2,2003,1104,106.0,89.0,3.786,3.179
3,2003,1105,54.0,109.0,2.077,4.192
4,2003,1106,88.0,89.0,3.143,3.179
...,...,...,...,...,...,...
6887,2015,1363,69.0,85.0,2.464,3.036
6888,2021,1152,13.0,26.0,1.444,2.889
6889,2022,1175,70.0,57.0,4.375,3.562
6890,2022,1237,41.0,56.0,2.158,2.947


### Personal Fouls

In [106]:
PF_W = pd.DataFrame(season_stats.groupby(['Season', 'WTeamID'])['WPF'].sum())
PF_W.reset_index(inplace=True)
PF_W.rename(columns={'WTeamID':'TeamID'}, inplace=True)
PF_L = pd.DataFrame(season_stats.groupby(['Season', 'LTeamID'])['LPF'].sum())
PF_L.reset_index(inplace=True)
PF_L.rename(columns={'LTeamID':'TeamID'}, inplace=True)
PF_tot = pd.merge(left=PF_W, right=PF_L, how='outer', on=['Season', 'TeamID'])
PF_tot.fillna(0, inplace=True)
PF_tot['PF'] = PF_tot['WPF'] + PF_tot['LPF']
PF_tot.drop(['WPF', 'LPF'], axis = 1, inplace=True)

Personal Fould Against

In [107]:
PF_W_Ag = pd.DataFrame(season_stats.groupby(['Season', 'WTeamID'])['LPF'].sum())
PF_W_Ag.reset_index(inplace=True)
PF_W_Ag.rename(columns={'WTeamID':'TeamID'}, inplace=True)
PF_L_Ag = pd.DataFrame(season_stats.groupby(['Season', 'LTeamID'])['WPF'].sum())
PF_L_Ag.reset_index(inplace=True)
PF_L_Ag.rename(columns={'LTeamID':'TeamID'}, inplace=True)
PF_tot_Ag = pd.merge(left=PF_W_Ag, right=PF_L_Ag, how='outer', on=['Season', 'TeamID'])
PF_tot_Ag.fillna(0, inplace=True)
PF_tot_Ag['PF_Ag'] = PF_tot_Ag['WPF'] + PF_tot_Ag['LPF']
PF_tot_Ag.drop(['WPF', 'LPF'], axis = 1, inplace=True)

Personal Fouls per game

In [108]:
PF_combined = pd.merge(left=PF_tot, right=PF_tot_Ag, how='outer', on=['Season', 'TeamID'])

In [109]:
df_PF = PF_combined.merge(GP,how='outer', on=['Season', 'TeamID'])
df_PF['PF_pg'] = round(df_PF['PF']/df_PF['GP'],3)
df_PF['PF_Ag_pg'] = round(df_PF['PF_Ag']/df_PF['GP'],3)
df_PF.drop('GP', axis=1, inplace=True)
df_PF

Unnamed: 0,Season,TeamID,PF,PF_Ag,PF_pg,PF_Ag_pg
0,2003,1102,525.0,514.0,18.750,18.357
1,2003,1103,536.0,606.0,19.852,22.444
2,2003,1104,505.0,539.0,18.036,19.250
3,2003,1105,526.0,496.0,20.231,19.077
4,2003,1106,509.0,452.0,18.179,16.143
...,...,...,...,...,...,...
6887,2015,1363,513.0,437.0,18.321,15.607
6888,2021,1152,12.0,13.0,1.333,1.444
6889,2022,1175,273.0,288.0,17.062,18.000
6890,2022,1237,361.0,276.0,19.000,14.526


#### Combine all DataFrames to make a season statistics DataFrame

In [110]:
from functools import reduce

In [136]:
data_frames = [df_FG, df_FG3, df_FT, df_OR, df_DR, df_Ast, df_TO, df_Stl, df_Blk, df_PF]

In [137]:
df_season_stats = reduce(lambda  left,right: pd.merge(left,right,on=['Season', 'TeamID'], how='outer'), data_frames)



In [138]:
df_season_stats

Unnamed: 0,Season,TeamID,FGM,FGA,FG%,FGM_Ag,FGA_Ag,FG%_Ag,FGM_pg,FGM_Ag_pg,...,Stl_pg,Stl_Ag_pg,Blk,Blk_Ag,Blk_pg,Blk_Ag_pg,PF,PF_Ag,PF_pg,PF_Ag_pg
0,2003,1102,536.0,1114.0,48.1149,540.0,1188.0,45.4545,19.143,19.286,...,5.964,5.429,50.0,44.0,1.786,1.571,525.0,514.0,18.750,18.357
1,2003,1103,733.0,1508.0,48.6074,750.0,1539.0,48.7329,27.148,27.778,...,7.259,6.407,63.0,77.0,2.333,2.852,536.0,606.0,19.852,22.444
2,2003,1104,673.0,1601.0,42.0362,651.0,1554.0,41.8919,24.036,23.250,...,6.607,5.536,106.0,89.0,3.786,3.179,505.0,539.0,18.036,19.250
3,2003,1105,634.0,1602.0,39.5755,702.0,1533.0,45.7926,24.385,27.000,...,9.308,9.385,54.0,109.0,2.077,4.192,526.0,496.0,20.231,19.077
4,2003,1106,656.0,1548.0,42.3773,608.0,1495.0,40.6689,23.429,21.714,...,8.357,8.786,88.0,89.0,3.143,3.179,509.0,452.0,18.179,16.143
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6887,2015,1363,503.0,1452.0,34.6419,687.0,1449.0,47.4120,17.964,24.536,...,4.857,6.964,69.0,85.0,2.464,3.036,513.0,437.0,18.321,15.607
6888,2021,1152,180.0,497.0,36.2173,295.0,558.0,52.8674,20.000,32.778,...,5.444,6.444,13.0,26.0,1.444,2.889,12.0,13.0,1.333,1.444
6889,2022,1175,326.0,863.0,37.7752,424.0,985.0,43.0457,20.375,26.500,...,5.938,8.125,70.0,57.0,4.375,3.562,273.0,288.0,17.062,18.000
6890,2022,1237,349.0,944.0,36.9703,428.0,957.0,44.7231,18.368,22.526,...,5.158,7.000,41.0,56.0,2.158,2.947,361.0,276.0,19.000,14.526


#### Export season statistics dataframe to csv. This will be combined with the team data dataframe in another notebook. More features will be added to this dataframe in another notebook.

In [142]:
df_season_stats.to_csv('season_stats.csv')