In [1]:
import pandas as pd 

## Reading the csv file 
nba = pd.read_csv('NBA_2006_2007.csv')
nba.head()

Unnamed: 0,Team,GP,W,L,Win_pct,min,EFG_pct,FTA_rate,TOV_pct,OREB_pct,OPP_EFG_pct,OPP_FTA_pct,OPP_TOV_pct,OPP_OREB_pct
0,Dallas Mavericks,82,67,15,0.817,3951,50.9,0.317,15.2,32.0,47.7,0.355,16.0,28.1
1,Phoenix Suns,82,61,21,0.744,3981,55.1,0.267,14.9,26.2,49.2,0.268,15.5,31.7
2,San Antonio Spurs,82,58,24,0.707,3956,52.1,0.313,15.3,27.9,47.1,0.272,15.8,28.5
3,Detroit Pistons,82,53,29,0.646,3976,48.8,0.307,13.7,30.7,47.7,0.317,16.4,32.2
4,Houston Rockets,82,52,30,0.634,3971,49.9,0.292,15.3,28.7,46.6,0.308,15.4,27.56


In [2]:
## Changing the scale of the predictor variables 
nba['EFG_pct'] = nba['EFG_pct'] / 100
nba['TOV_pct'] = nba['TOV_pct'] / 100
nba['OREB_pct'] = nba['OREB_pct'] / 100
nba.head()

Unnamed: 0,Team,GP,W,L,Win_pct,min,EFG_pct,FTA_rate,TOV_pct,OREB_pct,OPP_EFG_pct,OPP_FTA_pct,OPP_TOV_pct,OPP_OREB_pct
0,Dallas Mavericks,82,67,15,0.817,3951,0.509,0.317,0.152,0.32,47.7,0.355,16.0,28.1
1,Phoenix Suns,82,61,21,0.744,3981,0.551,0.267,0.149,0.262,49.2,0.268,15.5,31.7
2,San Antonio Spurs,82,58,24,0.707,3956,0.521,0.313,0.153,0.279,47.1,0.272,15.8,28.5
3,Detroit Pistons,82,53,29,0.646,3976,0.488,0.307,0.137,0.307,47.7,0.317,16.4,32.2
4,Houston Rockets,82,52,30,0.634,3971,0.499,0.292,0.153,0.287,46.6,0.308,15.4,27.56


In [3]:
import statsmodels.formula.api as smf

## Building the linear model 
lm_md = smf.ols(formula = 'W ~ EFG_pct + TOV_pct + OREB_pct + FTA_rate', data = nba).fit()

## Extracting model results
lm_md.summary()

0,1,2,3
Dep. Variable:,W,R-squared:,0.613
Model:,OLS,Adj. R-squared:,0.551
Method:,Least Squares,F-statistic:,9.907
Date:,"Wed, 03 Nov 2021",Prob (F-statistic):,6.05e-05
Time:,20:11:49,Log-Likelihood:,-99.321
No. Observations:,30,AIC:,208.6
Df Residuals:,25,BIC:,215.6
Df Model:,4,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,-129.8508,61.229,-2.121,0.044,-255.953,-3.748
EFG_pct,403.6129,95.137,4.242,0.000,207.675,599.551
TOV_pct,-631.7950,153.782,-4.108,0.000,-948.515,-315.075
OREB_pct,242.7052,75.088,3.232,0.003,88.058,397.352
FTA_rate,-4.0469,47.498,-0.085,0.933,-101.871,93.777

0,1,2,3
Omnibus:,3.049,Durbin-Watson:,1.567
Prob(Omnibus):,0.218,Jarque-Bera (JB):,2.095
Skew:,-0.644,Prob(JB):,0.351
Kurtosis:,3.134,Cond. No.,145.0


In [4]:
## Based on the estimated coefficients, the most and least important
## variables are TOV_pct and FTA_rate, respectively.

In [5]:
## Computing all the needed statistics
S_W = nba['W'].std()
S_EFG = nba['EFG_pct'].std()
S_TOV = nba['TOV_pct'].std()
S_OREB = nba['OREB_pct'].std()
S_FTA = nba['FTA_rate'].std()

## Computing the standardized regression coefficients
EFG_coef = abs(403.6129)*S_EFG / S_W
print('The standardized coefficients of EFG_pct is', EFG_coef)

TOV_coef = abs(-631.7950)*S_TOV / S_W
print('The standardized coefficients of TOV_pct is', TOV_coef)

OREB_coef = abs(242.7052)*S_OREB / S_W
print('The standardized coefficients of OREB_pct is', OREB_coef)

FTA_coef = abs(-4.0469)*S_FTA / S_W
print('The standardized coefficients of FTA_rate is', FTA_coef)

The standardized coefficients of EFG_pct is 0.6045375718410818
The standardized coefficients of TOV_pct is 0.6517372334182471
The standardized coefficients of OREB_pct is 0.5328092688638754
The standardized coefficients of FTA_rate is 0.01220907662647242


In [6]:
## Based on the standardized estimated coefficients, the most and least important
## variables are TOV_pct and FTA_rate, respectively.

In [7]:
## Computing all the needed statistics
corr_W_EFG = nba[['W', 'EFG_pct']].corr().iloc[0, 1]
corr_W_TOV = nba[['W', 'TOV_pct']].corr().iloc[0, 1]
corr_W_OREB = nba[['W', 'OREB_pct']].corr().iloc[0, 1]
corr_W_FTA = nba[['W', 'FTA_rate']].corr().iloc[0, 1]

R_EFG = round(100*403.6129*S_EFG*corr_W_EFG / S_W, 2)
print('The contribution of EFG_pct to R^2 is', R_EFG)

R_TOV = round(100*-631.7950*S_TOV*corr_W_TOV / S_W, 2)
print('The contribution of TOV_pct to R^2 is', R_TOV)

R_OREB = round(100*242.7052*S_OREB*corr_W_OREB / S_W, 2)
print('The contribution of OREB_pct to R^2 is', R_OREB)

R_FTA = round(100*-4.0469*S_FTA*corr_W_FTA / S_W, 2)
print('The contribution of FTA_rate to R^2 is', R_FTA)

The contribution of EFG_pct to R^2 is 32.7
The contribution of TOV_pct to R^2 is 35.0
The contribution of OREB_pct to R^2 is -6.71
The contribution of FTA_rate to R^2 is 0.33


In [8]:
## Based on the contribution to the R^2, the most and least important
## variables are TOV_pct and FTA_rate, respectively.