In [1]:
from nba_api.stats.endpoints import teamestimatedmetrics, leaguedashteamstats
from nba_api.stats.static import teams
import pandas as pd
import pingouin as pg
import statsmodels.api as sm
from statsmodels.stats.outliers_influence import variance_inflation_factor

In [2]:
"""
This is a short analysis to determine the correlation of two factors with a teams offensive rating:
(1) per-possession efficiency
(2) number of possessions (i.e. pace)

"""

'\nThis is a short analysis to determine the correlation of two factors with a teams offensive rating:\n(1) per-possession efficiency\n(2) number of possessions (i.e. pace)\n\n'

In [3]:
seasons = [
    '2000-01',
    '2001-02',
    '2002-03',
    '2003-04',
    '2004-05',
    '2005-06',
    '2006-07',
    '2007-08',
    '2008-09',
    '2009-10',
    '2010-11',
    '2011-12',
    '2012-13',
    '2013-14',
    '2014-15',
    '2015-16',
    '2016-17',
    '2017-18',
    '2018-19',
    '2019-20',
    '2020-21',
    '2021-22',
    '2022-23'
]

In [4]:
#Get list of all teams
all_teams = teams.get_teams()
team_ids = {}
for team in all_teams:
    team_ids[team['id']] = team['abbreviation']

In [5]:
#Get offensive ratings, pace, points-per-possession
offensive_ratings = []
paces = []
pts_per_poss = []
wins = []
for season in seasons:
    print("Collecting " + season + " data...")
    results = teamestimatedmetrics.TeamEstimatedMetrics(league_id='00', season=season).get_data_frames()[0]
    possession_results = leaguedashteamstats.LeagueDashTeamStats(season=season, per_mode_detailed="PerPossession")
    possession_results = possession_results.get_data_frames()[0]
    for id in team_ids.keys():
        offensive_ratings.append(results[results['TEAM_ID'] == id]['E_OFF_RATING'])
        paces.append(results[results['TEAM_ID'] == id]['E_PACE'])
        pts_per_poss.append(possession_results[possession_results['TEAM_ID'] == id]['PTS'])
        wins.append(possession_results[possession_results['TEAM_ID'] == id]['W'])

Collecting 2000-01 data...
Collecting 2001-02 data...
Collecting 2002-03 data...
Collecting 2003-04 data...
Collecting 2004-05 data...
Collecting 2005-06 data...
Collecting 2006-07 data...
Collecting 2007-08 data...
Collecting 2008-09 data...
Collecting 2009-10 data...
Collecting 2010-11 data...
Collecting 2011-12 data...
Collecting 2012-13 data...
Collecting 2013-14 data...
Collecting 2014-15 data...
Collecting 2015-16 data...
Collecting 2016-17 data...
Collecting 2017-18 data...
Collecting 2018-19 data...
Collecting 2019-20 data...
Collecting 2020-21 data...
Collecting 2021-22 data...
Collecting 2022-23 data...


In [6]:
for i in range(len(offensive_ratings)):
    if offensive_ratings[i].empty:
        offensive_ratings[i] = None
    else:
        offensive_ratings[i] = offensive_ratings[i].tolist()[0]

for i in range(len(paces)):
    if paces[i].empty:
        paces[i] = None
    else:
        paces[i] = paces[i].tolist()[0]

for i in range(len(pts_per_poss)):
    if pts_per_poss[i].empty:
        pts_per_poss[i] = None
    else:
        pts_per_poss[i] = pts_per_poss[i].tolist()[0]

for i in range(len(wins)):
    if wins[i].empty:
        wins[i] = None
    else:
        wins[i] = wins[i].tolist()[0]

In [7]:
#Calculate Pearson coefficient for linear correlation
df = pd.DataFrame({"OFF_RATING": offensive_ratings, "PACE": paces, "PPP": pts_per_poss, "WINS": wins}).dropna()
print("Linear correlation between pace and offensive rating: ")
print(df["OFF_RATING"].corr(df["PACE"]))
print("Linear correlation between PPP and offensive rating: ")
print(df["OFF_RATING"].corr(df["PPP"]))

Linear correlation between pace and offensive rating: 
0.525026458537889
Linear correlation between PPP and offensive rating: 
0.9950877749831571


In [8]:
#Let's also check the Spearman rank to deal with any nonlinear relationships
print("Spearman correlation between pace and offensive rating: ")
print(df["OFF_RATING"].corr(df["PACE"], method="spearman"))
print("Spearman correlation between PPP and offensive rating: ")
print(df["OFF_RATING"].corr(df["PPP"], method="spearman"))

Spearman correlation between pace and offensive rating: 
0.5178427583893976
Spearman correlation between PPP and offensive rating: 
0.994561509216428


In [9]:
"""
I think that a fast pace might be affecting offensive rating by pushing up per-possession efficiency (transition play, for instance),
so let's check the correlation and VIF between pace and PPP
"""

"\nI think that a fast pace might be affecting offensive rating by pushing up per-possession efficiency (transition play, for instance),\nso let's check the correlation and VIF between pace and PPP\n"

In [10]:
print("Linear correlation between pace and PPP: ")
print(df["PPP"].corr(df["PACE"]))
print("Spearman correlation between pace and PPP: ")
print(df["PPP"].corr(df["PACE"], method="spearman"))

Linear correlation between pace and PPP: 
0.5524181747835893
Spearman correlation between pace and PPP: 
0.5436513179675263


In [11]:
#Let's try a partial regression seeing the effect of pace on offensive rating, adjusting for PPP
print("Partial correlation of pace with offensive efficiency, adjusting for PPP: ")
print(pg.partial_corr(data=df, x="PACE", y="OFF_RATING", covar="PPP"))

Partial correlation of pace with offensive efficiency, adjusting for PPP: 
           n         r           CI95%         p-val
pearson  686 -0.299055  [-0.37, -0.23]  1.280696e-15


In [12]:
"""
The p-value is less than .05, suggesting that pace does have a statistically significant effect outside of PPP.
Let's try multiple regression as well. 
"""

"\nThe p-value is less than .05, suggesting that pace does have a statistically significant effect outside of PPP.\nLet's try multiple regression as well. \n"

In [13]:
#Multiple regression
X = df[["PACE", "PPP"]]
X = sm.add_constant(X)
model = sm.OLS(df["OFF_RATING"], X).fit()
print(model.summary())

                            OLS Regression Results                            
Dep. Variable:             OFF_RATING   R-squared:                       0.991
Model:                            OLS   Adj. R-squared:                  0.991
Method:                 Least Squares   F-statistic:                 3.793e+04
Date:                Sat, 30 Dec 2023   Prob (F-statistic):               0.00
Time:                        19:42:52   Log-Likelihood:                -381.90
No. Observations:                 686   AIC:                             769.8
Df Residuals:                     683   BIC:                             783.4
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          1.6756      0.440      3.812      0.0

In [14]:
#Now let's run the partial and multiple regressions again, checking correlation with wins. 
print("Partial correlation of pace with number of wins, adjusting for PPP: ")
print(pg.partial_corr(data=df, x="PACE", y="WINS", covar="PPP"))

Partial correlation of pace with number of wins, adjusting for PPP: 
           n         r           CI95%         p-val
pearson  686 -0.517856  [-0.57, -0.46]  2.909752e-48


In [15]:
#Multiple regression:
X = df[["PACE", "PPP"]]
X = sm.add_constant(X)
model = sm.OLS(df["WINS"], X).fit()
print(model.summary())

                            OLS Regression Results                            
Dep. Variable:                   WINS   R-squared:                       0.459
Model:                            OLS   Adj. R-squared:                  0.457
Method:                 Least Squares   F-statistic:                     289.4
Date:                Sat, 30 Dec 2023   Prob (F-statistic):           9.50e-92
Time:                        19:42:52   Log-Likelihood:                -2476.7
No. Observations:                 686   AIC:                             4959.
Df Residuals:                     683   BIC:                             4973.
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const        -32.6298      9.315     -3.503      0.0