# 2. Analysis of Player Changes

Looks at which players had the largest change in their role and performance in the bubble.

Will look at these from a few angles:
    1. Increase in key metrics
    2. Anomaly detection based on changes in metrics

In [1]:
import pandas as pd
import numpy as np
import datetime
import matplotlib.pyplot as plt

## Read in and Manipulate Data

In [2]:
# Read in data
schedule = pd.read_csv('data/schedule_2020.csv')
# team_perf = pd.read_csv('data/team_perf_2020.csv')
player_info = pd.read_csv('data/players_2020.csv')
player_perf = pd.read_csv('data/player_perf_2020.csv')

In [3]:
# convert date to date type
schedule['Date'] = pd.to_datetime(schedule['Date'])
player_perf['Date'] = pd.to_datetime(player_perf['Date'])

In [4]:
# separate home and visitor teams in schedule
schedule_h = schedule.rename(columns={'Visitor Team':'Opposing Team', 'Visitor Points':'Opposing Points', 'Home Team':'Team', 'Home Points':'Points'})
schedule_a = schedule.rename(columns={'Visitor Team':'Team', 'Visitor Points':'Points', 'Home Team':'Opposing Team', 'Home Points':'Opposing Points'})
schedule_sep = schedule_h.append(schedule_a).reset_index(drop=True)

In [5]:
# join to player performance
player_perf_team = pd.merge(schedule_sep, player_perf, on=['Date', 'Team'], validate='1:m', suffixes=['', '_copy'])

In [6]:
player_perf_team

Unnamed: 0,Date,Time,Opposing Team,Opposing Points,Team,Points,Box Score,OT,Notes,bubble,...,Player,STL,STLP,TOV,TOVP,TRB,TRBP,TSP,Time_copy,USGP
0,2019-10-22,800,New Orleans Pelicans,122,Toronto Raptors,130,www.basketball-reference.com/boxscores/2019102...,OT,,0,...,Kyle Lowry,2,2.1,4,16.2,5,5.4,0.531,,21.5
1,2019-10-22,800,New Orleans Pelicans,122,Toronto Raptors,130,www.basketball-reference.com/boxscores/2019102...,OT,,0,...,Fred VanVleet,2,2.1,2,8.8,5,5.4,0.824,,19.9
2,2019-10-22,800,New Orleans Pelicans,122,Toronto Raptors,130,www.basketball-reference.com/boxscores/2019102...,OT,,0,...,Pascal Siakam,0,0.0,4,11.5,18,22.7,0.551,,35.7
3,2019-10-22,800,New Orleans Pelicans,122,Toronto Raptors,130,www.basketball-reference.com/boxscores/2019102...,OT,,0,...,OG Anunoby,0,0.0,1,7.4,7,9.4,0.442,,14.7
4,2019-10-22,800,New Orleans Pelicans,122,Toronto Raptors,130,www.basketball-reference.com/boxscores/2019102...,OT,,0,...,Marc Gasol,0,0.0,0,0.0,4,6.0,0.318,,11.6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
27752,2020-08-31,900,Oklahoma City Thunder,104,Houston Rockets,100,www.basketball-reference.com/boxscores/2020083...,,,1,...,Ben McLemore,0,0.0,0,,0,0.0,,,0.0
27753,2020-08-31,900,Oklahoma City Thunder,104,Houston Rockets,100,www.basketball-reference.com/boxscores/2020083...,,,1,...,Bruno Caboclo,0,0.0,0,0.0,0,0.0,0.000,,0.0
27754,2020-08-31,900,Oklahoma City Thunder,104,Houston Rockets,100,www.basketball-reference.com/boxscores/2020083...,,,1,...,Tyson Chandler,0,0.0,0,0.0,0,0.0,0.000,,0.0
27755,2020-08-31,900,Oklahoma City Thunder,104,Houston Rockets,100,www.basketball-reference.com/boxscores/2020083...,,,1,...,Michael Frazier,0,0.0,0,0.0,0,0.0,0.000,,0.0


## Analyze key metrics before and after the bubble

In [7]:
player_perf_team.columns

Index(['Date', 'Time', 'Opposing Team', 'Opposing Points', 'Team', 'Points',
       'Box Score', 'OT', 'Notes', 'bubble', 'playoff', '3P', '3PA', '3PAR',
       '3PP', 'AST', 'ASTP', 'BLK', 'BLKP', 'DRB', 'DRBP', 'DRTG', 'EFGP',
       'FD', 'FG', 'FGA', 'FGP', 'FT', 'FTA', 'FTP', 'FTR', 'Home', 'Minutes',
       'ORB', 'ORBP', 'ORTG', 'Opp Team', 'PF', 'PM', 'PTS', 'Player', 'STL',
       'STLP', 'TOV', 'TOVP', 'TRB', 'TRBP', 'TSP', 'Time_copy', 'USGP'],
      dtype='object')

In [8]:
# find average of metrics before bubble

pre = player_perf_team.loc[player_perf_team['bubble']==0]
pre_agg = pre.groupby('Player').agg({'Time':'count', '3P':'mean', '3PA':'mean', 'AST':'mean', 'BLK':'mean', 
                                     'DRB':'mean', 'ORB': 'mean', 'DRTG':'mean', 'ORTG':'mean', 'FG':'mean', 
                                     'FGA':'mean', 'FT':'mean', 'FTA':'mean', 'Minutes':'mean', 'PTS':'mean', 
                                     'STL':'mean', 'TOV':'mean', 'TRB':'mean'}).reset_index()
pre_agg.rename(columns={'Time':'Count'}, inplace=True)
pre_agg.head()

Unnamed: 0,Player,Count,3P,3PA,AST,BLK,DRB,ORB,DRTG,ORTG,FG,FGA,FT,FTA,Minutes,PTS,STL,TOV,TRB
0,Aaron Gordon,58,1.172414,3.896552,3.706897,0.62069,5.827586,1.775862,110.275862,104.931034,5.413793,12.517241,2.362069,3.5,32.995402,14.362069,0.862069,1.62069,7.603448
1,Aaron Holiday,65,1.230769,3.123077,2.969231,0.2,1.707692,0.323077,99.538462,87.415385,3.107692,7.630769,0.953846,1.107692,21.049487,8.4,0.692308,1.153846,2.030769
2,Abdel Nader,57,0.684211,1.842105,0.614035,0.350877,1.333333,0.245614,94.087719,87.754386,1.754386,3.859649,0.842105,1.087719,13.26462,5.035088,0.350877,0.631579,1.578947
3,Adam Mokoka,18,0.333333,0.833333,0.222222,0.0,0.166667,0.388889,73.944444,62.833333,0.666667,1.555556,0.111111,0.222222,6.203704,1.777778,0.222222,0.111111,0.555556
4,Admiral Schofield,36,0.388889,1.305556,0.388889,0.083333,0.777778,0.166667,87.305556,39.055556,0.833333,2.083333,0.25,0.361111,8.129167,2.305556,0.222222,0.194444,0.944444


In [9]:
# find average of metrics after bubble

post = player_perf_team.loc[player_perf_team['bubble']==1]
post_agg = post.groupby('Player').agg({'Time':'count', '3P':'mean', '3PA':'mean', 'AST':'mean', 'BLK':'mean', 
                                     'DRB':'mean', 'ORB': 'mean', 'DRTG':'mean', 'ORTG':'mean', 'FG':'mean', 
                                     'FGA':'mean', 'FT':'mean', 'FTA':'mean', 'Minutes':'mean', 'PTS':'mean', 
                                     'STL':'mean', 'TOV':'mean', 'TRB':'mean'}).reset_index()
post_agg.rename(columns={'Time':'Count'}, inplace=True)
post_agg.head()

Unnamed: 0,Player,Count,3P,3PA,AST,BLK,DRB,ORB,DRTG,ORTG,FG,FGA,FT,FTA,Minutes,PTS,STL,TOV,TRB
0,Aaron Gordon,4,1.25,2.75,3.25,0.75,7.5,1.0,112.25,118.5,5.25,10.25,3.5,5.25,25.854167,15.25,0.25,1.5,8.5
1,Aaron Holiday,12,0.916667,2.25,3.5,0.25,2.25,0.166667,110.916667,107.166667,3.583333,7.333333,1.25,1.666667,26.748611,9.333333,1.166667,1.416667,2.416667
2,Abdel Nader,13,0.769231,2.153846,0.230769,0.153846,0.923077,0.076923,88.230769,70.307692,1.846154,3.846154,0.307692,0.461538,10.483333,4.769231,0.307692,0.538462,1.0
3,Admiral Schofield,8,0.625,1.75,0.125,0.125,1.5,0.125,89.25,73.625,0.625,2.125,0.125,0.25,9.458333,2.0,0.0,0.0,1.625
4,Al Horford,12,0.833333,1.916667,2.916667,0.916667,4.416667,1.666667,106.166667,108.333333,3.5,6.666667,0.833333,1.166667,25.506944,8.666667,0.083333,1.5,6.083333


In [10]:
# join together
pre_post_agg = pd.merge(pre_agg, post_agg, on='Player', how='inner', suffixes=['_pre', '_post'])
pre_post_agg.head()

Unnamed: 0,Player,Count_pre,3P_pre,3PA_pre,AST_pre,BLK_pre,DRB_pre,ORB_pre,DRTG_pre,ORTG_pre,...,ORTG_post,FG_post,FGA_post,FT_post,FTA_post,Minutes_post,PTS_post,STL_post,TOV_post,TRB_post
0,Aaron Gordon,58,1.172414,3.896552,3.706897,0.62069,5.827586,1.775862,110.275862,104.931034,...,118.5,5.25,10.25,3.5,5.25,25.854167,15.25,0.25,1.5,8.5
1,Aaron Holiday,65,1.230769,3.123077,2.969231,0.2,1.707692,0.323077,99.538462,87.415385,...,107.166667,3.583333,7.333333,1.25,1.666667,26.748611,9.333333,1.166667,1.416667,2.416667
2,Abdel Nader,57,0.684211,1.842105,0.614035,0.350877,1.333333,0.245614,94.087719,87.754386,...,70.307692,1.846154,3.846154,0.307692,0.461538,10.483333,4.769231,0.307692,0.538462,1.0
3,Admiral Schofield,36,0.388889,1.305556,0.388889,0.083333,0.777778,0.166667,87.305556,39.055556,...,73.625,0.625,2.125,0.125,0.25,9.458333,2.0,0.0,0.0,1.625
4,Al Horford,61,1.459016,4.327869,4.0,0.901639,5.245902,1.508197,106.606557,113.557377,...,108.333333,3.5,6.666667,0.833333,1.166667,25.506944,8.666667,0.083333,1.5,6.083333


In [11]:
# calculate differences in metrics
metric_list = list(pre_agg.columns)
metric_list.remove('Player')
metric_list.remove('Count')

for metric in metric_list:
    pre_post_agg[metric+'_diff'] = pre_post_agg[metric+'_post'] - pre_post_agg[metric+'_pre']
    pre_post_agg[metric+'_diff_abs'] = [abs(x) for x in pre_post_agg[metric+'_diff']]
    
pre_post_agg.head()    

Unnamed: 0,Player,Count_pre,3P_pre,3PA_pre,AST_pre,BLK_pre,DRB_pre,ORB_pre,DRTG_pre,ORTG_pre,...,Minutes_diff,Minutes_diff_abs,PTS_diff,PTS_diff_abs,STL_diff,STL_diff_abs,TOV_diff,TOV_diff_abs,TRB_diff,TRB_diff_abs
0,Aaron Gordon,58,1.172414,3.896552,3.706897,0.62069,5.827586,1.775862,110.275862,104.931034,...,-7.141236,7.141236,0.887931,0.887931,-0.612069,0.612069,-0.12069,0.12069,0.896552,0.896552
1,Aaron Holiday,65,1.230769,3.123077,2.969231,0.2,1.707692,0.323077,99.538462,87.415385,...,5.699124,5.699124,0.933333,0.933333,0.474359,0.474359,0.262821,0.262821,0.385897,0.385897
2,Abdel Nader,57,0.684211,1.842105,0.614035,0.350877,1.333333,0.245614,94.087719,87.754386,...,-2.781287,2.781287,-0.265857,0.265857,-0.043185,0.043185,-0.093117,0.093117,-0.578947,0.578947
3,Admiral Schofield,36,0.388889,1.305556,0.388889,0.083333,0.777778,0.166667,87.305556,39.055556,...,1.329167,1.329167,-0.305556,0.305556,-0.222222,0.222222,-0.194444,0.194444,0.680556,0.680556
4,Al Horford,61,1.459016,4.327869,4.0,0.901639,5.245902,1.508197,106.606557,113.557377,...,-4.786225,4.786225,-3.169399,3.169399,-0.769126,0.769126,0.434426,0.434426,-0.670765,0.670765


In [12]:
# players with biggest change in minutes
metric = 'Minutes'
pre_post_agg.sort_values(metric+'_diff_abs', ascending=False)[['Player', metric+'_pre', metric+'_post', metric+'_diff']].head(10)


Unnamed: 0,Player,Minutes_pre,Minutes_post,Minutes_diff
173,Kelly Oubre,34.524107,0.0,-34.524107
167,Jusuf Nurkić,0.0,32.50119,32.50119
301,Tremont Waters,7.416667,29.65,22.233333
171,Keldon Johnson,3.839583,26.05,22.210417
220,Michael Porter,11.166667,30.630769,19.464103
155,Jonathan Isaac,29.65,10.361111,-19.288889
302,Trey Burke,6.705782,24.844048,18.138265
146,Jerome Robinson,11.505128,28.914583,17.409455
100,Gary Trent,16.816667,32.889286,16.072619
158,Josh Jackson,18.784211,2.725926,-16.058285


In [13]:
# players with biggest change in points
metric = 'PTS'
pre_post_agg.sort_values(metric+'_diff_abs', ascending=False)[['Player', metric+'_pre', metric+'_post', metric+'_diff']].head(10)


Unnamed: 0,Player,PTS_pre,PTS_post,PTS_diff
173,Kelly Oubre,18.678571,0.0,-18.678571
167,Jusuf Nurkić,0.0,16.714286,16.714286
171,Keldon Johnson,1.708333,14.125,12.416667
146,Jerome Robinson,3.092308,14.75,11.657692
220,Michael Porter,5.966667,17.538462,11.571795
297,Timothé Luwawu-Cabarrot,5.369565,15.166667,9.797101
228,Montrezl Harrell,18.619048,9.0,-9.619048
302,Trey Burke,3.0,12.142857,9.142857
295,Thomas Bryant,9.583333,18.625,9.041667
308,Tyler Johnson,3.708333,12.583333,8.875


In [14]:
# players with biggest change in assists
metric = 'AST'
pre_post_agg.sort_values(metric+'_diff_abs', ascending=False)[['Player', metric+'_pre', metric+'_post', metric+'_diff']].head(10)


Unnamed: 0,Player,AST_pre,AST_post,AST_diff
167,Jusuf Nurkić,0.0,4.0,4.0
21,Ben Simmons,8.185185,4.333333,-3.851852
301,Tremont Waters,1.0,4.0,3.0
121,Ja Morant,6.704918,9.666667,2.961749
39,Chris Chiozza,1.5,4.333333,2.833333
249,Patrick Beverley,3.729167,1.25,-2.479167
33,Caris LeVert,4.076923,6.5,2.423077
278,Shabazz Napier,4.147541,1.8,-2.347541
192,LeBron James,10.6,8.294118,-2.305882
280,Shake Milton,1.392157,3.666667,2.27451


In [15]:
# players with biggest change in rebounds
metric = 'TRB'
pre_post_agg.sort_values(metric+'_diff_abs', ascending=False)[['Player', metric+'_pre', metric+'_post', metric+'_diff']].head(10)



Unnamed: 0,Player,TRB_pre,TRB_post,TRB_diff
167,Jusuf Nurkić,0.0,11.071429,11.071429
114,Hassan Whiteside,13.578125,6.25,-7.328125
173,Kelly Oubre,6.446429,0.0,-6.446429
77,Drew Eubanks,1.296296,6.25,4.953704
226,Mohamed Bamba,4.983333,0.3,-4.683333
220,Michael Porter,3.316667,7.692308,4.375641
171,Keldon Johnson,0.708333,5.0,4.291667
216,Meyers Leonard,5.061224,0.8125,-4.248724
115,Ian Mahinmi,4.192308,0.0,-4.192308
155,Jonathan Isaac,6.9375,3.0,-3.9375


In [16]:
# players with biggest change in Offensive Rating
metric = 'ORTG'
pre_post_agg.sort_values(metric+'_diff_abs', ascending=False)[['Player', metric+'_pre', metric+'_post', metric+'_diff']].head(10)



Unnamed: 0,Player,ORTG_pre,ORTG_post,ORTG_diff
137,Jarrod Uthoff,16.5,139.0,122.5
167,Jusuf Nurkić,0.0,111.5,111.5
173,Kelly Oubre,107.767857,0.0,-107.767857
313,Vincent Poirier,51.488372,157.0,105.511628
237,Nigel Williams-Goss,18.875,116.0,97.125
171,Keldon Johnson,37.583333,134.25,96.666667
226,Mohamed Bamba,106.466667,11.3,-95.166667
168,Juwan Morgan,34.914286,128.416667,93.502381
46,DaQuan Jeffries,31.7,120.75,89.05
115,Ian Mahinmi,78.884615,0.0,-78.884615


In [17]:
# players with biggest change in Defensive Rating
metric = 'DRTG'
pre_post_agg.sort_values(metric+'_diff_abs', ascending=False)[['Player', metric+'_pre', metric+'_post', metric+'_diff']].head(10)




Unnamed: 0,Player,DRTG_pre,DRTG_post,DRTG_diff
167,Jusuf Nurkić,0.0,113.071429,113.071429
173,Kelly Oubre,111.482143,0.0,-111.482143
231,Nassir Little,83.348485,0.0,-83.348485
226,Mohamed Bamba,103.666667,20.5,-83.166667
115,Ian Mahinmi,82.115385,0.0,-82.115385
184,Kyle Alexander,0.0,81.666667,81.666667
216,Meyers Leonard,109.571429,29.4375,-80.133929
16,Aron Baynes,76.786885,0.0,-76.786885
84,Elie Okobo,96.476923,22.2,-74.276923
71,Dion Waiters,14.347826,86.538462,72.190635


## Find anomalies
Method 1: For each player, calculate the deviation for each key metric, normalized by variance

In [19]:
# create key metrics
key_metrics = ['Minutes', 'PTS_PM', 'AST_PM', 'TOV_PM', 'TRB_PM', 'STL_PM']

player_perf_team_metrics = player_perf_team.copy()

player_perf_team_metrics['PTS_PM'] = player_perf_team_metrics['PTS']/player_perf_team_metrics['Minutes']
player_perf_team_metrics['AST_PM'] = player_perf_team_metrics['AST']/player_perf_team_metrics['Minutes']
player_perf_team_metrics['TOV_PM'] = player_perf_team_metrics['TOV']/player_perf_team_metrics['Minutes']
player_perf_team_metrics['TRB_PM'] = player_perf_team_metrics['TRB']/player_perf_team_metrics['Minutes']
player_perf_team_metrics['STL_PM'] = player_perf_team_metrics['STL']/player_perf_team_metrics['Minutes']


for m in key_metrics:
    player_perf_team_metrics[m] = player_perf_team_metrics[m].fillna(0)

In [20]:
# create list of players who have at least one game played in both settings and some stats for each metric
player_list_pre = player_perf_team_metrics.loc[(player_perf_team_metrics['Minutes']>0) & (player_perf_team_metrics['bubble']==0)].Player.unique()
player_list_post = player_perf_team_metrics.loc[(player_perf_team_metrics['Minutes']>0) & (player_perf_team_metrics['bubble']==1)].Player.unique()
player_list_played = player_perf_team_metrics.groupby('Player').sum().reset_index()
for metric in key_metrics:
    player_list_played = player_list_played.loc[player_list_played[metric]>0]    
player_list = set(player_list_pre).intersection(set(player_list_post)).intersection(set(player_list_played['Player']))

In [40]:
# iterate through each player to find mean, std dev, and n of pre and post bubble

player_test_stats = pd.DataFrame()
idx = 0

for player in player_list:
    perf = player_perf_team_metrics.loc[player_perf_team_metrics['Player']==player]
    pre = perf.loc[perf['bubble']==0]
    post = perf.loc[perf['bubble']==1]
    test_stat_sum = 0
    player_test_stats.loc[idx, 'Player'] = player
    
    for metric in key_metrics:
        pre_mean = pre[metric].mean()
        post_mean = post[metric].mean()
        
        std = np.std(perf[metric], ddof=1)       
        
        pre_n = len(pre)
        post_n = len(post)
        test_stat = (post_mean - pre_mean)/np.sqrt(std**2 * (1/pre_n + 1/post_n))
        test_stat_sum += abs(test_stat)
        player_test_stats.loc[idx, metric+'_deviation'] = abs(test_stat)
    
    player_test_stats.loc[idx, 'test_stat_sum'] = test_stat_sum
    idx += 1

In [41]:
player_test_stats.sort_values('test_stat_sum', ascending=False, inplace=True)
player_test_stats.head(10)

Unnamed: 0,Player,Minutes_deviation,PTS_PM_deviation,AST_PM_deviation,TOV_PM_deviation,TRB_PM_deviation,STL_PM_deviation,test_stat_sum
120,Mohamed Bamba,7.018038,4.412747,2.476134,2.458532,4.395572,0.12423,20.885252
38,Meyers Leonard,5.874607,3.116714,2.688943,2.228034,4.895224,0.420905,19.224427
172,Trey Burke,5.47896,3.104013,2.138469,1.794214,1.38444,4.384837,18.284935
53,JaKarr Sampson,3.007791,2.635254,4.063246,4.176418,2.195685,2.100189,18.178583
273,Khem Birch,1.738478,4.552328,3.625929,3.288138,3.233635,1.214957,17.653465
216,Keldon Johnson,4.774306,2.864954,1.283023,2.959357,1.911627,2.118678,15.911943
198,Dion Waiters,3.79914,2.579143,1.883952,2.332689,0.550946,2.525229,13.671098
262,Vincent Poirier,2.273132,0.532235,3.814382,0.943344,0.981633,4.317159,12.861886
11,Anfernee Simons,4.376883,3.894151,1.300349,2.091901,0.766448,0.035465,12.465196
296,Darius Bazley,3.514428,2.103722,1.572608,0.845448,2.641273,1.615338,12.292817


In [42]:
player_perf_team_metrics.loc[0, 'PTS_PM']

0.4890700259355317

In [56]:
player_test_stats[key_metrics_name].mean()

Minutes_deviation    1.756480
PTS_PM_deviation     1.114988
AST_PM_deviation     1.069899
TOV_PM_deviation     0.946386
TRB_PM_deviation     1.060501
STL_PM_deviation     0.859366
dtype: float64

In [57]:
import plotly.graph_objects as go

categories = key_metrics
key_metrics_name = [x+'_deviation' for x in key_metrics]

fig = go.Figure()

#add top 3 players
for idx, row in player_test_stats.head(3).iterrows():
    fig.add_trace(go.Scatterpolar(
          r=row[key_metrics_name].values,
          theta=categories,
          fill='toself',
          name=row['Player']
    ))
    
# add average of everyone else    
fig.add_trace(go.Scatterpolar(
      r=player_test_stats[key_metrics_name].mean().values,
      theta=categories,
      fill='toself',
      name='Average Player'
))

fig.update_layout(
  polar=dict(
    radialaxis=dict(
      visible=True,
      range=[0, 7]
    )),
  showlegend=False
)

fig.show()

In [49]:
row[key_metrics_name].values

array([7.018037571815998, 4.412747022354652, 2.476134274445133,
       2.4585322420346998, 4.395571728569316, 0.12422955810287985],
      dtype=object)