### PCA

This jupyter notebook will:
- using PCA to further reduce the dimenionality
- investigating components and finding insights for clustering


In [1]:
# importing libraries and modules needed for PCA
import pandas as pd
import numpy as np
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
# loading the store pickle file and transform it to dataframe
players_dashboard = pd.read_pickle('players_dashboard.pkl')
players_dashboard.reset_index(inplace=True)
players_dashboard.drop(['index'], axis=1, inplace=True)
players_dashboard


Unnamed: 0,PLAYER_NAME,TEAM,FG%,FG3%,OREB,DREB,AST,STL,BLK,PF,...,OFF_SCREEN_PPP,OFF_SCREEN_FG%,OFF_SCREEN_POSS,PUTBACK_PPP,PUTBACK_FG%,PUTBACK_POSS,PASSES_MADE,SECONDARY_AST,POTENTIAL_AST,AST_TO_PASS%
0,Aaron Gordon,DEN,0.463,0.335,1.5,4.1,3.2,0.7,0.7,1.8,...,0.000,0.000,0.0,1.182,0.647,0.9,32.9,0.6,5.7,0.098
1,Aaron Holiday,IND,0.390,0.368,0.2,1.1,1.9,0.7,0.2,1.4,...,0.905,0.412,0.3,0.000,0.000,0.0,15.5,0.3,3.3,0.121
2,Aaron Nesmith,BOS,0.438,0.370,0.6,2.2,0.5,0.3,0.2,1.9,...,1.471,0.563,0.4,1.200,0.818,0.3,11.7,0.1,1.0,0.043
3,Abdel Nader,PHX,0.491,0.419,0.3,2.3,0.8,0.4,0.4,1.4,...,0.000,0.000,0.0,0.000,0.000,0.0,9.4,0.0,1.1,0.084
4,Al Horford,BOS,0.450,0.368,1.0,5.7,3.4,0.9,0.9,1.7,...,0.000,0.000,0.0,0.000,0.000,0.0,38.5,0.1,5.2,0.087
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
391,Xavier Tillman,MEM,0.559,0.338,1.3,3.1,1.3,0.7,0.6,2.0,...,0.000,0.000,0.0,1.000,0.640,0.6,23.8,0.1,2.3,0.053
392,Yogi Ferrell,LAC,0.351,0.321,0.5,1.4,2.2,0.7,0.3,1.1,...,0.000,0.000,0.0,0.000,0.000,0.0,17.6,0.4,3.5,0.125
393,Yuta Watanabe,TOR,0.439,0.400,0.7,2.5,0.8,0.5,0.4,1.1,...,0.000,0.000,0.0,0.692,0.300,0.3,14.4,0.2,1.6,0.056
394,Zach LaVine,CHI,0.507,0.419,0.6,4.4,4.9,0.8,0.5,2.4,...,1.077,0.449,1.3,1.130,0.556,0.4,40.1,0.9,9.8,0.121


In [3]:
# standardizing data
players_stats_std = StandardScaler().fit_transform(players_dashboard.drop(['PLAYER_NAME', 'TEAM', 'PLAYER_POSITION'], axis=1))
players_stats_std = pd.DataFrame(players_stats_std, columns=players_dashboard.drop(['PLAYER_NAME', 'TEAM', 'PLAYER_POSITION'], axis=1).columns.tolist())
players_stats_std['PLAYER'] = players_dashboard['PLAYER_NAME']
players_stats_std['TEAM'] = players_dashboard['TEAM']
players_stats_std['POSITION'] = players_dashboard['PLAYER_POSITION']

players_stats_std

Unnamed: 0,FG%,FG3%,OREB,DREB,AST,STL,BLK,PF,PTS,PLUS_MINUS,...,PUTBACK_PPP,PUTBACK_FG%,PUTBACK_POSS,PASSES_MADE,SECONDARY_AST,POTENTIAL_AST,AST_TO_PASS%,PLAYER,TEAM,POSITION
0,-0.040703,0.077299,0.694923,0.403816,0.387215,-0.137444,0.487523,-0.174074,0.217126,0.443619,...,0.865386,1.026006,0.676724,0.359562,1.151339,0.328454,0.612185,Aaron Gordon,DEN,F
1,-0.995761,0.375892,-1.002166,-1.351519,-0.294749,-0.137444,-0.689666,-0.851550,-0.631433,0.066030,...,-1.235018,-1.226778,-0.802935,-0.981525,-0.060167,-0.347955,1.389080,Aaron Holiday,IND,G
2,-0.367777,0.393988,-0.479985,-0.707897,-1.029172,-1.307937,-0.689666,-0.004705,-1.039394,0.003099,...,0.897372,1.621410,-0.309715,-1.274406,-0.867838,-0.996181,-1.245609,Aaron Nesmith,BOS,G-F
3,0.325621,0.837354,-0.871620,-0.649385,-0.871796,-1.015314,-0.218791,-0.851550,-0.713025,0.443619,...,-1.235018,-1.226778,-0.802935,-1.451676,-1.271674,-0.967998,0.139292,Abdel Nader,PHX,F
4,-0.210781,0.375892,0.042196,1.339995,0.492132,0.447802,0.958398,-0.343443,0.510858,-1.066735,...,-1.235018,-1.226778,-0.802935,0.791176,-0.867838,0.187536,0.240626,Al Horford,BOS,C-F
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
391,1.215264,0.104444,0.433832,-0.181296,-0.609502,-0.137444,0.252085,0.164664,-0.729344,0.254825,...,0.541973,1.001633,0.183504,-0.341811,-0.867838,-0.629793,-0.907828,Xavier Tillman,MEM,F
392,-1.505997,-0.049377,-0.610530,-1.175986,-0.137373,-0.137444,-0.454229,-1.359658,-0.892528,0.034565,...,-1.235018,-1.226778,-0.802935,-0.819670,0.343668,-0.291588,1.524192,Yogi Ferrell,LAC,G
393,-0.354694,0.665437,-0.349439,-0.532363,-0.871796,-0.722691,-0.218791,-1.359658,-1.088349,-0.185695,...,-0.005340,-0.182210,-0.309715,-1.066306,-0.464003,-0.827079,-0.806494,Yuta Watanabe,TOR,G-F
394,0.534949,0.837354,-0.479985,0.579349,1.279014,0.155179,0.016647,0.842141,2.664892,0.003099,...,0.772982,0.709154,-0.145309,0.914494,2.362846,1.483988,1.389080,Zach LaVine,CHI,G-F


In [4]:
# fitting the standardizing data into PCA
pca = PCA(random_state=0).fit(players_stats_std.drop(['PLAYER', 'TEAM', 'POSITION'], axis=1))
pca_list = [ 'PC' + str(i) for i in range(1, pca.n_components_ + 1)]
pca_variance = pd.DataFrame({
    'Variance explained': pca.explained_variance_,
    'Percentage of variance explained': pca.explained_variance_ratio_
}, index=pca_list)
pca_variance['Cumulative Percentage of variance explained'] = pca_variance['Percentage of variance explained'].cumsum()
pca_variance.head(10)


Unnamed: 0,Variance explained,Percentage of variance explained,Cumulative Percentage of variance explained
PC1,19.926538,0.231119,0.231119
PC2,17.214583,0.199664,0.430783
PC3,5.223597,0.060586,0.491369
PC4,3.293922,0.038205,0.529574
PC5,3.045287,0.035321,0.564895
PC6,2.494112,0.028928,0.593823
PC7,2.182078,0.025309,0.619132
PC8,1.83329,0.021263,0.640395
PC9,1.642621,0.019052,0.659447
PC10,1.590081,0.018443,0.67789


In [5]:
pca_data = PCA(random_state=0).fit_transform(players_stats_std.drop(['PLAYER', 'TEAM', 'POSITION'], axis=1))
pca_data = pd.DataFrame(pca_data, columns=pca_list)
pca_data['PLAYER'] = players_stats_std['PLAYER']
pca_data['TEAM'] = players_stats_std['TEAM']
pca_data['POSITION'] = players_stats_std['POSITION']
pca_data.to_pickle('pca_data.pkl')
pca_data


Unnamed: 0,PC1,PC2,PC3,PC4,PC5,PC6,PC7,PC8,PC9,PC10,...,PC80,PC81,PC82,PC83,PC84,PC85,PC86,PLAYER,TEAM,POSITION
0,0.882081,1.494421,-0.103091,-0.276908,-2.034680,0.646940,-0.847650,0.499417,-0.639191,0.257360,...,-0.104025,-0.117018,-0.076665,-0.062760,0.041005,0.030177,-0.007113,Aaron Gordon,DEN,F
1,-3.105932,-3.972688,-0.099769,-0.275749,0.593826,-0.210098,-0.191335,-2.033733,0.496657,-0.443443,...,0.044854,-0.188527,0.070956,0.058327,-0.055097,-0.006387,-0.006745,Aaron Holiday,IND,G
2,0.288487,-4.568325,-1.555126,-1.268215,0.319178,0.871216,-0.080447,0.539424,0.887861,0.730629,...,-0.041681,0.133071,0.012921,0.084546,0.117756,0.003542,-0.005543,Aaron Nesmith,BOS,G-F
3,0.197796,-4.259287,-1.253244,-1.309263,-0.583111,-0.886630,0.408395,-0.683863,1.940139,0.725241,...,-0.210655,-0.195101,0.016869,0.272671,0.090357,-0.010020,0.004024,Abdel Nader,PHX,F
4,3.188852,0.733974,5.771234,-3.028903,7.864954,0.795206,2.981832,-0.731312,-0.325504,0.210913,...,0.047841,0.085120,0.118678,-0.125720,0.062269,0.002923,-0.010707,Al Horford,BOS,C-F
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
391,5.263395,0.616630,-0.827514,-1.665981,-0.742167,-0.320279,-1.455114,0.976687,1.104713,-0.206271,...,-0.173927,-0.057355,0.083619,0.007112,0.005803,0.040638,0.005924,Xavier Tillman,MEM,F
392,-0.849503,-6.196595,6.894639,-2.518939,0.948903,-1.509507,2.918007,5.402452,1.097583,6.317787,...,-0.065467,0.091706,0.097708,0.093402,-0.051510,-0.122489,0.000292,Yogi Ferrell,LAC,G
393,0.115215,-4.537190,-1.805106,0.628896,-1.676585,0.339316,-0.824896,0.940127,2.916521,0.732374,...,0.007056,0.012365,-0.137255,-0.118495,-0.001385,-0.030773,-0.008395,Yuta Watanabe,TOR,G-F
394,-6.056749,5.995335,-1.423611,0.042179,-1.203111,0.666256,2.164512,-0.312392,-0.063088,-0.696407,...,0.007658,-0.010762,0.080301,-0.018896,-0.021949,-0.047654,0.006219,Zach LaVine,CHI,G-F


In [6]:
# Compositions of components
pca_eigen = pd.DataFrame(pca.components_, columns=players_stats_std.drop(['PLAYER', 'TEAM', 'POSITION'], axis=1).columns.tolist())
pca_eigen


Unnamed: 0,FG%,FG3%,OREB,DREB,AST,STL,BLK,PF,PTS,PLUS_MINUS,...,OFF_SCREEN_PPP,OFF_SCREEN_FG%,OFF_SCREEN_POSS,PUTBACK_PPP,PUTBACK_FG%,PUTBACK_POSS,PASSES_MADE,SECONDARY_AST,POTENTIAL_AST,AST_TO_PASS%
0,0.132868,-0.126104,0.158507,0.054166,-0.132308,-0.092250,0.122940,0.039639,-0.098895,-0.022617,...,-0.116113,-0.114406,-0.107151,0.075937,0.082300,0.147825,-0.092022,-0.142942,-0.136964,-0.161203
1,0.109902,-0.003523,0.129870,0.195849,0.136542,0.123497,0.115311,0.145285,0.199246,0.087360,...,0.057766,0.063996,0.065506,0.136507,0.134848,0.137485,0.168043,0.124832,0.132328,0.060989
2,-0.034241,-0.159098,0.050776,0.021449,0.206002,0.100197,-0.008224,-0.011967,-0.018619,-0.064557,...,-0.208578,-0.195065,-0.173029,-0.123116,-0.112279,-0.020234,0.164734,0.122015,0.203186,0.175983
3,-0.109978,-0.156954,0.072401,0.046838,-0.019250,0.203138,0.109284,0.191785,-0.032748,-0.168380,...,-0.012800,-0.011083,-0.028064,0.082703,0.084518,0.047133,0.025226,-0.009401,-0.016738,-0.051085
4,-0.190210,0.140249,-0.002219,0.085107,-0.028139,0.009997,0.067937,0.112995,0.010545,-0.014542,...,0.063006,0.041833,0.037953,-0.130239,-0.142976,-0.047466,0.070089,-0.022909,-0.041235,-0.088921
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
81,-0.029857,0.005733,-0.199114,0.199486,0.009342,0.026698,0.003197,-0.032807,-0.094315,-0.009066,...,0.069174,-0.045083,-0.018063,0.059114,-0.054074,0.001884,-0.004800,0.012608,-0.167623,-0.028854
82,0.126968,0.005017,0.377099,-0.207857,-0.163478,0.011492,-0.014776,0.006319,-0.112898,0.011653,...,-0.055152,0.057378,0.015248,-0.196642,0.203992,-0.003707,-0.012033,0.011048,0.228620,0.008574
83,0.145282,-0.002637,0.128763,-0.096161,0.692418,-0.018873,-0.003542,0.006105,0.186172,-0.012913,...,0.076177,-0.064362,-0.009636,-0.051115,0.053120,-0.003003,-0.125596,-0.002377,-0.527357,-0.125999
84,0.209222,0.007967,-0.015400,-0.007162,-0.235629,0.027652,0.000907,-0.015654,0.765096,0.007925,...,0.020821,-0.007789,-0.024199,0.007366,-0.021080,0.025564,0.058707,-0.004714,0.075681,0.032867


In [7]:
# PC1
print(pca_eigen.iloc[0][pca_eigen.iloc[0].abs().sort_values(ascending=False).index.tolist()].head(20))
print('\n')
print(pca_data.nlargest(10,'PC1')[['PC1', 'PLAYER']])

OREB%                  0.190918
PNR_HANDLER_PPP       -0.172078
SCREEN_ASSISTS         0.170267
PNR_HANDLER_FG%       -0.167128
3PT_FREQ              -0.166189
PNR_HANDLER_POSS      -0.162927
AST_TO_PASS%          -0.161203
OREB                   0.158507
HANDOFF_FG%           -0.154675
HANDOFF_PPP           -0.153156
HANDOFF_POSS          -0.152183
CONTESTED_SHOTS_2PT    0.151777
BT25_29FT_FGM         -0.149828
PUTBACK_POSS           0.147825
PNR_ROLLMAN_POSS       0.147169
CUT_POSS               0.145109
BT20_24FT_FGM         -0.144609
DREB%                  0.143462
PNR_ROLLMAN_FG%        0.143120
SECONDARY_AST         -0.142942
Name: 0, dtype: float64


           PC1             PLAYER
335  12.873855        Rudy Gobert
54   11.996865       Clint Capela
121  11.349243       Enes Freedom
281  10.692030  Mitchell Robinson
82   10.270398     DeAndre Jordan
159  10.267981        Ivica Zubac
69    9.728899     Daniel Gafford
87    9.727703      Deandre Ayton
106   9.611996         Donta

In [8]:
# PC2
print(pca_eigen.iloc[1][pca_eigen.iloc[1].abs().sort_values(ascending=False).index.tolist()].head(20))
print('\n')
print(pca_data.nlargest(10,'PC2')[['PC2', 'PLAYER']])

PTS                          0.199246
PIE                          0.197299
DREB                         0.195849
LT_5FT_FGM                   0.194865
LT6FT_DFGM                   0.176635
OFF_LOOSE_BALLS_RECOVERED    0.172353
BT5_9FT_FGM                  0.169840
PASSES_MADE                  0.168043
USG%                         0.154592
POST_UP_POSS                 0.152815
BT16FT_3PT_DFGM              0.147127
PF                           0.145285
ISO_POSS                     0.144150
POST_UP_FG%                  0.143802
POST_UP_PPP                  0.143711
BT6_9FT_DFGM                 0.142565
BT10_15FT_DFGM               0.141681
3PT_DFGM                     0.138752
BT10_14FT_FGM                0.137622
PUTBACK_POSS                 0.137485
Name: 1, dtype: float64


           PC2                 PLAYER
299  14.356474           Nikola Jokic
196  12.008666            Joel Embiid
141  11.304216  Giannis Antetokounmpo
104  11.239786       Domantas Sabonis
220  10.655708     Karl-

In [9]:
# PC3
print(pca_eigen.iloc[2][pca_eigen.iloc[2].abs().sort_values(ascending=False).index.tolist()].head(20))
print('\n')
print(pca_data.nlargest(10,'PC3')[['PC3', 'PLAYER']])

AST%               0.262885
TRANSITION_PPP    -0.224155
SPOT_UP_POSS      -0.220048
OFF_SCREEN_PPP    -0.208578
AST                0.206002
POTENTIAL_AST      0.203186
OFF_SCREEN_FG%    -0.195065
SPOT_UP_PPP       -0.187985
BT20_24FT_FGM     -0.183993
CUT_PPP           -0.180213
CUT_FG%           -0.177778
AST_TO_PASS%       0.175983
OFF_SCREEN_POSS   -0.173029
SPOT_UP_FG%       -0.165801
PASSES_MADE        0.164734
AST_TO             0.164278
TS%               -0.160068
FG3%              -0.159098
TM_TOV%            0.154398
TRANSITION_FG%    -0.143226
Name: 2, dtype: float64


          PC3               PLAYER
269  7.781874  Matthew Dellavedova
360  7.173065       Thaddeus Young
337  6.904351    Russell Westbrook
392  6.894639         Yogi Ferrell
225  5.912303         Kemba Walker
4    5.771234           Al Horford
51   5.663023           Chris Paul
176  5.585118         James Harden
238  5.558224        Killian Hayes
199  5.328880            John Wall


In [10]:
# PC4
print(pca_eigen.iloc[3][pca_eigen.iloc[3].abs().sort_values(ascending=False).index.tolist()].head(20))
print('\n')
print(pca_data.nlargest(10,'PC4')[['PC4', 'PLAYER']])

3PT_DFGM                     0.235342
CONTESTED_SHOTS_3PT          0.234037
DEFLECTIONS                  0.229566
BT5_9FT_FG%                 -0.226953
BT10_15FT_FREQ              -0.203619
STL                          0.203138
BT10_14FT_FG%               -0.198230
PF                           0.191785
DEF_LOOSE_BALLS_RECOVERED    0.191544
BT6_9FT_FREQ                -0.181773
TS%                         -0.179039
PIE                         -0.177798
PLUS_MINUS                  -0.168380
BT25_29FT_FG%               -0.158535
FG3%                        -0.156954
BT10_14FT_FGM               -0.156444
BT20_24FT_FG%               -0.152994
LT6FT_DFGM                   0.143435
BT15_19FT_FGM               -0.139305
SPOT_UP_POSS                 0.130932
Name: 3, dtype: float64


          PC4             PLAYER
41   5.850233        Cam Reddish
130  4.910813  Freddie Gillespie
327  4.792714   Robert Covington
208  4.156879        Josh Okogie
154  4.028645        Isaac Okoro
255  4.018052   

In [11]:
# PC5
print(pca_eigen.iloc[4][pca_eigen.iloc[4].abs().sort_values(ascending=False).index.tolist()].head(20))
print('\n')
print(pca_data.nlargest(10,'PC5')[['PC5', 'PLAYER']])

BT10_15FT_DFGM         0.266591
BT16FT_3PT_DFGM        0.244504
TRANSITION_FG%        -0.238977
BT6_9FT_DFGM           0.237226
BT10_15FT_FREQ         0.201572
FG%                   -0.190210
BT20_24FT_FGM          0.187854
LT6FT_DFGM             0.183436
CONTESTED_SHOTS_2PT    0.169635
TRANSITION_PPP        -0.167486
BT16FT_3PT_FREQ        0.166731
BT25_29FT_FG%          0.163317
BT25_29FT_FGM          0.161226
CUT_FG%               -0.159986
CUT_PPP               -0.151867
3PT_DFGM               0.149510
LT_5FT_FG%            -0.146282
CONTESTED_SHOTS_3PT    0.145390
TM_TOV%               -0.144197
PUTBACK_FG%           -0.142976
Name: 4, dtype: float64


          PC5             PLAYER
4    7.864954         Al Horford
245  6.943303  LaMarcus Aldridge
35   5.466913        Brook Lopez
225  5.056500       Kemba Walker
300  4.559126     Nikola Vucevic
272  4.446643        Maxi Kleber
216  4.410811      Justin Patton
307  4.251908        P.J. Tucker
72   3.471365        Danny Green
97  

In [12]:
# PC6
print(pca_eigen.iloc[5][pca_eigen.iloc[5].abs().sort_values(ascending=False).index.tolist()].head(20))
print('\n')
print(pca_data.nlargest(10,'PC6')[['PC6', 'PLAYER']])

USG%                         0.249094
TRANSITION_FG%              -0.239501
BT6_9FT_FREQ                -0.238163
PLUS_MINUS                  -0.228004
AST_TO                      -0.221013
TRANSITION_PPP              -0.217828
BT15_19FT_FGM                0.211465
DEFLECTIONS                 -0.190139
POST_UP_PPP                  0.185737
TS%                         -0.183391
POST_UP_POSS                 0.176228
LT6FT_FREQ                   0.171487
POST_UP_FG%                  0.171385
BT10_15FT_FREQ              -0.169348
STL                         -0.168888
BT6_9FT_DFGM                -0.161365
BT10_14FT_FGM                0.154152
SPOT_UP_FG%                 -0.151380
OFF_SCREEN_POSS              0.149595
DEF_LOOSE_BALLS_RECOVERED   -0.144072
Name: 5, dtype: float64


          PC6              PLAYER
179  4.811652   Jaren Jackson Jr.
205  4.729569           Josh Hall
305  3.808730     Otto Porter Jr.
196  3.787251         Joel Embiid
160  3.748483           JJ Redick
7    3.739

In [13]:
# PC7
print(pca_eigen.iloc[6][pca_eigen.iloc[6].abs().sort_values(ascending=False).index.tolist()].head(20))
print('\n')
print(pca_data.nlargest(10,'PC7')[['PC7', 'PLAYER']])

SPOT_UP_FG%       -0.292292
SPOT_UP_PPP       -0.257323
OFF_SCREEN_POSS    0.252616
HANDOFF_POSS       0.245817
OFF_SCREEN_PPP     0.216160
OFF_SCREEN_FG%     0.209664
POST_UP_POSS      -0.202481
POST_UP_FG%       -0.198843
POST_UP_PPP       -0.197817
TS%                0.191781
FG3%              -0.181129
FG%                0.173070
LT_5FT_FG%         0.166794
TRANSITION_PPP    -0.163536
TRANSITION_FG%    -0.161116
BT25_29FT_FG%     -0.151558
HANDOFF_PPP        0.130928
BT6_9FT_FREQ       0.129270
BT25_29FT_FGM      0.128454
SPOT_UP_POSS      -0.126851
Name: 6, dtype: float64


          PC7               PLAYER
106  6.069927           Donta Hall
82   4.341050       DeAndre Jordan
281  4.317686    Mitchell Robinson
348  4.286849        Stephen Curry
102  4.180922       Dewayne Dedmon
286  3.739035          Moses Brown
328  3.540419  Robert Williams III
69   3.387298       Daniel Gafford
30   3.155734         Bradley Beal
360  2.985152       Thaddeus Young
