In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.manifold import TSNE
from sklearn.cluster import AffinityPropagation
from sklearn.metrics import silhouette_score
from sklearn.decomposition import PCA

In [2]:
# Constants - The master stat (feature) list, and the corresponding player data.
stats_list = ['MP_x','FG','FGA','2P','2PA','3P','3PA','FT','FTA','ORB', 'DRB', 'TRB', 'AST', 'STL','BLK','TOV','PF','PTS','FG%','2P%','3P%', 'eFG%', 'FT%', 'TS%_x', 'PER', '3PAr','FTr', 'ORB%', 'DRB%', 'TRB%', 'AST%', 'STL%', 'BLK%', 'TOV%', 'USG%','ORtg', 'DRtg', 'OWS', 'DWS', 'WS', 'WS/48', 'OBPM', 'DBPM', 'BPM','VORP', 'dist', 'spd', 'tchs', 'pass', 'sast', 'ftast', 'dfgm','dfga']
df = pd.read_csv('../data/final_stats.csv')
df.set_index('player_year',inplace=True)
df.fillna(value=0,inplace=True)

In [3]:
feature_space = ['MP_x','tchs','2P','3P','3PA','FTr','TS%_x','ORB','TRB','AST%','STL%','BLK%','3PAr','DWS','DBPM','dist','dfga']

In [6]:
# Optimizing Affinity Propagation
X = df[feature_space]

damping_list = np.linspace(0.5,0.98,20)
preference_list = np.arange(-300,300,25)
comp_list = [2,3,4,5]
max_score = [0,0,0,0]

for d in damping_list:
    print 'damping = {}'.format(d)
    for p in preference_list:
        for c in comp_list:
            pca = PCA(n_components=c,whiten=True,svd_solver='full')
            reduced = pca.fit_transform(X)
            
            aff = AffinityPropagation(damping=d,preference=p)
            aff.fit(reduced)
            try:
                score = silhouette_score(reduced,aff.labels_)
            except:
                continue
            n_clusters = len(np.unique(aff.labels_))
            if n_clusters in range(5,30):
                if score > max_score[0]:
                    max_score = [score,'damping:{}'.format(d),'preference:{}'.format(p),'{} pca components'.format(c)]
                    print 'new max score: {}'.format(max_score) + '{} clusters'.format(n_clusters)

damping = 0.5
new max score: [0.1746145220534368, 'damping:0.5', 'preference:-300', '5 pca components']5 clusters
new max score: [0.24662592585852194, 'damping:0.5', 'preference:-275', '4 pca components']5 clusters
new max score: [0.25518685450737078, 'damping:0.5', 'preference:-250', '4 pca components']5 clusters
new max score: [0.32241839889461948, 'damping:0.5', 'preference:-200', '3 pca components']5 clusters
damping = 0.525263157895
damping = 0.550526315789
new max score: [0.35023044489348742, 'damping:0.550526315789', 'preference:-25', '2 pca components']11 clusters
damping = 0.575789473684
damping = 0.601052631579
damping = 0.626315789474
new max score: [0.3809279943434567, 'damping:0.626315789474', 'preference:-100', '2 pca components']5 clusters
damping = 0.651578947368
damping = 0.676842105263
damping = 0.702105263158
damping = 0.727368421053
damping = 0.752631578947
damping = 0.777894736842
damping = 0.803157894737
new max score: [0.39355534892826055, 'damping:0.803157894737

In [7]:
# Optimizing Affinity Propagation, round 2
X = df[feature_space]

damping_list = np.linspace(0.5,0.6,20)
preference_list = np.arange(-300,300,25)
comp_list = [2,3,4,5]

for d in damping_list:
    max_score = [0,0,0,0] # reset high score with each damping value this time
    print 'damping = {}'.format(d)
    for p in preference_list:
        for c in comp_list:
            pca = PCA(n_components=c,whiten=True,svd_solver='full')
            reduced = pca.fit_transform(X)
            
            aff = AffinityPropagation(damping=d,preference=p)
            aff.fit(reduced)
            try:
                score = silhouette_score(reduced,aff.labels_)
            except:
                continue
            n_clusters = len(np.unique(aff.labels_))
            if n_clusters in range(5,30):
                if score > max_score[0]:
                    max_score = [score,'damping:{}'.format(d),'preference:{}'.format(p),'{} pca components'.format(c)]
                    print 'new max score: {}'.format(max_score) + '{} clusters'.format(n_clusters)

damping = 0.5
new max score: [0.1746145220534368, 'damping:0.5', 'preference:-300', '5 pca components']5 clusters
new max score: [0.24662592585852194, 'damping:0.5', 'preference:-275', '4 pca components']5 clusters
new max score: [0.25518685450737078, 'damping:0.5', 'preference:-250', '4 pca components']5 clusters
new max score: [0.32241839889461948, 'damping:0.5', 'preference:-200', '3 pca components']5 clusters
damping = 0.505263157895
new max score: [0.17993088517307038, 'damping:0.505263157895', 'preference:-300', '5 pca components']5 clusters
new max score: [0.23423736652086205, 'damping:0.505263157895', 'preference:-275', '4 pca components']5 clusters
new max score: [0.23969614680902226, 'damping:0.505263157895', 'preference:-250', '4 pca components']5 clusters
new max score: [0.32652811036707857, 'damping:0.505263157895', 'preference:-200', '3 pca components']5 clusters
damping = 0.510526315789
new max score: [0.19439026338429122, 'damping:0.510526315789', 'preference:-300', '5 

new max score: [0.23166454607357356, 'damping:0.584210526316', 'preference:-300', '4 pca components']5 clusters
new max score: [0.24438429846294332, 'damping:0.584210526316', 'preference:-275', '4 pca components']5 clusters
new max score: [0.27608640126654965, 'damping:0.584210526316', 'preference:-225', '3 pca components']5 clusters
new max score: [0.27618731216109266, 'damping:0.584210526316', 'preference:-200', '3 pca components']5 clusters
new max score: [0.31308721036831921, 'damping:0.584210526316', 'preference:-150', '3 pca components']5 clusters
new max score: [0.31668159882376218, 'damping:0.584210526316', 'preference:-125', '3 pca components']6 clusters
new max score: [0.34460594186009891, 'damping:0.584210526316', 'preference:-100', '2 pca components']6 clusters
damping = 0.589473684211
new max score: [0.18563286065616461, 'damping:0.589473684211', 'preference:-300', '5 pca components']5 clusters
new max score: [0.25051043907127429, 'damping:0.589473684211', 'preference:-275

In [197]:
X = df[feature_space]

In [198]:
pca = PCA(n_components=2,whiten=True,svd_solver='full')
reduced = pca.fit_transform(X)

aff = AffinityPropagation(damping=0.68,preference=-1500)
aff.fit(X)

AffinityPropagation(affinity='euclidean', convergence_iter=15, copy=True,
          damping=0.68, max_iter=200, preference=-1500, verbose=False)

In [199]:
silhouette_score(X,aff.labels_)

0.19431199682417732

In [200]:
X['label'] = aff.labels_

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':


In [201]:
X.loc['Nikola Jokic/2016-17']
X['label'].value_counts()

5     81
14    73
7     73
9     70
0     63
8     61
3     53
10    52
4     47
16    43
12    42
17    42
13    40
6     35
15    34
1     33
11    32
2     31
18     6
Name: label, dtype: int64

In [204]:
X[X['label'] == 11]

Unnamed: 0_level_0,MP_x,tchs,2P,3P,3PA,FTr,TS%_x,ORB,TRB,AST%,STL%,BLK%,3PAr,DWS,DBPM,dist,dfga,label
player_year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
Jeff Teague/2015-16,28.5,58.495327,5.2,1.8,4.4,0.316,0.551,0.5,3.4,34.4,2.1,0.8,0.278,3.0,-0.9,1.559346,0.82243,11
Rajon Rondo/2015-16,35.2,61.964286,4.2,0.9,2.4,0.192,0.506,1.1,6.2,48.0,2.7,0.3,0.217,2.3,0.2,1.567857,0.9375,11
Mo Williams/2014-15,29.1,60.833333,4.0,2.2,6.3,0.217,0.512,0.4,3.2,37.4,1.2,0.6,0.405,0.6,-3.5,1.593214,0.964286,11
Rajon Rondo/2016-17,26.7,64.628205,3.5,1.0,2.6,0.089,0.461,1.4,6.9,37.0,2.7,0.5,0.237,2.7,1.6,1.639103,1.025641,11
Chris Paul/2015-16,32.7,67.470588,5.8,1.8,4.9,0.294,0.575,0.6,4.6,52.7,3.1,0.4,0.295,3.5,0.5,1.649706,1.245098,11
Russell Westbrook/2015-16,34.4,63.610294,7.3,1.3,4.5,0.397,0.554,1.9,8.2,49.6,2.9,0.6,0.236,4.0,2.4,1.700074,1.455882,11
James Harden/2015-16,38.1,59.968,5.5,2.7,7.6,0.518,0.598,0.7,5.8,35.4,2.2,1.4,0.406,2.6,-0.4,1.7004,2.32,11
Isaiah Thomas/2014-15,25.8,61.028169,4.3,2.7,7.2,0.437,0.579,0.7,3.2,27.2,1.6,0.2,0.434,1.1,-2.8,1.730423,0.802817,11
Ish Smith/2015-16,29.1,65.979381,5.5,0.9,2.7,0.182,0.465,0.7,4.9,38.3,1.9,0.8,0.176,1.3,-1.1,1.760103,1.938144,11
Chris Paul/2016-17,31.5,74.358974,4.7,2.3,5.7,0.331,0.614,0.8,5.7,46.8,3.1,0.3,0.385,2.6,1.0,1.784872,1.5,11
