In [1]:
import pandas as pd
import numpy as np
from statsmodels.stats.outliers_influence import variance_inflation_factor
from sklearn import preprocessing
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
import seaborn as sns 

In [3]:
Asbury_game = pd.read_csv("Game 10 Ashbury.xlsx - Asbury.csv")
Asbury_game = Asbury_game.loc[:, Asbury_game.columns != 'Player Name']

In [4]:
Asbury_full = Asbury_game[Asbury_game['Split Name']== 'game']
Asbury_full

Unnamed: 0,Session Title,Player Code,Minutes Played,Split Name,Distance (miles),Sprint Distance (yards),Power Plays,Energy (kcal),Impacts,Hr Load,...,Accelerations Zone Count: 1 - 2 m/s/s,Accelerations Zone Count: 2 - 3 m/s/s,Accelerations Zone Count: 3 - 4 m/s/s,Accelerations Zone Count: > 4 m/s/s,Deceleration Zone Count: 0 - 1 m/s/s,Deceleration Zone Count: 1 - 2 m/s/s,Deceleration Zone Count: 2 - 3 m/s/s,Deceleration Zone Count: 3 - 4 m/s/s,Deceleration Zone Count: > 4 m/s/s,Top Decelerations
0,Asbury,M,90.0,game,4.762,282,30,732.7249,0,0,...,173,102,27,4,0,190,87,17,13,30
1,Asbury,V,53.0,game,4.3442,50,12,777.6657,0,0,...,261,127,24,11,0,260,100,37,10,47
2,Asbury,E,63.0,game,2.0922,34,7,414.4585,0,0,...,136,63,7,4,0,117,59,17,5,22
3,Asbury,C,40.0,game,3.147,91,19,566.0646,2,0,...,185,86,30,7,0,173,82,28,10,38
4,Asbury,N,13.0,game,0.9707,49,6,199.8937,1,0,...,46,31,0,2,0,49,24,7,2,9
5,Asbury,K,90.0,game,5.1294,152,28,920.7575,1,0,...,230,129,30,7,0,221,107,28,13,41
6,Asbury,R,85.0,game,5.7178,293,34,962.9745,1,0,...,270,98,38,3,0,216,111,50,31,81
7,Asbury,H,41.0,game,3.2067,169,28,522.5151,2,0,...,138,84,17,5,0,125,55,31,20,51
8,Asbury,P,49.0,game,2.7868,182,23,508.0111,1,0,...,121,65,26,6,0,93,58,33,18,51
9,Asbury,G,0.0,game,0.1627,0,0,22.2751,0,0,...,0,0,0,0,0,0,0,0,0,0


In [None]:
Asbury_volume_intensity = Asbury_full[["Player Code", "Minutes Played", "Distance (miles)", "Sprint Distance (yards)", "Power Plays", "Energy (kcal)", "Impacts", "Top Speed (mph)","Distance Per Min (yd/min)", "Power Score (w/kg)", "Player Load", "Work Ratio"]]

In [None]:
Asbury_cluster = Asbury_volume_intensity.loc[:, Asbury_volume_intensity.columns != 'Player Code']

In [None]:
Asbury_distortions = []
K = range(1,10)
for k in K:
    kmeanModel = KMeans(n_clusters=k)
    kmeanModel.fit(Asbury_cluster)
    Asbury_distortions.append(kmeanModel.inertia_)


In [None]:
plt.figure(figsize=(16,8))
plt.plot(K, Asbury_distortions, 'bx-')
plt.xlabel('k')
plt.ylabel('Distortion')
plt.title('The Elbow Method shwoing the optimal k')
plt.show()

In [None]:
x_Asbury = Asbury_cluster.values # numpy array
scaler = preprocessing.MinMaxScaler()
x_scaled = scaler.fit_transform(x_Asbury)
X_norm = pd.DataFrame(x_scaled)


In [None]:
pca_Asbury = PCA(n_components = 2) # 2D PCA for the plot
reduced_Asbury= pd.DataFrame(pca_Asbury.fit_transform(X_norm))

In [None]:
kmeans = KMeans(n_clusters=3)
# fit the input data
kmeans = kmeans.fit(reduced_Asbury)
# get the cluster labels
labels = kmeans.predict(reduced_Asbury)
# centroid values
centroid = kmeans.cluster_centers_
# cluster values
clusters = kmeans.labels_.tolist()
# adding names
Asbury_names = Asbury_full['Player Code']

In [None]:
reduced_Asbury['cluster'] = clusters
reduced_Asbury['names'] = Asbury_names
reduced_Asbury.columns = ['x', 'y', 'cluster', 'names']
reduced_Asbury.head()

In [None]:
%matplotlib inline
sns.set(style="white")
ax = sns.lmplot(x="x", y="y", hue='cluster', data = reduced_Asbury, legend= True,
fit_reg=False, height = 15, scatter_kws={"s": 250})
texts = []
for x, y, s in zip(reduced_Asbury.x, reduced_Asbury.y, reduced_Asbury.names):
    texts.append(plt.text(x, y, s))
ax.set(ylim=(-2, 2))
plt.tick_params(labelsize=15)
plt.xlabel(" Asbury PC 1", fontsize = 20)
plt.ylabel(" Asbury PC 2", fontsize = 20)
plt.show()

In [None]:
Asbury_group_0 = reduced_Asbury[(reduced_Asbury['cluster'] == 0)]
Asbury_group_0

In [None]:
Asbury_group_1 = reduced_Asbury[(reduced_Asbury['cluster'] == 1)]
Asbury_group_1

In [None]:
Asbury_group_2 = reduced_Asbury[(reduced_Asbury['cluster'] == 2)]
Asbury_group_2