In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import datetime as dt

import sklearn
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score

from scipy.cluster.hierarchy import linkage
from scipy.cluster.hierarchy import dendrogram
from scipy.cluster.hierarchy import cut_tree

In [6]:
data = pd.read_csv('Cricket.csv', encoding="latin-1")

In [7]:
data

Unnamed: 0,Player,Span,Mat,Inns,NO,Runs,HS,Ave,BF,SR,100,50,0
0,SR Tendulkar (INDIA),1989-2012,463,452,41,18426,200*,44.83,21367,86.23,49,96,20
1,KC Sangakkara (Asia/ICC/SL),2000-2015,404,380,41,14234,169,41.98,18048,78.86,25,93,15
2,RT Ponting (AUS/ICC),1995-2012,375,365,39,13704,164,42.03,17046,80.39,30,82,20
3,ST Jayasuriya (Asia/SL),1989-2011,445,433,18,13430,189,32.36,14725,91.20,28,68,34
4,DPMD Jayawardene (Asia/SL),1998-2015,448,418,39,12650,144,33.37,16020,78.96,19,77,28
...,...,...,...,...,...,...,...,...,...,...,...,...,...
74,CG Greenidge (WI),1975-1991,128,127,13,5134,133*,45.03,7908,64.92,11,31,3
75,Misbah-ul-Haq (PAK),2002-2015,162,149,31,5122,96*,43.40,6945,73.75,0,42,6
76,PD Collingwood (ENG),2001-2011,197,181,37,5092,120*,35.36,6614,76.98,5,26,7
77,A Symonds (AUS),1998-2009,198,161,33,5088,156,39.75,5504,92.44,6,30,15


In [12]:
df_cluster = data[['Player', 'SR', 'Ave']].copy()
df_cluster.head()

Unnamed: 0,Player,SR,Ave
0,SR Tendulkar (INDIA),86.23,44.83
1,KC Sangakkara (Asia/ICC/SL),78.86,41.98
2,RT Ponting (AUS/ICC),80.39,42.03
3,ST Jayasuriya (Asia/SL),91.2,32.36
4,DPMD Jayawardene (Asia/SL),78.96,33.37


In [13]:
scaler = StandardScaler()
df_cluster[['SR', 'Ave']] = scaler.fit_transform(df_cluster[['SR', 'Ave']])

In [14]:
# k-means with some arbitrary k
kmeans = KMeans(n_clusters=4, max_iter=50, random_state=100)
kmeans.fit(df_cluster[['SR', 'Ave']])

KMeans(max_iter=50, n_clusters=4, random_state=100)

In [15]:
df_cluster['Cluster'] = kmeans.labels_

In [20]:
df_cluster['Cluster'].unique()

array([3, 1, 0, 2], dtype=int32)

In [23]:
df_cluster.loc[df_cluster['Player'].str.contains('Guptill')]

Unnamed: 0,Player,SR,Ave,Cluster
63,MJ Guptill (NZ),0.855246,0.812157,3


In [21]:
df_cluster.loc[df_cluster['Cluster']==0]

Unnamed: 0,Player,SR,Ave,Cluster
3,ST Jayasuriya (Asia/SL),1.207091,-1.047909,0
10,TM Dilshan (SL),0.703152,0.126959,0
12,AC Gilchrist (AUS/ICC),1.789106,-0.447724,0
17,CH Gayle (ICC/WI),0.589588,-0.202889,0
20,Yuvraj Singh (Asia/INDIA),0.849162,-0.335508,0
24,V Sehwag (Asia/ICC/INDIA),2.538425,-0.590544,0
27,Shahid Afridi (Asia/ICC/PAK),3.823117,-2.542423,0
50,BB McCullum (NZ),1.73131,-1.379457,0
57,EJG Morgan (ENG/IRE),0.945489,0.035146,0
59,SR Watson (AUS),1.13003,0.34289,0


In [18]:
df_cluster.loc[df_cluster['Cluster']==1]

Unnamed: 0,Player,SR,Ave,Cluster
1,KC Sangakkara (Asia/ICC/SL),-0.044139,0.587725,1
2,RT Ponting (AUS/ICC),0.110997,0.596226,1
5,Inzamam-ul-Haq (Asia/PAK),-0.512591,0.169465,1
6,JH Kallis (Afr/ICC/SA),-0.649476,0.992383,1
7,SC Ganguly (Asia/INDIA),-0.567345,0.424501,1
8,R Dravid (Asia/ICC/INDIA),-0.81678,0.108256,1
9,BC Lara (ICC/WI),0.021768,0.332688,1
11,Mohammad Yousuf (Asia/PAK),-0.42539,0.541818,1
18,Saeed Anwar (PAK),0.139388,0.116757,1
19,S Chanderpaul (WI),-0.867478,0.523115,1


In [19]:
df_cluster.loc[df_cluster['Cluster']==2]

Unnamed: 0,Player,SR,Ave,Cluster
4,DPMD Jayawardene (Asia/SL),-0.034,-0.876185,2
14,M Azharuddin (INDIA),-0.534898,-0.272599,2
16,PA de Silva (SL),0.18603,-0.616048,2
22,MS Atapattu (SL),-1.173695,-0.162083,2
26,HH Gibbs (SA),0.402004,-0.406918,2
28,SP Fleming (ICC/NZ),-0.791431,-1.041108,2
30,SR Waugh (AUS),-0.343259,-0.956096,2
31,A Ranatunga (SL),-0.14148,-0.456225,2
33,Younis Khan (PAK),-0.406124,-1.238337,2
35,Saleem Malik (PAK),-0.292561,-0.959497,2


In [17]:
df_cluster.loc[df_cluster['Cluster']==3]

Unnamed: 0,Player,SR,Ave,Cluster
0,SR Tendulkar (INDIA),0.703152,1.072294,3
13,MS Dhoni (Asia/INDIA),0.952587,2.175752,3
15,AB de Villiers (Afr/SA),2.124728,2.554906,3
25,V Kohli (INDIA),1.185798,2.621216,3
34,HM Amla (SA),0.989089,1.993826,3
38,MG Bevan (AUS),-0.520702,2.560007,3
42,IVA Richards (WI),1.105695,1.441247,3
63,MJ Guptill (NZ),0.855246,0.812157,3
64,MEK Hussey (AUS),0.79745,1.636775,3
