In [None]:
import numpy as np
import pandas as pd
import random
import math
import sklearn.datasets as ds
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA

%matplotlib inline

In [None]:
nba = pd.read_csv("nba.csv")
nba.head(3)

In [None]:
numeric_cols = nba._get_numeric_data().dropna(axis=1)

In [None]:
Ks = range(1, 15)
# km = [KMeans(n_clusters=i, n_jobs=-1) for i in Ks]
score = [KMeans(n_clusters=i, n_jobs=-1).fit(numeric_cols).inertia_ for i in Ks]

In [None]:
plt.plot(Ks, score)

In [None]:
kmeans = KMeans(n_clusters=3, random_state=1)
kmeans.fit(numeric_cols)


# Visualizing using PCA
pca = PCA(n_components=2)
res = pca.fit_transform(numeric_cols)
plt.figure(figsize=(12,8))
plt.scatter(res[:,0], res[:,1], c=kmeans.labels_, s=50, cmap='viridis')
plt.title('PCA')

# Visualizing using 2 features: Total points vs. Total assists
plt.figure(figsize=(12,8))
plt.scatter(nba['pts'], nba['ast'], c=kmeans.labels_, s=50, cmap='viridis')
plt.xlabel('Total points')
plt.ylabel('Total assitances')

# Visualizing using 2 features: Age vs. Minutes played
plt.figure(figsize=(12,8))
plt.scatter(nba['age'], nba['mp'], c=kmeans.labels_, s=50, cmap='viridis')
plt.xlabel('Age')
plt.ylabel('Minutes played');

In [None]:
from sklearn.metrics import silhouette_score
silhouette_score(numeric_cols, kmeans.labels_)

In [None]:
from sklearn.cluster import AffinityPropagation

clst = AffinityPropagation()
pred = clst.fit_predict(numeric_cols)
silhouette_score(numeric_cols, pred)

In [None]:
# Visualizing using PCA
pca = PCA(n_components=2)
res = pca.fit_transform(numeric_cols)
plt.figure(figsize=(12,8))
plt.scatter(res[:,0], res[:,1], c=pred, s=50, cmap='viridis')
plt.title('PCA')

# Visualizing using 2 features: Total points vs. Total assists
plt.figure(figsize=(12,8))
plt.scatter(nba['pts'], nba['ast'], c=pred, s=50, cmap='viridis')
plt.xlabel('Total points')
plt.ylabel('Total assitances')

# Visualizing using 2 features: Age vs. Minutes played
plt.figure(figsize=(12,8))
plt.scatter(nba['age'], nba['mp'], c=pred, s=50, cmap='viridis')
plt.xlabel('Age')
plt.ylabel('Minutes played');

In [None]:
from sklearn.cluster import DBSCAN

clst = DBSCAN()
pred = clst.fit_predict(numeric_cols)
silhouette_score(numeric_cols, pred)

In [None]:
from sklearn.datasets.samples_generator import make_blobs
np.random.seed(0)
centers = [[3, 3], [-1, -1]]
n_clusters = len(centers)
X, labels_true = make_blobs(n_samples=3000, 
                            centers=centers, 
                            cluster_std=0.5)

In [None]:
a = [X[i][0] for i in range(len(X))]
b = [X[i][1] for i in range(len(X))]

In [None]:
plt.figure(figsize=(12,8))
plt.scatter(a, b)

In [None]:
from sklearn.cluster import DBSCAN

clst = DBSCAN()
pred = clst.fit_predict(X)
silhouette_score(X, pred)

In [None]:
plt.figure(figsize=(12,8))

plt.scatter(a, b, c=pred, s=50, cmap='viridis')
plt.title('PCA')