## Imports

In [40]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import random


from sklearn.preprocessing import StandardScaler
from matplotlib import rcParams
from helper_funtions import top_similar_players_in_cluster

rcParams['axes.titlepad'] = 20

plt.style.use('ggplot')

ImportError: cannot import name 'top_similar_players_in_cluster' from 'helper_funtions' (/Users/olive/github/fantasynba/helper_funtions.py)

In [41]:
SEED = 7

np.random.seed(SEED)
random.seed(SEED)

## Load Data

In [None]:
DATA_PATH = '/Users/olive/github/fantasynba/data/rankings.xlsx'

nba = pd.read_excel(DATA_PATH)

nba[:10]

## Data pre-processing

In [None]:
nba.drop('TOTAL', axis=1, inplace=True)
nba.drop('R#', axis=1, inplace=True)
nba.drop('GP', axis=1, inplace=True)
nba.drop('TEAM', axis=1, inplace=True)
nba.drop('POS', axis=1, inplace=True)

In [None]:
nba.isnull().any().any()

In [None]:
nba['PLAYER'] = nba['PLAYER'].apply(is_name)
nba['FG%'] = nba['FG%'].apply(remove_text_inside_brackets)
nba['FT%'] = nba['FT%'].apply(remove_text_inside_brackets)

nba.head()

In [None]:
nba.shape

In [None]:
nba.mean()

# EDA

In [None]:
#sns.pairplot(nba[["MPG", "FG%", "FT%", "3PM", "PTS", "AST", "STL", "BLK", "TO"]])

In [None]:
correlation = nba[["FG%", "FT%", "3PM", "PTS", "AST", "STL", "BLK", "TO"]].corr()
sns.heatmap(correlation, annot=True)

# KMeans Clustering

In [None]:
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans

In [None]:
scaler = StandardScaler()
X = nba._get_numeric_data().dropna(axis=1)
X_scaled = scaler.fit_transform(X)

pca_2 = PCA(2)
pca_2.fit(X_scaled)
X_pca = pca_2.transform(X_scaled)

distortions = []

for i in range(1, 10):
    km = KMeans(
        n_clusters=i, 
        init='random', 
        n_init=10, 
        max_iter=300, 
        tol=1e-04, 
        random_state=0)
    km.fit(X_pca)
    distortions.append(km.inertia_)
    
plt.plot(range(1, 10), distortions, marker='o')
plt.xlabel('Number of clusters')
plt.ylabel('Distortion')

In [None]:
kmeans_model = KMeans(n_clusters=9, 
                      init='random',
                      n_init=10,
                      max_iter=300,
                      tol=1e-04,
                      random_state=0)
kmeans_model.fit(X_pca)
labels = kmeans_model.labels_

centers = kmeans_model.cluster_centers_
plt.scatter(x = X_pca[:,0], y = X_pca[:,1], c = labels)
plt.scatter(x = centers[:, 0], y = centers[:, 1], c = 'black', s = 200, alpha = 0.5)

In [None]:
nba['Cluster'] = kmeans_model.labels_
nba[:12]

In [None]:
nba.mean()

In [None]:


print(get_cluster(7))

## Organize players into clusters

In [None]:
# Top similar players to a specified player

        



def visualize_data_for_similar_players(player,  
                                       plt_type,
                                       categories=['FG%', 'FT%', '3PM', 'PTS', 'TREB', 'AST', 'STL', 'BLK', 'TO']
                                       ):
    colors = plt.rcParams['axes.prop_cycle']()
    
    fig, axes = plt.subplots(3,3, figsize=(12,12))
    #fig.suptitle(f"Stats of players similar to {player['PLAYER'].values[0]}", fontsize=16)
    
    for ax, cat in zip(axes.flatten(), categories):
        c = next(colors)['color']
        
        if plt_type is 'vlines':
            ax.vlines(x=player['PLAYER'], ymin=0, ymax=player[cat], color=c, alpha=0.7, linewidth=2)
            ax.scatter(x=player['PLAYER'], y=player[cat], color=c, alpha=0.7)
        elif plt_type is 'bar':
            ax.bar(player['PLAYER'], player[cat], color=c, width=.5)
            ax.set_xticklabels(player['PLAYER'], rotation=60, horizontalalignment='right')
            for i, val in enumerate(player[cat].values):
                ax.text(i, val, float(val), 
                         horizontalalignment='center', 
                         verticalalignment='bottom', 
                         fontdict={'fontweight':500, 'size':12})
                if (max(player[cat] > 5)):
                    ax.set_ylim([0, max(player[cat]) + 5])
                elif (max(player[cat] > 2) and max(player[cat] < 5)):
                    ax.set_ylim([0, max(player[cat]) + 2])
                elif (max(player[cat] > 1) and (max(player[cat] <= 2))):
                    ax.set_ylim([0, max(player[cat]) + 0.3])
                else:
                    ax.set_ylim([0, max(player[cat] + 0.2)])
        
        ax.title.set_text(cat)
    
    plt.tight_layout()
    plt.show()

### Centers

In [None]:
db = nba.loc[nba['Cluster'] == 0]

DEFENSIVE_BIGS = db.set_index(['PLAYER'])

In [None]:
db_recommender = create_recommender(DEFENSIVE_BIGS)
db_recommender.head()

In [None]:
GOBERT = top_similar_players_in_cluster(
    search='Rudy Gobert', 
    cluster_category=db,
    recommender_df=db_recommender_df,
    max_range=5
)

In [None]:
GOBERT_SIMILAR = show_data_for_similar_players(
    cluster = 0,
    players = GOBERT.index.values
)

In [None]:
visualize_data_for_similar_players(
    player = GOBERT_SIMILAR, 
    plt_type='bar'
)

In [None]:
wrb = nba.loc[nba['Cluster'] == 1]

WELL_ROUNDED_BIGS = wrb.set_index(['PLAYER'])

In [None]:
wrb_recommender = create_recommender(WELL_ROUNDED_BIGS)
wrb_recommender.head()

In [None]:
HOLMES = top_similar_players_in_cluster(
    search='Richaun Holmes',
    cluster_category=wrb,
    recommender_df=wrb_recommender,
    max_range=5
)

HOLMES

In [None]:
HOLMES_SIMILAR = show_data_for_similar_players(
    cluster=1,
    players=HOLMES.index.values
)

In [None]:
visualize_data_for_similar_players(HOLMES_SIMILAR, 'bar')

## All around threats

In [None]:
at = nba.loc[nba['Cluster'] == 2]

ALL_AROUND_THREATS = at.set_index(['PLAYER'])

In [None]:
at_recommender = create_recommender(ALL_AROUND_THREATS)
at_recommender.head()

In [None]:
HOLIDAY = top_similar_players_in_cluster(
    search='Jrue Holiday',
    cluster_category=at,
    recommender_df=at_recommender,
    max_range=5
)

LAVINE = top_similar_players_in_cluster(
    search='Zach LaVine',
    cluster_category=at,
    recommender_df=at_recommender,
    max_range=5
)

DONCIC = top_similar_players_in_cluster(
    search = 'Luka Doncic',
    cluster_category=at,
    recommender_df=at_recommender,
    max_range=5
)

In [37]:
HOLIDAY_SIMILAR = show_data_for_similar_players(
    cluster=2,
    players = HOLIDAY.index.values
)

NameError: name 'HOLIDAY' is not defined

In [None]:
LAVINE_SIMILAR = show_data_for_similar_players(
    cluster = 2,
    players = LAVINE.index.values
)

In [None]:
DONCIC_SIMILAR = show_data_for_similar_players(
    cluster = 2,
    players = DONCIC.index.values
)

DONCIC_SIMILAR

In [None]:
visualize_data_for_similar_players(
    player = DONCIC_SIMILAR,
    plt_type='bar'
)

## Athletic bigs

In [None]:
ab = nba.loc[nba['Cluster'] == 3]

ATHLETIC_BIGS = ab.set_index(['PLAYER'])

In [None]:
ab_recommender = create_recommender(ATHLETIC_BIGS)
ab_recommender.head()

In [None]:
SIMMONS = top_similar_players_in_cluster(
    search='Ben Simmons',
    cluster_category=ab,
    recommender_df=ab_recommender,
    max_range=4
)

In [None]:
SIMMONS_SIMILAR = show_data_for_similar_players(
    cluster = 3,
    players = SIMMONS.index.values
)

In [None]:
visualize_data_for_similar_players(
    player = SIMMONS_SIMILAR,
    plt_type='bar'
)

## Shooters

In [None]:
sh = nba.loc[nba['Cluster'] == 5]

SHOOTERS = sh.set_index(['PLAYER'])

In [None]:
sh_recommender = create_recommender(SHOOTERS)
sh_recommender.head()

In [None]:
MARKKANEN = top_similar_players_in_cluster(
    search='Lauri Markkanen',
    cluster_category=sh,
    recommender_df=sh_recommender,
    max_range=5
)

MARKKANEN

In [None]:
MARKKANEN_SIMILAR = show_data_for_similar_players(
    cluster=5,
    players = MARKKANEN.index.values
)

In [None]:
visualize_data_for_similar_players(MARKKANEN_SIMILAR, 'bar')

## Guards

In [None]:
g = nba.loc[nba['Cluster'] == 7]

GUARDS = g.set_index(['PLAYER'])

In [None]:
g_recommender = create_recommender(GUARDS)
g_recommender.head()

In [None]:
DEVONTE = top_similar_players_in_cluster(
    search="Devonte' Graham",
    cluster_category=g,
    recommender_df=g_recommender,
    max_range=4
)

CURRY = top_similar_players_in_cluster(
    search="Stephen Curry",
    cluster_category=g,
    recommender_df=g_recommender,
    max_range=5
)

In [None]:
DEVONTE_SIMILAR = show_data_for_similar_players(
    cluster=7,
    players = DEVONTE.index.values
)

In [None]:
CURRY_SIMILAR = show_data_for_similar_players(
    cluster=7,
    players = CURRY.index.values
)

In [None]:
visualize_data_for_similar_players(
    player=CURRY_SIMILAR,
    plt_type='bar'
)