# K-Neareast Neighbors

Find similar players for a given NBA player.

In [1]:
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import MinMaxScaler
import pandas as pd
import numpy as np
import warnings
import os

warnings.filterwarnings("ignore")

In [2]:
path = os.path.join(os.path.dirname(os.getcwd()), "data", "nba_player_stats_2023.csv")
df = pd.read_csv(path)
df.columns = df.columns.str.lower()
df.set_index("pname", inplace=True)
df.head()

Unnamed: 0_level_0,pos,team,age,gp,w,l,min,pts,fgm,fga,...,reb,ast,tov,stl,blk,pf,fp,dd2,td3,+/-
pname,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Jayson Tatum,SF,BOS,25,74,52,22,2732.2,2225,727,1559,...,649,342,213,78,51,160,3691,31,1,470
Joel Embiid,C,PHI,29,66,43,23,2284.1,2183,728,1328,...,670,274,226,66,112,205,3706,39,1,424
Luka Doncic,PG,DAL,24,66,33,33,2390.5,2138,719,1449,...,569,529,236,90,33,166,3747,36,10,128
Shai Gilgeous-Alexander,PG,OKC,24,68,33,35,2416.0,2135,704,1381,...,329,371,192,112,65,192,3425,3,0,149
Giannis Antetokounmpo,PF,MIL,28,63,47,16,2023.6,1959,707,1278,...,742,359,246,52,51,197,3451,46,6,341


In [3]:
subset_df = df.iloc[:, 2:]
scaler = MinMaxScaler()
subset_df[subset_df.columns] = scaler.fit_transform(subset_df)
subset_df.head()

Unnamed: 0_level_0,age,gp,w,l,min,pts,fgm,fga,fg%,3pm,...,reb,ast,tov,stl,blk,pf,fp,dd2,td3,+/-
pname,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Jayson Tatum,0.26087,0.890244,0.912281,0.366667,0.922017,1.0,0.998626,1.0,0.466,0.797342,...,0.667009,0.461538,0.71,0.609375,0.264249,0.573477,0.960708,0.476923,0.034483,0.867395
Joel Embiid,0.434783,0.792683,0.754386,0.383333,0.770745,0.981124,1.0,0.851828,0.548,0.219269,...,0.688592,0.369771,0.753333,0.515625,0.580311,0.734767,0.964611,0.6,0.034483,0.831513
Luka Doncic,0.217391,0.792683,0.578947,0.55,0.806664,0.960899,0.987637,0.929442,0.496,0.614618,...,0.584789,0.7139,0.786667,0.703125,0.170984,0.594982,0.97528,0.553846,0.344828,0.600624
Shai Gilgeous-Alexander,0.217391,0.817073,0.578947,0.583333,0.815272,0.959551,0.967033,0.885824,0.51,0.192691,...,0.338129,0.500675,0.64,0.875,0.336788,0.688172,0.891491,0.046154,0.0,0.617005
Giannis Antetokounmpo,0.391304,0.756098,0.824561,0.266667,0.682803,0.880449,0.971154,0.819756,0.553,0.156146,...,0.76259,0.48448,0.82,0.40625,0.264249,0.706093,0.898257,0.707692,0.206897,0.766771


In [4]:
k = 10
model = NearestNeighbors(n_neighbors=k, metric="euclidean")
model.fit(subset_df)

In [5]:
name = "Anthony Davis"
idx = subset_df.index.get_loc(name)
a = [np.array(subset_df.iloc[idx])]
score, idxs = model.kneighbors(a)
for k, i in enumerate(idxs[0][1:]):
    print(f"Score: {score[0][k+1]} \t Name: {subset_df.iloc[i].name}")

Score: 0.7314231533271681 	 Name: Deandre Ayton
Score: 0.7326462756929811 	 Name: Bam Adebayo
Score: 0.7609872924150798 	 Name: Jarrett Allen
Score: 0.7653829748360935 	 Name: Rudy Gobert
Score: 0.8147714971690828 	 Name: Evan Mobley
Score: 0.8304238839874488 	 Name: Kristaps Porzingis
Score: 0.8867318718361652 	 Name: Jonas Valanciunas
Score: 0.9430740717985733 	 Name: Myles Turner
Score: 0.9590219394136233 	 Name: Aaron Gordon
