<a href="https://colab.research.google.com/github/mnocerino23/NBA-Player-Classifier/blob/main/clustering.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [638]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from sklearn.cluster import AgglomerativeClustering
import scipy.cluster.hierarchy as sch
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [639]:
complete = pd.read_csv('/content/drive/MyDrive/Data_Science_Projects/NBA/complete22-23.csv')

In [640]:
for index, rows in complete.iterrows():
  #if the position has length greater than two (meaning it isn't C, PG, SG, SF, or PF), we shorten it to the first position
  if len(complete.at[index, 'Pos']) > 2:
    complete.at[index, 'Pos'] = complete.at[index, 'Pos'][:2]

# Cluster all players based on some of the most important features

In [641]:
players = complete

In [642]:
player_names = list(players['Player'])

In [643]:
players = players[['3P%', 'TS%', 'TRB', 'AST', 'TOV%', 'two_way', 
                 'DWS_per_G', 'OWS_per_G', 'USG%', 'OWS_div_USG', 'PTs_per_36']]

In [644]:
model = AgglomerativeClustering(n_clusters=18, metric='euclidean', linkage='ward')
model.fit(players)
player_labels = model.labels_

In [645]:
p = dict(zip(player_names, player_labels))
clusters = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17]
for cluster in clusters:
  print(cluster,": ")
  for key, value in p.items():
    if value == cluster:
      print(key)

0 : 
Ochai Agbaji
Nickeil Alexander-Walker
Will Barton
Keita Bates-Diop
MarJon Beauchamp
Dāvis Bertāns
James Bouknight
Malaki Branham
Oshae Brissett
Greg Brown III
Vlatko Čančar
Julian Champagnie
Kennedy Chandler
Josh Christopher
Johnny Davis
Moussa Diabaté
Mamadi Diakite
Ousmane Dieng
PJ Dozier
Goran Dragić
Chris Duarte
David Duke Jr.
Keon Ellis
Bruno Fernando
Malachi Flynn
Simone Fontecchio
Evan Fournier
Rudy Gay
Taj Gibson
Eric Gordon
A.J. Green
Jeff Green
Josh Green
AJ Griffin
R.J. Hampton
Joe Harris
Kevon Harris
Killian Hayes
Danuel House Jr.
Serge Ibaka
Reggie Jackson
Keon Johnson
Stanley Johnson
Johnny Juzang
Corey Kispert
Nathan Knight
Kevin Knox
Furkan Korkmaz
Nassir Little
Sandro Mamukelashvili
Tre Mann
Boban Marjanović
Kenyon Martin Jr.
Tyrese Martin
Garrison Mathews
Doug McDermott
Rodney McGruder
Patty Mills
Markieff Morris
Svi Mykhailiuk
Andrew Nembhard
Aaron Nesmith
Zeke Nnaji
Eugene Omoruyi
Payton Pritchard
Duncan Robinson
Isaiah Roby
Matt Ryan
Shaedon Sharpe
Jeremy Soch

# Divide the dataset into the various positions then cluster each of these individually

In [646]:
guards = complete.loc[(complete['Pos'] == 'PG') | (complete['Pos'] == 'SG')]
wing_players = complete.loc[(complete['Pos'] == 'SG') | (complete['Pos'] == 'SF')]
bigman = complete.loc[(complete['Pos'] == 'PF') | (complete['Pos'] == 'C')]

# Guard Clustering

In [647]:
guards.columns

Index(['Player', 'Pos', 'Age', 'Tm', 'G', 'GS', 'MP', 'FG', 'FGA', 'FG%', '3P',
       '3PA', '3P%', '2P', '2PA', '2P%', 'eFG%', 'FT', 'FTA', 'FT%', 'ORB',
       'DRB', 'TRB', 'AST', 'STL', 'BLK', 'TOV', 'PF', 'PTS', 'PER', 'TS%',
       '3PAr', 'FTr', 'ORB%', 'DRB%', 'TRB%', 'AST%', 'STL%', 'BLK%', 'TOV%',
       'USG%', 'OWS', 'DWS', 'WS', 'WS/48', 'OBPM', 'DBPM', 'BPM', 'VORP',
       'Salary', 'WS_per_G', 'OWS_per_G', 'DWS_per_G', 'AST_TOV',
       'OWS_div_USG', 'TS_times_USG', 'bigmanD', 'foul_rate', 'three_rate',
       'PTs_per_36', 'PF_per_dplay', 'FG_TOV', 'two_way'],
      dtype='object')

In [648]:
guard_names = list(guards['Player'])

In [649]:
guards = guards[['FG%', '3P%', 'TS%', 'TRB', 'AST', 'TOV', 'two_way', 
                 'DWS_per_G', 'OWS_per_G', 'USG%', 'OWS_div_USG']]

In [650]:
model = AgglomerativeClustering(n_clusters=10, metric='euclidean', linkage='ward')
model.fit(guards)
guard_labels = model.labels_

In [651]:
guard_labels

array([7, 0, 6, 1, 6, 7, 3, 4, 0, 1, 0, 3, 0, 8, 8, 4, 2, 2, 0, 3, 6, 3,
       0, 6, 8, 6, 2, 7, 2, 0, 7, 1, 8, 0, 4, 0, 2, 0, 7, 0, 1, 6, 5, 1,
       8, 7, 2, 2, 2, 3, 4, 7, 2, 7, 7, 2, 3, 3, 4, 4, 3, 5, 8, 2, 0, 2,
       2, 6, 0, 8, 3, 2, 0, 9, 2, 0, 0, 7, 0, 3, 0, 7, 4, 7, 1, 1, 1, 4,
       2, 0, 0, 8, 2, 0, 6, 7, 8, 6, 2, 7, 7, 4, 0, 0, 3, 0, 1, 6, 0, 8,
       0, 7, 7, 7, 6, 7, 3, 8, 7, 7, 0, 3, 2, 0, 7, 5, 1, 7, 7, 9, 8, 3,
       3, 2, 0, 7, 1, 7, 2, 8, 1, 4, 1, 0, 7, 1, 1, 1, 0, 6, 1, 7, 0, 7,
       2, 0, 1, 8, 6, 8, 0, 0, 2, 3, 2, 3, 8, 2, 0, 1, 0, 7, 2, 0, 1, 0,
       6, 4, 0, 0, 0, 1, 2, 7, 0, 2, 7, 3, 8, 3, 0, 3, 3, 2, 1])

In [652]:
g = dict(zip(guard_names, guard_labels))
clusters = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
for cluster in clusters:
  print(cluster,": ")
  for key, value in g.items():
    if value == cluster:
      print(key)

0 : 
Nickeil Alexander-Walker
Dalano Banton
Will Barton
Malik Beasley
Christian Braun
Alec Burks
Jordan Clarkson
Seth Curry
Dyson Daniels
Terence Davis
Hamidou Diallo
Devonte' Graham
Josh Green
Tim Hardaway Jr.
Gary Harris
Joe Harris
Killian Hayes
George Hill
Reggie Jackson
Ty Jerome
Tre Jones
Damion Lee
Saben Lee
Kira Lewis Jr.
Théo Maledon
Tre Mann
Jordan McLaughlin
Shake Milton
Raul Neto
Gary Payton II
Payton Pritchard
Josh Richardson
Terrence Ross
Collin Sexton
Landry Shamet
Jaden Springer
Edmond Sumner
Cam Thomas
Matisse Thybulle
Devin Vassell
Gabe Vincent
Lonnie Walker IV
Lindy Waters III
Aaron Wiggins
1 : 
Jose Alvarado
RJ Barrett
Mike Conley
Spencer Dinwiddie
Luguentz Dort
Talen Horton-Tucker
Kevin Huerter
Bones Hyland
Damian Lillard
Malik Monk
Jaylen Nowell
Victor Oladipo
Cameron Payne
Jordan Poole
Kevin Porter Jr.
Norman Powell
Austin Reaves
Terry Rozier
Jalen Suggs
Klay Thompson
John Wall
Trae Young
2 : 
Brandon Boston Jr.
James Bouknight
Kennedy Chandler
Josh Christopher
Jo

# Wing Player Clustering

In [653]:
wing_names = list(wing_players['Player'])

In [654]:
wing_players = wing_players[['FG%', '3P%', 'TS%', 'TRB', 'AST', 'TOV', 'two_way', 
                 'DWS_per_G', 'OWS_per_G', 'USG%', 'OWS_div_USG']]

In [655]:
model = AgglomerativeClustering(n_clusters=13, metric='euclidean', linkage='ward')
model.fit(wing_players)
wing_labels = model.labels_

In [656]:
w = dict(zip(wing_names, wing_labels))
clusters = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
for cluster in clusters:
  print(cluster,": ")
  for key, value in w.items():
    if value == cluster:
      print(key)

0 : 
Deni Avdija
RJ Barrett
Bogdan Bogdanović
Dillon Brooks
Troy Brown Jr.
Jamal Cain
Pat Connaughton
Luguentz Dort
Ayo Dosunmu
Quentin Grimes
Gary Harris
Gordon Hayward
Buddy Hield
Kevin Huerter
Joe Ingles
Isaiah Joe
Jalen Johnson
Derrick Jones Jr.
Luke Kennard
Terance Mann
Naji Marshall
Jalen McDaniels
Keegan Murray
Royce O'Neale
Josh Okogie
Isaac Okoro
Cedi Osman
Austin Reaves
Max Strus
Coby White
1 : 
Malik Beasley
Saddiq Bey
Alec Burks
Jordan Clarkson
Seth Curry
Terence Davis
Hamidou Diallo
Tim Hardaway Jr.
Talen Horton-Tucker
De'Andre Hunter
Damion Lee
Shake Milton
Malik Monk
Jaylen Nowell
Jordan Nwora
Taurean Prince
Cam Reddish
Josh Richardson
Terrence Ross
Collin Sexton
Landry Shamet
Jaden Springer
Edmond Sumner
Cam Thomas
Devin Vassell
Lonnie Walker IV
2 : 
OG Anunoby
Mikal Bridges
Josh Giddey
Tyler Herro
Herbert Jones
Caris LeVert
Lauri Markkanen
CJ McCollum
Khris Middleton
Trey Murphy III
Dejounte Murray
Derrick White
Jalen Williams
3 : 
Bradley Beal
Tyrese Maxey
Victor Olad

# Bigman Clustering

In [657]:
big_names = list(bigman['Player'])

In [658]:
bigs = bigman[['FG%', '3P%', 'TS%', 'TRB', 'AST', 'TOV', 'two_way', 
                 'DWS_per_G', 'OWS_per_G', 'USG%', 'OWS_div_USG']]

In [659]:
model = AgglomerativeClustering(n_clusters=15, metric='euclidean', linkage='ward')
model.fit(bigs)
big_labels = model.labels_

In [660]:
b = dict(zip(big_names, big_labels))
clusters = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]
for cluster in clusters:
  print(cluster,": ")
  for key, value in b.items():
    if value == cluster:
      print(key)

0 : 
Marvin Bagley III
Harrison Barnes
Nicolas Batum
Darius Bazley
Goga Bitadze
Moses Brown
Zach Collins
Dewayne Dedmon
Dorian Finney-Smith
Wenyen Gabriel
Jerami Grant
JaMychal Green
Blake Griffin
Rui Hachimura
Montrezl Harrell
Sam Hauser
Jaxson Hayes
Luke Kornet
Jonathan Kuminga
Jock Landale
Trey Lyles
Marcus Morris
Mike Muscala
Dwight Powell
Orlando Robinson
Jeremiah Robinson-Earl
Dario Šarić
Day'Ron Sharpe
Jericho Sims
Jalen Smith
Lamar Stevens
Dean Wade
T.J. Warren
Jaylin Williams
James Wiseman
Thaddeus Young
1 : 
Bojan Bogdanović
Bruno Fernando
Luka Garza
Jeff Green
Kenneth Lofton Jr.
Sandro Mamukelashvili
JaVale McGee
Chimezie Metu
Georges Niang
Zeke Nnaji
David Roddy
Jabari Smith Jr.
Jeremy Sochan
Isaiah Stewart
Obi Toppin
Trendon Watford
2 : 
Wendell Carter Jr.
Brandon Clarke
Andre Drummond
Aaron Gordon
Jakob Poeltl
Bobby Portis
Pascal Siakam
Karl-Anthony Towns
Myles Turner
Jonas Valančiūnas
Christian Wood
Ivica Zubac
3 : 
Dāvis Bertāns
Khem Birch
Greg Brown III
John Butler
Mam

# Star Player Clustering (Players averaging over 18 points a game)

In [661]:
stars = complete.loc[complete['PTS'] > 20]

In [662]:
star_names = list(stars['Player'])

In [663]:
stars = stars[['FG%', '3P%', 'TS%', 'TRB', 'AST', 'TOV', 'two_way', 
                 'DWS_per_G', 'OWS_per_G', 'USG%', 'OWS_div_USG']]

In [664]:
model = AgglomerativeClustering(n_clusters=10, metric='euclidean', linkage='ward')
model.fit(stars)
star_labels = model.labels_

print("Star clustering - ward linkage")
s = dict(zip(star_names, star_labels))
clusters = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
for cluster in clusters:
  print(cluster,": ")
  for key, value in s.items():
    if value == cluster:
      print(key)

Star clustering - ward linkage
0 : 
Bojan Bogdanović
Jordan Clarkson
Jerami Grant
Jalen Green
Keldon Johnson
Anfernee Simons
1 : 
Bradley Beal
Kyle Kuzma
Tyrese Maxey
Kelly Oubre Jr.
Jordan Poole
Terry Rozier
Klay Thompson
2 : 
Giannis Antetokounmpo
Anthony Davis
Joel Embiid
Nikola Jokić
3 : 
Devin Booker
Stephen Curry
DeMar DeRozan
Anthony Edwards
Darius Garland
Paul George
Julius Randle
4 : 
Luka Dončić
Jayson Tatum
5 : 
Bam Adebayo
Jaylen Brown
James Harden
LeBron James
Kawhi Leonard
Ja Morant
Kristaps Porziņģis
Zion Williamson
6 : 
LaMelo Ball
Mikal Bridges
Jalen Brunson
De'Aaron Fox
Tyrese Haliburton
Tyler Herro
Lauri Markkanen
CJ McCollum
Dejounte Murray
7 : 
Desmond Bane
Brandon Ingram
Kyrie Irving
Zach LaVine
Pascal Siakam
Karl-Anthony Towns
8 : 
Jimmy Butler
Kevin Durant
Shai Gilgeous-Alexander
Donovan Mitchell
9 : 
Damian Lillard
Trae Young


In [665]:
model = AgglomerativeClustering(n_clusters=10, metric='euclidean', linkage='average')
model.fit(stars)
star_labels = model.labels_

print("Star clustering - average linkage")
s = dict(zip(star_names, star_labels))
clusters = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
for cluster in clusters:
  print(cluster,": ")
  for key, value in s.items():
    if value == cluster:
      print(key)

Star clustering - average linkage
0 : 
Giannis Antetokounmpo
Anthony Davis
Nikola Jokić
1 : 
Bojan Bogdanović
Jordan Clarkson
Jerami Grant
Jalen Green
Keldon Johnson
Anfernee Simons
2 : 
Jimmy Butler
Luka Dončić
Kevin Durant
Shai Gilgeous-Alexander
Donovan Mitchell
3 : 
Desmond Bane
Devin Booker
Stephen Curry
DeMar DeRozan
Anthony Edwards
Darius Garland
Paul George
Brandon Ingram
Kyrie Irving
Zach LaVine
Julius Randle
Pascal Siakam
Karl-Anthony Towns
4 : 
Bradley Beal
Kyle Kuzma
Tyrese Maxey
Kelly Oubre Jr.
Jordan Poole
Terry Rozier
Klay Thompson
5 : 
Bam Adebayo
Jaylen Brown
James Harden
LeBron James
Kawhi Leonard
Ja Morant
Kristaps Porziņģis
Zion Williamson
6 : 
Jayson Tatum
7 : 
LaMelo Ball
Mikal Bridges
Jalen Brunson
De'Aaron Fox
Tyrese Haliburton
Tyler Herro
Lauri Markkanen
CJ McCollum
Dejounte Murray
8 : 
Joel Embiid
9 : 
Damian Lillard
Trae Young


In [666]:
model = AgglomerativeClustering(n_clusters=10, metric='euclidean', linkage='complete')
model.fit(stars)
star_labels = model.labels_

print("Star clustering - complete linkage")
s = dict(zip(star_names, star_labels))
clusters = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
for cluster in clusters:
  print(cluster,": ")
  for key, value in s.items():
    if value == cluster:
      print(key)

Star clustering - complete linkage
0 : 
Bojan Bogdanović
Jordan Clarkson
Jerami Grant
Jalen Green
Keldon Johnson
Anfernee Simons
1 : 
Desmond Bane
Devin Booker
Stephen Curry
DeMar DeRozan
Anthony Edwards
Darius Garland
Paul George
Brandon Ingram
Kyrie Irving
Zach LaVine
Julius Randle
Pascal Siakam
Karl-Anthony Towns
2 : 
LaMelo Ball
Mikal Bridges
Jalen Brunson
De'Aaron Fox
Tyrese Haliburton
Tyler Herro
Lauri Markkanen
CJ McCollum
Dejounte Murray
3 : 
Giannis Antetokounmpo
Anthony Davis
Nikola Jokić
4 : 
Jimmy Butler
Luka Dončić
Kevin Durant
Shai Gilgeous-Alexander
Donovan Mitchell
5 : 
Bam Adebayo
Jaylen Brown
James Harden
LeBron James
Kawhi Leonard
Ja Morant
Kristaps Porziņģis
Zion Williamson
6 : 
Bradley Beal
Kyle Kuzma
Tyrese Maxey
Kelly Oubre Jr.
Jordan Poole
Terry Rozier
Klay Thompson
7 : 
Jayson Tatum
8 : 
Damian Lillard
Trae Young
9 : 
Joel Embiid
