# Kmeans Comparing Different Years

In [None]:
import os, sys

sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath("__file__")))))
from nbafuns import *
import seaborn.objects as snso
from sklearn.cluster import KMeans

export_DIR = "../../fdata/"

In [None]:
def get_playtypes(year, poss = 200):
    df = pd.read_parquet(export_DIR +f"NBA_Synergy_Player_PG_{year}.parquet")
    df.columns = map(str.lower, df.columns)
    dft = pd.read_parquet(export_DIR +f"NBA_Synergy_Player_Tot_{year}.parquet")
    dft.columns = map(str.lower, dft.columns)
    df1 = df.query("type_grouping == 'Offensive'")
    df1 = df1[["player_name","play_type","poss_pct"]]
    df1 = df1.sort_values(by=["player_name","play_type"])
    df1 = df1.fillna(0)
    dfs = df1.pivot_table(index= "player_name", columns= "play_type", values= "poss_pct", aggfunc= "mean")
    dfs = dfs.fillna(0)
    dfs = dfs.drop(columns= ["Misc","Transition"])
    dfs = dfs[['PRBallHandler','Isolation','Spotup','OffScreen','Handoff','Cut','PRRollMan','Postup','OffRebound']]
    dftp = dft.groupby("player_name")[["poss"]].agg({"poss":["sum"]})
    dftp.columns = ["poss"]
    dftp = dftp.sort_values(by=["player_name"])
    print(f"unfiltered length: {len(dftp)}")
    fil_len = len(dftp.query(f"poss > {poss}"))
    print(f"filtered   length: {fil_len}")
    dfs = dfs[dftp["poss"]>=poss]

    return dfs

In [None]:
def perform_kmeans_multi(features,dfs, n = 12):
    kmeans = KMeans(
        init= "random",
        n_clusters=n,
        n_init= 10,
        max_iter=300,
        random_state=42
    )
    kmeans.fit(features)
    clusters = kmeans.cluster_centers_
    clusters = np.round(clusters,3)
    dfc = dfs.copy()
    dfc["Ball"] = dfc["PRBallHandler"] + dfc["Isolation"]
    dfc["Shoot"] = dfc["Spotup"] + dfc["OffScreen"] + dfc["Handoff"]
    dfc["Big"] = dfc["Cut"] + dfc["PRRollMan"]	+ dfc["Postup"] + dfc["Postup"]
    dfc["label"] = kmeans.labels_
    dfc.insert(2,"label",dfc.pop("label"))
    dfcf = pd.DataFrame(clusters, columns=dfc.columns[3:-3], index = [f"c{i}" for i in range(1,n+1)])
    dfcf.index.names = ["player_type"]
    
    return dfc, dfcf

In [None]:
season = 2020
dfa = []
for season in range(2012,2023):
    dfs1 = get_playtypes(season, poss = 200)
    dfs1["Season"]  = season + 1
    dfs1 = dfs1.reset_index()
    dfa.append(dfs1)

In [None]:
dfs = pd.concat(dfa)
dfs = dfs.reset_index(drop=True)
dfs.insert(1,"Season",dfs.pop("Season"))

In [None]:
features = dfs.iloc[:,2:].to_numpy()

In [None]:
dfc, dfcf = perform_kmeans_multi(features,dfs, n = 12)

In [None]:
dfcf.sort_values(by="PRBallHandler", ascending= False)

In [None]:
init_clusters = dfcf.sort_values(by="PRBallHandler",ascending=False).to_numpy()

In [None]:
def perform_kmeans_multi_winit(features,dfs,init_clusters, n = 12):
    kmeans = KMeans(
        init= init_clusters,
        n_clusters=n,
        n_init='auto',
        max_iter=300,
        random_state=42
    )
    kmeans.fit(features)
    clusters = kmeans.cluster_centers_
    clusters = np.round(clusters,3)
    dfc = dfs.copy()
    dfc["Ball"] = dfc["PRBallHandler"] + dfc["Isolation"]
    dfc["Shoot"] = dfc["Spotup"] + dfc["OffScreen"] + dfc["Handoff"]
    dfc["Big"] = dfc["Cut"] + dfc["PRRollMan"]	+ dfc["Postup"] + dfc["Postup"]
    dfc["label"] = kmeans.labels_ + 1
    dfc.insert(2,"label",dfc.pop("label"))
    dfcf = pd.DataFrame(clusters, columns=dfc.columns[3:-3], index = [f"c{i}" for i in range(1,n+1)])
    dfcf.index.names = ["player_type"]
    dfcf["Ball"] = dfcf["PRBallHandler"] + dfcf["Isolation"]
    dfcf["Shoot"] = dfcf["Spotup"] + dfcf["OffScreen"] + dfcf["Handoff"]
    dfcf["Big"] = dfcf["Cut"] + dfcf["PRRollMan"]	+ dfcf["Postup"] + dfcf["Postup"]

    return dfc, dfcf

In [None]:
dfc1, dfcf1 = perform_kmeans_multi_winit(features,dfs,init_clusters, n = 12)

In [None]:
dfcf1

## 2012-23
- c1: Ball Handler
- c2: Ball Handler + Spot Up: Shooting Ball Handler
- c3: Ball Handler + ISO + Spot Up + Tall: Play-makers
- c4: Secondary Ball Handler + Spot Up: Versatile Shooter
- c5: Spot Up + Off Screen + Handoff: Movement Shooter
- c6: Spot Up + ISO + Post Up: Big Wing
- c7: Spot Up + little bit of versatility: Utility Standstill Shooter
- c8: Spot Up: Wing Shooter
- c9: Spot Up + Cut + Roll: Shooting Big
- c10: Cut + Roll + Post Up : Post up big
- c11: Cut + Roll + OffReb: PnR Big  w Shooting
- c12: Cut + Roll + OffReb: Finishing Big

In [None]:
i = 6
dfc1.query(f"label == {i}").query("Season == 2023")

In [None]:
player = "Jaren Jackson Jr."
dfc1.query(f"player_name == '{player}'")