In [1]:
import pandas as pd

import tensorflow as tf
from tensorflow import keras

!pip install -q -U keras-tuner
import keras_tuner as kt

from sklearn.feature_selection import RFECV 
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,OneHotEncoder
from sklearn.cluster import KMeans
import plotly.express as px
import matplotlib.pyplot as plt


In [2]:
mvp = pd.read_csv("mvp_votings.csv")
nba_df = pd.read_csv("nba_info.csv")
mvp["award_share"]

0      0.658
1      0.613
2      0.414
3      0.261
4      0.120
       ...  
632    0.006
633    0.005
634    0.005
635    0.004
636    0.002
Name: award_share, Length: 637, dtype: float64

In [3]:
print(mvp.columns)
print(nba_df.columns)

Index(['Unnamed: 0', 'fga', 'fg3a', 'fta', 'per', 'ts_pct', 'usg_pct', 'bpm',
       'season', 'player', 'win_pct', 'votes_first', 'points_won',
       'points_max', 'award_share', 'g', 'mp_per_g', 'pts_per_g', 'trb_per_g',
       'ast_per_g', 'stl_per_g', 'blk_per_g', 'fg_pct', 'fg3_pct', 'ft_pct',
       'ws', 'ws_per_48'],
      dtype='object')
Index(['GAME_DATE_EST', 'GAME_ID', 'HOME_TEAM_ID', 'VISITOR_TEAM_ID', 'SEASON',
       'TEAM_ID_home', 'HOME_TEAM_WINS', 'TEAM_ID', 'CONFERENCE', 'TEAM',
       'PLAYER_ID', 'PLAYER_NAME', 'MIN', 'FGM', 'FGA', 'FG_PCT', 'FG3M',
       'FG3A', 'FG3_PCT', 'FTM', 'FTA', 'FT_PCT', 'REB', 'AST', 'STL', 'BLK',
       'TO', 'PTS'],
      dtype='object')


In [4]:
mvp_groupedII = mvp.groupby(["season", "player"]).mean()
mvp_groupedII.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 0,fga,fg3a,fta,per,ts_pct,usg_pct,bpm,win_pct,votes_first,...,pts_per_g,trb_per_g,ast_per_g,stl_per_g,blk_per_g,fg_pct,fg3_pct,ft_pct,ws,ws_per_48
season,player,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1980-81,Adrian Dantley,11.0,20.3,0.1,9.8,24.3,0.622,28.4,4.6,0.341463,1.0,...,30.7,6.4,4.0,1.4,0.2,0.559,0.286,0.806,13.6,0.191
1980-81,Artis Gilmore,21.0,10.0,0.0,6.5,21.7,0.699,18.5,4.9,0.54878,0.0,...,17.9,10.1,2.1,0.6,2.4,0.67,0.0,0.705,12.3,0.208
1980-81,Bernard King,13.0,15.4,0.1,5.4,19.9,0.617,23.2,3.3,0.47561,0.0,...,21.9,6.8,3.5,0.9,0.4,0.588,0.333,0.703,9.1,0.15
1980-81,Bob Lanier,22.0,10.7,0.0,4.1,19.7,0.573,21.8,3.4,0.731707,0.0,...,14.3,6.2,2.7,1.1,1.2,0.525,1.0,0.751,6.8,0.185
1980-81,Bobby Jones,26.0,9.3,0.0,4.3,20.1,0.604,20.8,5.0,0.756098,0.0,...,13.5,5.4,2.8,1.2,0.9,0.539,0.0,0.813,9.2,0.217


In [5]:
nba_cleaning = nba_df.drop(columns=['HOME_TEAM_ID', 'VISITOR_TEAM_ID', 'TEAM_ID_home', 'HOME_TEAM_WINS', 'TEAM_ID', 'CONFERENCE', 'TEAM'])
nba_cleaning.head()

Unnamed: 0,GAME_DATE_EST,GAME_ID,SEASON,PLAYER_ID,PLAYER_NAME,MIN,FGM,FGA,FG_PCT,FG3M,...,FG3_PCT,FTM,FTA,FT_PCT,REB,AST,STL,BLK,TO,PTS
0,2022-12-22,22200477,2022-01-01,1629641,Romeo Langford,18,1,1,1.0,0,...,0.0,0,0,0.0,2,0,1,0,2,2
1,2022-12-22,22200477,2022-01-01,1631110,Jeremy Sochan,31,7,14,0.5,2,...,0.5,7,10,0.7,9,6,1,0,2,23
2,2022-12-22,22200477,2022-01-01,1627751,Jakob Poeltl,21,6,9,0.667,0,...,0.0,1,1,1.0,4,1,1,0,2,13
3,2022-12-22,22200477,2022-01-01,1630170,Devin Vassell,30,4,13,0.308,1,...,0.167,1,1,1.0,9,5,3,0,2,10
4,2022-12-22,22200477,2022-01-01,1630200,Tre Jones,27,7,12,0.583,1,...,0.333,4,4,1.0,2,3,0,0,2,19


In [6]:
mvp_groupedII["award_share"]

season   player           
1980-81  Adrian Dantley       0.022
         Artis Gilmore        0.006
         Bernard King         0.017
         Bob Lanier           0.006
         Bobby Jones          0.004
                              ...  
2017-18  LaMarcus Aldridge    0.006
         LeBron James         0.731
         Russell Westbrook    0.075
         Stephen Curry        0.005
         Victor Oladipo       0.002
Name: award_share, Length: 637, dtype: float64

In [7]:
mvp_cat = mvp_groupedII.dtypes[mvp_groupedII.dtypes == "object"].index.tolist()
mvp_groupedII.dtypes

Unnamed: 0     float64
fga            float64
fg3a           float64
fta            float64
per            float64
ts_pct         float64
usg_pct        float64
bpm            float64
win_pct        float64
votes_first    float64
points_won     float64
points_max     float64
award_share    float64
g              float64
mp_per_g       float64
pts_per_g      float64
trb_per_g      float64
ast_per_g      float64
stl_per_g      float64
blk_per_g      float64
fg_pct         float64
fg3_pct        float64
ft_pct         float64
ws             float64
ws_per_48      float64
dtype: object

In [8]:
def get_clusters(k, data) :
# Create a copy of the DataFrame
  data = data.copy()
# Initialize the K-Means model
  model = KMeans(n_clusters=k, random_state=0)
# Fit the model
  model.fit(data)
# Predict clusters
  predictions = model.predict(data)
# Create return DataFrame with predicted clusters
  data["class"] = model.labels_
  return data

In [12]:
two_clusters = get_clusters(6, mvp_groupedII)
two_clusters.head()
print(len(two_clusters))



637


In [10]:
!pip install panel==0.12.6 hvplot==0.7.3
import hvplot.pandas

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


ImportError: ignored

In [None]:
two_clusters.hvplot.scatter(x="MVP Chance", y="Award Share", by="class")