### Data Import

In [41]:
import pandas as pd

nba_data = pd.read_csv(r"seasons_clean.csv")
nba_data.rename(columns={"WS/48":"WS48"}, inplace=True)
nba_data.head(2)

Unnamed: 0,Season,Player,Pos,Age,Tm,G,GS,MPG,PPG,RPG,...,TS%,USG%,OWS,DWS,WS,WS48,OBPM,DBPM,BPM,VORP
0,2009,Arron Afflalo,SG,24.0,DEN,82.0,75.0,27.085366,8.829268,3.073171,...,0.576,14.0,2.8,1.4,4.3,0.092,0.8,-0.5,0.3,1.3
1,2009,Alexis Ajinca,C,21.0,CHA,6.0,0.0,5.0,1.666667,0.666667,...,0.479,19.3,-0.1,0.0,0.0,-0.013,-8.5,-2.0,-10.4,-0.1


### Selecting columns/stats to keep in dataset 

In [42]:
stat_list = [7,0,1,3,5,6,8,9,10,11,12,13,15,17,18,20,21,23,24,26,27,28]
output_list = [0,25,30,31,34,35]

print(f'Stat list \n*******\n{nba_data.columns[stat_list]}')
print(f'\n\nOutput list \n*******\n{nba_data.columns[output_list]}')

Stat list 
*******
Index(['MPG', 'Season', 'Player', 'Age', 'G', 'GS', 'PPG', 'RPG', 'APG', 'SPG',
       'BPG', 'TOPG', 'FG', 'FG%', '3P', '3P%', 'FT', 'FT%', 'PFPG', 'TS%',
       'USG%', 'OWS'],
      dtype='object')


Output list 
*******
Index(['Season', 'PER', 'WS', 'WS48', 'BPM', 'VORP'], dtype='object')


### Defining X y datasets 

In [43]:
# Defining dataset of chosen stats 
# (filtering out players with less than half a quarter of play)
nba_X = nba_data.loc[nba_data['MPG'] > 6].iloc[:,stat_list]
nba_X = nba_X[nba_X['Season'] != 2018].drop(columns=["Season","Player"])

In [129]:
# Reserving the 2018 season for "production"
nba_2018_prod = nba_data.loc[nba_data['MPG'] > 6].iloc[:,stat_list]
nba_2018_prod = nba_2018_prod[nba_2018_prod['Season'] == 2018].drop(columns=["Season"])
nba_2018_prod

nba_mean = nba_2018_prod.mean()
nba_mean_df = pd.DataFrame(nba_mean).T

# Appending the 2018 league mean stats to Production dataset
idx = 1
NBA_Mean = "NBA Mean"  
nba_mean_df.insert(loc=idx, column='Player', value=NBA_Mean)
nba_2018_prod = pd.concat([nba_2018_prod, nba_mean_df])

In [8]:
# Defining output dataset for y_train/y_test of chosen outputs 
# (filtering out players with less than half a quarter of play)
nba_y = nba_data.loc[nba_data['MPG'] > 6].iloc[:,output_list]
nba_y = nba_y[nba_y['Season'] != 2018].drop(columns=["Season"])

### Build Regressor Function 

In [174]:
import matplotlib.pyplot as plt

from sklearn.utils._testing import ignore_warnings
from sklearn.exceptions import ConvergenceWarning
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.inspection import permutation_importance

from joblib import dump, load

@ignore_warnings(category=ConvergenceWarning)
def Regressor(X,y):
    
    ### Starting data-modeling process 

    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=rand)

    ### Select Player 
    
    # Selecting a random player from test data set
    X_player = X_test.sample(n=1, random_state=42)

    season,player,pos = nba_data.iloc[X_player.index.values[0],0:3]
    print(f'The {output} for {player} ({pos}, {season})')

    ### Scaling data 

    # Create a StandardScater model and fit it to the training data
    X_scaler = StandardScaler().fit(X_train)

    # Transform the training and testing data using the X_scaler
    X_train_scaled = X_scaler.transform(X_train)
    X_test_scaled = X_scaler.transform(X_test)

    ### Model fit and score

    from sklearn.neural_network import MLPRegressor
    model = MLPRegressor()   
    
    model.fit(X_train_scaled, y_train)
    training_score = model.score(X_train_scaled, y_train)
    testing_score = model.score(X_test_scaled, y_test)
    
    print(f"Training Score: {training_score.round(3)}")
    print(f"Testing Score: {testing_score.round(3)}")

    
    # Permutation Importance
    r = permutation_importance(model, X_test_scaled, y_test,
                               n_repeats=30,
                               random_state=0)

    for i in r.importances_mean.argsort()[::-1]:
        if r.importances_mean[i] - 2 * r.importances_std[i] > 0:
#         if i == len(r.importances_mean)-1:    
            print(f"Importance (Highest): {X_test.columns[i]}({r.importances_mean[i]:.3f})\n"
                  f"Importance ({X_test.columns[0]}): ({r.importances_mean[0]:.3f})")
#                   f" +/- {r.importances_std[i]:.3f}")
        break 
    
    ### SAVE MODEL
    
    dump(model, f'Model/model_NBA_{output}.joblib') 
    
    ### Running the Loop

    max_mpg = int(nba_X["MPG"].max())
    PER_list = list()

    # Building predictions based on each minute of MPG
    for i in range(7,max_mpg+1):
        X_player["MPG"] = i
        X_player_scaled = X_scaler.transform(X_player)
        y_player = model.predict(X_player_scaled)

        PER_list.append(y_player[0])

    ### PLOT THE RESULTS
    
    plt.scatter(range(7,max_mpg+1), PER_list, c="blue", label=f"{output} by Minute")
    plt.legend()
    plt.title(f"Test Data - MPG vs. {output} for {player}")
    plt.savefig(fname=f"Figures/{output}_test",facecolor="white")
    
    ### Clear the figure for loop
    
    plt.clf()


### Evaluate MPG impact on Selected Output Stats, and Plot 

In [175]:
from random import randrange
rand = randrange(1, 1000)

for j in range(0,len(output_list)-1):
        
    output = nba_y.columns[j]
    output_data = nba_y.iloc[:,j]

    ### Building X and y dataframe 

    # Defining inputs
    X = nba_X

    # Defining outputs
    y = output_data.values.reshape(-1)
    
    print("*********************************")
    Regressor(X,y)
    print("*********************************")

*********************************
The PER for DeMarcus Cousins (C, 2012)
Training Score: 0.983
Testing Score: 0.977
Importance (Highest): USG%(0.758)
Importance (MPG): (0.078)
*********************************
*********************************
The WS for DeMarcus Cousins (C, 2012)
Training Score: 0.981
Testing Score: 0.973
Importance (Highest): OWS(0.862)
Importance (MPG): (0.096)
*********************************
*********************************
The WS48 for DeMarcus Cousins (C, 2012)
Training Score: 0.602
Testing Score: 0.344
Importance (Highest): OWS(1.164)
Importance (MPG): (0.611)
*********************************
*********************************
The BPM for DeMarcus Cousins (C, 2012)
Training Score: 0.937
Testing Score: 0.896
Importance (Highest): APG(0.523)
Importance (MPG): (0.080)
*********************************
*********************************
The VORP for DeMarcus Cousins (C, 2012)
Training Score: 0.974
Testing Score: 0.951
Importance (Highest): PPG(1.170)
Importance (M

<Figure size 432x288 with 0 Axes>

# Production 

#### Enter player name  

In [116]:
player = "Stephen Curry"

### Setting dataset for Production 

In [117]:
player_prod = nba_2018_prod[nba_2018_prod['Player'] == player].drop(columns=["Player"])
player_prod

Unnamed: 0,MPG,Age,G,GS,PPG,RPG,APG,SPG,BPG,TOPG,FG,FG%,3P,3P%,FT,FT%,PFPG,TS%,USG%,OWS
5585,33.8,30.0,69.0,69.0,27.3,5.3,5.2,1.3,0.4,2.8,9.2,0.472,5.1,0.437,3.8,0.916,2.4,0.641,30.4,7.2


In [125]:
# Setting Players actual MPG value
v = player_prod['MPG'].values[0]
v

33.8

### Evaluate MPG impact on Selected Output Stats, and Plot 

In [127]:
for j in range(0,len(output_list)-1):
    
    output = nba_y.columns[j]
    output_data = nba_y.iloc[:,j]

    ### Building X and y dataframe 

    # Defining inputs
    X_prod_2018 = player_prod
    
    ### LOAD Model

    model_NBA_load = load(f'Model/model_NBA_{output}.joblib') 
    
    pos = player_prod.iloc[0,2]
    print(f'Evaluated {output} for {player} (2018)')

    ### Scaling data 

    # Create a StandardScaler model and fit it to the training data
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=rand)
    X_scaler = StandardScaler().fit(X_train)
    
    ### Running the MPG Loop

    max_mpg = int(nba_X["MPG"].max())
    PER_list = list()
    
    # Building predictions based on each minute of MPG
    for i in range(7,max_mpg+1):
        player_prod["MPG"] = i
        X_player_scaled_2018 = X_scaler.transform(player_prod)
        y_player_2018 = model_NBA_load.predict(X_player_scaled_2018)

        PER_list.append(y_player_2018[0])

    ### PLOT THE RESULTS

    
    plt.scatter(range(7,max_mpg+1), PER_list, c="blue", label=f"{output} by Minute")
    plt.vlines(v, min(PER_list), max(PER_list))
    plt.legend()
    plt.title(f"2018 - MPG vs. {output} for {player}")
    plt.savefig(fname=f"Figures/{output}_for_{player}",facecolor="lightsteelblue")

    ### Clear the figure for loop
    
    plt.clf()

Evaluated PER for Stephen Curry (2018)
Evaluated WS for Stephen Curry (2018)
Evaluated WS48 for Stephen Curry (2018)
Evaluated BPM for Stephen Curry (2018)
Evaluated VORP for Stephen Curry (2018)


<Figure size 432x288 with 0 Axes>