### Data Import

In [None]:
import pandas as pd

nba_data = pd.read_csv(r"seasons_clean.csv")
nba_data.rename(columns={"WS/48":"WS48"}, inplace=True)
nba_data.head(2)

### Selecting columns (stats) to keep in dataset 

In [None]:
stat_list = [7,0,1,3,5,6,8,9,10,11,12,13,15,17,18,20,21,23,24,26,27,28]
output_list = [0,25,30,31,34,35]

print(f'Stat list \n*******\n{nba_data.columns[stat_list]}')
print(f'\n\nOutput list \n*******\n{nba_data.columns[output_list]}')

### Defining X y datasets (for testing/training)

In [None]:
# Defining dataset of chosen stats 
# (filtering out players with less than half a quarter of play)
nba_X = nba_data.loc[nba_data['MPG'] > 6].iloc[:,stat_list]
nba_X = nba_X[nba_X['Season'] != 2018].drop(columns=["Season"])

In [None]:
# Defining output dataset for y_train/y_test of chosen outputs 
# (filtering out players with less than half a quarter of play)
nba_y = nba_data.loc[nba_data['MPG'] > 6].iloc[:,output_list]
nba_y = nba_y[nba_y['Season'] != 2018].drop(columns=["Season"])

### Defining X y datasets (2018 only, for Production) 

In [None]:
# Reserving the 2018 season for "production"
nba_2018_X = nba_data.loc[nba_data['MPG'] > 6].iloc[:,stat_list]
nba_2018_X = nba_2018_X[nba_2018_X['Season'] == 2018].drop(columns=["Season"])

In [None]:
# Defining output dataset for y_train/y_test of chosen outputs 
# (filtering out players with less than half a quarter of play)
nba_2018_y = nba_data.loc[nba_data['MPG'] > 6].iloc[:,output_list]
nba_2018_y = nba_2018_y[nba_2018_y['Season'] == 2018].drop(columns=["Season"])

### Defining the function to Create the Model

In [None]:
import matplotlib.pyplot as plt

from sklearn.utils._testing import ignore_warnings
from sklearn.exceptions import ConvergenceWarning
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.inspection import permutation_importance

from joblib import dump, load

@ignore_warnings(category=ConvergenceWarning)
def Create_Model(nba_X,nba_y):
    
    for j in range(0,len(output_list)-1):
        
        output = nba_y.columns[j]
        output_data = nba_y.iloc[:,j]

        ### Building X and y dataframe 

        # Defining inputs
        X = nba_X.drop(columns=["Player"])

        # Defining outputs
        y = output_data.values.reshape(-1)

        print("*********************************")

        ### Starting data-modeling process 

        X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

        ### Scaling data 

        # Create a StandardScaler model and fit it to the training data
        X_scaler = StandardScaler().fit(X_train)

        # Transform the training and testing data using the X_scaler
        X_train_scaled = X_scaler.transform(X_train)
        X_test_scaled = X_scaler.transform(X_test)

        ### Model fit and score

        from sklearn.neural_network import MLPRegressor
        model = MLPRegressor()   

        model.fit(X_train_scaled, y_train)
        training_score = model.score(X_train_scaled, y_train)
        testing_score = model.score(X_test_scaled, y_test)

        print(f"{output} Training Score: {training_score.round(3)}")
        print(f"{output} Testing Score: {testing_score.round(3)}")
        print(f'Saved: model_NBA_{output}.joblib')

        ### SAVE MODEL

        dump(model, f'Model/model_NBA_{output}.joblib') 

        print("*********************************")


### Create and Save the Model 

In [None]:
Create_Model(nba_X,nba_y)

### Defining the function for Permutation Importance (and apply model)

In [None]:
@ignore_warnings(category=ConvergenceWarning)
def Permutation_Importance(nba_X,nba_y):
    
    for j in range(0,len(output_list)-1):

        ### Building X and y dataframe 
        # Defining inputs
        X = nba_X.drop(columns=["Player"])

        # Defining outputs
        output = nba_y.columns[j]
        y = nba_y.iloc[:,j].values.reshape(-1)

        print("*********************************")
        print(f'{output}\n')

        X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

        ### Scaling data 
        # Create a StandardScaler model and fit it to the training data
        X_scaler = StandardScaler().fit(X_train)

        # Transform the training and testing data using the X_scaler
        X_train_scaled = X_scaler.transform(X_train)
        X_test_scaled = X_scaler.transform(X_test)

        ### LOAD Model
        model_NBA_load = load(f'Model/model_NBA_{output}.joblib') 
        print(f'Load: model_NBA_{output}.joblib')
        
        # Permutation Importance
        r = permutation_importance(model_NBA_load, X_test_scaled, y_test,
                                   n_repeats=30,
                                   random_state=0)

        for i in r.importances_mean.argsort()[::-1]:
            if r.importances_mean[i] - 2 * r.importances_std[i] > 0:   
                print(f"Importance (Highest): {X_test.columns[i]}({r.importances_mean[i]:.3f})\n"
                      f"Importance ({X_test.columns[0]}): ({r.importances_mean[0]:.3f})")

            # Break in order to print only the highest importance stat
            break 

        print("*********************************")


### Run Permutation Importance

In [None]:
Permutation_Importance(nba_X,nba_y)

### Defining the function for Mean Efficiency Rating

In [None]:
def MER(nba_2018_X, nba_2018_y):
    
    mean_data = pd.DataFrame(nba_2018_X.mean()).T
    mean_output = pd.DataFrame(nba_2018_y.mean()).T
    
    rows = []
    for j in range(0,len(output_list)-1):

        # Defining inputs
        mean_data_test = mean_data

        X = nba_2018_X.drop(columns=["Player"])

        # Defining outputs  
        output = nba_2018_y.columns[j]
        y = nba_2018_y.iloc[:,j].values.reshape(-1)        

        ### LOAD Model
        model_NBA_load = load(f'Model/model_NBA_{output}.joblib') 

        ### Scaling data 
        X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
        X_scaler = StandardScaler().fit(X_train)

        # Gathering MEAN data
        mean_data_scaled = X_scaler.transform(mean_data_test)
        y_mean = model_NBA_load.predict(mean_data_scaled)

        print("*****************")
        print(f"{output}")

        y_mean_value = y_mean[0]
        print(f"Predicted = {y_mean_value:.2f}")

        y_actual = mean_output.iloc[0][output]
        print(f"Actual = {y_actual:.2f}")

        dif = ((y_mean_value - y_actual)/y_actual)*100
        print(f"MER: {dif:.2f}%")

        print("*****************")

        rows.append([output,round(y_mean_value,2), round(y_actual,2), round(dif,1)])

    df = pd.DataFrame(rows, columns=['Output','Predicted','Actual','% Diff'])
    return df

### Run Mean Efficiency Rating on 2018 data

In [None]:
MER(nba_2018_X, nba_2018_y)

### Run Mean Efficiency Rating on Train/Test data

In [None]:
MER(nba_X, nba_y)

# 
# Production 

### Define Roster to Evaluate 

In [None]:
raptors = ["OG Anunoby","Marc Gasol","Danny Green", "Serge Ibaka", "Kawhi Leonard",
           "Jeremy Lin","Kyle Lowry","Norman Powell", "Pascal Siakam", 
           "Fred VanVleet", "Delon Wright"]

### Function to Select Player to Evaluate with Model

In [None]:
def SelectPlayer(x):
    
    # Enter player as string in function above
    player = x
    player_prod = nba_2018_X[nba_2018_X['Player'] == player].drop(columns=["Player"])
    
    Production(player_prod)

### Defining function to evaluate players in Production environment with Saved Model 

In [None]:
import numpy as np
def Production(player_prod):
    # Setting Players actual MPG value
    v = player_prod['MPG'].values[0]
    print(f"Actual MPG: {v}")
    total_max = 0
    total_min = 0
    
    for j in range(0,len(output_list)-1):

        ### Building X and y dataframe 
        # Defining inputs
        X = nba_2018_X.drop(columns=["Player"])
        X_prod_2018 = player_prod

        # Defining outputs
        output = nba_2018_y.columns[j]
        y = nba_2018_y.iloc[:,j].values.reshape(-1)

        ### LOAD Model

        model_NBA_load = load(f'Model/model_NBA_{output}.joblib') 

        pos = player_prod.iloc[0,2]
        print(f'Evaluated {output} for {player} (2018)')

        ### Scaling data 

        # Create a StandardScaler model and fit it to the training data
        X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
        X_scaler = StandardScaler().fit(X_train)

        ### Running the MPG Loop

        max_mpg = int(nba_X["MPG"].max())
        PER_list = list()

        # Building predictions based on each minute of MPG
        for i in range(7,max_mpg+1):

            # Gathering player data
            player_prod["MPG"] = i
            X_player_scaled_2018 = X_scaler.transform(player_prod)
            y_player_2018 = model_NBA_load.predict(X_player_scaled_2018)

            PER_list.append(y_player_2018[0])

        # Scale results
        myInt = max(PER_list)
        PER_list[:] = [x / myInt for x in PER_list]
        
        # Grabbing max and min values to set for vertical line
        total_max = max(total_max,max(PER_list))
        total_min = min(total_min,min(PER_list))
        
        
        ### PLOT THE RESULTS
        
        colors = ["b","g","r","c","m"]
        
        plt.scatter(range(7,max_mpg+1), PER_list, c=colors[j], label=f"{output}")
        plt.title(f"2018 - MPG Impact for {player}")
    plt.vlines(v, total_min, total_max, label=f'MPG')
    plt.legend(loc="lower right")
    plt.savefig(fname=f"Figures/AllStats_for_{player}",facecolor="lightsteelblue")

    ### Clear the figure for loop
    plt.clf()
    print("********************")

### Generating plots of MPG impact on Player Stats

In [None]:
for player in raptors:  
    SelectPlayer(player)

In [None]:
player = "Stephen Curry"

SelectPlayer(player)