### Data Import

In [None]:
import pandas as pd

nba_data = pd.read_csv(r"seasons_clean.csv")
nba_data.rename(columns={"WS/48":"WS48"}, inplace=True)
nba_data.head(2)

### Selecting columns/stats to keep in dataset 

In [None]:
stat_list = [7,0,1,3,5,6,8,9,10,11,12,13,15,17,18,20,21,23,24,26,27,28]
output_list = [0,25,30,31,34,35]

print(f'Stat list \n*******\n{nba_data.columns[stat_list]}')
print(f'\n\nOutput list \n*******\n{nba_data.columns[output_list]}')

### Defining X y datasets 

In [None]:
# Defining dataset of chosen stats 
# (filtering out players with less than half a quarter of play)
nba_X = nba_data.loc[nba_data['MPG'] > 6].iloc[:,stat_list]
nba_X = nba_X[nba_X['Season'] != 2018].drop(columns=["Season","Player"])

In [None]:
# Reserving the 2018 season for "production"
nba_2018_prod = nba_data.loc[nba_data['MPG'] > 6].iloc[:,stat_list]
nba_2018_prod = nba_2018_prod[nba_2018_prod['Season'] == 2018].drop(columns=["Season"])

In [None]:
# Defining output dataset for y_train/y_test of chosen outputs 
# (filtering out players with less than half a quarter of play)
nba_y = nba_data.loc[nba_data['MPG'] > 6].iloc[:,output_list]
nba_y = nba_y[nba_y['Season'] != 2018].drop(columns=["Season"])

### Build Regressor Function 

In [None]:
import matplotlib.pyplot as plt

from sklearn.utils._testing import ignore_warnings
from sklearn.exceptions import ConvergenceWarning
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

from joblib import dump, load

@ignore_warnings(category=ConvergenceWarning)
def Regressor(X,y):
    
    ### Starting data-modeling process 

    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=rand)

    ### Select Player 
    
    # Selecting a random player from test data set
    X_player = X_test.sample(n=1, random_state=42)

    season,player,pos = nba_data.iloc[X_player.index.values[0],0:3]
    print(f'The {output} for {player} ({pos}, {season})')

    ### Scaling data 

    # Create a StandardScater model and fit it to the training data
    X_scaler = StandardScaler().fit(X_train)

    # Transform the training and testing data using the X_scaler
    X_train_scaled = X_scaler.transform(X_train)
    X_test_scaled = X_scaler.transform(X_test)

    ### Model fit and score

    from sklearn.neural_network import MLPRegressor
    model = MLPRegressor()   
    
    model.fit(X_train_scaled, y_train)
    training_score = model.score(X_train_scaled, y_train)
    testing_score = model.score(X_test_scaled, y_test)
    
    print(f"Training Score: {training_score.round(3)}")
    print(f"Testing Score: {testing_score.round(3)}")

    ### SAVE MODEL
    
    dump(model, f'model_NBA_{output}.joblib') 
    
    ### Running the Loop

    max_mpg = int(nba_X["MPG"].max())
    PER_list = list()

    # Building predictions based on each minute of MPG
    for i in range(7,max_mpg+1):
        X_player["MPG"] = i
        X_player_scaled = X_scaler.transform(X_player)
        y_player = model.predict(X_player_scaled)

        PER_list.append(y_player[0])

    ### PLOT THE RESULTS
    
    plt.scatter(range(7,max_mpg+1), PER_list, c="blue", label=f"{output} by Minute")
    plt.legend()
    plt.title(f"Test Data - MPG vs. {output} for {player}")
    plt.savefig(fname=f"Figures/{output}_test",facecolor="white")
    
    ### Clear the figure for loop
    
    plt.clf()


### Evaluate MPG impact on Selected Output Stats, and Plot 

In [None]:
from random import randrange
rand = randrange(1, 1000)

for j in range(0,len(output_list)-1):
        
    output = nba_y.columns[j]
    output_data = nba_y.iloc[:,j]

    ### Building X and y dataframe 

    # Defining inputs
    X = nba_X

    # Defining outputs
    y = output_data.values.reshape(-1)
    
    print("*********************************")
    Regressor(X,y)
    print("*********************************")

# Production 

#### Enter player name  

In [None]:
player = "Steven Adams"

### Setting dataset for Production 

In [None]:
player_prod = nba_2018_prod[nba_2018_prod['Player'] == player].drop(columns=["Player"])
player_prod

### Evaluate MPG impact on Selected Output Stats, and Plot 

In [None]:
for j in range(0,len(output_list)-1):
    
    output = nba_y.columns[j]
    output_data = nba_y.iloc[:,j]

    ### Building X and y dataframe 

    # Defining inputs
    X_prod_2018 = player_prod
    
    ### LOAD Model

    model_NBA_load = load(f'model_NBA_{output}.joblib') 
    
    pos = player_prod.iloc[0,2]
    print(f'Evaluated {output} for {player} (2018)')

    ### Scaling data 

    # Create a StandardScaler model and fit it to the training data
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=rand)
    X_scaler = StandardScaler().fit(X_train)
    
    ### Running the MPG Loop

    max_mpg = int(nba_X["MPG"].max())
    PER_list = list()
    
    # Building predictions based on each minute of MPG
    for i in range(7,max_mpg+1):
        player_prod["MPG"] = i
        X_player_scaled_2018 = X_scaler.transform(player_prod)
        y_player_2018 = model_NBA_load.predict(X_player_scaled_2018)

        PER_list.append(y_player_2018[0])

    ### PLOT THE RESULTS
    
    plt.scatter(range(7,max_mpg+1), PER_list, c="blue", label=f"{output} by Minute")
    plt.legend()
    plt.title(f"2018 - MPG vs. {output} for {player}")
    plt.savefig(fname=f"Figures/{output}",facecolor="lightsteelblue")

    ### Clear the figure for loop
    
    plt.clf()