### Setting up the data 

In [71]:
import pandas as pd

nba_data = pd.read_csv(r"seasons_clean.csv")
nba_data.rename(columns={"WS/48":"WS48"}, inplace=True)
nba_data.head(2)

Unnamed: 0,Season,Player,Pos,Age,Tm,G,GS,MPG,PPG,RPG,...,TS%,USG%,OWS,DWS,WS,WS48,OBPM,DBPM,BPM,VORP
0,2009,Arron Afflalo,SG,24.0,DEN,82.0,75.0,27.085366,8.829268,3.073171,...,0.576,14.0,2.8,1.4,4.3,0.092,0.8,-0.5,0.3,1.3
1,2009,Alexis Ajinca,C,21.0,CHA,6.0,0.0,5.0,1.666667,0.666667,...,0.479,19.3,-0.1,0.0,0.0,-0.013,-8.5,-2.0,-10.4,-0.1


In [72]:
# Selecting columns to keep/drop for data set
stat_list = [7,0,1,3,5,6,8,9,10,11,12,13,15,17,18,20,21,23,24,26,27,28]
output_list = [0,25,30,31,34,35]

print(f'Stat list \n*******\n{nba_data.columns[stat_list]}')
print(f'\n\nOutput list \n*******\n{nba_data.columns[output_list]}')

Stat list 
*******
Index(['MPG', 'Season', 'Player', 'Age', 'G', 'GS', 'PPG', 'RPG', 'APG', 'SPG',
       'BPG', 'TOPG', 'FG', 'FG%', '3P', '3P%', 'FT', 'FT%', 'PFPG', 'TS%',
       'USG%', 'OWS'],
      dtype='object')


Output list 
*******
Index(['Season', 'PER', 'WS', 'WS48', 'BPM', 'VORP'], dtype='object')


In [49]:
# Defining dataset of chosen stats
nba_data2 = nba_data.loc[nba_data['MPG'] > 6]
nba_data2 = nba_data2.iloc[:,stat_list]


nba_data3 = nba_data.loc[nba_data['MPG'] > 6]
output_data = nba_data3.iloc[:,output_list]
output_data = output_data[output_data['Season'] != 2018].drop(columns=["Season"])


for j in range(0,len(output_list)-1):

#     output_data = nba_data3.iloc[:,output_list]
#     output_data = output_data[output_data['Season'] != 2018].drop(columns=["Season"])

    output = output_data.columns[j]
    output_data = output_data.iloc[:,j]

    # Reserving the 2018 season for "production"
    # ***Player name included
    nba_data_prod = nba_data2[nba_data2['Season'] == 2018]
    nba_data_prod = nba_data_prod.drop(columns=['Season'])

    # Setting the remainder of data for test and train
    nba_data_train_test = nba_data2[nba_data2['Season'] != 2018].drop(columns=["Player","Season"])

    # renaming for ease
    nba_tt = nba_data_train_test
    nba_tt

    ### BUilding X and y dataframe 

    # Defining inputs
    X = nba_tt
    X

    # Defining outputs
    y = output_data.values.reshape(-1,1)
    y

    print(X.shape)
    print(y.shape)
    y.max()

    X

    ### Starting data-modeling process 

    from sklearn.model_selection import train_test_split

    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

    ### Select Player 

    # Selecting a random player from test data set
    X_player = X_test.sample(n=1, random_state=42)
    
    season,player,pos = nba_data.iloc[X_player.index.values[0],0:3]
    print(f'The {output} for {player} ({pos}, {season})')

    ### Scaling data 

    from sklearn.preprocessing import StandardScaler

    # Create a StandardScater model and fit it to the training data
    X_scaler = StandardScaler().fit(X_train)

    # Transform the training and testing data using the X_scaler
    X_train_scaled = X_scaler.transform(X_train)
    X_test_scaled = X_scaler.transform(X_test)

    # from sklearn.linear_model import LinearRegression
    # model = LinearRegression()

    from sklearn.neural_network import MLPRegressor
    model = MLPRegressor()

    ### Model fit and score 

    model.fit(X_train_scaled, y_train)
    training_score = model.score(X_train_scaled, y_train)
    testing_score = model.score(X_test_scaled, y_test)

    print(f"Training Score: {training_score}")
    print(f"Testing Score: {testing_score}")



    ### Running the Loop

    max_mpg = int(nba_tt["MPG"].max())
    PER_list = list()

    # Building predictions based on each minute of MPG
    for i in range(7,max_mpg+1):
        X_player["MPG"] = i
        X_player_scaled = X_scaler.transform(X_player)
        y_player = model.predict(X_player_scaled)

        PER_list.append(y_player[0])

    # PER_list

    # ### Summarizing Weights for each Stat

    # importance = model.coefs_
    # importance
    # # consider permutation feature importance 
    # # https://scikit-learn.org/stable/modules/permutation_importance.html


    # list1 = list()
    # list2 = list()

    # for i in range(0,len(X.columns)):
    #     list1.append(importance[0][i].round(4))
    #     list2.append(X.columns[i])

    # df = pd.DataFrame(list(zip(list1, list2)),
    #                columns =['Weight', 'Stat']).set_index('Stat').sort_values(by='Weight',ascending=False)
    # df.T

    ###  Summarizing PER results for each potential MPG

    import matplotlib.pyplot as plt

    plt.scatter(range(7,max_mpg+1), PER_list, c="blue", label=f"{output} by Minute")
    plt.legend()
    plt.title(f"MPG vs. {output}")
    plt.savefig(fname=f"Figures/{output}")

    plt.clf()



(5001, 20)
(5001, 1)
            MPG   Age     G    GS       PPG       RPG       APG      SPG  \
1144  24.597015  26.0  67.0  17.0  9.791045  2.776119  1.970149  0.80597   

           BPG      TOPG     FG    FG%    3P   3P%    FT    FT%      PFPG  \
1144  0.074627  0.985075  239.0  0.402  97.0  0.37  81.0  0.844  2.029851   

        TS%  USG%  OWS  
1144  0.515  19.8  1.1  


  return f(*args, **kwargs)


Training Score: 0.9823204917818248
Testing Score: 0.9775476868685499
(5001, 20)
(5001, 1)
            MPG   Age     G    GS       PPG       RPG       APG      SPG  \
1144  24.597015  26.0  67.0  17.0  9.791045  2.776119  1.970149  0.80597   

           BPG      TOPG     FG    FG%    3P   3P%    FT    FT%      PFPG  \
1144  0.074627  0.985075  239.0  0.402  97.0  0.37  81.0  0.844  2.029851   

        TS%  USG%  OWS  
1144  0.515  19.8  1.1  


  return f(*args, **kwargs)


Training Score: 0.9829281159476689
Testing Score: 0.9696900569753981
(5001, 20)
(5001, 1)
            MPG   Age     G    GS       PPG       RPG       APG      SPG  \
1144  24.597015  26.0  67.0  17.0  9.791045  2.776119  1.970149  0.80597   

           BPG      TOPG     FG    FG%    3P   3P%    FT    FT%      PFPG  \
1144  0.074627  0.985075  239.0  0.402  97.0  0.37  81.0  0.844  2.029851   

        TS%  USG%  OWS  
1144  0.515  19.8  1.1  


  return f(*args, **kwargs)


Training Score: 0.6657010900142037
Testing Score: 0.5480911072016019
(5001, 20)
(5001, 1)
            MPG   Age     G    GS       PPG       RPG       APG      SPG  \
1144  24.597015  26.0  67.0  17.0  9.791045  2.776119  1.970149  0.80597   

           BPG      TOPG     FG    FG%    3P   3P%    FT    FT%      PFPG  \
1144  0.074627  0.985075  239.0  0.402  97.0  0.37  81.0  0.844  2.029851   

        TS%  USG%  OWS  
1144  0.515  19.8  1.1  


  return f(*args, **kwargs)


Training Score: 0.9335887861688936
Testing Score: 0.8960984599792441
(5001, 20)
(5001, 1)
            MPG   Age     G    GS       PPG       RPG       APG      SPG  \
1144  24.597015  26.0  67.0  17.0  9.791045  2.776119  1.970149  0.80597   

           BPG      TOPG     FG    FG%    3P   3P%    FT    FT%      PFPG  \
1144  0.074627  0.985075  239.0  0.402  97.0  0.37  81.0  0.844  2.029851   

        TS%  USG%  OWS  
1144  0.515  19.8  1.1  


  return f(*args, **kwargs)


Training Score: 0.9765103174964725
Testing Score: 0.9445052047082203


<Figure size 432x288 with 0 Axes>

In [60]:
# X_player.index.values[0]
season,player,pos = nba_data.iloc[X_player.index.values[0],0:3]



'SG'