In [27]:
import json
import numpy as np
import pandas as pd
from sklearn.linear_model import RidgeCV

In [28]:
f=open("data\wnba_players.json")
player_id = []
data = json.load(f)
player_dict = {int(k):v for k,v in data.items()}

In [29]:
# Select Yeat to calculate RAPM
year = 2021
possessions = pd.read_csv("data\WNBA_rapm_possessions_{0}.csv".format(year))

In [30]:
def build_player_list(posessions):
    players = list(
        set(list(posessions['off1'].unique()) + list(posessions['off2'].unique()) + 
            list(posessions['off3']) + list(posessions['off4'].unique()) + 
            list(posessions['off5'].unique()) + list(posessions['def1'].unique()) + 
            list(posessions['def2'].unique()) + list(posessions['def3'].unique()) + 
            list(posessions['def4'].unique()) + list(posessions['def5'].unique())))
    players.sort()
    return players

In [31]:
player_list = build_player_list(possessions)
possessions['PointsPerPossession'] = 100 * possessions['Points'] 
# possessions.head()

In [32]:
def map_players(row_in, players):
    p1 = row_in[0]
    p2 = row_in[1]
    p3 = row_in[2]
    p4 = row_in[3]
    p5 = row_in[4]
    p6 = row_in[5]
    p7 = row_in[6]
    p8 = row_in[7]
    p9 = row_in[8]
    p10 = row_in[9]

    rowOut = np.zeros([len(players) * 2])

    rowOut[players.index(p1)] = 1
    rowOut[players.index(p2)] = 1
    rowOut[players.index(p3)] = 1
    rowOut[players.index(p4)] = 1
    rowOut[players.index(p5)] = 1

    rowOut[players.index(p6) + len(players)] = -1
    rowOut[players.index(p7) + len(players)] = -1
    rowOut[players.index(p8) + len(players)] = -1
    rowOut[players.index(p9) + len(players)] = -1
    rowOut[players.index(p10) + len(players)] = -1

    return rowOut

In [33]:
# Break the dataframe into x_train (nxm matrix), y_train (nx1 matrix of target values), and weights (not necessary because all rows will have 1 possession)
def convert_to_matricies(possessions, name, players):
    # extract only the columns we need

    # Convert the columns of player ids into a numpy matrix
    stints_x_base = possessions[['off1', 'off2','off3', 'off4', 'off5',
                                 'def1', 'def2', 'def3', 'def4', 'def5']].to_numpy()
    # Apply our mapping function to the numpy matrix
    stint_X_rows = np.apply_along_axis(map_players, 1, stints_x_base, players)

    # Convert the column of target values into a numpy matrix
    stint_Y_rows = possessions[name].to_numpy()

    # return matricies and possessions series
    return stint_X_rows, stint_Y_rows

In [34]:
train_x, train_y = convert_to_matricies(possessions, 'PointsPerPossession', player_list)

In [35]:
# Convert lambda value to alpha needed for ridge CV
def lambda_to_alpha(lambda_value, samples):
    return (lambda_value * samples) / 2.0

# Convert RidgeCV alpha back into a lambda value
def alpha_to_lambda(alpha_value, samples):
    return (alpha_value * 2.0) / samples

In [36]:
def calculate_rapm(train_x, train_y, lambdas, name, players):
    # convert our lambdas to alphas
    alphas = [lambda_to_alpha(l, train_x.shape[0]) for l in lambdas]

    # create a 5 fold CV ridgeCV model. Our target data is not centered at 0, so we want to fit to an intercept.
    clf = RidgeCV(alphas=alphas, cv=5, fit_intercept=True, normalize=False)

    # fit our training data
    model = clf.fit(train_x, train_y,)

    # convert our list of players into a mx1 matrix
    player_arr = np.transpose(np.array(players).reshape(1, len(players)))

    # extract our coefficients into the offensive and defensive parts
    coef_offensive_array = model.coef_[0:len(players)][np.newaxis].T
    coef_defensive_array = model.coef_[len(players):][np.newaxis].T

    # concatenate the offensive and defensive values with the playey ids into a mx3 matrix
    player_id_with_coef = np.concatenate([player_arr, coef_offensive_array, coef_defensive_array], axis=1)
    # build a dataframe from our matrix
    players_coef = pd.DataFrame(player_id_with_coef)
    intercept = model.intercept_

    # apply new column names
    players_coef.columns = ['playerId', 'O{0}'.format(name),         
    'D{0}'.format(name)]

    # Add the offesnive and defensive components together (we should really be weighing this to the number of offensive and defensive possession played as they are often not equal).
    players_coef[name] = players_coef['O{0}'.format(name)] + players_coef['D{0}'.format(name)]

    # rank the values
    players_coef['{0} Rank'.format(name)] = players_coef[name].rank(ascending=False)
    players_coef['O{0} Rank'.format(name)] = players_coef[         
        'O{0}'.format(name)].rank(ascending=False)
    players_coef['D{0} Rank'.format(name)] = players_coef[
        'D{0}'.format(name)].rank(ascending=False)

    return players_coef, intercept

In [37]:
lambdas_rapm = [.01, .05, .1]

In [38]:
results, intercept = calculate_rapm(train_x, train_y, lambdas_rapm, 'RAPM', player_list)
results = np.round(results, decimals=2)
results = results.reindex(sorted(results.columns), axis=1)
results['playerId']=results['playerId'].astype('int')
results['RAPM Rank']=results['RAPM Rank'].astype('int')
results['ORAPM Rank']=results['ORAPM Rank'].astype('int')
results['DRAPM Rank']=results['DRAPM Rank'].astype('int')
results['Player Name']=results['playerId'].map(player_dict)
results = results.sort_values(by=['RAPM'],ascending=False)
results = results.reset_index(drop=True)
results = results.drop(columns=['playerId'])
results['Year']=year
results = results[['Player Name','RAPM','RAPM Rank','ORAPM','ORAPM Rank','DRAPM','DRAPM Rank','Year']]

In [39]:
results.to_csv('data\WNBA_RAPM_{0}.csv'.format(year),index=False)
results.head()

Unnamed: 0,Player Name,RAPM,RAPM Rank,ORAPM,ORAPM Rank,DRAPM,DRAPM Rank,Year
0,Jonquel Jones,6.04,1,2.36,9,3.68,2,2021
1,Breanna Stewart,5.91,2,3.66,2,2.26,7,2021
2,Jackie Young,5.83,3,4.19,1,1.64,13,2021
3,Dearica Hamby,4.51,4,2.75,8,1.76,12,2021
4,Skylar Diggins-Smith,4.42,5,3.43,4,0.99,34,2021


In [40]:
rapm_2020 = pd.read_csv("data\WNBA_RAPM_2020.csv")
rapm_2019 = pd.read_csv("data\WNBA_RAPM_2019.csv")
rapm_2018 = pd.read_csv("data\WNBA_RAPM_2018.csv")
rapm = rapm_2021.append(rapm_2020)
rapm = rapm.append(rapm_2019)
rapm = rapm.append(rapm_2018)

In [41]:
rapm.to_csv('WNBA_RAPM.csv',index=False)