In [2]:
import datetime

import numpy as np
import pandas as pd
from sklearn.linear_model import RidgeCV

In [3]:
# Set display options for pandas for easier printing
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

In [4]:
# Convert lambda value to alpha needed for ridge CV
def lambda_to_alpha(lambda_value, samples):
    return (lambda_value * samples) / 2.0


# Convert RidgeCV alpha back into a lambda value
def alpha_to_lambda(alpha_value, samples):
    return (alpha_value * 2.0) / samples

In [5]:
# Convert the row of player ids into a sparse row for the training matrix:
# [o_id1, o_id2, .... d_id4, d_id5] -> [0 1 1 0 0 0 1 1 1 -1 0 -1 -1 0 -1 -1 0]
def map_players(row_in, players):
    p1 = row_in[0]
    p2 = row_in[1]
    p3 = row_in[2]
    p4 = row_in[3]
    p5 = row_in[4]
    p6 = row_in[5]
    p7 = row_in[6]
    p8 = row_in[7]
    p9 = row_in[8]
    p10 = row_in[9]

    row_out = np.zeros([len(players)])

    row_out[players.index(p1)] = 1
    row_out[players.index(p2)] = 1
    row_out[players.index(p3)] = 1
    row_out[players.index(p4)] = 1
    row_out[players.index(p5)] = 1

    row_out[players.index(p6)] = -1
    row_out[players.index(p7)] = -1
    row_out[players.index(p8)] = -1
    row_out[players.index(p9)] = -1
    row_out[players.index(p10)] = -1

    return row_out

In [6]:
# Break the dataframe into x_train (nxm matrix), y_train (nx1 matrix of target values), and weights (not necessary because all rows will have 1 possession)
def convert_to_matricies(possessions_df, name, players, prior):
    # extract only the columns we need

    # Convert the columns of player ids into a numpy matrix
    stints_x_base = possessions_df[['offensePlayer1Id', 'offensePlayer2Id',
                                                      'offensePlayer3Id', 'offensePlayer4Id', 'offensePlayer5Id',
                                                      'defensePlayer1Id', 'defensePlayer2Id', 'defensePlayer3Id',
                                                      'defensePlayer4Id', 'defensePlayer5Id']].to_numpy()

    # Apply our mapping function to the numpy matrix
    stint_X_rows = np.apply_along_axis(map_players, 1, stints_x_base, players)

    # Convert the column of target values into a numpy matrix
    stint_Y_rows = possessions_df[[name]].to_numpy()

    # Subtract out the prior
    # print('Y')
    # print(stint_Y_rows)
    # print(stint_Y_rows.shape)
    #
    # print('X')
    # print(stint_X_rows)
    # print(stint_X_rows.shape)

    prior = np.array(prior).reshape(len(prior), 1)

    # print('PRIOR')
    # print(prior)
    # print(prior.shape)

    Y_prior = stint_X_rows.dot(prior)
    # print('Y_prior')
    # print(Y_prior)
    # print(Y_prior.shape)
    #
    # print('stint_Y_rows')
    # print(stint_Y_rows)
    # print(stint_Y_rows.shape)

    stint_Y_row_adj = stint_Y_rows - Y_prior
    # print('stint_Y_row_adj')
    # print(stint_Y_row_adj)
    # print(stint_Y_row_adj.shape)

    # print('ADJ Y')
    # print(stint_Y_row_adj)
    # print(stint_Y_row_adj.shape)

    # extract the possessions as a pandas Series
    possessions_vector = possessions_df['possessions']

    # return matricies and possessions series
    return stint_X_rows, stint_Y_row_adj, possessions_vector

In [7]:
# Build list of unique player ids in the possessions data
def build_player_list(posessions_df):
    players = list(
        set(list(posessions_df['offensePlayer1Id'].unique()) + list(posessions_df['offensePlayer2Id'].unique()) + list(
            posessions_df['offensePlayer3Id']) + \
            list(posessions_df['offensePlayer4Id'].unique()) + list(posessions_df['offensePlayer5Id'].unique()) + list(
            posessions_df['defensePlayer1Id'].unique()) + \
            list(posessions_df['defensePlayer2Id'].unique()) + list(posessions_df['defensePlayer3Id'].unique()) + list(
            posessions_df['defensePlayer4Id'].unique()) + \
            list(posessions_df['defensePlayer5Id'].unique())))
    players.sort()
    return players

In [8]:
# Take in our nxm training matrix, our nx1 target matrix, a list of lambdas, the name we want to give to the value
# we are getting from the coefficients, and the list of players.
def calculate_rapm(train_x, train_y, possessions, lambdas, name, players, prior):
    # convert our lambdas to alphas
    alphas = [lambda_to_alpha(l, train_x.shape[0]) for l in lambdas]

    # create a 5 fold CV ridgeCV model. Our target data is not centered at 0, so we want to fit to an intercept.
    clf = RidgeCV(alphas=alphas, cv=5, fit_intercept=True, normalize=False)

    # fit our training data
    model = clf.fit(train_x, train_y, sample_weight=possessions)

    # convert our list of players into a mx1 matrix
    player_arr = np.transpose(np.array(players).reshape(1, len(players)))

    # extract our coefficients into the offensive and defensive parts
    coef_array = np.transpose(model.coef_)

    prior_arr = np.array(prior).reshape(len(prior), 1)

    coef_array_plus_prior = coef_array + prior_arr

    # concatenate the offensive and defensive values with the playey ids into a mx3 matrix
    player_id_with_coef = np.concatenate([player_arr, coef_array, prior_arr, coef_array_plus_prior], axis=1)
    # build a dataframe from our matrix
    players_coef = pd.DataFrame(player_id_with_coef)
    intercept = model.intercept_

    print("Model Lambda: {0} -> {1}".format(model.alpha_, alpha_to_lambda(model.alpha_, train_x.shape[0])))

    # apply new column names
    players_coef.columns = ['PLAYER_ID', '{}_PRE_PRIOR'.format(name), 'PRIOR', name]

    # rank the values
    players_coef['{0}_Rank'.format(name)] = players_coef[name].rank(ascending=False)

    return players_coef, intercept

In [12]:

possessions = pd.read_csv('data/possessions_19_20.csv')

# Read player name CSV
player_names = pd.read_csv('data/player_names.csv')

# Read Prior
prior = pd.read_csv('data/prior.csv')

# Filter out 0 possession possessions
possessions = possessions[possessions['possessions'] > 0]


In [13]:
# build the list o unique player ids
player_list = build_player_list(possessions)

prior_frame = pd.DataFrame()
prior_frame['PLAYER_ID'] = player_list
prior_frame=prior_frame.merge(prior, on='PLAYER_ID', how='left')
print(len(player_list))
print(prior.shape)
print(prior_frame.shape)
prior_frame=prior_frame.fillna(0.0)
prior_frame['Raw'] = 0.0
prior = prior_frame['Stable SPR']
prior_raw = prior_frame['Raw']
prior_frame.to_csv('data/stable_prior.csv')

473
(473, 4)
(473, 4)


In [14]:

# Calculate pts/100 possessions for each possession
possessions['PointsPerPossession'] = 100 * (possessions['points'].values / possessions['possessions'].values)

# extract the training data from our possession data frame
train_x, train_y, possessions_raw = convert_to_matricies(possessions, 'PointsPerPossession', player_list, prior)

# a list of lambdas for cross validation
lambdas_rapm = [.01, .05]

# calculate the RAPM
results, intercept = calculate_rapm(train_x, train_y, possessions_raw, lambdas_rapm, 'Stable RAPM', player_list, prior)

Model Lambda: 2430.4500000000003 -> 0.05


In [15]:
# extract the training data from our possession data frame
train_x, train_y, possessions_raw = convert_to_matricies(possessions, 'PointsPerPossession', player_list, prior)


# a list of lambdas for cross validation
lambdas_rapm = [.01, .05]

# calculate the RAPM
#
train_x_raw, train_y_raw, possessions_raw = convert_to_matricies(possessions, 'PointsPerPossession', player_list, prior_raw)
results_raw, intercept = calculate_rapm(train_x_raw, train_y_raw, possessions_raw, lambdas_rapm, 'RAPM', player_list, prior_raw)

Model Lambda: 2430.4500000000003 -> 0.05


In [16]:
results = results.merge(results_raw, on='PLAYER_ID')

# sort the columns
results = results.reindex(sorted(results.columns), axis=1)

# join back with player names
results = results.merge(player_names, how='left', on='PLAYER_ID')

# save as CSV
results=results.merge(prior_frame[['PLAYER_ID', 'Stable SPR']], on='PLAYER_ID')
print(results)

results.to_csv('data/rapm_with_prior.csv')

     PLAYER_ID  PRIOR_x  PRIOR_y      RAPM  RAPM_PRE_PRIOR  RAPM_Rank  Stable RAPM  Stable RAPM_PRE_PRIOR  Stable RAPM_Rank  primaryKey       playerName  Stable SPR
0       1713.0     -1.7      0.0  0.346471        0.346471      155.0    -0.533261               1.166739             197.0        1713     Vince Carter        -1.7
1       2199.0     -1.1      0.0  0.501025        0.501025      130.0    -0.637625               0.462375             209.0        2199   Tyson Chandler        -1.1
2       2544.0      4.5      0.0  2.752204        2.752204        6.0     5.179204               0.679204               6.0        2544     LeBron James         4.5
3       2546.0     -1.5      0.0  1.013975        1.013975       62.0    -0.126367               1.373633             159.0        2546  Carmelo Anthony        -1.5
4       2594.0      0.1      0.0 -0.328059       -0.328059      315.0    -1.312537              -1.412537             290.0        2594      Kyle Korver         0.1
..        

In [21]:
results.sort_values('Stable RAPM', ascending = False)

Unnamed: 0,PLAYER_ID,PRIOR_x,PRIOR_y,RAPM,RAPM_PRE_PRIOR,RAPM_Rank,Stable RAPM,Stable RAPM_PRE_PRIOR,Stable RAPM_Rank,primaryKey,playerName,Stable SPR
49,201935.0,6.8,0.0,2.743309,2.743309,7.0,6.971243,0.171243,1.0,201935,James Harden,6.8
144,203507.0,7.6,0.0,2.261198,2.261198,14.0,5.564281,-2.035719,2.0,203507,Giannis Antetokounmpo,7.6
92,202710.0,4.6,0.0,2.798222,2.798222,5.0,5.293724,0.693724,3.0,202710,Jimmy Butler,4.6
197,1626157.0,5.5,0.0,1.202301,1.202301,50.0,5.287426,-0.212574,4.0,1626157,Karl-Anthony Towns,5.5
86,202695.0,3.9,0.0,3.428835,3.428835,3.0,5.219807,1.319807,5.0,202695,Kawhi Leonard,3.9
...,...,...,...,...,...,...,...,...,...,...,...,...
208,1626172.0,-2.3,0.0,-1.666686,-1.666686,450.0,-3.636374,-1.336374,469.0,1626172,Kevon Looney,-2.3
248,1627777.0,-1.1,0.0,-2.729996,-2.729996,471.0,-3.641893,-2.541893,470.0,1627777,Georges Niang,-1.1
36,201569.0,-2.0,0.0,-2.256115,-2.256115,466.0,-3.756372,-1.756372,471.0,201569,Eric Gordon,-2.0
365,1629014.0,-2.4,0.0,-2.889632,-2.889632,472.0,-4.362905,-1.962905,472.0,1629014,Anfernee Simons,-2.4
