In [1]:
import sys, os
sys.path.append("../..")
sys.path.append("..")
sys.path.append(os.getcwd())

from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
import copy

from mrsc.src.model.SVDmodel import SVDmodel
from mrsc.src.model.Target import Target
from mrsc.src.model.Donor import Donor
from mrsc.src.synthcontrol.mRSC import mRSC
from mrsc.src.importData import *
import mrsc.src.utils as utils

In [2]:
def getActivePlayers(stats, year):
    # list of name of the players who were active in this and last year
    thisYear = stats[stats.Year == year].copy()
    lastYear = stats[stats.Year == (year-1)].copy()
    return list(set(thisYear.Player.unique()) & set(lastYear.Player.unique()))

def topPlayers(stats, year, metric, n):
    stats = stats[stats.Year == year]
    stats = stats.groupby('Player').mean().reset_index()
    stats_sorted = stats[stats.Year == year].sort_values(metric, ascending = False).reset_index(drop=True)
    return stats_sorted[["Player","player_id"]][:n]

def removeDuplicated(players, stats):
    """
    players: "../data/nba-players-stats/player_data.csv"
    stats: "../data/nba-players-stats/Seasons_Stats.csv"
    """
    # players with the same name
    names = players.name.unique()
    duplicated = np.array([])

    for name in names:
        numrows = len(players[players.name == name])
        if numrows != 1:
            duplicated = np.append(duplicated, name)

    duplicated = np.sort(duplicated)

    start_year = players.copy()
    start_year = start_year.rename(columns={"name":"Player"})

    # for non-duplicated players
    stats_not_duplicated = stats[~stats.Player.isin(duplicated)]
    stats_not_duplicated = pd.merge(stats_not_duplicated, start_year, on="Player", how="left")

    # only take the values that make sense
    stats_not_duplicated = stats_not_duplicated[(stats_not_duplicated.Year >= stats_not_duplicated.year_start) & (stats_not_duplicated.Year <= stats_not_duplicated.year_end )]
    stats_not_duplicated["year_count"] = stats_not_duplicated.Year - stats_not_duplicated.year_start

    return stats_not_duplicated

# Clean Data

In [3]:
"""
import data
"""
players = pd.read_csv("../data/nba-players-stats/player_data.csv")
players = players[players.year_start >= 1980] # only choose players who started after 1980
players["player_id"] = range(0,len(players.name)) # assign id

stats = pd.read_csv("../data/nba-players-stats/Seasons_Stats.csv")
stats = stats[stats.Player.isin(players.name)]

# only after 1980
stats = stats[stats.Year >= 1980]

# without duplicated names --> to do: how to distinguish multiple player with the same name
stats = removeDuplicated(players, stats)
stats.Year = stats.Year.astype(int)
stats.year_count = stats.year_count.astype(int)

# transform stats to a dictionary composed of df's for each stat
# the stats are re-calculated to get one stat for each year

metricsPerGameColNames = ["PTS","AST","TOV","TRB","STL","BLK"]
metricsPerGameDict = getMetricsPerGameDict(stats, metricsPerGameColNames)

metricsPerCentColNames = ["FG","FT","3P"]
metricsPerCentDict = getMetricsPerCentDict(stats, metricsPerCentColNames)

metricsWeightedColNames = ["PER"]
metricsWeightedDict = getMetricsWeightedDict(stats, metricsWeightedColNames)

allMetricsDict = {**metricsPerGameDict, **metricsPerCentDict, **metricsWeightedDict}
allPivotedTableDict = getPivotedTableDict(allMetricsDict)

# this matrix will be used to mask the table
df_year = pd.pivot_table(stats, values="Year", index="Player", columns = "year_count")

In [None]:
activePlayers = getActivePlayers(stats, 2017)
activePlayers.sort()

for playerName in activePlayers:

    offMetrics = ["PTS_G","AST_G","TOV_G","PER_w", "FG%","FT%","3P%"]

    target = Target(playerName, allPivotedTableDict, df_year)
    donor = Donor(allPivotedTableDict, df_year)

    mrsc = mRSC(donor, target, probObservation=1)

    expSetup = ["sliding", "SVD", "pre", "pinv", False]

    mrsc.fit(offMetrics, 2017, pred_length =1, singvals=8, setup = expSetup)
    pred = mrsc.predict()
    true = mrsc.getTrue()
    mask = (true !=0 )
    mape = np.abs(pred - true) / true[mask]

    print(mrsc.target.key)
    print(mape.astype(float).round(3))

Aaron Brooks
        2017
PTS_G  0.777
AST_G  0.405
TOV_G  0.263
PER_w  0.231
FG%    0.089
FT%    0.211
3P%    0.351
Aaron Gordon
        2017
PTS_G  0.324
AST_G  0.155
TOV_G  0.039
PER_w  0.036
FG%    0.002
FT%    0.009
3P%    0.213
Aaron Harrison
        2017
PTS_G  5.492
AST_G  0.632
TOV_G    NaN
PER_w -3.223
FG%      NaN
FT%    0.147
3P%      NaN
Adreian Payne
        2017
PTS_G  0.157
AST_G  0.349
TOV_G  0.594
PER_w  0.508
FG%    0.143
FT%    0.157
3P%    0.048
Al Horford
        2017
PTS_G  0.047
AST_G  0.449
TOV_G  0.018
PER_w  0.002
FG%    0.146
FT%    0.101
3P%    0.205
Al Jefferson
        2017
PTS_G  0.544
AST_G  0.810
TOV_G  1.872
PER_w  0.093
FG%    0.087
FT%    0.135
3P%      NaN
Al-Farouq Aminu
        2017
PTS_G  0.095
AST_G  0.048
TOV_G  0.286
PER_w  0.065
FG%    0.010
FT%    0.129
3P%    0.477
Alan Anderson
        2017
PTS_G  0.025
AST_G  0.531
TOV_G  1.259
PER_w  0.023
FG%    0.330
FT%    0.404
3P%    0.685
Alan Williams
        2017
PTS_G  0.416
AST_G  0.727
TOV_G 

D'Angelo Russell
        2017
PTS_G  0.199
AST_G  0.343
TOV_G  0.251
PER_w  0.071
FG%    0.040
FT%    0.059
3P%    0.220
D.J. Augustin
        2017
PTS_G  0.122
AST_G  0.186
TOV_G  0.104
PER_w  0.031
FG%    0.004
FT%    0.179
3P%    0.298
Dahntay Jones
        2017
PTS_G  0.019
AST_G  0.675
TOV_G  0.048
PER_w  0.019
FG%    0.093
FT%    0.485
3P%      NaN
Damian Lillard
        2017
PTS_G  0.193
AST_G  0.045
TOV_G  0.131
PER_w  0.158
FG%    0.028
FT%    0.006
3P%    0.097
Damjan Rudez
        2017
PTS_G  0.373
AST_G  0.078
TOV_G  0.855
PER_w  0.167
FG%    0.172
FT%      NaN
3P%    0.065
Danilo Gallinari
        2017
PTS_G  0.011
AST_G  0.264
TOV_G  0.564
PER_w  0.022
FG%    0.004
FT%    0.186
3P%    0.286
Danny Green
        2017
PTS_G  0.049
AST_G  0.294
TOV_G  0.082
PER_w  0.184
FG%    0.136
FT%    0.209
3P%    0.542
Dante Cunningham
        2017
PTS_G  0.429
AST_G  0.167
TOV_G  0.486
PER_w  0.062
FG%    0.171
FT%    0.011
3P%    0.665
Darrell Arthur
        2017
PTS_G  0.182
AST_G  0

JaVale McGee
        2017
PTS_G  0.196
AST_G  0.485
TOV_G  0.522
PER_w  0.436
FG%    0.087
FT%    0.657
3P%      NaN
Jabari Parker
        2017
PTS_G  0.364
AST_G  0.394
TOV_G  0.110
PER_w  0.239
FG%    0.081
FT%    0.005
3P%    0.339
Jae Crowder
        2017
PTS_G  0.168
AST_G  0.048
TOV_G  0.362
PER_w  0.061
FG%    0.119
FT%    0.216
3P%    0.523
Jahlil Okafor
        2017
PTS_G  0.362
AST_G  0.114
TOV_G  0.135
PER_w  0.145
FG%    0.036
FT%    0.070
3P%      NaN
Jamal Crawford
        2017
PTS_G  0.016
AST_G  0.173
TOV_G  0.061
PER_w  0.024
FG%    0.361
FT%    0.419
3P%    0.260
Jameer Nelson
        2017
PTS_G  0.383
AST_G  0.378
TOV_G  0.293
PER_w  0.073
FG%    0.166
FT%    0.041
3P%    0.308
James Ennis
        2017
PTS_G  0.053
AST_G  0.109
TOV_G  0.042
PER_w  0.309
FG%    0.115
FT%    0.052
3P%    0.312
James Harden
        2017
PTS_G  0.131
AST_G  0.487
TOV_G  0.451
PER_w  0.114
FG%    0.211
FT%    0.152
3P%    0.209
James Johnson
        2017
PTS_G  0.573
AST_G  0.578
TOV_G  0

Kevin Love
        2017
PTS_G  0.258
AST_G  0.054
TOV_G  0.144
PER_w  0.127
FG%    0.416
FT%    0.108
3P%    0.152
Kevin Seraphin
        2017
PTS_G  0.009
AST_G  0.265
TOV_G  0.230
PER_w  0.294
FG%    0.266
FT%    0.071
3P%      NaN
Kevon Looney
        2017
PTS_G  0.026
AST_G  0.537
TOV_G  0.566
PER_w  0.235
FG%    0.248
FT%    0.502
3P%    0.666
Khris Middleton
        2017
PTS_G  0.070
AST_G  0.077
TOV_G  0.112
PER_w  0.032
FG%    0.155
FT%    0.265
3P%    0.452
Klay Thompson
        2017
PTS_G  0.113
AST_G  0.425
TOV_G  0.373
PER_w  0.004
FG%    0.155
FT%    0.190
3P%    0.332
Kosta Koufos
        2017
PTS_G  0.201
AST_G  0.200
TOV_G  0.015
PER_w  0.067
FG%    0.076
FT%    0.187
3P%      NaN
Kris Humphries
        2017
PTS_G  0.110
AST_G  0.742
TOV_G  0.501
PER_w  0.088
FG%    0.146
FT%    0.181
3P%    0.713
Kristaps Porzingis
        2017
PTS_G  0.252
AST_G  0.003
TOV_G  0.096
PER_w  0.103
FG%    0.054
FT%    0.017
3P%    0.191
Kyle Anderson
        2017
PTS_G  0.322
AST_G  0.202

Omer Asik
        2017
PTS_G  0.674
AST_G  0.421
TOV_G  0.745
PER_w  0.165
FG%    0.011
FT%    0.154
3P%      NaN
Omri Casspi
        2017
PTS_G  0.686
AST_G  0.465
TOV_G  0.409
PER_w  0.266
FG%    0.147
FT%    0.063
3P%    0.505
Otto Porter
        2017
PTS_G  0.251
AST_G  0.022
TOV_G  1.335
PER_w  0.308
FG%    0.347
FT%    0.371
3P%    0.647
P.J. Tucker
        2017
PTS_G  0.086
AST_G  0.136
TOV_G  0.151
PER_w  0.117
FG%    0.146
FT%    0.260
3P%    0.587
Pat Connaughton
        2017
PTS_G  0.184
AST_G  0.283
TOV_G  0.229
PER_w  0.385
FG%    0.337
FT%    0.008
3P%    0.389
Patrick Beverley
        2017
PTS_G  0.025
AST_G  0.272
TOV_G  0.008
PER_w  0.051
FG%    0.100
FT%    0.167
3P%    0.416
Patrick Patterson
        2017
PTS_G  0.147
AST_G  0.222
TOV_G  0.526
PER_w  0.073
FG%    0.129
FT%    0.052
3P%    0.525
Patty Mills
        2017
PTS_G  0.318
AST_G  0.510
TOV_G  0.127
PER_w  0.106
FG%    0.173
FT%    0.067
3P%    0.534
Pau Gasol
        2017
PTS_G  0.016
AST_G  0.210
TOV_G  0.4