In [1]:
from IPython.display import HTML
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import animation
from matplotlib.patches import Circle, Rectangle, Arc
from plotting_helper import draw_court

import seaborn as sns
import scipy.interpolate
import pystan

import numpy as np
from sklearn.decomposition import NMF
pd.options.display.float_format = '{:,.4f}'.format
pd.options.display.max_rows = 1000
pd.options.display.max_columns = 10

%matplotlib inline

In [2]:
shot_x_range = [-25., 25.]
shot_y_range = [-4.75, -4.75 + 47]
player_index = pd.read_csv("players_index.csv")
rosters = pd.read_csv("roster.csv")

In [3]:
# Read in data for each player
player_names = []
player_name_map = {}
player_dfs = []
for i, row in player_index.iterrows():
    try:
        pn = row['PLAYER_NAME']
        df = pd.read_csv("players_data/%s/2019-20_shots.csv" % row['PLAYER_ID'])
        df = df[df["ACTION_TYPE"].str.contains("Floating")]
        if df.shape[0] > 10:
            df["LOC_X"] = df["LOC_X"]/10
            df["LOC_Y"] = df["LOC_Y"]/10
            player_dfs.append(df)
            c = len(player_names)
            player_name_map[pn] = c
            player_names.append(pn)
    except:
        pass

## Estimate Shot Probabilities with Uncertainties

In [4]:
binomial_model_code = """
data {
    int<lower = 1> N;
    
    int shots_made[N];
    int total_shots[N];
}

parameters {
    real<lower = 0, upper = 1> theta[N];
}

model {
    for(i in 1:N) {
        shots_made[i] ~ binomial(total_shots[i], theta[i]);
    }
}

generated quantities {
    real ev[N];
    for(i in 1:N) {
        ev[i] = theta[i] * 2.0;
    }
}
"""
binomial_model = pystan.StanModel(model_code = binomial_model_code)

INFO:pystan:COMPILING THE C++ CODE FOR MODEL anon_model_dc8cd41a661c2273bf5a57993c28ae32 NOW.


In [5]:
shots_made = []
shots_total = []
ml_ev = []
for pn, pdf in zip(player_names, player_dfs):
    made = pdf["SHOT_MADE_FLAG"]
    shots_made.append(sum(made))
    shots_total.append(made.shape[0])
    ml_ev.append((pn, (sum(made)/made.shape[0]) * 2))

In [7]:
data = {
    'N' : len(shots_made),
    'shots_made' : shots_made,
    'total_shots' : shots_total
}
fit = binomial_model.sampling(data = data, init = '0')

In [9]:
ev = fit.extract()['ev']
ml_ev.sort(key = lambda x: -x[1])
df = []
for e,v in ml_ev:
    vals = ev[:,player_name_map[e]]
    pct_10 = np.percentile(vals, 10)
    pct_50 = np.percentile(vals, 50)
    pct_90 = np.percentile(vals, 90)
    std = np.std(vals)
    df.append([e, pct_50, pct_10, pct_90, std])
df = pd.DataFrame(df, columns=["Name", "Median EV", "10th Percentile EV", "90th Percentile EV", "EV Std"])
df = df.set_index("Name")

In [10]:
df

Unnamed: 0_level_0,Median EV,10th Percentile EV,90th Percentile EV,EV Std
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Aron Baynes,1.5686,1.214,1.8093,0.2291
T.J. McConnell,1.5192,1.2044,1.7589,0.2132
Wilson Chandler,1.4523,1.1136,1.7176,0.23
Otto Porter Jr.,1.4034,1.0534,1.686,0.2461
Steven Adams,1.3757,1.0851,1.618,0.2022
Gordon Hayward,1.3159,1.0607,1.5369,0.1836
Nikola Jokic,1.2741,1.1216,1.4205,0.1163
Hamidou Diallo,1.2383,0.8855,1.5609,0.2564
Frank Kaminsky,1.2348,0.9811,1.4701,0.1866
Karl-Anthony Towns,1.2315,0.9341,1.5013,0.2197
