# Imports

In [1]:
# -- Imports.
import os
import sqlite3
import numpy as np
import pandas as pd
import plotly.tools as tls
import plotly.graph_objs as go
import matplotlib.pyplot as plt
from matplotlib import cm
from matplotlib import colors
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)
plt.style.use("ggplot")
%matplotlib inline

# Functions

In [2]:
def clighten(color, percent):
    """Lighten color by x percent.
    Args:
        color (list) - rgb color.
        percent (float) - how much to reduce the color.
    Return:
        (list) - new rgb values.
    """
    color = np.array(color)
    white = np.array([255, 255, 255])
    vector = white - color
    return color + vector * percent

# Load Data

In [3]:
# -- Load all data tables.
with sqlite3.connect("../data/l2m.db") as conn:
    calls = pd.read_sql("SELECT * FROM calls", conn)
    urls = pd.read_sql("SELECT * FROM urls", conn)
    refs = pd.read_sql("SELECT * FROM refs", conn)

In [4]:
# -- Did the home team win?
urls["h_win"] = urls.home_score > urls.away_score
# -- What's the pdf?
urls["pdf"] = urls.l2m_url.apply(lambda x: os.path.basename(x))
# -- For each row, find the winning teams abbr.
winner = []
for idx, row in urls.iterrows():
    if row.home_score > row.away_score:
        winner.append(row.home)
    else:
        winner.append(row.away)
urls["winner"] = winner

urls.head()

Unnamed: 0,gameid,season,date,away,away_score,home,home_score,l2m_url,h_win,pdf,winner
0,21500476,2015,2015-12-30 00:00:00,LAL,112,BOS,104,http://official.nba.com/wp-content/uploads/sit...,False,L2M-LAL-BOS-12-30-15.pdf,LAL
1,21601046,2016,2017-03-20 00:00:00,UTA,100,IND,107,http://official.nba.com/wp-content/uploads/sit...,True,L2M-UTA-IND-03-20-17.pdf,IND
2,21600996,2016,2017-03-13 00:00:00,LAC,108,UTA,114,http://official.nba.com/wp-content/uploads/sit...,True,L2M-LAC-UTA-03-13-17.pdf,UTA
3,21600377,2016,2016-12-14 00:00:00,IND,89,MIA,95,http://official.nba.com/wp-content/uploads/sit...,True,L2M-IND-MIA-12-14-16.pdf,MIA
4,41600164,2016,2017-04-23 00:00:00,HOU,113,OKC,109,http://official.nba.com/wp-content/uploads/sit...,False,L2M-HOU-OKC-04-23-17.pdf,HOU


In [5]:
refs.head()

Unnamed: 0,index,gameid,refid,first_name,last_name,jersey_num
0,0,21500613,1662,Bill,Kennedy,55
1,1,21500613,2714,David,Guthrie,16
2,2,21500613,202035,Brett,Nansel,67
3,3,21500758,1168,David,Jones,36
4,4,21500758,1176,Monty,McCutchen,13


In [6]:
calls.head(5)

Unnamed: 0,index,pdf,period,time,call_type,committing_player,committing_team,disadvantaged_player,disadvantaged_team,review_decision
0,0,L2M-LAL-BOS-12-30-15.pdf,Q4,01:45.0,Foul: Personal,David Lee,BOS,Brandon Bass,LAL,CNC
1,1,L2M-LAL-BOS-12-30-15.pdf,Q4,01:38.0,Foul: Offensive,David Lee,BOS,Lou Williams,LAL,INC
2,2,L2M-LAL-BOS-12-30-15.pdf,Q4,01:34.0,Foul: Loose Ball,Jae Crowder,BOS,Kobe Bryant,LAL,CNC
3,3,L2M-LAL-BOS-12-30-15.pdf,Q4,01:13.0,Foul: Shooting,Isaiah Thomas,BOS,Brandon Bass,LAL,CC
4,4,L2M-LAL-BOS-12-30-15.pdf,Q4,01:09.0,Foul: Personal,Jordan Clarkson,LAL,Isaiah Thomas,BOS,INC


# Team Calls

In [7]:
# -- Count the number of L2Ms for each team (home & away).
team_l2ms = pd.concat([urls.groupby("away").size(), urls.groupby("home").size()], axis=1) \
    .rename(columns={0: "away", 1: "home"})
# -- Get to total number of L2Ms.
team_l2ms["tot"] = team_l2ms.away + team_l2ms.home

# -- Call type by team.
team_calls = calls.groupby(["disadvantaged_team", "review_decision"]).size() \
    .unstack(level=1).iloc[:, 1:]
# -- Replace column names with asterisk.
team_calls.columns = ['CC1', 'CC2', 'CNC1', 'CNC2', 'IC1', 'IC2', 'INC1', 'INC2']
# -- Fill na.
team_calls.fillna(0, inplace=True)
# -- Sum across odd column names.
team_calls["CC"] = team_calls["CC1"] + team_calls["CC2"]
team_calls["CNC"] = team_calls["CNC1"] + team_calls["CNC2"]
team_calls["IC"] = team_calls["IC1"] + team_calls["IC2"]
team_calls["INC"] = team_calls["INC1"] + team_calls["INC2"]
team_calls = team_calls.iloc[:, -4:]
# -- Sum correct and incorrect calls.
team_calls["Correct"] = team_calls["CC"] + team_calls["CNC"]
team_calls["Incorrect"] = team_calls["IC"] + team_calls["INC"]

# -- Merge team L2Ms with call type.
team_calls = team_l2ms.merge(team_calls, left_index=True, right_index=True)
# -- Call type per game.
for col in team_calls.columns[3:]:
    col_name = col + "_PG"
    team_calls[col_name] = team_calls[col].astype(float) / team_calls["tot"]

team_calls.sort_values("Incorrect_PG", inplace=True)
team_calls.head()

Unnamed: 0,away,home,tot,CC,CNC,IC,INC,Correct,Incorrect,CC_PG,CNC_PG,IC_PG,INC_PG,Correct_PG,Incorrect_PG
LAL,28,30,58,95.0,206.0,3.0,20.0,301.0,23.0,1.637931,3.551724,0.051724,0.344828,5.189655,0.396552
DET,37,28,65,134.0,316.0,3.0,23.0,450.0,26.0,2.061538,4.861538,0.046154,0.353846,6.923077,0.4
SAS,41,25,66,135.0,293.0,5.0,23.0,428.0,28.0,2.045455,4.439394,0.075758,0.348485,6.484848,0.424242
SAC,40,37,77,147.0,326.0,6.0,28.0,473.0,34.0,1.909091,4.233766,0.077922,0.363636,6.142857,0.441558
OKC,42,38,80,142.0,323.0,2.0,34.0,465.0,36.0,1.775,4.0375,0.025,0.425,5.8125,0.45


In [8]:
r, g, b = clighten([21, 64, 139], 0.25)
text = ["Inc/PG: {:.4f}".format(val) for val in team_calls.Incorrect_PG]

fig = tls.make_subplots(1, 1)
trace = go.Bar(
    x=team_calls.index, 
    y=team_calls.Incorrect_PG,
    marker=dict(color="rgb({}, {}, {})".format(r, g, b)),
    text=text,
    hoverinfo="text+x",
)

data = [trace]
layout = go.Layout(
    title="Incorrect Calls Per L2M By Team",
    yaxis=dict(title="Incorrect Calls Per L2M")
)

fig = go.Figure(data=data, layout=layout)
iplot(fig)

This is the format of your plot grid:
[ (1,1) x1,y1 ]



# Ref Calls

In [9]:
# -- Call type by game.
game_calls = urls.merge(calls, left_on="pdf", right_on="pdf")
game_calls = game_calls.groupby(["gameid", "review_decision"]).size() \
    .unstack(level=1).iloc[:, 1:]
# -- Replace column names with asterisk.
game_calls.columns = ['CC1', 'CC2', 'CNC1', 'CNC2', 'IC1', 'IC2', 'INC1', 'INC2']
# -- Fill na.
game_calls.fillna(0, inplace=True)
# -- Sum across odd column names.
game_calls["CC"] = game_calls["CC1"] + game_calls["CC2"]
game_calls["CNC"] = game_calls["CNC1"] + game_calls["CNC2"]
game_calls["IC"] = game_calls["IC1"] + game_calls["IC2"]
game_calls["INC"] = game_calls["INC1"] + game_calls["INC2"]
game_calls = game_calls.iloc[:, -4:]
# -- Sum correct and incorrect calls.
game_calls["Correct"] = game_calls["CC"] + game_calls["CNC"]
game_calls["Incorrect"] = game_calls["IC"] + game_calls["INC"]

# -- Merge game calls with refs.
ref_calls = refs.merge(game_calls, left_on="gameid", right_index=True)
ref_calls = ref_calls.groupby(["first_name", "last_name"]).sum().iloc[:, 2:]
ref_games = pd.DataFrame(refs.groupby(["first_name", "last_name"]).size(), columns=["gms"])

# -- Ref games and calls.
ref_data = ref_games.merge(ref_calls, left_index=True, right_index=True)
# -- Call type per game.
for col in ref_data.columns[1:]:
    col_name = col + "_PG"
    ref_data[col_name] = ref_data[col].astype(float) / ref_data["gms"]

# -- Set index, sort, and select refs with more than 20 refed games with l2ms.
ref_data.reset_index(inplace=True)
ref_data.index = ref_data.first_name + " " + ref_data.last_name + " "
ref_data.sort_values("Incorrect_PG", inplace=True, ascending=False)
ref_data = ref_data[ref_data.gms > 20]

ref_data.head()

Unnamed: 0,first_name,last_name,gms,CC,CNC,IC,INC,Correct,Incorrect,CC_PG,CNC_PG,IC_PG,INC_PG,Correct_PG,Incorrect_PG
Tre Maddox,Tre,Maddox,50,262.0,466.0,12.0,99.0,728.0,111.0,5.24,9.32,0.24,1.98,14.56,2.22
David Jones,David,Jones,28,169.0,182.0,6.0,49.0,351.0,55.0,6.035714,6.5,0.214286,1.75,12.535714,1.964286
Leroy Richardson,Leroy,Richardson,61,310.0,577.0,14.0,105.0,887.0,119.0,5.081967,9.459016,0.229508,1.721311,14.540984,1.95082
Derrick Collins,Derrick,Collins,56,297.0,542.0,17.0,91.0,839.0,108.0,5.303571,9.678571,0.303571,1.625,14.982143,1.928571
Josh Tiven,Josh,Tiven,69,355.0,660.0,15.0,118.0,1015.0,133.0,5.144928,9.565217,0.217391,1.710145,14.710145,1.927536


In [10]:
cmap = colors.LinearSegmentedColormap.from_list("test", ["#15418c", "#dfdfe5", "#cb0729"])
nmedian = ref_data.Incorrect_PG.median()
nstd = ref_data.Incorrect_PG.std()
norm = colors.Normalize(nmedian - 1.5 * nstd, nmedian + 1.7 * nstd)
smap = cm.ScalarMappable(norm=norm, cmap=cmap)

In [11]:
r, g, b = clighten([21, 64, 139], 0.25)
blue_rgb = "rgb({}, {}, {})".format(r, g, b)
r, g, b = clighten([203, 7, 41], 0.3)
red_rgb = "rgb({}, {}, {})".format(r, g, b)

text = ["Inc/PG: {:.4f}<br>L2Ms: {}".format(val, gm) for val, gm in zip(ref_data.Incorrect_PG, ref_data.gms)]

fig = tls.make_subplots(1, 1)
trace = go.Bar(
    x=ref_data.Incorrect_PG,
    y=ref_data.index, 
    marker=dict(color=[colors.rgb2hex(smap.to_rgba(val)[:-1]) for val in ref_data.Incorrect_PG]),
    text=text,
    hoverinfo="text+y",
    orientation="h"
)

data = [trace]
layout = go.Layout(
    title="""Incorrect Calls Per L2M By Referee""",
    height=1400,
    margin=go.Margin(l=140),
    xaxis=dict(title="Incorrect Calls Per L2M"),
    images=[dict(
        source= "https://cdn.nba.net/nba-drupal-prod/styles/landscape/s3/2017-07/NBA%20Primary%20Logo.jpg?itok=h_1XnifQ",
        xref= "paper",
        yref= "y",
        x=0.75,
        y=60,
        sizex=20,
        sizey=20,
        opacity= 0.8,
        layer= "above"
    )],
    annotations=[dict(
        x=1.515, 
        y=31, 
        xref="x", 
        yref="y", 
        xanchor="right", 
        yanchor="center", 
        text="Median", 
        showarrow=False,
        font=dict(color="rgb(105, 105, 105)")
    )]
)

fig = go.Figure(data=data, layout=layout)
iplot(fig)

This is the format of your plot grid:
[ (1,1) x1,y1 ]

