In [1]:
import numpy as np
import pandas as pd
import pdb
pd.set_option('display.max_columns', None)

### Read in throw-by-throw raw data

In [2]:
%%bash
ls

DCBreeze2015-stats.csv
explore_sjs.ipynb
MadisonRadicals2015-stats.csv
NewYorkEmpire2015-stats.csv
SanJoseSpiders2015-stats.csv


In [3]:
sjs = pd.read_csv("SanJoseSpiders2015-stats.csv")

In [4]:
sjs.head(3)

Unnamed: 0,Date/Time,Tournamemnt,Opponent,Point Elapsed Seconds,Line,Our Score - End of Point,Their Score - End of Point,Event Type,Action,Passer,Receiver,Defender,Hang Time (secs),Player 0,Player 1,Player 2,Player 3,Player 4,Player 5,Player 6,Player 7,Player 8,Player 9,Player 10,Player 11,Player 12,Player 13,Player 14,Player 15,Player 16,Player 17,Player 18,Player 19,Player 20,Player 21,Player 22,Player 23,Player 24,Player 25,Player 26,Player 27,Elapsed Time (secs)
0,2015-05-03 13:36,AUDL,Los Angeles Aviators,51,O,1,0,Offense,OpponentPull,Anonymous,Anonymous,,,Rasmus C,Sanche M,Wynne R,Kittre B,Higgin S,Smith K,Johnso C,,,,,,,,,,,,,,,,,,,,,,0
1,2015-05-03 13:36,AUDL,Los Angeles Aviators,51,O,1,0,Offense,Catch,Rasmus C,Wynne R,,,Rasmus C,Sanche M,Wynne R,Kittre B,Higgin S,Smith K,Johnso C,,,,,,,,,,,,,,,,,,,,,,9
2,2015-05-03 13:36,AUDL,Los Angeles Aviators,51,O,1,0,Offense,Catch,Wynne R,Rasmus C,,,Rasmus C,Sanche M,Wynne R,Kittre B,Higgin S,Smith K,Johnso C,,,,,,,,,,,,,,,,,,,,,,13


### Get all players who played a point

In [59]:
cleaned = sjs[pd.isnull(sjs["Player 1"]) == False]
players = pd.unique(cleaned.loc[0:, "Player 0":"Player 27"].values.ravel())
players = [player for player in players if str(player) != "nan"]
players

['Rasmus C',
 'Sanche M',
 'Wynne R',
 'Kittre B',
 'Higgin S',
 'Smith K',
 'Johnso C',
 'Jaffe M',
 'Peters B',
 'Falat E',
 'Cocks K',
 'Dillow L',
 'Marcy J',
 'Farina C',
 'Crawfo M',
 'Cohen G',
 'Natali M',
 'Ham S',
 'Norden J',
 'Cao C',
 'Zaccar S',
 'Grant T',
 'Klevec A',
 'Brown A',
 'Guerre X',
 'Joye A',
 'Reinha S',
 'Van Ar K',
 'Roeder S']

### Looking at point-by-point summary statistics

In [60]:
# throws where a goal was scored
goals = sjs[sjs["Action"] == "Goal"]
scores = goals[goals["Event Type"] == "Offense"]
goals.head(3)

Unnamed: 0,Date/Time,Tournamemnt,Opponent,Point Elapsed Seconds,Line,Our Score - End of Point,Their Score - End of Point,Event Type,Action,Passer,Receiver,Defender,Hang Time (secs),Player 0,Player 1,Player 2,Player 3,Player 4,Player 5,Player 6,Player 7,Player 8,Player 9,Player 10,Player 11,Player 12,Player 13,Player 14,Player 15,Player 16,Player 17,Player 18,Player 19,Player 20,Player 21,Player 22,Player 23,Player 24,Player 25,Player 26,Player 27,Elapsed Time (secs)
11,2015-05-03 13:36,AUDL,Los Angeles Aviators,51,O,1,0,Offense,Goal,Higgin S,Wynne R,,,Rasmus C,Sanche M,Wynne R,Kittre B,Higgin S,Smith K,Johnso C,,,,,,,,,,,,,,,,,,,,,,49
20,2015-05-03 13:36,AUDL,Los Angeles Aviators,58,D,2,0,Offense,Goal,Johnso C,Kittre B,,,Rasmus C,Jaffe M,Peters B,Falat E,Kittre B,Cocks K,Johnso C,,,,,,,,,,,,,,,,,,,,,,165
30,2015-05-03 13:36,AUDL,Los Angeles Aviators,85,D,3,0,Offense,Goal,Dillow L,Cohen G,,,Dillow L,Marcy J,Farina C,Crawfo M,Cohen G,Natali M,Ham S,,,,,,,,,,,,,,,,,,,,,,305


The Spiders score about 68% of their points starting on O line. That's almost a third of their points from breaks.

In [61]:
len(scores["Line"][scores["Line"] == "O"]) / float(scores.shape[0])

0.6813725490196079

Let's look at their O and D line convesion rates

In [62]:
o_line = goals[goals["Line"] == "O"]
broken = o_line["Event Type"][o_line["Event Type"] == "Defense"]
print "Broken", broken.shape[0], "points out of", o_line.shape[0]
print broken.shape[0] / float(o_line.shape[0])

Broken 93 points out of 371
0.250673854447


Interestingly, they are broken on about 25% of their O line posessions. I would have expected much lower.

In [63]:
d_line = goals[goals["Line"] == "D"]
breaks = d_line["Event Type"][d_line["Event Type"] == "Offense"]
print "The Spiders broke on", breaks.shape[0], "out of", d_line.shape[0]
print breaks.shape[0] / float(d_line.shape[0])

The Spiders broke on 130 out of 410
0.317073170732


They broke on about 31% of their D points, and also had ~40 more D points than O points. A 30% break percentage seems pretty good to me.

Let's get some player-specific summary statistics

In [64]:
stat_dict = {"goals": 0, "assists": 0, "ds": 0, "throwaways": 0, "drops": 0}
sjs_dict = {player : dict(stat_dict) for player in players}

# col 9 = thrower, 10 = receiver
o_goals = goals[goals["Event Type"] == "Offense"]
for x in o_goals.values:
    sjs_dict[x[9]]["assists"] += 1
    sjs_dict[x[10]]["goals"]  += 1
    
# add D's to dict
ds = sjs[sjs["Action"] == "D"]
for x in ds.values:
    sjs_dict[x[11]]["ds"] += 1
    
# add turnovers to dict
turns = sjs[((sjs.Action == "Throwaway") | (sjs.Action == "Drop")) & (sjs["Event Type"] == "Offense")]
for x in turns.values:
    if x[8] == "Throwaway":
        sjs_dict[x[9]]["throwaways"] += 1
    else:
        sjs_dict[x[10]]["drops"]     += 1

In [65]:
pd.DataFrame.from_dict(sjs_dict, orient="index").sort_values("ds", ascending=False)

Unnamed: 0,drops,throwaways,goals,assists,ds
Kittre B,3,23,68,37,26
Sanche M,4,25,38,46,17
Wynne R,0,9,14,9,15
Cohen G,0,9,26,15,14
Joye A,3,29,15,45,11
Peters B,0,12,10,4,10
Grant T,1,15,9,15,9
Smith K,0,27,4,35,9
Higgin S,3,24,46,38,8
Johnso C,4,20,29,29,8


In [66]:
pd.unique(sjs["Action"])

array(['OpponentPull', 'Catch', 'Goal', 'Pull', 'Throwaway', 'PullOb', 'D',
       'Drop', 'EndOfFirstQuarter', 'OpponentPullOb', 'Halftime',
       'MiscPenalty', 'EndOfThirdQuarter', 'Stall', 'GameOver',
       'EndOfFourthQuarter'], dtype=object)