In [1]:
import pandas as pd
pd.options.display.max_colwidth = 1000
import numpy as np
import matplotlib.pyplot as plt
import plotly
import plotly.plotly as py
import plotly.graph_objs as go
from plotly import tools, offline
plotly.tools.set_credentials_file(username='mtodisco10', api_key='bCfUmq5FralbymKXY4uX')
import re
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline
from plotly.offline import init_notebook_mode, iplot
from IPython.display import display, HTML

In [2]:
%run progress_bar.ipynb
%run field_layout.ipynb

In [3]:
#Read in Data
game_data = pd.read_csv('game_data.csv')
play_info = pd.read_csv('play_information.csv')
play_player_role = pd.read_csv('play_player_role_data.csv')
player_punt = pd.read_csv('player_punt_data.csv')
video_footage_control = pd.read_csv('video_footage-control.csv')
video_footage_injury = pd.read_csv('video_footage-injury.csv')
video_review = pd.read_csv('video_review.csv')

In [4]:
def read_NGS_data(file_lst):
    for i, file in enumerate(file_lst):
        print('Reading in {}'.format(file))
        data = pd.read_csv(file)
        if i == 0:
            NGS_df = data
            del data
        else:
            NGS_df = pd.concat([NGS_df, data])
            del data
    return NGS_df

In [5]:
file_lst = ['NGS-2016-pre.csv','NGS-2016-reg-wk1-6.csv','NGS-2016-reg-wk7-12.csv','NGS-2016-reg-wk13-17.csv',\
            'NGS-2016-post.csv'#,'NGS-2017-pre.csv','NGS-2017-reg-wk1-6.csv','NGS-2017-reg-wk7-12.csv',\
#            'NGS-2017-reg-wk13-17.csv','NGS-2017-post.csv'
           ]

In [6]:
NGS_df = read_NGS_data(file_lst)

Reading in NGS-2016-pre.csv
Reading in NGS-2016-reg-wk1-6.csv
Reading in NGS-2016-reg-wk7-12.csv
Reading in NGS-2016-reg-wk13-17.csv
Reading in NGS-2016-post.csv


### Injury Plays EDA

In [7]:
'There are concussion injuries on ' + str(round(len(video_review) / float(len(play_info)) * 100, 2)) + '% of ' + 'punt plays'

'There are concussion injuries on 0.55% of punt plays'

##### It is good for the NFL and players that injuries only occur on less than 1% of punt plays, but it will be hard to do analysis

#### How are players being injured?

In [8]:
trace1 = go.Bar(
        x=video_review.groupby(['Player_Activity_Derived'], as_index=False)['PlayID'].count()['Player_Activity_Derived'],
        y=video_review.groupby(['Player_Activity_Derived'], as_index=False)['PlayID'].count()['PlayID']
    )
trace2 = go.Bar(
        x=video_review.groupby(['Primary_Impact_Type'], as_index=False)['PlayID'].count()['Primary_Impact_Type'],
        y=video_review.groupby(['Primary_Impact_Type'], as_index=False)['PlayID'].count()['PlayID'],
    )
trace3 = go.Bar(
        x=video_review.groupby(['Friendly_Fire'], as_index=False)['PlayID'].count()['Friendly_Fire'],
        y=video_review.groupby(['Friendly_Fire'], as_index=False)['PlayID'].count()['PlayID'],
    )
trace4 = go.Bar(
        x=video_review.groupby(['Primary_Partner_Activity_Derived'], as_index=False)['PlayID'].count()['Primary_Partner_Activity_Derived'],
        y=video_review.groupby(['Primary_Partner_Activity_Derived'], as_index=False)['PlayID'].count()['PlayID'],
    )

fig = tools.make_subplots(rows=2, cols=2, subplot_titles=('Player Activity Derived', 'Primary Impact Type',
                                                          'Friendly Fire', 'Primary Partner Activity Derived'))

fig.append_trace(trace1, 1, 1)
fig.append_trace(trace2, 1, 2)
fig.append_trace(trace3, 2, 1)
fig.append_trace(trace4, 2, 2)

fig['layout'].update(showlegend=False)

py.iplot(fig, filename='make-subplots-multiple-with-titles')

This is the format of your plot grid:
[ (1,1) x1,y1 ]  [ (1,2) x2,y2 ]
[ (2,1) x3,y3 ]  [ (2,2) x4,y4 ]



### Quick Conclusions
    - No player activity stands out
    - Helmet to player is primary impact type
    - Friendly fire is only clear 16% of the time
    - No Partner Activity Stands Out

#### Who's getting hurt?

In [9]:
#Categorize player punt roles into the sides of the ball
return_roles = ['PDL1','PDL2','PDL3','PDL4','PDL5','PDL6','PDM','PDR1','PDR2','PDR3','PDR4','PDR5','PDR6'
                ,'PFB','PLL','PLL1','PLL2','PLL3','PLM','PLM1','PLR','PLR1','PLR2','PLR3','PR','VL','VLi'
                ,'VLo','VR','VRi','VRo']

coverage_roles = ['GL','GLi','GLo','GR','GRi','GRo','P','PC','PLG','PLS','PLT','PLW','PPL','PPLi','PPLo'
                 ,'PPR','PPRi','PPRo','PRG','PRT','PRW']

gunner_blockers = ['VL','VLi','VLo','VR','VRi','VRo']

In [10]:
inj_players = video_review.merge(play_player_role, how='inner', on=['Season_Year', 'GameKey', 'PlayID', 'GSISID'])
inj_players.rename(columns={'Role':'inj_role'}, inplace=True)

In [11]:
inj_players['inj_side_of_ball'] = np.where(inj_players.inj_role.isin(return_roles), 'return',
                                          np.where(inj_players.inj_role.isin(coverage_roles), 'coverage', ''))

In [12]:
partner_players = video_review[['Season_Year', 'GameKey', 'PlayID', 'Primary_Partner_GSISID']]
partner_players['Primary_Partner_GSISID'] = partner_players.loc[:,'Primary_Partner_GSISID'] \
                                                .replace('Unclear','0').fillna(0).astype(int)
partner_players = partner_players.merge(play_player_role, how='left', \
                                        left_on=['Season_Year', 'GameKey', 'PlayID', 'Primary_Partner_GSISID'],\
                                       right_on =['Season_Year', 'GameKey', 'PlayID', 'GSISID'])
partner_players = partner_players.drop('GSISID', axis=1)
partner_players.rename(columns={'Role':'partner_role'}, inplace=True)
partner_players['partner_side_of_ball'] = np.where(partner_players.partner_role.isin(return_roles), 'return',
                                          np.where(partner_players.partner_role.isin(coverage_roles), 'coverage', ''))
inj_partner_df = pd.concat([inj_players,partner_players[['partner_role','partner_side_of_ball']]], axis = 1)

In [13]:
inj_partner_df.head()

Unnamed: 0,Season_Year,GameKey,PlayID,GSISID,Player_Activity_Derived,Turnover_Related,Primary_Impact_Type,Primary_Partner_GSISID,Primary_Partner_Activity_Derived,Friendly_Fire,inj_role,inj_side_of_ball,partner_role,partner_side_of_ball
0,2016,5,3129,31057,Tackling,No,Helmet-to-body,32482,Tackled,No,PLW,coverage,PR,return
1,2016,21,2587,29343,Blocked,No,Helmet-to-helmet,31059,Blocking,No,GL,coverage,PLL1,return
2,2016,29,538,31023,Tackling,No,Helmet-to-body,31941,Tackled,No,GR,coverage,PR,return
3,2016,45,1212,33121,Tackling,No,Helmet-to-body,28249,Tackled,No,PRT,coverage,PR,return
4,2016,54,1045,32444,Blocked,No,Helmet-to-body,31756,Blocked,Yes,PRT,coverage,GR,coverage


In [14]:
partner_players.partner_side_of_ball.value_counts()

return      18
coverage    15
             4
Name: partner_side_of_ball, dtype: int64

In [15]:
ex_play = NGS_df.loc[(NGS_df.Season_Year==2016) & (NGS_df.GameKey== 234) \
                     & (NGS_df.PlayID== 3278) & (NGS_df.GSISID== 28620)
                    ].sort_values('Time')

In [16]:
ball_snap_time = ex_play.loc[ex_play.Event == 'ball_snap'].Time.min()

In [17]:
ex_play = ex_play.loc[ex_play.Time >= ball_snap_time].reset_index(drop=True)

In [18]:
ex_play['Event'] = ex_play['Event'].fillna(method='ffill')

In [19]:
ret_play = NGS_df.loc[(NGS_df.Season_Year==2016) & (NGS_df.GameKey== 234) \
                     & (NGS_df.PlayID== 3278) & (NGS_df.GSISID== 27860)].sort_values('Time')
ret_play = ret_play.loc[ret_play.Time >= ball_snap_time]

In [20]:
x = np.array(ex_play.x)
y = np.array(ex_play.y)
xx = np.array(ex_play.x)
yy = np.array(ex_play.y)

In [21]:
x1 = np.array(ret_play.x)
y1 = np.array(ret_play.y)
xx1 = np.array(ret_play.x)
yy1 = np.array(ret_play.y)

In [42]:
from plotly.offline import init_notebook_mode, iplot
from IPython.display import display, HTML

init_notebook_mode(connected=True)

N = len(x)

data=[dict(x=x, y=y, 
            name='Distance',
            mode='lines',
            textposition='bottom center',
            line=dict(width=2, color=None)
          ),
      dict(x=x, y=y, 
            name='Injured Player',
            mode='markers',
            marker=dict(color='red', size=15)
          ),
      dict(x=x1, y=y1, 
           name = 'Partner Player',
           mode='markers',
           marker=dict(color='orange', size=15)
         )
    ]

layout = load_field()
layout['hovermode'] = 'closest'
layout['updatemenus'] = [{'type': 'buttons',
                           'buttons': [{'label': 'Play',
                                        'method': 'animate',
                                        'args': [None]}]}]

frames=[dict(data=[dict(x=[x1[k]], 
                        y=[y1[k]], 
                        mode='markers', 
                        marker=dict(color='#33BBFF', size=15),
                        name='Partner Player'
                        ),
                   dict(x=[x[k]], 
                        y=[y[k]], 
                        mode='markers', 
                        marker=dict(color='orange', size=15),
                        name='Injured Player'
                        ), 
                   dict(x=[xx[k], xx1[k], None, xx[k], xx1[k]], 
                        y=[yy[k], yy1[k], None, yy[k], yy1[k]], 
                        mode='lines', 
                        text='Distance: {}'.format(round(np.sqrt((xx1[k] - xx[k])**2 + (yy1[k] - yy[k])**2),0)),
                        textposition='bottom center',
                        line=dict(color='#2c3539', width=2),
                        name='Distance'
                       )
                  ], layout=dict(title=ex_play.Event[k],
                                 annotations=[
                                     dict(x=100,
                                          y=5,
                                          showarrow=False,
                                          font=dict(
                                              family='Courier New, monospace',
                                              size=14,
                                              color='#ffffff'),
                                          align='center',
                                          bordercolor='#c7c7c7',
                                          borderwidth=2,
                                          borderpad=4,
                                          bgcolor='#2c3539',
                                          opacity=0.8,
                                          text='{} Yds'.format(round(np.sqrt((xx1[k] - xx[k])**2 + (yy1[k] - yy[k])**2),0)),
                                          )
                                 ]
                                )
            ) for k in range(0, N, 5)]
          
figure1=dict(data=data, layout=layout, frames=frames)
iplot(figure1)

#### On what types of plays are players getting hurt?

In [None]:
#Create a column in the play_info dataframe to determine the type of play
play_info['outcome'] =  np.where(play_info['PlayDescription'].str.contains('aborted|Fumbled snap|FUMBLES, and recovers', flags=re.IGNORECASE, regex=True), 'aborted',
                        np.where(play_info['PlayDescription'].str.contains('fake|pass|right end|left end|up the middle|Direct snap|right guard', flags=re.IGNORECASE, regex=True), 'fake',
                        np.where(play_info['PlayDescription'].str.contains('muffs', flags=re.IGNORECASE, regex=True), 'muff',         
                        np.where(play_info['PlayDescription'].str.contains('fair catch by', flags=re.IGNORECASE, regex=True), 'fair_catch',
                        np.where(play_info['PlayDescription'].str.contains('touchback', flags=re.IGNORECASE, regex=True), 'touchback',
                        np.where(play_info['PlayDescription'].str.contains('blocked|deflected', flags=re.IGNORECASE, regex=True), 'blocked',
                        np.where(play_info['PlayDescription'].str.contains('out of bounds.', flags=re.IGNORECASE, regex=False), 'oob',
                        np.where(play_info['PlayDescription'].str.contains('downed', flags=re.IGNORECASE, regex=True), 'downed', 
                        np.where(play_info['PlayDescription'].str.contains('safety', flags=re.IGNORECASE, regex=True), 'safety',
                        np.where(play_info['PlayDescription'].str.contains('[0-9]+ for [-+]?[0-9]+ yards?|for no gain|touchdown|(to [A-Z]+ [0-9]+ for [-+]?[0-9]+ yards?)|(to [0-9]+ for [-+]?[0-9]+ yards?)', flags=re.IGNORECASE, regex=True), 'return',         
                        np.where(play_info['PlayDescription'].str.contains('- no play|delay of game|false start, declined|penalty enforced', flags=re.IGNORECASE, regex=True), 'no_play', ' ')))))))))))

In [None]:
pi = play_info.merge(video_review[['Season_Year', 'GameKey', 'PlayID','GSISID']], how='left', on =['Season_Year', 'GameKey', 'PlayID'])
pi['injury'] = np.where(pi.GSISID.notnull(), 1, 0).astype(int)
pi.drop('GSISID', axis = 1, inplace=True)
vr = play_info.merge(video_review, how='inner', on =['Season_Year', 'GameKey', 'PlayID'])

In [None]:
vr.outcome.value_counts()

#### Conclusion: players are getting hurt on returns 

In [None]:
#Is it a higher proportion compared to all plays?

In [None]:
pi_inj_grouped = pi.groupby(['outcome'], as_index=False)['injury'] \
    .agg({'total_plays':'count','injuries':sum}) \
    .sort_values('total_plays', ascending = False) \
    .reset_index(drop=True)

pi_inj_grouped['injury_percentage'] = round(pi_inj_grouped['injuries'] / pi_inj_grouped['total_plays'] * 100, 1).astype(str) + '%'
pi_inj_grouped

#### Conclusion: over 1% of punt plays with a return have an injury
    - **** This is 10x higher than when a fair catch is called

#### Plot Player Paths on a Given Play

In [None]:
# Loading and plotting functions

def load_plays_for_game(GameKey):
    """
    Returns a dataframe of play data for a given game (GameKey)
    """
    play_information = pd.read_csv('play_information.csv')
    play_information = play_information[play_information['GameKey'] == GameKey]
    return play_information


def load_game_and_ngs(ngs_file=None, GameKey=None):
    """
    Returns a dataframe of player movements (NGS data) for a given game
    """
    if ngs_file is None:
        print("Specifiy an NGS file.")
        return None
    if GameKey is None:
        print('Specify a GameKey')
        return None
    # Merge play data with NGS data    
    plays = load_plays_for_game(GameKey)
    ngs = pd.read_csv(ngs_file, low_memory=False)
    merged = pd.merge(ngs, plays, how="inner", on=["GameKey", "PlayID", "Season_Year"])
    return merged


def plot_play(game_df, PlayID, player1=None, player2=None, custom_layout=False):
    """
    Plots player movements on the field for a given game, play, and two players
    """
    game_df = game_df[game_df.PlayID==PlayID]
    
    GameKey=str(pd.unique(game_df.GameKey)[0])
    HomeTeam = pd.unique(game_df.Home_Team_Visit_Team)[0].split("-")[0]
    VisitingTeam = pd.unique(game_df.Home_Team_Visit_Team)[0].split("-")[1]
    YardLine = game_df[(game_df.PlayID==PlayID) & (game_df.GSISID==player1)]['YardLine'].iloc[0]
    
    traces=[]   
    if (player1 is not None) & (player2 is not None):
        game_df = game_df[ (game_df['GSISID']==player1) | (game_df['GSISID']==player2)]
        for player in pd.unique(game_df.GSISID):
            player = int(player)
            trace = go.Scatter(
                x = game_df[game_df.GSISID==player].x,
                y = game_df[game_df.GSISID==player].y,
                name='GSISID '+str(player),
                mode='markers'
            )
            traces.append(trace)
    else:
        print("Specify GSISIDs for player1 and player2")
        return None
    
    if custom_layout is not True:
        layout = load_field()
        layout['title'] =  HomeTeam + \
        ' vs. ' + VisitingTeam + \
        '<br>Possession: ' + \
        YardLine.split(" ")[0] +'@'+YardLine.split(" ")[1]
    data = traces     
    fig = dict(data=data, layout=layout)
    play_description = game_df[(game_df.PlayID==PlayID) & (game_df.GSISID==player1)].iloc[0]["PlayDescription"]
    print("\n\n\t",play_description)
    return py.iplot(fig, filename='jupyter-table1')
    #return offline.iplot(fig)

In [None]:
#game280 = load_game_and_ngs('NGS-2016-reg-wk7-12.csv',GameKey=3278)

In [None]:
#plot_play(game_df=game280, PlayID=2918, player1=32120, player2=32725)

#### Calculate hang time of punts

In [None]:
def get_hang_time(ngs_df, start_event='punt', *stop_events):
    punt_event = ngs_df.loc[ngs_df.Event==start_event] \
        .groupby(['Season_Year', 'GameKey','PlayID'], as_index = False)['Time'].min()
    punt_event.rename(columns = {'Time':'punt_time'}, inplace=True)
    punt_event['punt_time'] = pd.to_datetime(punt_event['punt_time'],\
                                             format='%Y-%m-%d %H:%M:%S.%f')
    
    receiving_event = ngs_df.loc[ngs_df.Event.isin(stop_events)] \
        .groupby(['Season_Year', 'GameKey','PlayID'], as_index = False)['Time'].min()
    receiving_event.rename(columns = {'Time':'receiving_time'}, inplace=True)
    receiving_event['receiving_time'] = pd.to_datetime(receiving_event['receiving_time'],\
                                             format='%Y-%m-%d %H:%M:%S.%f')
    
    punt_df = punt_event.merge(receiving_event, how='inner', on = ['Season_Year','GameKey','PlayID']) \
                .reset_index(drop=True)
    
    punt_df['hang_time'] = (punt_df['receiving_time'] - punt_df['punt_time']).dt.total_seconds()
    
    return punt_df

In [None]:
punt_df = get_hang_time(NGS_df, 'punt', 'punt_received', 'fair_catch')

In [None]:
data = [go.Histogram(x=punt_df.hang_time)]

py.iplot(data, filename='basic histogram')

In [None]:
round(punt_df['hang_time'].mean(), 1)

In [None]:
round(punt_df['hang_time'].median(), 1)

In [None]:
str(round(len(punt_df.loc[punt_df.hang_time < 5.5]) / len(punt_df) * 100, 1)) + '% of hang times are less than 5 1/2 seconds'

### Calculate distance between returner and closest coverage man

In [None]:
#Create a function to calculate the space between the returner and the closest coverage player for every second between the punt and the catch
def coverage_returner_space(play_df, ngs_df):
    cov_ret_lst = []
    play_df = play_df.loc[(play_df.outcome == 'fair_catch') | (play_df.outcome == 'return')].reset_index(drop=True)
    for i in log_progress(range(0, len(play_df)), every=25):
        season_key = play_df['Season_Year'][i]
        game_key = play_df['GameKey'][i]
        play_id = play_df['PlayID'][i]
        outcome = play_df['outcome'][i]
        injury = play_df['injury'][i]
        if game_key in ngs_df.GameKey and play_id in ngs_df.PlayID:
            filtered_play = ngs_df.loc[(ngs_df.GameKey == game_key) \
                                       & (ngs_df.PlayID == play_id)].sort_values('Time').reset_index(drop=True)
            filtered_play = filtered_play.merge(play_player_role, \
                                                how='inner', on = ['Season_Year','GameKey','PlayID','GSISID'])
            if len(filtered_play) > 0:
                filtered_play['Time'] = pd.to_datetime(filtered_play['Time'], \
                                                       format='%Y-%m-%d %H:%M:%S.%f')
                punt_event_time = filtered_play.loc[filtered_play.Event == 'punt'].Time.min()
                receiving_event_time = filtered_play.loc[(filtered_play.Event == 'punt_received') | \
                                                         (filtered_play.Event == 'fair_catch')].Time.min()
                gunner_blocker_count = len(filtered_play.loc[filtered_play['Role'].isin(gunner_blockers)]['Role'].unique())
                filtered_play = filtered_play.loc[(filtered_play.Time >= punt_event_time) & \
                                                  (filtered_play.Time <= receiving_event_time)]
                coverage_df = filtered_play.loc[filtered_play['Role'].isin(coverage_roles)].sort_values('Time')
                coverage_df.rename(columns={'x':'cov_x', 
                                          'y': 'cov_y',
                                          'GSISID': 'cov_GSISID',
                                          'dis': 'cov_dis',
                                          'o': 'cov_o',
                                          'dir': 'cov_dir',
                                          'Role': 'cov_Role'
                                         }, inplace=True)
                
                returner_df = filtered_play.loc[filtered_play['Role'] == 'PR'].sort_values('Time')
                returner_df.rename(columns={'x':'ret_x', 
                                          'y': 'ret_y',
                                          'GSISID': 'ret_GSISID',
                                          'dis': 'ret_dis',
                                          'o': 'ret_o',
                                          'dir': 'ret_dir',
                                          'Role': 'ret_Role'
                                         }, inplace=True)
                returner_df = returner_df.drop('Event', axis = 1)
                
                cov_ret_df = coverage_df.merge(returner_df, how ='inner', on = ['Season_Year','GameKey','PlayID','Time'])
                cov_ret_df['dis_from_ret'] = ((cov_ret_df['cov_x'] -  cov_ret_df['ret_x']) ** 2 \
                                           + (cov_ret_df['cov_y'] -  cov_ret_df['ret_y']) ** 2).apply(np.sqrt)
                cov_ret_df['time_since_punt'] = cov_ret_df['Time'] - punt_event_time
                times_to_capture = [punt_event_time + pd.Timedelta(seconds=i) for i in range(1, 7)]
                cov_ret_df = cov_ret_df.loc[cov_ret_df['Time'].isin(times_to_capture)]
                cov_ret_df['gunner_blockers'] = gunner_blocker_count
                cov_ret_df['outcome'] = outcome
                cov_ret_df['injury'] = injury
                cov_ret_df['cov_speed'] = convert_to_mph(cov_ret_df.cov_dis, 20.455)
                cov_ret_df['ret_speed'] = convert_to_mph(cov_ret_df.ret_dis, 20.455)
                if len(cov_ret_df) > 0:
                    cov_ret_lst.append(cov_ret_df)
                    
    cov_ret_df = pd.concat(cov_ret_lst).reset_index(drop=True)          
    return cov_ret_df

In [None]:
def convert_to_mph(dis_vector, converter):
    mph_vector = dis_vector * converter
    return mph_vector

In [None]:
cov_ret_df = coverage_returner_space(pi, NGS_df)

In [None]:
cov_ret_df.gunner_blockers.value_counts(normalize=True)

In [None]:
#Speed vs Number of Gunner Blockers at each second.

pd.pivot_table(cov_ret_df.loc[(cov_ret_df.cov_Role == 'GR') | (cov_ret_df.cov_Role == 'GL')], 
               index=['gunner_blockers'], values='cov_speed',
               columns=['time_since_punt'], aggfunc=[np.mean, np.median])

In [None]:
# Average distance for all player at the X second mark.  Return vs Fair Catch
pd.pivot_table(cov_ret_df, values='dis_from_ret', index=['time_since_punt'], columns=['outcome'], aggfunc=[np.mean, np.median])

In [None]:
pd.pivot_table(cov_ret_df, values='dis_from_ret', index=['time_since_punt'], columns=['outcome','gunner_blockers'], aggfunc=np.mean)

In [None]:
#### At each second of each play... Who is the closest to the returner?  How far away are they?  Return vs Fair Catch

In [None]:
min_dist_from_ret = cov_ret_df.groupby(['Season_Year', 'GameKey','PlayID','time_since_punt'], as_index = False)['dis_from_ret'].min()

In [None]:
min_dist_from_ret.head()

In [None]:
closest_df = cov_ret_df.merge(min_dist_from_ret.drop('time_since_punt', axis = 1), how = 'inner', on=['Season_Year', 'GameKey', 'PlayID', 'dis_from_ret'])

In [None]:
ex_cr = cov_ret_df.loc[(cov_ret_df.Season_Year==2016) & (cov_ret_df.GameKey== 289) \
                      & (cov_ret_df.PlayID== 2341) & (cov_ret_df.cov_GSISID== 32007)].sort_values('Time')

In [None]:
ex_cr.head()

In [None]:
#video_review.head(20)

In [None]:
x1 = np.array(ex_cr.cov_x)
y1 = np.array(ex_cr.cov_y)

x2 = np.array(ex_cr.ret_x)
y2 = np.array(ex_cr.ret_y)

In [None]:
y2

In [None]:
from plotly.offline import init_notebook_mode, iplot
from IPython.display import display, HTML

data=[dict(x=x1, y=y1, 
           mode='lines', 
           line=dict(width=2, color='blue')
          ),
      dict(x=x2, y=y2, 
           mode='lines', 
           line=dict(width=2, color='orange')
          )
    ]

layout = load_field()
layout['hovermode'] = 'closest'
layout['updatemenus'] = [{'type': 'buttons',
                           'buttons': [{'label': 'Play',
                                        'method': 'animate',
                                        'args': [None]}]}]

N = len(x1)
frames=[dict(data=[dict(x=[x1[k]], 
                        y=[y1[k]], 
                        mode='markers', 
                        marker=dict(color='red', size=10)
                        )
                  ]) for k in range(N)]
          
figure1=dict(data=data, layout=layout, frames=frames)
py.iplot(figure1)

In [None]:
#closest_df.groupby(['time_since_punt','cov_Role'], as_index=False)['dis_from_ret'].agg({'# of times closest': 'count','avg distance':np.mean}).sort_values('# of times closest', ascending =False)

In [None]:
pos_grouped_df = cov_ret_df.groupby(['cov_Role','time_since_punt', 'outcome'], as_index=False)['dis_from_ret'].mean()

In [None]:
trace1 = go.Bar(
        x=pos_grouped_df.loc[pos_grouped_df.outcome == 'fair_catch']['cov_Role'],
        y=pos_grouped_df.loc[pos_grouped_df.outcome == 'fair_catch']['dis_from_ret'],
        name='fair catch'
    )

trace2 = go.Bar(
        x=pos_grouped_df.loc[pos_grouped_df.outcome == 'return']['cov_Role'],
        y=pos_grouped_df.loc[pos_grouped_df.outcome == 'return']['dis_from_ret'],
        name='return'
    )

data = [trace1, trace2]
layout = go.Layout(
    barmode='group',
    xaxis=dict(title='Position'),
    yaxis= dict(title='Avg Distance From Returner (Yds)')
)

fig=go.Figure(data=data, layout=layout)
py.iplot(fig, filename='grouped-bar')

#### Punts Landing inside the 20

In [None]:
def parse_play_description(df, outcome_lst):
    parsed_df = play_info.loc[play_info.outcome.isin(outcome_lst),\
                             ['Season_Year','GameKey','PlayID',\
                              'PlayDescription','outcome']].reset_index(drop=True)
    punt_to_lst = []
    punt_dist_lst = []
    return_dist_lst = []
    punt_regex = '(punts [0-9]+ yards? to [A-Z]* [-+]?[0-9]+)| (punts [0-9]+ yards? to [-+]?[0-9]+)'
    return_regex = '(to [A-Z]* [0-9]+ for [-+]?[0-9]+ yards?)|(to [0-9]+ for [-+]?[0-9]+ yards?)|(ob at [A-Z]* [-+]?[0-9]+ for [-+]?[0-9]+ yards?)|(ob at [0-9]+ for [-+]?[0-9]+ yards?)|(for [-+]?[0-9]+ yards?, TOUCHDOWN)'
    
    for i in range(0, len(parsed_df)):
        punt_search = re.search(punt_regex, parsed_df.PlayDescription[i])
        return_search = re.search(return_regex, parsed_df.PlayDescription[i])
    
        if punt_search:
            punt_snip = re.findall(r'-?\d+', punt_search.group(0))
            if parsed_df.outcome[i] in ['downed','fair_catch', 'oob', 'return']:
                punt_to_lst.append(int(punt_snip[-1]))
                punt_dist_lst.append(int(punt_snip[0]))
            else:
                if parsed_df.outcome[i] == 'touchback':
                    punt_to_lst.append(0)
                    punt_dist_lst.append(int(punt_snip[0]))
                else:
                    print('Missing Punt Outcome at Row {}'.format(i))
        else:
            if parsed_df.outcome[i] == 'touchback':
                punt_to_lst.append(0)
                punt_dist_lst.append(int(punt_snip[0]))
            else:
                print('Missing Punt Outcome at Row {}'.format(i))
        
        if return_search:
            return_snip = re.findall(r'-?\d+', return_search.group(0))
            return_dist_lst.append(int(return_snip[-1]))
        else:
            if parsed_df.outcome[i] == 'touchback':
                return_dist_lst.append(20)
            elif parsed_df.outcome[i] in ['downed','fair_catch', 'oob']:
                return_dist_lst.append(0)
            elif 'no gain' in parsed_df.PlayDescription[i]:
                return_dist_lst.append(0)
            else:
                print('Missing Return Outcome at Row {}'.format(i))   
                
    parsed_df['punt_to'] = punt_to_lst
    parsed_df['punt_dist'] = punt_dist_lst
    parsed_df['return_dist'] = return_dist_lst
                                   
    return parsed_df

In [None]:
punt_detail_df = parse_play_description(play_info, ['touchback', 'fair_catch','oob', 'downed', 'return'])

In [None]:
#Fearing the extra 5 yard penalty, punters will be even more wary of avoiding a touchback.
#They will punt the ball shorter and with more hang time, or angle the ball out of bounds.

#From a returners perspective, if the ball is heading close to the goaline, they will be more likely to let it bounce 
#and take the touchback than return it because of the extra 5 yard bonus.

In [None]:
punt_detail_df.head()

In [None]:
punt_to_lst = []
punt_dist_lst = []
return_dist_lst = []

In [None]:
string = 'T.Morstead punts 47 yards to 17, Center-C.Highland. T.Ervin to HST 16 for -1 yards '

In [None]:
snip = re.search('(to [A-Z]* [0-9]+ for [-+]?[0-9]+ yards?)', string).group(0)