In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import re
import glob
import datetime

# Analyzing Occurence of Concussions

Before being able to propose modifcations to the rules, I needed to first analyze the occurence of concussions on punt returns.

* How many punt plays in the last 2 seasons resulted in concussions?
* What activity lead to the concussions?
* Where during the play did the concussions occur?  Before or After the kick? Before or after the punt is caught?
* Any other characteristics of the play affect the liklihood of a concussion.

### Load Injury Data

In [None]:
df_injury = pd.read_csv('../input/video_review.csv')

In [None]:
print('Total Concussions:', len(df_injury))

In [None]:
df_injury['Player_Activity_Derived'].value_counts().plot(kind="bar", title = 'Player Activity That Causes Concussions')
df_injury['Player_Activity_Derived'].value_counts()

### Load Play Specific Data 

In [None]:
df_play = pd.read_csv('../input/play_information.csv')

In [None]:
# Identify Punt Plays that resulted in fair catch
df_play['FairCatch'] = 0
df_play.loc[df_play['PlayDescription'].str.contains('fair catch', case = False),'FairCatch'] = 1

In [None]:
# Identify Punt Plays that had a penalty
df_play['is_penalty'] = 0
df_play.loc[df_play['PlayDescription'].str.contains('penalty', case = False),'is_penalty'] = 1

#### Identify the Punt/Return Type
Use the Play Description field to determine the type of play.  Options are:
* Return - The punt returner caught and returned the ball.
* Fair Catch - Returned called a fair catch. No Return.
* Downed - The ball was downed by the kicking team.  No Return.
* Touchback - The ball goes into the opposing team's endzone.  No Return.
* Out of Bounds - The ball is punted out of bounds.  No Return.
* Block - The punt is blocked behind the LOS by the return team
* No Punt - Play that does not result in an actual punt.  Either a fake or dropped snap. 
* Penalty - Penalty before the snap or on the return team that nullfies the play.  No official play.

In [None]:
df_play['punt_play_type'] = 'return'
df_play.loc[df_play['PlayDescription'].str.contains('fair catch', case = False),'punt_play_type'] = 'fair_catch'
df_play.loc[df_play['PlayDescription'].str.contains('downed', case = False),'punt_play_type'] = 'downed'
df_play.loc[df_play['PlayDescription'].str.contains('touchback', case = False),'punt_play_type'] = 'touchback'
df_play.loc[df_play['PlayDescription'].str.contains('out of bounds\.', case = False),'punt_play_type'] = 'out_of_bounds'
df_play.loc[~df_play['PlayDescription'].str.contains('punts', case = False),'punt_play_type'] = 'no punt'
df_play.loc[df_play['PlayDescription'].str.contains('blocked', case = False),'punt_play_type'] = 'block'
df_play.loc[df_play['PlayDescription'].str.contains('\(punt formation\) penalty', case = False),'punt_play_type'] = 'pre-snap penalty'

In [None]:
df_concussion_plays = df_injury.merge(df_play, how = 'inner', on = ['GameKey','PlayID'])
df_concussion_plays['punt_play_type'].value_counts().plot(kind="bar", title = 'Punt Play Type for Concussions')
df_concussion_plays['punt_play_type'].value_counts()

#### Takeaways
1. The majority of concussions are resulting on plays that have a return.  Where do injuries occur? Particular Position?
2. There is still a chance of concussions on plays that do not have a return.
3. After further investigation, the play with no punt is actually a fake punt.  This play is truly a running play so should not be included in the analysis

**What is it about return plays that lead to conussions?**

### Does Position lead to more penalties? - Add Role Data

In [None]:
df_player_role = pd.read_csv('../input/play_player_role_data.csv')
df_concussion_plays = df_concussion_plays.merge(df_player_role, on = ['GameKey','PlayID','GSISID'], how = 'left')

#Remove the no_snap play
df_concussion_plays = df_concussion_plays[df_concussion_plays['punt_play_type'] != 'no punt']

df_concussion_plays['Role'].value_counts().plot(kind="bar", title = 'Position of Concussed Player')
df_concussion_plays['Role'].value_counts()

#### Takeaways
* The punt returner is the most likely to be injured, which was somewhat expected as this is the ball carrier. 
* Majority of positions are impacted

### What is happening on the plays with injuries? - Add Player Tracking Data
To first answer this question, I watched the video clips.  What I realized is that the majority of the concussions were occuring after the ball was caught.  A player was injured on the tackle or a downfield block during the return. 

Other injuries occurred during blocking at the line of LOS or before the punt was caught on downfield blocking.  

While these three categories existed, I wanted to confirm this with the tracking data.

In [None]:
df_ngs = pd.concat([pd.read_csv(f) for f in glob.glob('../input/NGS*.csv')],ignore_index=True)

# Limit to just plays with injuries
df_ngs_injuries = df_ngs.merge(df_injury[['GameKey','PlayID']], how = 'inner', on = ['GameKey','PlayID'])

df_concussion_plays['Primary_Partner_GSISID'] = pd.to_numeric(df_concussion_plays['Primary_Partner_GSISID'].str.replace('Unclear',''))
df_injury_location = df_concussion_plays.merge(df_ngs_injuries, how = 'left', on = ['GameKey','PlayID','GSISID'])
df_injury_location = df_injury_location.merge(df_ngs_injuries, how = 'left', left_on = ['GameKey','PlayID','Time','Primary_Partner_GSISID'],right_on = ['GameKey','PlayID','Time','GSISID'],  suffixes = ['_injured','_partner'])

In [None]:
df_injury_location['y_separation'] = (df_injury_location['y_partner'] - df_injury_location['y_injured']).abs()
df_injury_location['x_separation'] = (df_injury_location['x_partner'] - df_injury_location['x_injured']).abs()
df_injury_location['separation'] = np.sqrt(df_injury_location['y_separation']**2 + df_injury_location['x_separation']**2)

In [None]:
df_concussion = df_injury_location.sort_values('separation').drop_duplicates(['GameKey','PlayID'])

In [None]:
df_punt_received = df_ngs[(df_ngs['Event'].isin(['punt_received','kick_received','fair_catch','punt_downed']))][['PlayID','GameKey','Time']].drop_duplicates()
df_punt_received.columns = ['PlayID','GameKey','punt_caught_time']

In [None]:
df_punt_time = df_ngs[(df_ngs['Event'] == 'punt')][['PlayID','GameKey','Time']].drop_duplicates()
df_punt_time.columns = ['PlayID','GameKey','ball_kicked_time']

In [None]:
df_snap = df_ngs[(df_ngs['Event'] == 'ball_snap')][['PlayID','GameKey','Time']].drop_duplicates()
df_snap = df_snap.merge(df_player_role[df_player_role['Role'] == 'PLS'], how = 'inner', on = ['PlayID','GameKey'])
df_snap = df_snap[['PlayID','GameKey','Time','GSISID']]
df_snap.columns = ['PlayID','GameKey','snap_time','GSISID']
df_snap = df_snap.merge(df_ngs[df_ngs['Event'] == 'ball_snap'], how = 'inner', on = ['PlayID','GameKey','GSISID'])
df_snap = df_snap[['PlayID','GameKey','Time','x']]
df_snap.columns = ['PlayID','GameKey','snap_time','x_LOS']

In [None]:
df_concussion = df_concussion.merge(df_punt_received, how = 'left', on = ['GameKey','PlayID'], suffixes = ['_injury','_punt_received'])
df_concussion = df_concussion.merge(df_punt_time, how = 'left', on = ['GameKey','PlayID'], suffixes = ['_injury','_ball_kicked'])
df_concussion = df_concussion.merge(df_snap, how = 'left', on = ['GameKey','PlayID'], suffixes = ['_injury','_ball_snapped'])

In [None]:
df_concussion['Time_to_injury'] = pd.to_datetime(df_concussion['Time']) - pd.to_datetime(df_concussion['snap_time'])
df_concussion['Time_to_punt'] = pd.to_datetime(df_concussion['ball_kicked_time']) - pd.to_datetime(df_concussion['snap_time'])
df_concussion['Time_to_punt_caught'] = pd.to_datetime(df_concussion['punt_caught_time']) - pd.to_datetime(df_concussion['snap_time'])
df_concussion['delta_from_LOS'] = (df_concussion['x_injured'] - df_concussion['x_LOS']).abs()

In [None]:
df_concussion = df_concussion[~df_concussion['Primary_Partner_GSISID'].isnull()]

In [None]:
df_concussion['Injury_Event_Type'] = 'NA'
df_concussion.loc[df_concussion['Time'] >= df_concussion['punt_caught_time'],'Injury_Event_Type'] = 'Blocking on return'
df_concussion.loc[df_concussion['Time'] < df_concussion['punt_caught_time'],'Injury_Event_Type'] = 'Blocking before return'
df_concussion.loc[df_concussion['Player_Activity_Derived'].isin(['Tackling','Tackled']),'Injury_Event_Type'] = 'Tackle'
df_concussion['Injury_Event_Type'].value_counts()

In [None]:
colors = {'Tackle':'magenta', 'Blocking on return':'blue', 'Blocking before return':'cyan','NA':'red'}
labels = {'Tackle':'Tackle', 'Blocking on return':'Blocking on return', 'Blocking before return':'Blocking before return','NA':'NA'}

# create data 
y = df_concussion['delta_from_LOS']
x = df_concussion['Time_to_injury'].dt.total_seconds()
 
# plot
for g in df_concussion['Injury_Event_Type'].unique():
    ix = df_concussion['Injury_Event_Type'] == g
    plt.scatter(x[ix], y[ix], c = colors[g], label = g)
#plt.scatter(x,y, c = df_concussion['Injury_Event_Type'].apply(lambda x: colors[x]))
plt.gcf().autofmt_xdate()
plt.axvline(x=df_concussion['Time_to_punt_caught'].mean().total_seconds(), color='k', linestyle='--', label = 'avg. time to punt')
plt.axvline(x=df_concussion['Time_to_punt'].mean().total_seconds(), color='r', linestyle='--', label = 'avg. time to receive punt')
plt.legend(bbox_to_anchor=(1.1, 1.05))
plt.show()

## What if the punt was eliminated?

In [None]:
all_punt_atts = []
for x in df_play.index:

    turnover = 0
    strings = ['return','fair_catch','downed','out_of_bounds','touchback']

    if any(s in df_play['punt_play_type'][x] for s in strings):
        
        # Code for touchback
        try:
            play = df_play['PlayDescription'][x].lower()
            team = df_play['Poss_Team'][x].lower()
            punt_data = re.search('.* touchback.',play).groups()

            #print(punt_data)

            return_yard = 20

            return_length = 0
            punt_dist = np.nan
            
            punt_atts = {
                    "index":x,
                    "punt_distance": punt_dist,
                    "return_yard": return_yard,
                    "return_length": return_length,
                    "muff": 0
                }
            
           # print(punt_atts)
            
            all_punt_atts.append(punt_atts)

            continue

        except:
            pass
        
        # Code for return
        try:
            play = df_play['PlayDescription'][x].lower()
            team = df_play['Poss_Team'][x].lower()
            punt_data = re.search('.* punts (\d*) yards to (\w*) (\d*|-|\d), .* to (\w*) (\d*) for (\d*|-\d*) yard.*',play).groups()

            #print(punt_data)

            if punt_data[3] == team:
                return_yard = int(punt_data[4]) + 50
            else:
                return_yard = int(punt_data[4])

            return_length = int(punt_data[5])
            punt_dist = punt_data[0]

            punt_atts = {
                    "index":x,
                    "punt_distance": punt_dist,
                    "return_yard": return_yard,
                    "return_length": return_length,
                    "muff": 0
                }
            
           # print(punt_atts)
            
            all_punt_atts.append(punt_atts)

            continue

        except:
            pass  
        
        # Code for out of bounds
        try:
            play = df_play['PlayDescription'][x].lower()
            team = df_play['Poss_Team'][x].lower()
            punt_data = re.search('.* punts (\d*) yards to (\w*) (\d*), .* out of bounds.',play).groups()

           # print(punt_data)

            if punt_data[1] == team:
                return_yard = int(punt_data[2]) + 50
            else:
                return_yard = int(punt_data[2])

            return_length = 0
            punt_dist = punt_data[0]

            punt_atts = {
                    "index":x,
                    "punt_distance": punt_dist,
                    "return_yard": return_yard,
                    "return_length": return_length,
                    "muff": 0
                }
            
           # print(punt_atts)
            
            all_punt_atts.append(punt_atts)

            continue

        except:
            pass 
        
        # Code for fair catch
        try:
            play = df_play['PlayDescription'][x].lower()
            team = df_play['Poss_Team'][x].lower()
            punt_data = re.search('.* punts (\d*) yards to (\w*) (\d*), .* fair catch .*',play).groups()

           # print(punt_data)

            if punt_data[1] == team:
                return_yard = int(punt_data[2]) + 50
            else:
                return_yard = int(punt_data[2])

            return_length = 0
            punt_dist = punt_data[0]

            punt_atts = {
                    "index":x,
                    "punt_distance": punt_dist,
                    "return_yard": return_yard,
                    "return_length": return_length,
                    "muff": 0
                }
            
           # print(punt_atts)
            
            all_punt_atts.append(punt_atts)

            continue
            
        except:
            pass

        # Code for muff
        try:
            play = df_play['PlayDescription'][x].lower()
            #print(play)
            team = df_play['Poss_Team'][x].lower()
            punt_data = re.search('.* muffs catch, recovered by (\w*)-.* at .* to (\w*) (\d*) for .*',play).groups()

            #print(punt_data)

            if punt_data[0] == team:
                turnover = True
                return_yard = np.nan
                return_length = np.nan
            else:
                turnover = False
                return_length = np.nan
                if punt_data[1] == team:
                    return_yard = np.nan
                    return_length = np.nan

            #return_length = 0
            #punt_dist = punt_data[0]

            #print(punt_dist, return_yard, return_length)
            
            punt_atts = {
                    "index":x,
                    "punt_distance": np.nan,
                    "return_yard": return_yard,
                    "return_length": return_length,
                    "muff": 1,
                    "turnover": turnover
                }
            
            #print(punt_atts)
            
            all_punt_atts.append(punt_atts)

            continue
        except:
            pass  
        
        # Code for return no gain
        try:
            play = df_play['PlayDescription'][x].lower()
            team = df_play['Poss_Team'][x].lower()
            punt_data = re.search('.* punts (\d*) yards to (\w*) (\d*), .* to (\w*) (\d*) for no gain .*',play).groups()

            #print(punt_data)

            if punt_data[3] == team:
                return_yard = int(punt_data[4]) + 50
            else:
                return_yard = int(punt_data[4])

            return_length = 0
            punt_dist = punt_data[0]

            punt_atts = {
                    "index":x,
                    "punt_distance": punt_dist,
                    "return_yard": return_yard,
                    "return_length": return_length,
                    "muff": 0
                }
            
            #print(punt_atts)
            
            all_punt_atts.append(punt_atts)

            continue

        except:
            pass  
        
        # Downed Punt
        try:
            play = df_play['PlayDescription'][x].lower()
            team = df_play['Poss_Team'][x].lower()
            punt_data = re.search('.* punts (\d*) yards to (\w*) (\d*), .* downed .*',play).groups()

           # print(punt_data)

            if punt_data[1] == team:
                return_yard = 50
            else:
                return_yard = 0

            return_length = 0
            punt_dist = punt_data[0]

            punt_atts = {
                    "index":x,
                    "punt_distance": punt_dist,
                    "return_yard": return_yard,
                    "return_length": return_length,
                    "muff": 0
                }
            
            #print(punt_atts)
            
            all_punt_atts.append(punt_atts)

            continue
        except:
            pass  
        
        # return to 50
        try:
            play = df_play['PlayDescription'][x].lower()
            team = df_play['Poss_Team'][x].lower()
            punt_data = re.search('.* punts (\d*) yards to (\w*) (\d*), .* to 50 for (\d*) yard.*',play).groups()

           # print(punt_data)

            return_yard = 50
            
            return_length = punt_data[3]
            punt_dist = punt_data[0]

            punt_atts = {
                    "index":x,
                    "punt_distance": punt_dist,
                    "return_yard": return_yard,
                    "return_length": return_length,
                    "muff": 0
                }
            
            #print(punt_atts)
            
            all_punt_atts.append(punt_atts)

            continue

        except:
            pass
        
        # Code for return
        try:
            play = df_play['PlayDescription'][x].lower()
            team = df_play['Poss_Team'][x].lower()
            punt_data = re.search('.* punts (\d*) yards to (\w*) (\d*), .* at (\w*) (\d*) for (\d*|-\d*) yard.*',play).groups()

            #print(punt_data)

            if punt_data[3] == team:
                return_yard = int(punt_data[4]) + 50
            else:
                return_yard = int(punt_data[4])

            return_length = int(punt_data[5])
            punt_dist = punt_data[0]

            punt_atts = {
                    "index":x,
                    "punt_distance": punt_dist,
                    "return_yard": return_yard,
                    "return_length": return_length,
                    "muff": 0
                }
            
           # print(punt_atts)
            
            all_punt_atts.append(punt_atts)
            
            continue

        except:
            pass
        
        # Code for return
        try:
            play = df_play['PlayDescription'][x].lower()
            team = df_play['Poss_Team'][x].lower()
            punt_data = re.search('.* punts (\d*) yards to (\w*) (\d*), .* at (\w*) (\d*) for no gain.*',play).groups()

            #print(punt_data)

            if punt_data[3] == team:
                return_yard = int(punt_data[4]) + 50
            else:
                return_yard = int(punt_data[4])

            return_length = 0
            punt_dist = punt_data[0]

            punt_atts = {
                    "index":x,
                    "punt_distance": punt_dist,
                    "return_yard": return_yard,
                    "return_length": return_length,
                    "muff": 0
                }
            
           # print(punt_atts)
            
            all_punt_atts.append(punt_atts)
            
            continue
        except:
            pass 
        
        # ob 50
        try:
            play = df_play['PlayDescription'][x].lower()
            team = df_play['Poss_Team'][x].lower()
            punt_data = re.search('.* punts (\d*) yards to (\w*) (\d*), .* at 50 for (\d*) yard.*',play).groups()

           # print(punt_data)

            return_yard = 50
            
            return_length = punt_data[3]
            punt_dist = punt_data[0]

            punt_atts = {
                    "index":x,
                    "punt_distance": punt_dist,
                    "return_yard": return_yard,
                    "return_length": return_length,
                    "muff": 0
                }
            
            #print(punt_atts)
            
            all_punt_atts.append(punt_atts)

            continue
            
        except:
            pass            
        # touchdown
        try:
            play = df_play['PlayDescription'][x].lower()
            team = df_play['Poss_Team'][x].lower()
            punt_data = re.search('.* punts (\d*) yards to (\w*) (\d*), .* for (\d*) yard.*',play).groups()

           # print(punt_data)

            return_yard = 100
            
            return_length = punt_data[3]
            punt_dist = punt_data[0]

            punt_atts = {
                    "index":x,
                    "punt_distance": punt_dist,
                    "return_yard": return_yard,
                    "return_length": return_length,
                    "muff": 0
                }
            
            #print(punt_atts)
            
            all_punt_atts.append(punt_atts)

            continue
            
        except:
            pass 
        
        # ob 50
        try:
            play = df_play['PlayDescription'][x].lower()
            team = df_play['Poss_Team'][x].lower()
            punt_data = re.search('.* punts (\d*) yards to 50, .* for (\d*) yard.*',play).groups()

           # print(punt_data)

            return_yard = 50 + int(punt_data[1])
            
            return_length = punt_data[1]
            punt_dist = punt_data[0]

            punt_atts = {
                    "index":x,
                    "punt_distance": punt_dist,
                    "return_yard": return_yard,
                    "return_length": return_length,
                    "muff": 0
                }
            
            #print(punt_atts)
            
            all_punt_atts.append(punt_atts)
            
            continue

        except:
            pass 
        
        # ob 50
        try:
            play = df_play['PlayDescription'][x].lower()
            team = df_play['Poss_Team'][x].lower()
            punt_data = re.search('.* punts (\d*) yards to 50, .* for no gain .*',play).groups()

           # print(punt_data)

            return_yard = 50 
            
            return_length = 0
            punt_dist = punt_data[0]

            punt_atts = {
                    "index":x,
                    "punt_distance": punt_dist,
                    "return_yard": return_yard,
                    "return_length": return_length,
                    "muff": 0
                }
            
            #print(punt_atts)
            
            all_punt_atts.append(punt_atts) 
            
            continue
        except:
            pass 
        
        # ob 50
        try:
            play = df_play['PlayDescription'][x].lower()
            team = df_play['Poss_Team'][x].lower()
            punt_data = re.search('.* punts (\d*) yards to 50, .* fair catch.*',play).groups()

           # print(punt_data)

            return_yard = 50 
            
            return_length = 0
            punt_dist = punt_data[0]

            punt_atts = {
                    "index":x,
                    "punt_distance": punt_dist,
                    "return_yard": return_yard,
                    "return_length": return_length,
                    "muff": 0
                }
            
            #print(punt_atts)
            
            all_punt_atts.append(punt_atts)  
            
            continue
            
        except:
            pass 
        
        # ob 50
        try:
            play = df_play['PlayDescription'][x].lower()
            team = df_play['Poss_Team'][x].lower()
            punt_data = re.search('.* punts (\d*) yards to (\w*) (\d*), .* muffs .* recovered by (\w*)-.* at (\w*) (\d*).*',play).groups()

            if punt_data[3] == team:
                turnover = True
                return_yard = np.nan
                return_length = np.nan
            else:
                turnover = False
                return_length = 0
                return_yard = punt_data[5]
            
            punt_dist = punt_data[0]

            punt_atts = {
                    "index":x,
                    "punt_distance": punt_dist,
                    "return_yard": return_yard,
                    "return_length": return_length,
                    "muff": 1,
                    "turnover": turnover
                }
            
            #print(punt_atts)
            
            all_punt_atts.append(punt_atts)
            
            continue 
            
        except:
            pass 
        
        # ob 50
        try:
            play = df_play['PlayDescription'][x].lower()
            team = df_play['Poss_Team'][x].lower()
            punt_data = re.search('.* punts (\d*) yards to (\w*) (\d*) .* muffs .* recovers at (\w*) (\d*).*',play).groups()
           
            turnover = False
            return_yard = punt_data[4]
            return_length = 0
            punt_dist = punt_data[0]

            punt_atts = {
                    "index":x,
                    "punt_distance": punt_dist,
                    "return_yard": return_yard,
                    "return_length": return_length,
                    "muff": 1,
                    "turnover": turnover
                }
            
            #print(punt_atts)
            
            all_punt_atts.append(punt_atts)
        except:
            pass

        try:
            play = df_play['PlayDescription'][x].lower()
            team = df_play['Poss_Team'][x].lower()
            punt_data = re.search('.* punts (\d*) yards to 50, .*',play).groups()

           # print(punt_data)

            return_yard = 50 
            
            return_length = 0
            punt_dist = punt_data[0]

            punt_atts = {
                    "index":x,
                    "punt_distance": punt_dist,
                    "return_yard": return_yard,
                    "return_length": return_length,
                    "muff": 0
                }
            
            #print(punt_atts)
            
            all_punt_atts.append(punt_atts)
            
            continue

        except:
            
            pass

In [None]:
df_punt_atts = pd.DataFrame(all_punt_atts)
df_punt_atts['return_length'] = df_punt_atts['return_length'].fillna(0).astype(int)
print('Average Return Length:', df_punt_atts['return_length'].mean())

In [None]:
df_punt_atts['return_length'] = df_punt_atts['return_length'].fillna(0).astype(int)
print('Average Return on returns only:', df_punt_atts['return_length'].mean())