# Parsing Cover
This is the notebook for parsing 'playDescription' for defensive players' full name for every play.

In [None]:
import re
import math
import pandas as pd
import numpy as np
import matplotlib.animation as animation
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
from tqdm import tqdm

pd.set_option('display.max_columns', 1000)
pd.set_option('display.max_colwidth', 1000)
pd.set_option('display.max_rows', 1000)
plt.rcParams["animation.html"] = "jshtml"
plt.rcParams['figure.dpi'] = 150  
plt.ioff()

In [None]:
dataDir = "/kaggle/input/nfl-big-data-bowl-2021/"
games = pd.read_csv(dataDir+"games.csv")
players = pd.read_csv(dataDir+"players.csv")
plays = pd.read_csv(dataDir+"plays.csv")
weeks = [] 
for i in range(1,18):
    weeks.append(pd.read_csv(dataDir+"week{}.csv".format(i)))

# Some functions that I need
Mostly Visualizations

In [None]:
def posession(plays_row):
    if games.query("gameId=={}".format(plays_row.gameId)).visitorTeamAbbr.iloc[0] == plays_row.possessionTeam:
        return 'away'
    else:
        return 'home'
    
def get_week(gameID, playID):
    return games.query("gameId=={}".format(gameID)).iloc[0].week - 1
    
def get_week_df(gameID, playID):
    wk = get_week(gameID, playID)
    week_df = weeks[wk]
    information = week_df[(week_df.playId==playID)&(week_df.gameId==gameID)]
    return information, wk

# this function requires 'plays' dataframe
def animate_play(gameID, playID):
    plt.close('all')
    fig, ax = plt.subplots()
    fig.set_figheight(7)
    fig.set_figwidth(14)
    
    information, wk = get_week_df(gameID, playID)
    max_frame = information.frameId.max()
    print("GAME: {}\tPLAY: {}\tWEEK: {}\tFRAME: {}".format(gameID, playID, wk, max_frame))
    
    home, = ax.plot([],[], linestyle='None', marker='o', markersize=12, c='C0')
    away, = ax.plot([],[], linestyle='None', marker='o', markersize=12, c='C1')
    foot, = ax.plot([],[], linestyle='None', marker='o', markersize=12, c='C2')
    lines = [home, away, foot]
    
    def draw_field():
        ax.set_xlim(0, 120)
        ax.set_ylim(-5, 58.3)
        # Endzones
        ax.add_patch(Rectangle((  0, 0), width=10, height=53.3, alpha=0.5, color='grey'))
        ax.add_patch(Rectangle((110, 0), width=10, height=53.3, alpha=0.5, color='grey'))
        # Sidelines
        ax.plot([0,120],[0,0], c='grey', linewidth=2)
        ax.plot([0,120],[53.3,53.3], c='grey', linewidth=2)
        for i in range(10, 120, 10):
            # 10 Yard lines
            ax.plot([i,i],[0,53.3], c='grey', linewidth=2)
            if abs(60-i) <=40:
                # Numbers
                ax.text(i-2.5, 12, str(50-abs(60-i)), color="grey", fontsize=24)
                ax.text(i-2.5, 41.3, str(50-abs(60-i)), color="grey", fontsize=24, rotation=180)
        # Hash marks
        for i in range(10, 110):
            ax.plot([i,i], [0, 1], c='grey')
            ax.plot([i,i], [23, 24], c='grey')
            ax.plot([i,i], [29.3, 30.3], c='grey')
            ax.plot([i,i], [52.3, 53.3], c='grey')
        
        # Drawing the line of scrimmage
        scrimmage = information.query("frameId==1 and displayName=='Football'").iloc[0].x
        ax.plot([scrimmage,scrimmage],[0, 53.3], c='darkblue', linewidth=2)
        
        # Drawing the First Down Line
        play_information = plays.query("playId=={} and gameId=={}".format(playID, gameID)).iloc[0]
        ax.set_title(play_information.playDescription)
        home_team = games[games.gameId==gameID].iloc[0].homeTeamAbbr
        poss_team = play_information.possessionTeam
        team_query_term = "home"
        if home_team != poss_team:
            team_query_term = "away"
        shit = information.query("frameId==1 and team=='{}'".format(team_query_term)).iloc[0].x
        first_down_line = scrimmage - play_information.yardsToGo
        if shit < scrimmage:
            first_down_line = scrimmage + play_information.yardsToGo
        ax.plot([first_down_line,first_down_line],[0, 53.3], c='gold', linewidth=2)
        return lines

    def plot_players(t):
        home_filtered = information[(information.frameId==t)&(information.team=="home")]
        away_filtered = information[(information.frameId==t)&(information.team=="away")]
        foot_filtered = information[(information.frameId==t)&(information.team=="football")]
        home.set_data(home_filtered.x.array, home_filtered.y.array)
        away.set_data(away_filtered.x.array, away_filtered.y.array)
        foot.set_data(foot_filtered.x.array, foot_filtered.y.array)
        lines = [home, away, foot]
        
        if t%10 == 0:
            print(t,end = ',')
        return lines
    
    return animation.FuncAnimation(fig, plot_players, frames=range(1, max_frame+1), init_func=draw_field, blit=True)
    #return animation.FuncAnimation(fig, plot_players, frames=range(20, 21), init_func=draw_field, blit=True)

# Parsing for Full Names of Defensive Players
By defensive player, I mean the player who is covering the target. To find how to get the full names of the target players, look at my [previous notebook](https://www.kaggle.com/beomjunbae/parsing-targets). 

In this notebook, we do very similar routine, but now, we are parsing for the full names of the covers. Here is the routine I used:

In [None]:
cover_ones = []
cover_twos = []
initial_name_finding_re = "(?!(?:No Huddle|Shotgun|sack|Pass|Aborted|Punt|Field|Testaverde|Hamstring|including))([a-zA-Z\.\s\-\',;])+"
initial_re = "(\["+initial_name_finding_re+"\]|\("+initial_name_finding_re+"\)|Coverage [0-9a-zA-Z\-\'\s]+(\.|\,)|INTERCEPTED by [A-Z]{1}\.[A-Z]{1}[-\'a-zA-Z]+\s)"

for i in tqdm(range(len(plays))):
    current_play = plays.iloc[i]
    
    matchesFromPlayDescription = re.search(initial_re, current_play.playDescription)
    if matchesFromPlayDescription and current_play.passResult != 'S':
        matchesFromPlayDescription = re.sub("([\s\(\)\]\[\-0-9]|INTERCEPTED by|Coverage by|Coverage |Sr\.|Jr\.)" ,'',matchesFromPlayDescription.group(0)).strip()
        potentials = re.split("[,;]",matchesFromPlayDescription)
        
        # Get the appropriate week\d.csv
        play_df, _ = get_week_df(current_play.gameId, current_play.playId)
        
        # filter by the qb's team, if there is a qb on the field
        qb_df = play_df.query("position=='QB'")
        if len(qb_df) > 0:
            qb_team = qb_df.team.iloc[0]
            play_df = play_df[play_df.team!=qb_team]

        # iterate over the list of names
        for index in range(len(potentials)-1, -1, -1):
            p = potentials[index]
            # When the split is empty 
            if len(p) == 0:
                potentials.pop(index)
                continue

            shat = p.split(".")
            spec_name_re = ""
            if len(shat) == 1:
                spec_name_re = "^[-\'\.a-zA-Z]+ {}$".format(shat[0])
            elif len(shat) == 2:
                spec_name_re = "^{}[-\'\.a-zA-Z]+ {}$".format(shat[0], shat[1])
            else:
                print(index, potentials)
                print(current_play.playDescription)
                print("Uexpected number of periods#####################################################")
            
            # Search from play_df (week\d.csv)
            possible_fullnames_from_playdf = play_df[play_df.displayName.str.contains(spec_name_re, regex=True)].displayName.unique().tolist()
            if len(possible_fullnames_from_playdf) == 1:
                potentials[index] = possible_fullnames_from_playdf[0]
            else:
                potentials.pop(index)
    if len(potentials) == 1:
        cover_ones.append(potentials[0])
        cover_twos.append('')
    elif len(potentials) == 2: 
        cover_ones.append(potentials[0])
        cover_twos.append(potentials[1])
    else:
        cover_ones.append('')
        cover_twos.append('')

plays = plays.assign(coverOne=cover_ones,coverTwo=cover_twos)

# Simple Analysis

In [None]:
def donut_chart(series, top_k=10):
    plt.close('all')

    shit = pd.Series(series).value_counts()
    shit.iloc[0] += shit[top_k:].sum()
    shit = shit[1:top_k]
    shat = shit.plot.pie(textprops={'fontsize': 5})
    shat.set_xticklabels(shat.xaxis.get_majorticklabels(), rotation=45, ha='right')

    my_circle=plt.Circle( (0,0), 0.8, color='white')
    p=plt.gcf()
    p.gca().add_artist(my_circle)

    plt.show()

In [None]:
cover_list = plays.coverOne.tolist() + plays.coverTwo.tolist()
donut_chart(cover_list)
pd.Series(cover_list).value_counts()[:10]

# How about incomplete passes?
The above was looking at all the plays that the covers were involved. How about the plays that resulted in incompletes?

In [None]:
completions = plays.query("passResult=='I'")
completions = pd.Series(completions.coverOne.tolist() + completions.coverTwo.tolist())
donut_chart(completions)
pd.Series(completions).value_counts()[:10]

# How about Intercepted plays?

In [None]:
completions = plays.query("passResult=='IN'")
completionSeries = pd.Series(completions.coverOne.tolist() + completions.coverTwo.tolist())
donut_chart(completionSeries)
completionSeries.value_counts()[:10]

# Conclusion
Using this notebook and the [notebook before](https://www.kaggle.com/beomjunbae/parsing-targets), we now have the full names of covers and the targets. More insightful analysis using both of this columns coming soon.