# "Play Type Recommendation System & Result Prediction"

> American Football is a very fast pace sports, during the match coaches might have to make certain strategic judgements within a small time period.

> We are trying to build a recommendation system to facilitate coaches to make faster and more sound judgements! By entering some parameters of the current circumstance (e.g. quarter, scores, opponent team), the system will recommend the best formation of play (play type) in descending order, with success rate and even suggested strategy (including kick type and direction). The expected result will also be predicted, such as kick length of a punt.
Even if coaches have this huge load of historical data that they can study, but such amount cannot be processed by a human's mind, especially not during a match. But with a tool like this, strategic decisions can be generated in a blink and with the support of the big data as well.

> Besides strategies of own team, as American Football is a team sports, synergy plays a very important factor on strategy planning. But in this case, we are trying to predict the key and support players of the opponent team that own team need to be aware when doing certain types of play.

## Import libs

In [None]:
# !pip install --upgrade pandas

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from datetime import datetime, timedelta, date
import re

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

# Step 1 - Importing files 

In [None]:
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
%cd "/kaggle/input"

In [None]:
data_path = 'nfl-big-data-bowl-2022/'

In [None]:
plays = pd.read_csv (data_path + 'plays.csv')
players = pd.read_csv (data_path + 'players.csv')
PFFScouting = pd.read_csv (data_path + 'PFFScoutingData.csv')
games = pd.read_csv (data_path + 'games.csv')

In [None]:
players_ori = players.copy()

In [None]:
tracking2018 = pd.read_csv(data_path + 'tracking2018.csv')
tracking2019 = pd.read_csv(data_path + 'tracking2019.csv')
tracking2020 = pd.read_csv(data_path + 'tracking2020.csv')

In [None]:
playgames = pd.merge(plays,games,on='gameId',how='left')
playgamesScout = pd.merge(playgames,PFFScouting,on=['gameId','playId'],how='left')
rawDf = pd.merge(playgamesScout,players, left_on=['kickerId'],right_on=['nflId'],how='left')

# Step 2 - Preprocessing


## Compress size for saving memory space

In [None]:
def downcast(df, verbose=True):
    start_mem = df.memory_usage().sum() / 1024**2
    for col in df.columns:
        dtype_name = df[col].dtype.name
        if dtype_name == 'object':
            pass
        elif dtype_name == 'bool':
            df[col] = df[col].astype('int8')
        elif dtype_name.startswith('int') or (df[col].round() == df[col]).all():
            df[col] = pd.to_numeric(df[col], downcast='integer')
        else:
            df[col] = pd.to_numeric(df[col], downcast='float')
    end_mem = df.memory_usage().sum() / 1024**2
    if verbose:
        print('{:.1f}% Compressed'.format(100 * (start_mem - end_mem) / start_mem))
    
    return df

In [None]:
def resumetable(df):
    print(f'Shape : {df.shape}')
    summary = pd.DataFrame(df.dtypes, columns=['Data Type'])
    summary = summary.reset_index()
    summary = summary.rename(columns={'index': 'Feature'})
    summary['Num of null'] = df.isnull().sum().values
    summary['Num of unique'] = df.nunique().values
    summary['First value'] = df.loc[0].values
    summary['Second value'] = df.loc[1].values
    summary['Third value'] = df.loc[2].values
    return summary

In [None]:
games = downcast(games)
players = downcast(players)
plays = downcast(plays)
PFFScouting = downcast(PFFScouting)

In [None]:
#check data
resumetable(players)
resumetable(games)
resumetable(plays)
resumetable(PFFScouting)

## Clean and combine data

### Before combining

In [None]:
def cleanPlayers(players):
    #convert height & weight

    # Get the Height data from DataFrame & Split the heights by hyphen ("-")
    players_heights = players["height"].apply(lambda x: x.split("-"))  

    # Convert Heights to Centimeters and add them to DataFrame
    players["height"] = players_heights.apply(lambda x: int(x[0]) * 12 + int(x[1]) if len(x) == 2 else int(x[0])) * 2.54

    # Convert Weights to Kilograms and them to DataFrame
    players["weight"] = round(players.weight * 0.453592, 2)

    #fill in NAN Value on age and college name (only players in special team)
    players.loc[players['displayName'] =='Hunter Niswander', ['birthDate']] = '1994-11-26'
    players.loc[players['displayName'] =='Taylor Russolino', ['birthDate']] = '1989-05-23'
    players.loc[players['displayName'] =='Brandon Wright', ['collegeName']] = 'North Carolina State'
    players.loc[players['displayName'] =='Hunter Niswander', ['collegeName']] = 'Northwestern'
    players.loc[players['displayName'] =='Taylor Russolino', ['collegeName']] = 'Mississippi'

    players['birthDate'] = pd.to_datetime(players['birthDate'])

    return players


In [None]:
def cleanBeforeCombine(players, plays, games):
    players = cleanPlayers(players)

    games['gameDate'] = pd.to_datetime(games['gameDate'],infer_datetime_format=True)
    plays= plays.loc[(plays['specialTeamsPlayType'] == 'Field Goal') | (plays['specialTeamsPlayType'] == 'Punt' ) | (plays['specialTeamsResult'] == 'Non-Special Teams Result' )]

    return players, plays, games

In [None]:
players, plays, games = cleanBeforeCombine(players, plays, games)

### Combine data 

In [None]:
playgames = pd.merge(plays,games,on='gameId',how='left')
playgamesScout = pd.merge(playgames,PFFScouting,on=['gameId','playId'],how='left')
alldata = pd.merge(playgamesScout,players, left_on=['kickerId'],right_on=['nflId'],how='left')

### Clean data

In [None]:
typeCols = ['quarter', 'down', 'yardsToGo', 'possessionTeam',
            'specialTeamsPlayType', 'specialTeamsResult', 'yardlineSide',
            'yardlineNumber', 'gameClock', 'preSnapHomeScore',
            'preSnapVisitorScore', 'absoluteYardlineNumber', 'homeTeamAbbr', 'visitorTeamAbbr']

In [None]:
puntCols = ['quarter', 'down', 'yardsToGo', 'possessionTeam', 
            'yardlineSide', 'yardlineNumber', 'gameClock', 'preSnapHomeScore',
            'preSnapVisitorScore', 'kickLength', 'absoluteYardlineNumber',
            'homeTeamAbbr', 'visitorTeamAbbr', 'kickType', 'direction',
            'nflId', 'height', 'weight', 'Position', 'age']

In [None]:
fgCols = ['quarter', 'down', 'yardsToGo', 'possessionTeam', 'specialTeamsResult',
        'yardlineSide', 'yardlineNumber', 'gameClock', 'preSnapHomeScore',
        'preSnapVisitorScore', 'absoluteYardlineNumber', 'homeTeamAbbr', 'visitorTeamAbbr',
        'nflId', 'height', 'weight', 'Position', 'age']

In [None]:
allCols = list(set().union(puntCols, fgCols, typeCols))

In [None]:
pos_map = {'K':1,"P":0 }
kick_map = {'N':1,"A":2,"R":3,0:0}
dir_map = {'C':1,"L":2,"R":3,0:0 }

In [None]:
def cleanAll(alldata):

    # count the players age in the play
    alldata["age"]=alldata["gameDate"].dt.year - alldata["birthDate"].dt.year
    alldata = alldata.drop(columns=["birthDate", "gameDate"])

    alldata['gameClock'] = pd.to_timedelta(alldata['gameClock'])
    alldata['gameClock'] = alldata['gameClock'].dt.total_seconds()

    #  convert team abbr to number
    team_idx = alldata['homeTeamAbbr'].value_counts().sort_index(key=lambda x : x.str.lower())
    team_map = {}
    i = 0
    for t in team_idx.index:
        team_map[t] = i
        i += 1

    alldata['homeTeamAbbr'] = alldata['homeTeamAbbr'].map(team_map)
    alldata['visitorTeamAbbr'] = alldata['visitorTeamAbbr'].map(team_map)
    alldata['possessionTeam'] = alldata['possessionTeam'].map(team_map)
    alldata['yardlineSide'] = alldata['yardlineSide'].map(team_map)

    alldata['kickLength'] = alldata['kickLength'].fillna(0)

    # conver categorical data to number
    alldata['Position']= alldata['Position'].map(pos_map)
    alldata['kickType'] = alldata['kickType'].map(kick_map)
    alldata['direction'] = alldata['kickDirectionActual'].map(dir_map)

    alldata = alldata[allCols]
    return alldata, team_map


In [None]:
alldata, team_map = cleanAll(alldata)

In [None]:
alldata.head()

# Step 3 - Exploratory Data Analysis

In [None]:
alldata.isna().sum()

In [None]:
punt = alldata[alldata['specialTeamsPlayType']=='Punt']
fg = alldata[alldata['specialTeamsPlayType']=='Field Goal']
countpunt=punt['specialTeamsResult'].value_counts().reset_index()
countfg=fg['specialTeamsResult'].value_counts().reset_index()

In [None]:


from matplotlib import gridspec, ticker
color_counts = (punt['specialTeamsResult'].value_counts().reset_index())
color_counts.columns = ['specialTeamsResult', 'count']

order = color_counts['specialTeamsResult']
palette = color_counts['specialTeamsResult'].replace('other', None) # "other" is not a color name

fig = plt.figure(figsize=(15, 8))
gs = gridspec.GridSpec(1, 3, figure=fig)

# Left plot
ax = fig.add_subplot(gs[0])
sns.barplot(data = color_counts, x = 'count', y = 'specialTeamsResult',
            #palette = palette, 
            ax = ax)
ax.set(title = 'Punt Result')
ax.set_xlim((0, 5600))
for p in ax.patches:
    ax.annotate(f"{int(p.get_width())}", xy = (p.get_width(), p.get_y() + 0.5),
                horizontalalignment = 'left')
    clr = p.get_facecolor()
    if clr == (1, 1, 1, 1):
        # If facecolor is white
        p.set_edgecolor('magenta')
ax.set_ylabel('')


# Right plot
ax = fig.add_subplot(gs[1:3])
sns.boxenplot(data = punt, x = 'kickLength', y = 'specialTeamsResult',
              order = order, 
              #palette = palette, 
              ax = ax)
ax.set(title = 'Punt Results - Kick Lengths', xscale = 'log')
ax.yaxis.tick_right()
ax.set_ylabel('')

plt.suptitle("How Punt Results Compared when it comes to kick lengths", fontsize = 15)
plt.tight_layout()
plt.show()

In [None]:
from matplotlib import gridspec, ticker
color_counts = (fg['specialTeamsResult'].value_counts().reset_index())
color_counts.columns = ['specialTeamsResult', 'count']

order = color_counts['specialTeamsResult']
palette = color_counts['specialTeamsResult'].replace('other', None) # "other" is not a color name

fig = plt.figure(figsize=(15, 8))
gs = gridspec.GridSpec(1, 3, figure=fig)

# Left plot
ax = fig.add_subplot(gs[0])
sns.barplot(data = color_counts, x = 'count', y = 'specialTeamsResult',
            #palette = palette, 
            ax = ax)
ax.set(title = 'Field Goal Result')
ax.set_xlim((0, 5600))
for p in ax.patches:
    ax.annotate(f"{int(p.get_width())}", xy = (p.get_width(), p.get_y() + 0.5),
                horizontalalignment = 'left')
    clr = p.get_facecolor()
    if clr == (1, 1, 1, 1):
        # If facecolor is white
        p.set_edgecolor('magenta')
ax.set_ylabel('')


# Right plot
ax = fig.add_subplot(gs[1:3])
sns.boxenplot(data = fg, x = 'kickLength', y = 'specialTeamsResult',
              order = order, 
              #palette = palette, 
              ax = ax)
ax.set(title = 'Field Goal Results - Kick Lengths', xscale = 'log')
ax.yaxis.tick_right()
ax.set_ylabel('')

plt.suptitle("How Field Goal Results Compared when it comes to kick lengths", fontsize = 15)
plt.tight_layout()
plt.show()

# Step 4 - Building whole model

## Some shared functions

In [None]:
def fillna(df):
    for c in df.columns:
        col = df[c]
        if col.isna().sum()>0:
            df[c] = df[c].fillna(method='ffill')
    return df

## Play Type Classifier

### Clean train data

In [None]:
def cleanTypeData(alldata):

    classplaytype = alldata.drop(alldata[alldata.specialTeamsPlayType=='Extra Point'].index)
    classplaytype = fillna(classplaytype)

    classplaytype['specialTeamsPlayType'].mask(classplaytype['specialTeamsResult'] == 'Non-Special Teams Result', 'Non-Special Teams', inplace=True)

    classplaytype =classplaytype[typeCols]
    return classplaytype
    

In [None]:
classplaytype = cleanTypeData(alldata)

### Train model

In [None]:
typeX = classplaytype.drop(columns=["specialTeamsPlayType","specialTeamsResult"])
typeInputCol = typeX.columns
typeX = typeX.to_numpy()
typeY = classplaytype['specialTeamsPlayType']
print(typeX.shape, typeY.shape)

In [None]:
classplaytype['specialTeamsPlayType']

In [None]:
typeX_train, typeX_test, typeY_train, typeY_test = train_test_split(typeX, typeY, random_state=20, train_size=0.8)

In [None]:
typeModel = GaussianNB()
typeModel.fit(typeX_train, typeY_train)
print('Accuracy of type classifier on training set: {:.2f}'
     .format(typeModel.score(typeX_train, typeY_train)))
print('Accuracy of type classifier on test set: {:.2f}'
     .format(typeModel.score(typeX_test, typeY_test)))

In [None]:
typeModel.predict([typeX_test[2]])

## Punt Model

### Prepare data

In [None]:
def cleanPuntData(alldata):

    punt = alldata.loc[(alldata['specialTeamsPlayType'] == 'Punt') & (alldata['specialTeamsResult']!='Non-Special Teams Result')]
    
    punt = punt.reset_index()
    punt = fillna(punt)
    punt = punt[puntCols]

    return punt

In [None]:
punt = cleanPuntData(alldata)
punt.info()

create y data

In [None]:
def create_puntY(df):
    categorized = []
    for i, r in df.iterrows():

        kickLength = r.kickLength
        if 30>=kickLength:
            # punt failed
            categorized.append(0)
        elif (kickLength>30) & (45>=kickLength):
            categorized.append(1)
        elif (kickLength>45) & (60>=kickLength):
            categorized.append(2)
        else: #kickLength > 60
            categorized.append(3)
    return categorized

In [None]:
puntY = create_puntY(punt)
puntX = punt.drop(columns=['kickLength'])
puntInputCols = puntX.columns
puntX = puntX.to_numpy()

In [None]:
puntX_train, puntX_test, puntY_train, puntY_test = train_test_split(puntX, puntY, random_state=234, train_size=0.9)


### Train Model

In [None]:
puntModel = LinearDiscriminantAnalysis()
puntModel.fit(puntX, puntY)
print('Accuracy of LDA classifier on training set: {:.2f}'
     .format(puntModel.score(puntX, puntY)))
print('Accuracy of LDA classifier on test set: {:.2f}'
     .format(puntModel.score(puntX_test, puntY_test)))

In [None]:
puntModel.predict_proba([puntX_test[0]])

## Field Goal Model

### Prepare data

In [None]:
def cleanFgData(alldata):

    FG = alldata.loc[(alldata['specialTeamsPlayType'] == 'Field Goal') & (alldata['specialTeamsResult']!='Non-Special Teams Result')]

    FG = FG.reset_index()
    FG = fillna(FG)
    FG = FG[fgCols]
    return FG

In [None]:
FG = cleanFgData(alldata)

In [None]:
FG.info()

In [None]:
fgY = FG['specialTeamsResult'].map(
    {'Kick Attempt Good':True, 
     "Kick Attempt No Good":False, 
     "Blocked Kick Attempt":False, 
     "Out of Bounds":False, 
     "Downed":False
     })
fgX = FG.drop(columns=['specialTeamsResult'])
fgInputCols = fgX.columns
fgX = fgX.to_numpy()

In [None]:
len(fgInputCols)

In [None]:
fgX_train, fgX_test, fgY_train, fgY_test = train_test_split(fgX, fgY, random_state=2, train_size=0.8)


### Train model

In [None]:
fgModel = LinearDiscriminantAnalysis()
fgModel.fit(fgX_train,fgY_train)
print('Accuracy of LDA classifier on training set: {:.2f}'
     .format(fgModel.score(fgX_train, fgY_train)))
print('Accuracy of LDA classifier on test set: {:.2f}'
     .format(fgModel.score(fgX_test, fgY_test)))

In [None]:
fgModel.predict_proba([fgX_test[0]])

## Synergy Matrix
### (Effective combination of Key + Support player)

In [None]:
merged_df = pd.merge(plays, PFFScouting,  how='left', left_on=['gameId','playId'], right_on = ['gameId','playId'])

merged_df = pd.merge(merged_df, games,  how='left', left_on=['gameId'], right_on = ['gameId'])

In [None]:
def idReplace(df):
    print(f"Possible {df.playResult}:")
    print("- Key player: ", players_ori[players_ori.nflId == df['key']].displayName.item())
    
    supportNames = []
    for i in df['support']:
        supportNames.append(str(players_ori[players_ori.nflId == i].displayName.item()))
    print("- Support player: ", supportNames, "\n")

## (a) Blocked Punt - kickBlockerId & puntRushers

In [None]:
new_df = merged_df[(merged_df.specialTeamsResult == "Blocked Punt")]

new_df = new_df.reset_index(drop=True)

In [None]:
# Check for missing values
new_df.puntRushers.isna().sum()

In [None]:
team = []
teamSide = []
keyId = []
supportId = []


def ksm_blockedpunt(new_df):
    ## Team
    if new_df.possessionTeam == new_df.homeTeamAbbr:
        team.append(new_df.visitorTeamAbbr)
    else:
        team.append(new_df.homeTeamAbbr)
    
    ## Key
    keyId.append(int(new_df.kickBlockerId))
    
    ## Support
    supportId_temp = [] ## To store one row after each loop, then append to "supportId"
    
    JerseyArr = re.split(r'; |\s+', new_df.puntRushers)
    
    if JerseyArr[0] == new_df.homeTeamAbbr:
        teamSide.append('home')
    else:
        teamSide.append('away')
    
    ## Retrieve the tracking data based on the year
    if str(new_df.gameId).startswith('2018'):
        trackingData = globals()['tracking' + '2018']
    elif str(new_df.gameId).startswith('2019'):
        trackingData = globals()['tracking' + '2019']
    else:
        trackingData = globals()['tracking' + '2020']
          
    ## Retrieve nflId of 
    for j in range(0, len(JerseyArr), 2):
        if JerseyArr[j] == new_df.homeTeamAbbr:
            supportId_temp.append( int(trackingData.loc[(trackingData.gameId == new_df.gameId) & (trackingData.team == 'home') & (trackingData.jerseyNumber == int(JerseyArr[j+1]))].iloc[0].nflId) )
        else:
            supportId_temp.append( int(trackingData.loc[(trackingData.gameId == new_df.gameId) & (trackingData.team == 'away') & (trackingData.jerseyNumber == int(JerseyArr[j+1]))].iloc[0].nflId) )
 
    ## Append "supportId_temp" to "supportId"
    supportId.append(supportId_temp)

In [None]:
new_df.apply(lambda x: ksm_blockedpunt(x), axis=1)

In [None]:
df_BlockedPunt_players = pd.DataFrame({"playResult": 'punt block', "team": [team], "side": [teamSide], "key": [keyId], "support": [supportId]})

df_BlockedPunt_players = df_BlockedPunt_players.explode(['team','side','key','support']).reset_index(drop=True)

In [None]:
df_BlockedPunt_players

## (b) Blocked Kick attempt (Field Goal/Extra Point) - kickBlockerId & specialTeamsSafeties

In [None]:
new_df = merged_df[(merged_df.specialTeamsResult == "Blocked Kick Attempt")]

new_df = new_df.reset_index(drop=True)

In [None]:
# Check for missing values
new_df.specialTeamsSafeties.isna().sum()

In [None]:
# Removing records with missing values
new_df = new_df.dropna(subset=['specialTeamsSafeties'])

In [None]:
team = []
teamSide = []
keyId = []
supportId = []


def ksm_blockedkick(new_df):
    ## Team
    if new_df.possessionTeam == new_df.homeTeamAbbr:
        team.append(new_df.visitorTeamAbbr)
    else:
        team.append(new_df.homeTeamAbbr)
        
    ## Key
    keyId.append(int(new_df.kickBlockerId))
    
    ## Support
    supportId_temp = [] ## To store one row after each loop, then append to "supportId"
    
    if new_df.possessionTeam == new_df.homeTeamAbbr:
        teamSide.append('home')
    else:
        teamSide.append('away')
            
    if not pd.isna(new_df.specialTeamsSafeties):
        JerseyArr = re.split(r'; |\s+', new_df.specialTeamsSafeties)
        

        ## Retrieve the tracking data based on the year
        if str(new_df.gameId).startswith('2018'):
            trackingData = globals()['tracking' + '2018']
        elif str(new_df.gameId).startswith('2019'):
            trackingData = globals()['tracking' + '2019']
        else:
            trackingData = globals()['tracking' + '2020']
          
        ## Retrieve nflId of 
        for j in range(0, len(JerseyArr), 2):
            if JerseyArr[j] == new_df.homeTeamAbbr:
                supportId_temp.append( int(trackingData.loc[(trackingData.gameId == new_df.gameId) & (trackingData.team == 'home') & (trackingData.jerseyNumber == int(JerseyArr[j+1]))].iloc[0].nflId) )
            else:
                supportId_temp.append( int(trackingData.loc[(trackingData.gameId == new_df.gameId) & (trackingData.team == 'away') & (trackingData.jerseyNumber == int(JerseyArr[j+1]))].iloc[0].nflId) )
    
    ## Append "supportId_temp" to "supportId"
    supportId.append(supportId_temp)

In [None]:
new_df.apply(lambda x: ksm_blockedkick(x), axis=1)

In [None]:
df_BlockedKickAttempt_players = pd.DataFrame({"playResult": 'kick block attempt', "team": [team], "side": [teamSide], "key": [keyId], "support": [supportId]})

df_BlockedKickAttempt_players = df_BlockedKickAttempt_players.explode(['team','side','key','support']).reset_index(drop=True)

In [None]:
df_BlockedKickAttempt_players = df_BlockedKickAttempt_players[~df_BlockedKickAttempt_players.support.str.len().eq(0)].reset_index(drop=True)

## (c) Tackle on return - tacklers & assistTacklers

In [None]:
new_df = merged_df[(merged_df.specialTeamsResult == "Return")]

new_df = new_df.reset_index(drop=True)

In [None]:
# Checking for missing values
new_df.assistTackler.isna().sum()

In [None]:
# Removing records with missing values
new_df = new_df.dropna(subset=['assistTackler']).reset_index(drop=True)

In [None]:
import re

team = []
teamSide = []

keyArr = []

keyId = []
supportId = []


def ksm_returntackled(new_df):    
    ## Team
    if new_df.possessionTeam == new_df.homeTeamAbbr:
        team.append(new_df.visitorTeamAbbr)
    else:
        team.append(new_df.homeTeamAbbr)
    
    
    
    if new_df.possessionTeam == new_df.homeTeamAbbr:
        teamSide.append('home')
    else:
        teamSide.append('away')
        

    ## Retrieve the tracking data based on the year
    if str(new_df.gameId).startswith('2018'):
        trackingData = globals()['tracking' + '2018']
    elif str(new_df.gameId).startswith('2019'):
        trackingData = globals()['tracking' + '2019']
    else:
        trackingData = globals()['tracking' + '2020']
        
    
    ## Key
    keyArr = re.split(r'; |\s+', new_df.tackler)
    if keyArr[0] == new_df.homeTeamAbbr:
        keyId.append( int(trackingData.loc[(trackingData.gameId == new_df.gameId) & (trackingData.team == 'home') & (trackingData.jerseyNumber == int(keyArr[1]))].iloc[0].nflId) )
    else:
        keyId.append( int(trackingData.loc[(trackingData.gameId == new_df.gameId) & (trackingData.team == 'away') & (trackingData.jerseyNumber == int(keyArr[1]))].iloc[0].nflId) )

    
    
    ## Support
    supportId_temp = [] ## To store one row after each loop, then append to "supportId"
    
    if not pd.isna(new_df.assistTackler):
        JerseyArr = re.split(r'; |\s+', new_df.assistTackler)

        ## Retrieve nflId of 
        for j in range(0, len(JerseyArr), 2):
            if JerseyArr[j] == new_df.homeTeamAbbr:
                supportId_temp.append( int(trackingData.loc[(trackingData.gameId == new_df.gameId) & (trackingData.team == 'home') & (trackingData.jerseyNumber == int(JerseyArr[j+1]))].iloc[0].nflId) )
            else:
                supportId_temp.append( int(trackingData.loc[(trackingData.gameId == new_df.gameId) & (trackingData.team == 'away') & (trackingData.jerseyNumber == int(JerseyArr[j+1]))].iloc[0].nflId) )
 
    ## Append "supportId_temp" to "supportId"
    supportId.append(supportId_temp)

In [None]:
new_df.apply(lambda x: ksm_returntackled(x), axis=1)

In [None]:
df_ReturnTackled_players = pd.DataFrame({"playType": 'tackle on return', "team": [team], "side": [teamSide], "key": [keyId], "support": [supportId]})

df_ReturnTackled_players = df_ReturnTackled_players.explode(['team','side','key','support']).reset_index(drop=True)

In [None]:
df_ReturnTackled_players

## (d) Return (returnerId & vises)

In [None]:
new_df = merged_df[(merged_df.specialTeamsPlayType == "Punt") & (merged_df.specialTeamsResult == "Return")]

new_df = new_df.reset_index(drop=True)

In [None]:
# Checking missing values
print(new_df.returnerId.isna().sum())
print(new_df.vises.isna().sum())

In [None]:
new_df = new_df.dropna(subset=['returnerId','vises']).reset_index(drop=True)

In [None]:
import re

team = []
teamSide = []

keyArr = []

keyId = []
supportId = []


def ksm_return(new_df):
    ## Team
    if new_df.possessionTeam == new_df.homeTeamAbbr:
        team.append(new_df.visitorTeamAbbr)
    else:
        team.append(new_df.homeTeamAbbr)
    
    
    ## Key
    keyId.append(new_df.returnerId)
    
    
    if new_df.possessionTeam == new_df.homeTeamAbbr:
        teamSide.append('home')
    else:
        teamSide.append('away')
        

    ## Retrieve the tracking data based on the year
    if str(new_df.gameId).startswith('2018'):
        trackingData = globals()['tracking' + '2018']
    elif str(new_df.gameId).startswith('2019'):
        trackingData = globals()['tracking' + '2019']
    else:
        trackingData = globals()['tracking' + '2020']
        
    

    ## Support
    supportId_temp = [] ## To store one row after each loop, then append to "supportId"
    
    JerseyArr = re.split(r'; |\s+', new_df.vises)

    
    ## Retrieve nflId of SUPPORT
    for j in range(0, len(JerseyArr), 2):
        if JerseyArr[j] == new_df.homeTeamAbbr:
            if len(trackingData.loc[(trackingData.gameId == new_df.gameId) & (trackingData.team == 'home') & (trackingData.jerseyNumber == int(JerseyArr[j+1]))]) > 0:
                supportId_temp.append( int(trackingData.loc[(trackingData.gameId == new_df.gameId) & (trackingData.team == 'home') & (trackingData.jerseyNumber == int(JerseyArr[j+1]))].iloc[0].nflId) )
        else:
            if len(trackingData.loc[(trackingData.gameId == new_df.gameId) & (trackingData.team == 'away') & (trackingData.jerseyNumber == int(JerseyArr[j+1]))]) > 0:
                supportId_temp.append( int(trackingData.loc[(trackingData.gameId == new_df.gameId) & (trackingData.team == 'away') & (trackingData.jerseyNumber == int(JerseyArr[j+1]))].iloc[0].nflId) )
        
    ## Append "supportId_temp" to "supportId"
    supportId.append(supportId_temp)

In [None]:
new_df.apply(lambda x: ksm_return(x), axis=1)

In [None]:
df_Return_players = pd.DataFrame({"playResult": 'punt return', "team": [team], "side": [teamSide], "key": [keyId], "support": [supportId]})

df_Return_players = df_Return_players.explode(['team','side','key','support']).reset_index(drop=True)

In [None]:
df_Return_players

# Step 5 - Recommendation System / Prediction

In [None]:
alldata.columns

In [None]:
inputCols = ['gameClock','down', 'quarter', 'yardsToGo',
             'yardlineSide','yardlineNumber', 'absoluteYardlineNumber',
             'preSnapVisitorScore', 'preSnapHomeScore',
             'possessionTeam', 'homeTeamAbbr', 'visitorTeamAbbr', 'nflId', 'gameDate']
strategyCols = ['direction', 'kickType']
goalCols = ['kickLength','specialTeamsResult','specialTeamsPlayType']
print(len(inputCols) + len(strategyCols) + len(goalCols))

In [None]:
def cleanInput(inputs, team_map):

    # count the players age in the play
    nflid = inputs['nflId']
    player = players_ori.loc[players.nflId == nflid].to_dict('r')[0]

     # Get the Height data from DataFrame & Split the heights by hyphen ("-")
    player_height = player["height"]
    player_height = player_height.split("-")

    # Convert Heights to Centimeters and add them to DataFrame
    if len(player_height) == 2:
        player["height"] = int(player_height[0]) * 12 + int(player_height[1]) 
    else: 
        player["height"] = int(player_height[0]) * 2.54

    # Convert Weights to Kilograms and them to DataFrame
    player["weight"] = round(player['weight'] * 0.453592, 2)

    inputs['Position'] = player['Position']
    inputs['height'] = player['height']
    inputs['weight'] = player['weight']

    inputs['birthDate'] = datetime.strptime(player['birthDate'], "%Y-%m-%d")
    inputs["gameDate"] = datetime.strptime(inputs["gameDate"], "%m/%d/%Y")

    inputs["age"]=inputs["gameDate"].year - inputs["birthDate"].year
    inputs = inputs.drop(labels=["birthDate", "gameDate"])

    inputs['gameClock'] = pd.to_timedelta(inputs['gameClock'])
    inputs['gameClock'] = inputs['gameClock'].total_seconds()
    #  convert team abbr to number
    inputs['homeTeamAbbr'] = team_map[inputs['homeTeamAbbr']]
    inputs['visitorTeamAbbr'] = team_map[inputs['visitorTeamAbbr']]
    inputs['possessionTeam'] = team_map[inputs['possessionTeam']]
    inputs['yardlineSide'] = team_map[inputs['yardlineSide']]

    # conver categorical data to number
    inputs['Position']= pos_map[inputs['Position']]
    inputs = inputs.astype('float64')

    return inputs


In [None]:
def getPuntRes(inputs):
    bestK=0
    bestD =0
    bestRes = 0
    bestProb = 0
    for k in range(1,4):
        tmp = []
        for d in range(1,4):
            puntInputs = inputs

            puntInputs['kickType'] = k
            puntInputs['direction'] = d
            puntInputs = puntInputs[puntInputCols].astype('float64')
            puntInputs = puntInputs.to_numpy()
            curRes = puntModel.predict([puntInputs])[0]
            curProb = puntModel.predict_proba([puntInputs])[0, curRes]
            
            if (curRes>bestRes) &(curProb>bestProb) :
                bestK=k
                bestD =d
                bestRes = curRes
                bestProb = curProb

    return bestK, bestD, bestRes, bestProb

In [None]:
re_kick_map ={
    1: 'Normal - standard punt style',
    3: 'Rugby style punt',
    2: 'Nose down or Aussie-style punts' }

In [None]:
re_dir_map ={
    2: 'Left', 3: 'Right', 1: 'Center'}

In [None]:
punt_res_map = {
    0:'less than 30',
    1:"between 30 and 45",
    2:"between 45 and 60",
    3: 'over 60'
}

In [None]:
type_map ={0:'FG',1:'nonSP',2:'Punt'}

In [None]:
def predict(inputs):
    if inputs.possessionTeam == inputs.homeTeamAbbr:
        opponentTeam = inputs.visitorTeamAbbr
    else:
        opponentTeam = inputs.homeTeamAbbr
    
    Type=[]
    inputs = cleanInput(inputs, team_map)
    # predict play type
    typeInputs = inputs[typeInputCol].to_numpy()
    typeRes = typeModel.predict_proba([typeInputs])
    Type.append(np.argwhere(typeRes == np.max(typeRes))[0,1])
    Type.append(np.argwhere(typeRes == np.unique(typeRes)[-2])[0,1])
#     print(typeRes)
#     print(Type)

    
    for idx, t in enumerate(Type):
        print(f'No. {idx+1} recommended play type: {type_map.get(t)}')
        
        # FG
        if t == 0:
            fgInputs = inputs[fgInputCols].to_numpy()
            fgSuccessRate = fgModel.predict_proba([fgInputs])[0,1]
            print(f'    Success rate of Field Goal is {round(fgSuccessRate*100,2)}%\n')
            
            if len(df_BlockedKickAttempt_players[df_BlockedKickAttempt_players.team == opponentTeam]) > 0:
                df_BlockedKickAttempt_players[df_BlockedKickAttempt_players.team == opponentTeam].apply(lambda x: idReplace(x), axis=1)
                
            print("--------------------------------------\n")    
        
        elif t == 1:
            print('Non-special team result')
        
        # Punt
        else:
            k, d, res, prob = getPuntRes(inputs)
            resKick = re_kick_map.get(k)
            resDir = re_dir_map.get(d)
            puntRes = punt_res_map.get(res)
            puntProb = round(prob*100)
            print(f'    Suggested strategy for Punt: \n    Direction: {resDir}, KicktType: {resKick}')
            print(f'    Prediction: {puntProb}% of change kick to the distance {puntRes}\n')
            
            if len(df_BlockedPunt_players[df_BlockedPunt_players.team == opponentTeam]) > 0:
               df_BlockedPunt_players[df_BlockedPunt_players.team == opponentTeam].apply(lambda x: idReplace(x), axis=1)
            
            print("--------------------------------------\n") 
        



In [None]:
try_inputs = rawDf.loc[120]
try_inputs = try_inputs[inputCols]
predict(try_inputs)