In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory
import plotly.express as px
from plotly.offline import plot, iplot, init_notebook_mode
init_notebook_mode(connected=True)

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# What is a good kickoff?

![2007 Hawaii Bowl - Boise State University vs East Carolina University - Second Half Kickoff](https://upload.wikimedia.org/wikipedia/commons/thumb/7/7b/2007_Hawaii_Bowl_-_Boise_State_University_vs_East_Carolina_University_-_Second_Half_Kickoff.jpg/800px-2007_Hawaii_Bowl_-_Boise_State_University_vs_East_Carolina_University_-_Second_Half_Kickoff.jpg)

2007 Hawaii Bowl - Boise State University vs East Carolina University - Second Half Kickoff

by **Mark Ramelb**

This notebook is to analyze data on **kickoff** plays, in order to provide insights on the strategy.

If kickoff is a new term to you, NFL has provided a [definition](https://operations.nfl.com/learn-the-game/nfl-basics/terms-glossary/)

> A kick that puts the ball in play at the start of each half, at the start of overtime, after each Try, and after a successful field goal.

Other than that, I personally find this Youtube [video](https://www.youtube.com/watch?v=9HwNDNeDpFw) is quite helpful as a brief intro.

From the data source in this competition, we can see four types of special team plays - 

* Kickoffs
* Punt
* Extra point
* Field goal

The type having the highest number is Kickoffs, which shows its importance in special team performance.


# Kickoff is the most common play type for the special team

In [None]:
# fetch play data
play_data = pd.read_csv("/kaggle/input/nfl-big-data-bowl-2022/plays.csv")

In [None]:
# how many Kickoffs were played from the data source?
play_data_aggr = play_data.groupby('specialTeamsPlayType').nunique().reset_index()[['specialTeamsPlayType','playId']]
fig = px.bar(play_data_aggr, 
             x='specialTeamsPlayType', 
             y='playId',
             labels={'specialTeamsPlayType':'Type of Special team plays',
                   'playId':'Number of plays'}
            )
fig.update_layout(title_text='Number of special team plays per type<br><sup>Kickoff has the highest number of play in all four special team types</sup>')

fig.show()

# Most usual starting positions for a kickoff

The starting position of the ball for a kickoff is usually at either the **35** line or the **65** line, depending on the team kicked off the ball is heading to the right or the left.

In [None]:
# select plays which are Kickoffs
kickoff_play = play_data.loc[play_data["specialTeamsPlayType"] == "Kickoff"]

In [None]:
# The start and the end of the x coorinator is the outside egde of the goal zone
# so we need to minus the absoluteYardlineNumber by 10 if we want to get the exact number of the number line
kickoff_play['absoluteYardlineNumber'] = kickoff_play['absoluteYardlineNumber'] - 10 
absoluteYardlineNumber_aggr = kickoff_play.groupby(['absoluteYardlineNumber']).nunique().reset_index()[['absoluteYardlineNumber','playId']]

In [None]:
fig = px.bar(absoluteYardlineNumber_aggr, 
             x='absoluteYardlineNumber', 
             y='playId',
             labels={'absoluteYardlineNumber':'Location of ball downfield',
                   'playId':'Number of kickoffs'}
            )
fig.update_layout(title_text='Number of kickoffs per Location of ball downfield')

fig.show()

Among the 3,454 Kickoffs, **Touchback** and **Return** are two of the most common results, followed by Out of bounds, Muffed, Kick Team Recovery, Fair catch and Downed.

In [None]:
# nr of specialTeamsResult of kickoff
kickoff_play_result = kickoff_play.groupby(['specialTeamsResult']).nunique().reset_index()[['specialTeamsResult','playId']]

fig = px.bar(kickoff_play_result, 
             x='specialTeamsResult', 
             y='playId',
             labels={'specialTeamsResult':'Result of kickoffs',
                   'playId':'Number of plays'}
            )
fig.update_layout(title_text='Number of plays per kickoff result')

fig.show()

In [None]:
game_data = pd.read_csv("/kaggle/input/nfl-big-data-bowl-2022/games.csv")

# Touchback tends to become more popular for special plays over the years since 2018
The reason is it became more easier to occur since 2018, the time NFL approved a new rule that if a ball gets to the end zone and touches the ground, it’s an automatic touchback. There’s no need for a player to pick it up and kneel, or even catch a ball if it’s headed for the end zone and they don’t intend to return it. This is a small time saver, but the goal is to blow a play dead earlier so that unnecessary collisions don’t happen. Under the previous rules, a player could take their time gathering a ball and kneeling while the coverage team and return team blockers still careened toward each other for no reason. [source](https://www.sbnation.com/2018/5/22/17369774/nfl-kickoff-rule-change-explained)


In [None]:
game_play_data = pd.merge(
    kickoff_play,
    game_data,
    how="inner",
    left_on=["gameId"],
    right_on=["gameId"],
    sort=True,
    suffixes=("_x", "_y"),
    copy=True,
    indicator=False,
    validate=None,
)

game_play_data_type = game_play_data[['gameId','playId','season','specialTeamsResult']]

game_play_data_type['unique_id'] = game_play_data['gameId'].astype(str) + game_play_data['playId'].astype(str)
game_play_data_type_aggr = game_play_data_type.groupby(['season','specialTeamsResult']).count().reset_index()


game_play_data_type_aggr['year_sum'] = game_play_data_type_aggr['unique_id'].groupby(game_play_data_type_aggr['season']).transform('sum')
game_play_data_type_aggr['Percentage_of_total_plays'] = round(game_play_data_type_aggr['unique_id'] * 100/ game_play_data_type_aggr['year_sum'],1)
touchback_share = game_play_data_type_aggr.loc[game_play_data_type_aggr['specialTeamsResult'] == 'Touchback']
touchback_share['season'] = touchback_share['season'].astype(str)

fig = px.line(touchback_share, x="season", y="Percentage_of_total_plays", title='Percentage of touchback is played by the special team during the season')
fig.show()

# Gained yards per kickoff outcome

Return and touchback are most two common outcomes of kickoff. The two have pretty close median in gained yards of the kickoff team, which is ~40 yards. Though the return has a much wider range of possible outcomes, from 65 to minus -35. It indicates that touchback is more safe but might be less uncertain, exciting than return.

In [None]:
kickoff_play['uniqueId'] = kickoff_play['gameId'].astype(str) + kickoff_play['playId'].astype(str)
yards_gained_outcome = kickoff_play[['uniqueId','playResult','specialTeamsResult']]
fig = px.box(yards_gained_outcome, 
             x="specialTeamsResult", 
             y="playResult")
fig.show()

# Does point and time left correlate with the kickoff outcome?

In [None]:
"""
game_play_data_point_time = game_play_data[['gameId','playId','quarter','yardsToGo','specialTeamsResult',
                'gameClock','preSnapHomeScore','preSnapVisitorScore','possessionTeam','homeTeamAbbr','visitorTeamAbbr']]
game_play_data_point_time['Score_diff'] = np.where(game_play_data_point_time['possessionTeam'] == game_play_data_point_time['homeTeamAbbr'], 
                                                            game_play_data_point_time['preSnapHomeScore'] - game_play_data_point_time['preSnapVisitorScore'], 
                                                            -(game_play_data_point_time['preSnapHomeScore'] - game_play_data_point_time['preSnapVisitorScore']))

game_play_data_point_time['uniqueId'] = game_play_data_point_time['gameId'].astype(str) + game_play_data_point_time['playId'].astype(str)
fig = px.box(game_play_data_point_time, 
             x="specialTeamsResult", 
             y="Score_diff")
fig.show()
"""

Taking 2020 tracking data as an example, I plotted the routes of ball per special team play type.

In [None]:
# tracking data
tracking_2020 = pd.read_csv('/kaggle/input/nfl-big-data-bowl-2022/tracking2020.csv')
tracking_2020['x'] = tracking_2020['x'] - 10

In [None]:
kickoff_tracking_2020 = pd.merge(
    kickoff_play,
    tracking_2020,
    how="inner",
    left_on=["gameId","playId"],
    right_on=["gameId","playId"],
    sort=True,
    suffixes=("_x", "_y"),
    copy=True,
    indicator=False,
    validate=None,
)

# ball route per type
kickoff_ball = kickoff_tracking_2020.loc[kickoff_tracking_2020['displayName'] == 'football']

In [None]:
fig = px.scatter(kickoff_ball, 
                 x="x", 
                 y="y", 
                 facet_col="playDirection", 
                 facet_row="specialTeamsResult",
                height=800, 
                 width=1200)
fig.update_traces(marker={'size': 2,
                         'opacity':0.5})
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))

fig.update_layout(
    paper_bgcolor='rgba(0,0,0,0)',
    plot_bgcolor='rgba(0,0,0,0)',
    title_text='Ball route of kickoff per type')

fig.show()

Let's create a visualization to illustrate how players and ball are moving in the field.

In [None]:
# playId 39
# gameId 2020091000
play_2020091000_39 = kickoff_tracking_2020.loc[(kickoff_tracking_2020['playId'] == 3509) & 
                                    (kickoff_tracking_2020['gameId'] == 2020091310)]

In [None]:
# draw a field board
import plotly.graph_objects as go

#fig = go.Figure()

fig = px.scatter(play_2020091000_39, 
           x="x", 
           y="y", 
           animation_frame="time", 
           animation_group="nflId",
           color="team",
           hover_name="displayName"           
          )
"""
fig.add_shape(type="rect",
    x0=0, 
    y0=53.3, 
    x1=120, 
    y1=0,
    line=dict(color="white"),
)




x_start = 10
while x_start <= 110:
    fig.add_shape(type="line",
    x0=x_start, y0=0, x1=x_start, y1=53.3,
    line=dict(color="white",width=1))
    fig.add_annotation(text=str(x_start),x=x_start, y=1, showarrow=False)
    fig.add_annotation(text=str(x_start),x=x_start, y=52, showarrow=False)
    x_start += 5


# Set axes properties
fig.update_xaxes(range=[0, 120], 
                 showgrid=False)
fig.update_yaxes(range=[0, 53.3], showgrid = False)

fig.update_shapes(dict(xref='x', 
                       yref='y'))



fig.update_layout(xaxis_visible=False, xaxis_showticklabels=False)
fig.update_layout(xaxis_visible=False, xaxis_showticklabels=False,
                  yaxis_visible=False, yaxis_showticklabels=False,
                 paper_bgcolor='rgba(0,0,0,0)',
                  plot_bgcolor='green')
"""
fig.show()




TBA:

What are the outcomes of each result?

# Returning yards could be a good metric

Once catching the ball, the Kickoff return team tries as much as possible to reach the end zone of the kickoff team. The return yards is calculated from the ball possession is changed. How many yards do they gain in general? The data is close to a normal distribution which has an average number ~20.

Another interesting thing is X% of the kickoffs return team gained no yards at all. Since the goal of the kickoff team is to limit the yards the opposing team gaining once they possess the ball, I'd firstly look at the returning yards as a metric to evaluate how good a kickoff is.

In [None]:
kickReturn_aggr = kickoff_play.groupby(['kickReturnYardage']).nunique().reset_index()[['kickReturnYardage',
                                                                               'playId']]

fig = px.bar(kickReturn_aggr, 
             x='kickReturnYardage', 
             y='playId',
             labels={'kickReturnYardage':'Kick Return Yard age',
                   'playId':'Number of kickoffs'}
            )
fig.update_layout(title_text='Number of kickoffs per Return Yard age')

fig.show()

But one thing we need to keep in mind is if the ball to is kicked beyond the end zone of the opposing team, then a **touchback** will occur.

In such case, according to the current NFL rule, the kickoff team will gain no awards and the receiving team will start their round at 25 yard line from their side.

And in our dataset, the kickReturnYardage value will be null if the play result is a touchback.

In [None]:
print("Not null values in kickReturnYardage data once a touchback result occured is: " + 
      str(sum(kickoff_play.loc[kickoff_play['specialTeamsResult'] == 'Touchback']['kickReturnYardage'].notna())))

# A ball is often kicked ~75 yards in length...

In [None]:
kickLength_aggr = kickoff_play.groupby(['kickLength']).nunique().reset_index()[['kickLength',
                                                                               'playId']]

**Kick length** indicates the distance of a kicked ball in air.
The most common length is 75 yards, which has almost 3 times in terms of number of the second length - 70 yards. Another interesting finding is on the left side of the chart below, we can see a small but noticeable amount of kick lengths lie around 10 yards.

In [None]:
fig = px.bar(kickLength_aggr, 
             x='kickLength', 
             y='playId',
             labels={'kickLength':'Kick length',
                   'playId':'Number of kickoffs'}
            )
fig.update_layout(title_text='Number of kickoffs per kick length')

fig.show()

# Is Kick length significantly correlated with the returning yards?

Yes. They are positively correlated.

In [None]:
correlation = kickoff_play["kickLength"].corr(kickoff_play["kickReturnYardage"])
print("The correlation between Kick length and Kick return is: " + str(round(correlation,3)) + ".")

From the chart below, it turns out there is an **upward** trend indicating the relationship between how far the ball is kicked and how much the returning team could advance the ball towards the end zone.

In [None]:
import plotly.express as px
fig = px.scatter(kickoff_play, 
                 x="kickLength", 
                 y="kickReturnYardage",
                 trendline="ols",
                 labels=dict(kickLength="Kick length (yards)", kickReturnYardage="Kick return (yards)"),
                 title="The correlation between Kick length and Kick return"
                )
fig.show()

The hypothesis could be the further the ball is kicked, the longer distance/longer time it takes the kick team to tackle the returning team player who holds the ball.

Therefore, the returning player could have much space and time to move forward.

In [None]:
# kicklength per team
fig = px.box(kickoff_play, 
             x="possessionTeam", 
             y="kickLength")
fig.show()



# Kickoff teams usually get 40 yards as the result of their kickoff

The reason is out of 3,454 plays, around 2,675 of them ended up as touchback, in which the ball will be placed at the 35/75 yard line from the opposing team. And it resulted in the most cases, Kickoff teams usually get 40 yards as result.

In [None]:
playResult_aggr = kickoff_play.groupby(['playResult']).nunique().reset_index()[['playResult',
                                                                               'playId']]

In [None]:
fig = px.bar(playResult_aggr, 
             x='playResult', 
             y='playId',
             labels={'playResult':'Play result',
                   'playId':'Number of kickoffs'}
            )
fig.update_layout(title_text='Number of kickoffs per play result')

fig.show()

Let's explore what the PFFScoutingData reveals to us.

In [None]:
pff = pd.read_csv("/kaggle/input/nfl-big-data-bowl-2022/PFFScoutingData.csv")

In [None]:
kickoff_pff = pd.merge(
    kickoff_play,
    pff,
    how="inner",
    left_on=["gameId","playId"],
    right_on=["gameId","playId"],
    sort=True,
    suffixes=("_x", "_y"),
    copy=True,
    indicator=False,
    validate=None,
)

Possible values for kickoff plays:

* D: Deep - your normal deep kick with decent hang time
* F: Flat - different than a Squib in that it will have some hang time and no roll but has a lower trajectory and hang time than a Deep kick off
* K: Free Kick - Kick after a safety
* O: Obvious Onside - score and situation dictates the need to regain possession. Also the hands team is on for the returning team
* P: Pooch kick - high for hangtime but not a lot of distance - usually targeting an upman
* Q: Squib - low-line drive kick that bounces or rolls considerably, with virtually no hang time
* S: Surprise Onside - accounting for score and situation an onsides kick that the returning team doesn’t expect. Hands teams probably aren't on the field
* B: Deep Direct OOB - Kickoff that is aimed deep (regular kickoff) that goes OOB directly (doesn't bounce)

In [None]:
kickType_aggr = kickoff_pff.groupby(['kickType']).nunique().reset_index()[['kickType','playId']]
fig = px.bar(kickType_aggr, 
             x='kickType', 
             y='playId',
             labels={'kickType':'Kick type',
                   'playId':'Number of kickoffs'}
            )
fig.update_layout(title_text='Number of kickoffs per Kick type')

fig.show()

# It usually takes ~4 seconds for the ball hanging in the air after a kickoff.

In [None]:
kickHangTime_aggr = kickoff_pff.groupby(['hangTime']).nunique().reset_index()[['hangTime','playId']]
fig = px.bar(kickHangTime_aggr, 
             x='hangTime', 
             y='playId',
             labels={'hangTime':'Hang time',
                   'playId':'Number of kickoffs'}
            )
fig.update_layout(title_text='Number of kickoffs per hang time')

fig.show()

# In most kickoffs, the direction of the ball goes to the center, either intended or actual.

In [None]:
kickDirectionIntended= kickoff_pff.groupby(['kickDirectionIntended']).nunique().reset_index()[['kickDirectionIntended','playId']]
fig = px.bar(kickDirectionIntended, 
             x='kickDirectionIntended', 
             y='playId',
             labels={'kickDirectionIntended':'kick Direction Intended',
                   'playId':'Number of kickoffs'}
            )
fig.update_layout(title_text='Number of kickoffs per kick Direction Intended')

fig.show()

In [None]:
kickDirectionActual= kickoff_pff.groupby(['kickDirectionActual']).nunique().reset_index()[['kickDirectionActual','playId']]
fig = px.bar(kickDirectionActual, 
             x='kickDirectionActual', 
             y='playId',
             labels={'kickDirectionActual':'kick Direction Actual',
                   'playId':'Number of kickoffs'}
            )
fig.update_layout(title_text='Number of kickoffs per kick Direction Actual')

fig.show()

In [None]:
kickoff_pff[['kickDirectionIntended','kickDirectionActual']]

kickoff_pff_direction_actual = kickoff_pff.loc[kickoff_pff['kickDirectionIntended'] != kickoff_pff['kickDirectionActual']][['kickDirectionIntended','kickDirectionActual','playId']]


kickoff_pff_direction_actual_aggr = kickoff_pff_direction_actual.groupby(['kickDirectionIntended','kickDirectionActual']).nunique().reset_index()

kickoff_pff_direction_actual_aggr['IntendedActuall'] = kickoff_pff_direction_actual_aggr['kickDirectionIntended'] + kickoff_pff_direction_actual_aggr['kickDirectionActual']

fig = px.bar(kickoff_pff_direction_actual_aggr, 
             x='IntendedActuall', 
             y='playId',
             labels={'IntendedActuall':'kick Direction against Actual',
                   'playId':'Number of kickoffs'}
            )
fig.update_layout(title_text='Number of kickoffs per kick Direction vs. Actual')

fig.show()

# Does long approach run results in a long kick length?

In [None]:
kickoff_tracking_2020['uniqueId'] = kickoff_tracking_2020['gameId'].astype(str) + kickoff_tracking_2020['playId'].astype(str)

kickoff_tracking_distance_2020 = kickoff_tracking_2020[['gameId',
                                                       'playId',
                                                        'time',
                                                       'kickerId',
                                                       'kickLength',
                                                       'x', 
                                                        'y', 
                                                        's', 
                                                        'a', 
                                                        'dis', 
                                                        'o', 
                                                        'dir',
                                                        'event', 
                                                        'nflId', 
                                                        'displayName',
                                                       'playDirection',
                                                       'uniqueId']]
kickoff_tracking_distance_2020 = kickoff_tracking_distance_2020.loc[(kickoff_tracking_distance_2020['displayName'] != 'football') 
                                                                    & (kickoff_tracking_distance_2020['kickerId'] == kickoff_tracking_distance_2020['nflId'])]
kickoff_tracking_distance_2020 = kickoff_tracking_distance_2020[kickoff_tracking_distance_2020['kickLength'].notna()]
kickoff_events = [
                    'kickoff',
                  'onside_kick',
                  'autoevent_kickoff',
                  'free_kick',
                  'kickoff_play'
                 ]
kickoff_tracking_2020_kicks = kickoff_tracking_distance_2020.loc[kickoff_tracking_distance_2020['event'].isin(kickoff_events)]
unique_id_kicks = kickoff_tracking_2020_kicks['uniqueId'].unique()

# free_kick and kickoff_play have high correlations between distance of approach run and the length of a kickoff length
# what is the difference between a free kick and ki

In [None]:
kickoff_tracking_2020_kicks.groupby(['event']).count()

In [None]:
kickoff_tracking_2020_kicks.groupby(['event']).count()

In [None]:
# shorter approach run but larger kick space
kickoff_tracking_2020_kicks.groupby(['event']).sum()['dis'] / kickoff_tracking_2020_kicks.groupby(['event']).count()['uniqueId'] 

In [None]:
distance_list = []
kick_length_list = []
data_list = []
for uniqueId in unique_id_kicks:
        
        dataset = kickoff_tracking_distance_2020.loc[kickoff_tracking_distance_2020['uniqueId'] == uniqueId]
        #kicker_ball_data = dataset.loc[(dataset['kickerId'] == dataset['nflId'])]
        kickoff_time = dataset.loc[dataset['event'].isin(kickoff_events)]['time'].iloc[0]
        kicker_ball_data_pre_kickoff = dataset.loc[dataset['time'] <= kickoff_time]
        kicker_ball_data_pre_kickoff = kicker_ball_data_pre_kickoff.sort_values(by = 'time')
        data_list.append(kicker_ball_data_pre_kickoff)
        # we have dis!!
        #kicker_ball_data_pre_kickoff['x2'] = kicker_ball_data_pre_kickoff['x'].shift(-1)
        #kicker_ball_data_pre_kickoff['y2'] = kicker_ball_data_pre_kickoff['y'].shift(-1)
        kick_length = kicker_ball_data_pre_kickoff['kickLength'].iloc[0]
        #kicker_ball_data_pre_kickoff = kicker_ball_data_pre_kickoff.dropna()

        #kicker_ball_data_pre_kickoff['distance_moving'] =((kicker_ball_data_pre_kickoff['x2'] - kicker_ball_data_pre_kickoff['x']) ** 2 + (kicker_ball_data_pre_kickoff['y2'] - kicker_ball_data_pre_kickoff['y']) ** 2) ** 0.5
        approach_run_distance = kicker_ball_data_pre_kickoff['dis'].sum()
        distance_list.append(approach_run_distance)
        kick_length_list.append(kick_length)


from scipy import stats

correlation, p_value = stats.pearsonr(distance_list, kick_length_list)


print(correlation, p_value)

In [None]:
fig = px.scatter(x=distance_list, y=kick_length_list, trendline="ols")
fig.show()

# distance between the player and the ball when it is kicked

In [None]:
kickoff_tracking_kicked_distance_2020 = kickoff_tracking_2020.loc[(kickoff_tracking_2020['displayName'] == 'football') | (kickoff_tracking_2020['kickerId'] == kickoff_tracking_2020['nflId'])]

In [None]:
kickoff_tracking_kicked_distance_2020['uniqueId'] = kickoff_tracking_kicked_distance_2020['gameId'].astype(str) + kickoff_tracking_kicked_distance_2020['playId'].astype(str)

In [None]:
kickoff_tracking_kicked_distance_2020 = kickoff_tracking_kicked_distance_2020[['uniqueId','event','time','kickerId','displayName','x','y','kickLength']]

kickoff_events = [
                    'kickoff',
                  'onside_kick',
                  'autoevent_kickoff',
                  'free_kick',
                  'kickoff_play'
                 ]

In [None]:
kickoff_tracking_kicked_distance_event_2020 = kickoff_tracking_kicked_distance_2020.loc[kickoff_tracking_kicked_distance_2020['event'].isin(kickoff_events)]

kickoff_tracking_kicked_distance_event_2020['x2'] = kickoff_tracking_kicked_distance_event_2020.groupby(['uniqueId'])['x'].shift(-1)
kickoff_tracking_kicked_distance_event_2020['y2'] = kickoff_tracking_kicked_distance_event_2020.groupby(['uniqueId'])['y'].shift(-1)

In [None]:
kickoff_tracking_kicked_distance_event_2020 = kickoff_tracking_kicked_distance_event_2020.dropna()
kickoff_tracking_kicked_distance_event_2020['distance'] =((kickoff_tracking_kicked_distance_event_2020['x2'] - kickoff_tracking_kicked_distance_event_2020['x']) ** 2 + (kickoff_tracking_kicked_distance_event_2020['y2'] - kickoff_tracking_kicked_distance_event_2020['y']) ** 2) ** 0.5
kickoff_tracking_kicked_distance_event_2020

In [None]:
correlation, p_value = stats.pearsonr(kickoff_tracking_kicked_distance_event_2020['distance'], 
                                      kickoff_tracking_kicked_distance_event_2020['kickLength'])


print(correlation, p_value)

In [None]:
fig = px.scatter(x=kickoff_tracking_kicked_distance_event_2020['distance'], y=kickoff_tracking_kicked_distance_event_2020['kickLength'], trendline="ols")
fig.show()

In [None]:
fig = px.box(kickoff_tracking_kicked_distance_event_2020, 
             x="event", 
             y="distance")
fig.show()

In [None]:
kickoff_distance_ball = kickoff_tracking_kicked_distance_event_2020.loc[kickoff_tracking_kicked_distance_event_2020['event'].isin(['kickoff','autoevent_kickoff'])]
onsidekick_distance_ball = kickoff_tracking_kicked_distance_event_2020.loc[kickoff_tracking_kicked_distance_event_2020['event'].isin(['onside_kick'])]

In [None]:
onsidekick_distance_ball['distance']

In [None]:
stats.ttest_ind(kickoff_distance_ball['distance'], onsidekick_distance_ball['distance'])

In [None]:
kickoff_distance_ball['distance'].mean()

In [None]:
onsidekick_distance_ball['distance'].mean()

# Onside kick has much smaller approach run than kickoffs, which results in a shorter kick length.  

In [None]:
merged_dataset = pd.concat(data_list)

event_aggr = pd.DataFrame()
event_aggr[['uniqueId','dis']] = merged_dataset.groupby(['uniqueId']).sum().reset_index()[['uniqueId','dis']]
event_aggr['kickLength'] = merged_dataset.groupby(['uniqueId']).mean().reset_index()[['kickLength']]
event_aggr['event'] = merged_dataset.groupby(['uniqueId']).max().reset_index()[['event']]
event_aggr.groupby(['event']).count()

In [None]:
event_aggr.groupby(['event']).median()

In [None]:
# 5 yardline should be a determining line
event_aggr.groupby(['event']).quantile(.9)

# Does the direction of a kicker's body tells us more of in which way the ball will fly to?

In [None]:
kickoff_tracking_2020_dir = kickoff_tracking_2020[['uniqueId','time', 'kickerId', 'o', 'dir','event', 'nflId','playDirection']]

# PFF Scouting data
# kickDirectionIntended
# kickDirectionActual

pff['uniqueId'] = pff['gameId'].astype(str) + pff['playId'].astype(str)

pff_dir = pff[['uniqueId','kickDirectionIntended','kickDirectionActual']]

kickoff_pff_dir = pd.merge(
    kickoff_tracking_2020_dir,
    pff_dir,
    how="inner",
    left_on=["uniqueId"],
    right_on=["uniqueId"],
    sort=True,
    suffixes=("_x", "_y"),
    copy=True,
    indicator=False,
    validate=None,
)

kickoff_pff_dir_kick_event = kickoff_pff_dir.loc[(kickoff_pff_dir['kickerId'] == kickoff_pff_dir['nflId']) &
                   (kickoff_pff_dir['event'].isin(kickoff_events))]

# right more if want right
kickoff_pff_dir_kick_event.groupby(['playDirection',
                                    'kickDirectionActual']).median().reset_index()[['playDirection',
                                                                                   'kickDirectionActual',
                                                                                   'o',
                                                                                   'dir']]

In [None]:
# why f

In [None]:
# time
# entity: player + ball
# x
# y
# dir
# o
# acc
# event - kick the ball, ball is catched, ball is out of bound/end, ball touches the ground

In [None]:
# playId 39
# gameId 2020091000

"""
play_2020091000_39 = kickoff_tracking_2020.loc[(kickoff_tracking_2020['playId'] == 4182) & 
                                    (kickoff_tracking_2020['gameId'] == 2021010315)]

play_2020091000_39 = play_2020091000_39[['gameId',
                   'playId',
                   'kickerId',
                    'nflId',
                    'displayName',
                    'event',                     
                   'time',
                   'x',
                   'y',
                   's',
                   'a',
                   'dis',
                   'o',
                   'dir',
                   'playDirection']]

play_2020091000_39 = play_2020091000_39.loc[(play_2020091000_39['nflId'] == play_2020091000_39['kickerId']) | (play_2020091000_39['displayName'] == 'football')]

kickoffTime = list(play_2020091000_39.loc[(play_2020091000_39['event'].isin(['kickoff','onside_kick'])) & (play_2020091000_39['displayName'] == 'football')][['time']].iloc[0])[0]

play_2020091000_39 = play_2020091000_39.loc[play_2020091000_39['time'] <= kickoffTime]



player_2020091000_39 = play_2020091000_39.loc[play_2020091000_39['displayName'] != 'football']
ball_2020091000_39 = play_2020091000_39.loc[(play_2020091000_39['displayName'] == 'football') & (play_2020091000_39['time'] == kickoffTime)]

fig = go.Figure()

# Add traces

fig.add_trace(go.Scatter(x=player_2020091000_39['x'], 
                         y=player_2020091000_39['y'],
                    mode='lines+markers',
                         text = player_2020091000_39['displayName']
                        #marker=dict(size=player_2020091000_39['s'] * 20)
                        ))

fig.add_trace(go.Scatter(x=ball_2020091000_39['x'], 
                         y=ball_2020091000_39['y'],
                    mode='markers',
                    #name='lines+markers',
                        text = ball_2020091000_39['displayName']))



fig.show()
"""