In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import plotly.express as px

## Import data
retrieve data from nflverse  
filter for 2023 season  
filter for regular season data

In [3]:
year = 2023
data_source = f'https://github.com/nflverse/nflverse-data/releases/download/pbp/play_by_play_{year}.csv.gz'

df = pd.read_csv(
    data_source,
    compression='gzip', 
    low_memory=False,
)

df = df[df.season_type=='REG']

df.head()

Unnamed: 0,play_id,game_id,old_game_id,home_team,away_team,season_type,week,posteam,posteam_type,defteam,...,out_of_bounds,home_opening_kickoff,qb_epa,xyac_epa,xyac_mean_yardage,xyac_median_yardage,xyac_success,xyac_fd,xpass,pass_oe
0,1,2023_01_ARI_WAS,2023091007,WAS,ARI,REG,1,,,,...,0,1,0.0,,,,,,,
1,39,2023_01_ARI_WAS,2023091007,WAS,ARI,REG,1,WAS,home,ARI,...,0,1,0.0,,,,,,,
2,55,2023_01_ARI_WAS,2023091007,WAS,ARI,REG,1,WAS,home,ARI,...,0,1,-0.336103,,,,,,0.515058,-51.505846
3,77,2023_01_ARI_WAS,2023091007,WAS,ARI,REG,1,WAS,home,ARI,...,0,1,0.703308,0.340652,3.328642,1.0,0.996628,0.583928,0.661106,33.889407
4,102,2023_01_ARI_WAS,2023091007,WAS,ARI,REG,1,WAS,home,ARI,...,0,1,0.469799,,,,,,0.196065,-19.606467


In [4]:
#example of using f
win_rate = .514
print(f"win rate: {win_rate:.0%}")

win rate: 51%


In [5]:
#define a function to return if the Patriots ended up winning or losing the game
def process_row(row):
    """description of func here"""
    if row['home_team'] == 'NE':
        return row['result'] > 0
    elif row['away_team'] == 'NE':
        return row['result'] < 0
    else: # Tie or NE not involved in game
        return None


df['pats_win'] = df.apply(
    process_row,
    axis=1,
)

# Hypothesis: When the patriots spend more time on defense, they lose

In [6]:
# Count number of plays when NE is on defense per game, and if they won or not
_df = (
    df
    .query('defteam == "NE"')
    .groupby(['game_id', 'pats_win'], as_index=False)
    .nunique()
)

px.histogram(
    _df,
    x='play_id',
    nbins=18,
    color='pats_win',
    title = "Frequency of Defensive Plays per Game and Outcome"

    
)

# Average yards per play type
Compare the Patriots' average run and pass yards per game vs. League average

In [34]:
#create a df for league averages
df_average_league = (
    df
    .query('play_type in ("run", "pass")')
    .groupby(['play_type'])
    .agg({'yards_gained': 'mean'})
    .reset_index() #same as as_index=false, but more efficient in this case
    .rename(columns={"yards_gained": "average_yards_gained"})
)

# create a new column and set the value as "league" for all rows. will use this after combining patriots vs league average tables
df_average_league['team'] = 'league'

# create a df for patriots averages
df_average_patriots = (
    df
    .query('posteam == "NE" and play_type in ["run", "pass"]', engine='python')
    .groupby(['play_type'])
    .agg({'yards_gained': 'mean'})
    .reset_index(level=0) #same as as_index=false, but more efficient in this case
    .rename(columns={"yards_gained": "average_yards_gained"})
)
# create a new column and set the value as "NE" for all rows. will use this after combining patriots vs league average tables
df_average_patriots['team'] = 'NE'

# concat the average & NE tables
df_NE_vs_League = pd.concat([df_average_league, df_average_patriots])

fig = px.bar(
    df_NE_vs_League,
    x='play_type',
    y='average_yards_gained',
    color='team',
    barmode='group',
    title = 'Average Yards per Play'
)
fig.show()


The Patriots were *below* the League average for both passing and running yard per play

# Now, let's look at 3rd down conversions  
Since third_down_converted is a binary variable, by taking the average, we will get the percentage of 3rd downs converted

In [63]:
# compare any team average 3rd down percentage to the League
team = "NE"

# filter to only look at 3rd down plays, because all other plays will have a value of 0
df_third_down_plays = df[df['down'] == 3]


league_third_down_conversion_rate = df_third_down_plays['third_down_converted'].mean()
print(league_third_down_conversion_rate)

team_third_down_conversion_rate = df_third_down_plays[df_third_down_plays['posteam'] == team]['third_down_converted'].mean()
print(team_third_down_conversion_rate)

df_mini = pd.DataFrame({
    "team": ['league_avg', team],
    "conversion_rate": [league_third_down_conversion_rate, team_third_down_conversion_rate],
})

fig2 = px.bar(
    df_mini,
    x='team',
    y='conversion_rate',
    title = 'Third Down Conversion Rate'
)
fig2.show()


0.3538969281543539
0.2815126050420168


In [75]:
# Instead, let's look at every team's average and highlight NE
df_third_down_conversions_per_team = (
    df_third_down_plays
    .groupby(['posteam'], as_index=False)
    .agg({'third_down_converted': 'mean'})
    .reset_index(level=0) #same as as_index=false, but more efficient in this case
    .rename(columns={"third_down_converted": "third_down_conversion_rate", "posteam": "Team"})
    .sort_values('third_down_conversion_rate', ascending=False)
)
df_third_down_conversions_per_team.head()

league_third_down_conversion_rate = df_third_down_plays['third_down_converted'].mean()


fig3 = px.bar(
    df_third_down_conversions_per_team,
    x='Team',
    y='third_down_conversion_rate',
    title = 'Third Down Conversion Rate by Team'
)
fig3.add_hline(y=league_third_down_conversion_rate,line_dash="dot",
              annotation_text="League Average")
fig3.show()


3.0
