# Team Tendency Analysis - Oct 15, 2024

Goal here is to get longer-term data on teams (and later players) to bake in to 

In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/nfl-big-data-bowl-2025/players.csv
/kaggle/input/nfl-big-data-bowl-2025/tracking_week_7.csv
/kaggle/input/nfl-big-data-bowl-2025/tracking_week_9.csv
/kaggle/input/nfl-big-data-bowl-2025/tracking_week_6.csv
/kaggle/input/nfl-big-data-bowl-2025/games.csv
/kaggle/input/nfl-big-data-bowl-2025/tracking_week_8.csv
/kaggle/input/nfl-big-data-bowl-2025/player_play.csv
/kaggle/input/nfl-big-data-bowl-2025/tracking_week_4.csv
/kaggle/input/nfl-big-data-bowl-2025/tracking_week_3.csv
/kaggle/input/nfl-big-data-bowl-2025/tracking_week_5.csv
/kaggle/input/nfl-big-data-bowl-2025/tracking_week_1.csv
/kaggle/input/nfl-big-data-bowl-2025/plays.csv
/kaggle/input/nfl-big-data-bowl-2025/tracking_week_2.csv
/kaggle/input/nfl-stats-1999-2022/yearly_team_data.csv
/kaggle/input/nfl-stats-1999-2022/2024_player_predictions.csv
/kaggle/input/nfl-stats-1999-2022/yearly_player_data.csv
/kaggle/input/nfl-stats-1999-2022/weekly_team_data.csv
/kaggle/input/nfl-stats-1999-2022/weekly_player_data.csv


## Import data

We load all relevant data to understand team pass rates

In [2]:
df_play = pd.read_csv('/kaggle/input/nfl-big-data-bowl-2025/plays.csv')
df_player_play = pd.read_csv('/kaggle/input/nfl-big-data-bowl-2025/player_play.csv')
df_games = pd.read_csv('/kaggle/input/nfl-big-data-bowl-2025/games.csv')
df_players = pd.read_csv('/kaggle/input/nfl-big-data-bowl-2025/players.csv')

In [3]:
# Add in week # for each play
df_weeks = df_play[['gameId','isDropback','possessionTeam','defensiveTeam']].merge(df_games[['gameId','week']],how='left',on=['gameId'])

# Aggregate team snap data

We want to know both the raw number of snaps each team took, and how many of them were passes, for each week


In [4]:
df_working = df_weeks.drop(columns=['gameId'])
df_working.head(1)

Unnamed: 0,isDropback,possessionTeam,defensiveTeam,week
0,True,CIN,ATL,7


In [5]:
# Group by team/week, get pass & snap count for each team/week combo

off_df = df_working.groupby(['possessionTeam','week'])\
                        .agg(off_passes=('isDropback', 'sum'), off_snaps=('isDropback', 'count')).reset_index()

def_df = df_working.groupby(['defensiveTeam','week'])\
                        .agg(def_passes=('isDropback', 'sum'), def_snaps=('isDropback', 'count')).reset_index()


In [6]:
# Merge offensive, defensive snap stats
df_full = off_df.merge(def_df,how='inner',left_on=['possessionTeam','week'],right_on=['defensiveTeam','week']).drop(columns=['defensiveTeam'])

# Get running snap/pass stats over the season for the current week
df_cs = pd.concat([df_full[['possessionTeam','week']],df_full.groupby('possessionTeam').cumsum().drop(columns='week')],axis=1)

# Calc pass rate for offense & defense
df_cs['pass_rate_off'] = df_cs['off_passes']/df_cs['off_snaps']
df_cs['pass_rate_def'] = df_cs['def_passes']/df_cs['def_snaps']

In [7]:
df_cs.head(3)

Unnamed: 0,possessionTeam,week,off_passes,off_snaps,def_passes,def_snaps,pass_rate_off,pass_rate_def
0,ARI,1,39,57,40,64,0.684211,0.625
1,ARI,2,88,130,81,124,0.676923,0.653226
2,ARI,3,146,206,108,169,0.708738,0.639053


# Calculate player-weighted pass tendencies

I.e., given the snap history of players on the field, does that help us predict a pass?

In [8]:
# Player-play data, flagged for if it's a dropback
db_flagged = df_player_play.merge(df_play[['playId','gameId','isDropback']],how='left',
                     on=['playId','gameId'])

# Add in week info to each play
db_weeks_id = db_flagged[['gameId','playId','nflId','isDropback','teamAbbr']].merge(df_weeks[['gameId','week']].drop_duplicates('gameId'),how='left',on=['gameId'])

# Get weekly pass counts for each player
pass_cts = db_weeks_id.groupby(['nflId','week','teamAbbr']).agg(pass_ct=('isDropback','sum'),
                                                     snap_ct=('isDropback','count')).reset_index()

In [9]:
pass_cts['team_pass_ratio'] = pass_cts['pass_ct']/pass_cts['snap_ct']

In [10]:
pass_cts.head(3)

Unnamed: 0,nflId,week,teamAbbr,pass_ct,snap_ct,team_pass_ratio
0,25511,1,TB,26,58,0.448276
1,25511,2,TB,35,63,0.555556
2,25511,3,TB,42,56,0.75


## Qualitatively explore player pass counts

Just to get a feel for who exactly is in on pass/run downs, we look at some examples of players who tell us a pass (or run) is likely

In [11]:
pass_snap_df = pass_cts.merge(df_players[['nflId','displayName','position']],how='left',on='nflId')
pass_snap_df = pass_snap_df.sort_values(by='pass_ct',ascending=False)
pass_snap_df['pass_ratio'] = pass_snap_df['pass_ct']/pass_snap_df['snap_ct']

Below we can see that a bevy of blocking tight ends, nose tackles, and run-first RB Derrick Henry dominate low-pass rate scenarios:

In [12]:
pass_snap_df[pass_snap_df['snap_ct'] > 30].sort_values(by=['pass_ratio']).head(10)

Unnamed: 0,nflId,week,teamAbbr,pass_ct,snap_ct,team_pass_ratio,displayName,position,pass_ratio
1644,42589,8,TEN,4,38,0.105263,Geoff Swaim,TE,0.105263
9525,54594,5,DAL,5,33,0.151515,Jake Ferguson,TE,0.151515
1946,43334,8,TEN,8,42,0.190476,Derrick Henry,RB,0.190476
1643,42589,7,TEN,6,31,0.193548,Geoff Swaim,TE,0.193548
146,35562,2,SEA,7,34,0.205882,Al Woods,NT,0.205882
6146,48723,6,ATL,8,38,0.210526,Parker Hesse,TE,0.210526
7086,52598,2,SF,7,32,0.21875,Charlie Woerner,TE,0.21875
9370,54560,8,CIN,11,49,0.22449,Zach Carter,DT,0.22449
5234,47852,7,BAL,7,31,0.225806,Josh Oliver,TE,0.225806
1126,42302,6,JAX,8,35,0.228571,Chris Manhertz,TE,0.228571


## Merge in players to week data

We want to merge in our player snap ratio, looking back a week

In [13]:
db_weeks_id.head(1)

Unnamed: 0,gameId,playId,nflId,isDropback,teamAbbr,week
0,2022090800,56,35472,True,BUF,1


In [14]:
pass_snap_df.head(1)

Unnamed: 0,nflId,week,teamAbbr,pass_ct,snap_ct,team_pass_ratio,displayName,position,pass_ratio
2017,43353,9,TEN,78,90,0.866667,Kevin Byard,FS,0.866667


In [15]:
# downshift week
pass_snap_df['week'] += 1

# merge shifted player pass data back in to week data
pass_merged = db_weeks_id.merge(pass_snap_df[['nflId','week','pass_ct','snap_ct']],how='left',on=['nflId','week'])

In [16]:
pass_merged = pass_merged.fillna(0)
pass_merged = pass_merged.merge(pass_snap_df[['nflId','position']],how='left',on=['nflId'])
pass_merged.sample(3)

Unnamed: 0,gameId,playId,nflId,isDropback,teamAbbr,week,pass_ct,snap_ct,position
166561,2022091106,3437,35466,False,NE,1,0.0,0.0,FS
556,2022090800,122,53522,True,BUF,1,0.0,0.0,T
1926412,2022102302,1316,53543,False,ATL,7,16.0,54.0,C


We can excise OL members and QB's since they're on the field most every snap, but let's keep them in for now:

In [17]:
#pass_merged = pass_merged[pass_merged.position.isin(['QB','G','T','C'])]

## Expected pass rate by teams

Excluding players on the field at each snap, we essentially consider the run/pass likelihood based on the relative snaps taken by the players on the field. 

Lingering Q: do we sum pass over sum snaps, or just an average (potentially weighted) of pass snap ratios. The latter potentially gives up-weight to run-only players.


In [18]:
team_xpass_df = pass_merged.groupby(['week','gameId','playId','teamAbbr']).apply(lambda x: x['pass_ct'].sum() /
                                   x['snap_ct'].sum().astype(float), include_groups=False).reset_index().rename(columns={0:'personnel_xpass_ratio'})

team_xpass_df = team_xpass_df.dropna()

  team_xpass_df = pass_merged.groupby(['week','gameId','playId','teamAbbr']).apply(lambda x: x['pass_ct'].sum() /


In [19]:
team_xpass_df.isna().sum()

week                     0
gameId                   0
playId                   0
teamAbbr                 0
personnel_xpass_ratio    0
dtype: int64

In [20]:
min(team_xpass_df['week'])

2

In [21]:
team_xpass_df.head(3)

Unnamed: 0,week,gameId,playId,teamAbbr,personnel_xpass_ratio
3904,2,2022091500,55,KC,0.680626
3905,2,2022091500,55,LAC,0.758184
3906,2,2022091500,76,KC,0.658697


In [22]:
team_xpass_df['week'].min()

2

In [23]:
df_cs.head(1)

Unnamed: 0,possessionTeam,week,off_passes,off_snaps,def_passes,def_snaps,pass_rate_off,pass_rate_def
0,ARI,1,39,57,40,64,0.684211,0.625


In [24]:
tp_df = df_cs[['possessionTeam','week','pass_rate_off','pass_rate_def']].copy()
tp_df['week'] = tp_df['week'] +1

In [25]:
tp_df = tp_df.rename(columns={'possessionTeam':'team'})

In [26]:
tp_df.head(1)

Unnamed: 0,team,week,pass_rate_off,pass_rate_def
0,ARI,2,0.684211,0.625


In [27]:
play_trunc = df_play[['gameId','playId','possessionTeam','defensiveTeam']]

In [28]:
play_trunc.head(1)

Unnamed: 0,gameId,playId,possessionTeam,defensiveTeam
0,2022102302,2655,CIN,ATL


In [29]:
play_trunc = play_trunc.merge(db_weeks_id[['gameId','playId','week']],how='left',on=['gameId','playId'])

In [30]:
play_trunc.head(1)

Unnamed: 0,gameId,playId,possessionTeam,defensiveTeam,week
0,2022102302,2655,CIN,ATL,7


In [31]:
play_out = play_trunc.merge(tp_df[['team','pass_rate_off','week']], right_on=['team','week'],
                                   how='left',left_on=['possessionTeam','week']).drop(columns=['possessionTeam','team'])

In [32]:
play_out.head(1)

Unnamed: 0,gameId,playId,defensiveTeam,week,pass_rate_off
0,2022102302,2655,ATL,7,0.654054


In [33]:
play_out = play_out.merge(tp_df[['team','pass_rate_def','week']], right_on=['team','week'],
                                   how='left',left_on=['defensiveTeam','week']).drop(columns=['defensiveTeam','team'])

In [34]:
prate_out = play_out.drop(columns=['week'])
prate_out.head(1)

Unnamed: 0,gameId,playId,pass_rate_off,pass_rate_def
0,2022102302,2655,0.654054,0.648794


# Breakout xpass for each team 

In [35]:
team_xpass_df = team_xpass_df.drop(columns=['week'])
team_xpass_df.head(3)

Unnamed: 0,gameId,playId,teamAbbr,personnel_xpass_ratio
3904,2022091500,55,KC,0.680626
3905,2022091500,55,LAC,0.758184
3906,2022091500,76,KC,0.658697


In [36]:
play_trunc = play_trunc.drop(columns=['week'])
play_trunc.head()

Unnamed: 0,gameId,playId,possessionTeam,defensiveTeam
0,2022102302,2655,CIN,ATL
1,2022102302,2655,CIN,ATL
2,2022102302,2655,CIN,ATL
3,2022102302,2655,CIN,ATL
4,2022102302,2655,CIN,ATL


In [37]:
play_trunc = play_trunc.drop_duplicates()

In [38]:
play_trunc.head()

Unnamed: 0,gameId,playId,possessionTeam,defensiveTeam
0,2022102302,2655,CIN,ATL
22,2022091809,3698,CIN,DAL
44,2022103004,3146,HOU,TEN
66,2022110610,348,KC,TEN
88,2022102700,2799,BAL,TB


In [39]:
xpass_out = play_trunc.merge(team_xpass_df,left_on=['gameId','playId','possessionTeam'],
                right_on=['gameId','playId','teamAbbr'],how='left')

In [40]:
xpass_out = xpass_out.drop(columns=['teamAbbr']).rename(columns={'personnel_xpass_ratio':'off_xpass'})

In [41]:
xpass_out.head()

Unnamed: 0,gameId,playId,possessionTeam,defensiveTeam,off_xpass
0,2022102302,2655,CIN,ATL,0.779215
1,2022091809,3698,CIN,DAL,0.685163
2,2022103004,3146,HOU,TEN,0.628021
3,2022110610,348,KC,TEN,
4,2022102700,2799,BAL,TB,0.339726


In [42]:
xpass_out = xpass_out.merge(team_xpass_df,left_on=['gameId','playId','defensiveTeam'],
                right_on=['gameId','playId','teamAbbr'],how='left').drop(columns=['teamAbbr',
                                                                                 'possessionTeam',
                                                                                 'defensiveTeam']).rename(columns={'personnel_xpass_ratio':'def_xpass'})

In [43]:
xpass_out.head(1)

Unnamed: 0,gameId,playId,off_xpass,def_xpass
0,2022102302,2655,0.779215,0.723996


In [44]:
len(xpass_out)/2

8062.0

In [45]:
xpass_out.head()

Unnamed: 0,gameId,playId,off_xpass,def_xpass
0,2022102302,2655,0.779215,0.723996
1,2022091809,3698,0.685163,0.425815
2,2022103004,3146,0.628021,0.757653
3,2022110610,348,,0.744
4,2022102700,2799,0.339726,0.427465


In [46]:
xpass_out.drop_duplicates(inplace=True)
prate_out.drop_duplicates(inplace=True)

prate_out.duplicated().sum()

0

In [47]:
xpass_out.duplicated().sum()

0

In [48]:
len(xpass_out)

16124

In [49]:
len(prate_out)

16124

In [50]:
prate_out.to_csv('/kaggle/working/team_pass_rates.csv')

In [51]:
xpass_out.to_csv('/kaggle/working/team_xpass_ratios.csv')