In [14]:
# Dependencies
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Ignore pandas column limits
pd.set_option('display.max_columns', None)


In [2]:
# Import the player data
player_data = pd.read_csv("data/players.csv")
player_data.head()

Unnamed: 0,nflId,height,weight,birthDate,collegeName,officialPosition,displayName
0,25511,6-4,225,1977-08-03,Michigan,QB,Tom Brady
1,28963,6-5,240,1982-03-02,"Miami, O.",QB,Ben Roethlisberger
2,29550,6-4,328,1982-01-22,Arkansas,T,Jason Peters
3,29851,6-2,225,1983-12-02,California,QB,Aaron Rodgers
4,30078,6-2,228,1982-11-24,Harvard,QB,Ryan Fitzpatrick


In [4]:
# Get a count of each position
player_data["officialPosition"].value_counts()

WR     210
CB     192
DE     142
OLB    136
RB     126
T      122
TE     121
DT     114
G       99
ILB     81
FS      72
SS      69
QB      60
C       55
NT      34
MLB     27
FB      15
LB       2
DB       2
Name: officialPosition, dtype: int64

In [8]:
# Filter player data down to only the offensive lineman positions
df_offensive_lineman = player_data.loc[player_data['officialPosition'].isin(['C', 'G', 'T'])]
df_offensive_lineman.head()

Unnamed: 0,nflId,height,weight,birthDate,collegeName,officialPosition,displayName
2,29550,6-4,328,1982-01-22,Arkansas,T,Jason Peters
6,30869,6-7,330,1981-12-12,Louisiana State,T,Andrew Whitworth
8,33107,6-4,315,1985-08-30,Virginia Tech,T,Duane Brown
15,34457,6-4,330,1987-01-25,Alabama,T,Andre Smith
17,34472,6-4,311,1985-11-19,California,C,Alex Mack


In [9]:
# Read in the week 1 data
week1_data = pd.read_csv("data/week1.csv")
week1_data.head()

Unnamed: 0,gameId,playId,nflId,frameId,time,jerseyNumber,team,playDirection,x,y,s,a,dis,o,dir,event
0,2021090900,97,25511.0,1,2021-09-10T00:26:31.100,12.0,TB,right,37.77,24.22,0.29,0.3,0.03,165.16,84.99,
1,2021090900,97,25511.0,2,2021-09-10T00:26:31.200,12.0,TB,right,37.78,24.22,0.23,0.11,0.02,164.33,92.87,
2,2021090900,97,25511.0,3,2021-09-10T00:26:31.300,12.0,TB,right,37.78,24.24,0.16,0.1,0.01,160.24,68.55,
3,2021090900,97,25511.0,4,2021-09-10T00:26:31.400,12.0,TB,right,37.73,24.25,0.15,0.24,0.06,152.13,296.85,
4,2021090900,97,25511.0,5,2021-09-10T00:26:31.500,12.0,TB,right,37.69,24.26,0.25,0.18,0.04,148.33,287.55,


In [12]:
# Merge the week 1 data with the player data
week1_data = pd.merge(week1_data, df_offensive_lineman, on="nflId")
week1_data.head()

Unnamed: 0,gameId,playId,nflId,frameId,time,jerseyNumber,team,playDirection,x,y,...,dis,o,dir,event,height,weight,birthDate,collegeName,officialPosition,displayName
0,2021090900,97,40151.0,1,2021-09-10T00:26:31.100,66.0,TB,right,42.18,23.97,...,0.01,45.36,206.92,,6-4,319,1991-05-27,Colorado State-Pueblo,C,Ryan Jensen
1,2021090900,97,40151.0,2,2021-09-10T00:26:31.200,66.0,TB,right,42.16,23.97,...,0.02,45.36,216.2,,6-4,319,1991-05-27,Colorado State-Pueblo,C,Ryan Jensen
2,2021090900,97,40151.0,3,2021-09-10T00:26:31.300,66.0,TB,right,42.15,23.97,...,0.01,46.28,275.73,,6-4,319,1991-05-27,Colorado State-Pueblo,C,Ryan Jensen
3,2021090900,97,40151.0,4,2021-09-10T00:26:31.400,66.0,TB,right,42.15,23.98,...,0.01,46.28,308.29,,6-4,319,1991-05-27,Colorado State-Pueblo,C,Ryan Jensen
4,2021090900,97,40151.0,5,2021-09-10T00:26:31.500,66.0,TB,right,42.14,23.99,...,0.02,47.5,317.31,,6-4,319,1991-05-27,Colorado State-Pueblo,C,Ryan Jensen


In [15]:
# Filter dataset to one play and one player to get an idea of the distribution of the data
df_iso = week1_data.loc[(week1_data['playId'] == 97) & (week1_data['nflId'] == 40151)]
df_iso

Unnamed: 0,gameId,playId,nflId,frameId,time,jerseyNumber,team,playDirection,x,y,s,a,dis,o,dir,event,height,weight,birthDate,collegeName,officialPosition,displayName
0,2021090900,97,40151.0,1,2021-09-10T00:26:31.100,66.0,TB,right,42.18,23.97,0.0,0.0,0.01,45.36,206.92,,6-4,319,1991-05-27,Colorado State-Pueblo,C,Ryan Jensen
1,2021090900,97,40151.0,2,2021-09-10T00:26:31.200,66.0,TB,right,42.16,23.97,0.0,0.0,0.02,45.36,216.2,,6-4,319,1991-05-27,Colorado State-Pueblo,C,Ryan Jensen
2,2021090900,97,40151.0,3,2021-09-10T00:26:31.300,66.0,TB,right,42.15,23.97,0.01,0.21,0.01,46.28,275.73,,6-4,319,1991-05-27,Colorado State-Pueblo,C,Ryan Jensen
3,2021090900,97,40151.0,4,2021-09-10T00:26:31.400,66.0,TB,right,42.15,23.98,0.06,0.63,0.01,46.28,308.29,,6-4,319,1991-05-27,Colorado State-Pueblo,C,Ryan Jensen
4,2021090900,97,40151.0,5,2021-09-10T00:26:31.500,66.0,TB,right,42.14,23.99,0.18,1.02,0.02,47.5,317.31,,6-4,319,1991-05-27,Colorado State-Pueblo,C,Ryan Jensen
5,2021090900,97,40151.0,6,2021-09-10T00:26:31.600,66.0,TB,right,42.1,24.02,0.51,1.98,0.06,48.28,308.34,ball_snap,6-4,319,1991-05-27,Colorado State-Pueblo,C,Ryan Jensen
6,2021090900,97,40151.0,7,2021-09-10T00:26:31.700,66.0,TB,right,42.03,24.06,0.79,2.13,0.07,53.21,305.04,,6-4,319,1991-05-27,Colorado State-Pueblo,C,Ryan Jensen
7,2021090900,97,40151.0,8,2021-09-10T00:26:31.800,66.0,TB,right,41.95,24.1,1.0,1.84,0.09,57.07,300.02,,6-4,319,1991-05-27,Colorado State-Pueblo,C,Ryan Jensen
8,2021090900,97,40151.0,9,2021-09-10T00:26:31.900,66.0,TB,right,41.86,24.16,1.17,1.35,0.11,59.92,301.28,,6-4,319,1991-05-27,Colorado State-Pueblo,C,Ryan Jensen
9,2021090900,97,40151.0,10,2021-09-10T00:26:32.000,66.0,TB,right,41.74,24.24,1.39,1.09,0.14,62.96,301.82,,6-4,319,1991-05-27,Colorado State-Pueblo,C,Ryan Jensen
