In [2]:
#imports
import os
import gc
import pandas as pd
import numpy as np
import datetime as dt 

import matplotlib.pyplot as plt
import matplotlib.cm as cm 
import seaborn as sns

import plotly.graph_objects as go 
from plotly.subplots import make_subplots
import plotly.express as px
import plotly.offline 

from colorama import Fore, Style, init 
from pprint import pprint

import warnings
warnings.filterwarnings('ignore')

In [3]:
#Color printing and first impression data summary
def PrintColor(text: str, color=Fore.BLUE, style=Style.BRIGHT):
    "Prints color outputs using colorama using a text F-string"
    print(style + color + text + Style.RESET_ALL)

def summarize_dataframe(df):
    summary_df = pd.DataFrame(df.dtypes, columns=['dtypes'])
    summary_df['missing#'] = df.isna().sum().values*100
    summary_df['missing%'] = (df.isna().sum().values*100)/len(df)
    summary_df['uniques'] = df.nunique().values
    summary_df['first_value'] = df.iloc[0].values
    summary_df['last_value'] = df.iloc[len(df)-1].values
    summary_df['count'] = df.count().values

    desc = pd.DataFrame(df.describe().T)
    summary_df['min'] = desc['min']
    summary_df['max'] = desc['max']
    summary_df['mean'] = desc['mean']
    return summary_df

# <div style="color:white;background-color:#000000;padding:3%;border-radius:50px 50px;font-size:1em;text-align:center">Games Data</div>

In [24]:
#games data
games = pd.read_csv('../data/games.csv')
games.head()

Unnamed: 0,gameId,season,week,gameDate,gameTimeEastern,homeTeamAbbr,visitorTeamAbbr,homeFinalScore,visitorFinalScore
0,2022090800,2022,1,9/8/2022,20:20:00,LA,BUF,10,31
1,2022091100,2022,1,9/11/2022,13:00:00,ATL,NO,26,27
2,2022091101,2022,1,9/11/2022,13:00:00,CAR,CLE,24,26
3,2022091102,2022,1,9/11/2022,13:00:00,CHI,SF,19,10
4,2022091103,2022,1,9/11/2022,13:00:00,CIN,PIT,20,23


In [25]:
#check missing values in games data
for col in games.columns:
    msg = 'column: {:>10}\t Percent of NaN value: {:.2f}%'.format(col,100*(games[col].isnull().sum()/games[col].shape[0]))
    PrintColor(f"\n---> {msg}");

[1m[34m
---> column:     gameId	 Percent of NaN value: 0.00%[0m
[1m[34m
---> column:     season	 Percent of NaN value: 0.00%[0m
[1m[34m
---> column:       week	 Percent of NaN value: 0.00%[0m
[1m[34m
---> column:   gameDate	 Percent of NaN value: 0.00%[0m
[1m[34m
---> column: gameTimeEastern	 Percent of NaN value: 0.00%[0m
[1m[34m
---> column: homeTeamAbbr	 Percent of NaN value: 0.00%[0m
[1m[34m
---> column: visitorTeamAbbr	 Percent of NaN value: 0.00%[0m
[1m[34m
---> column: homeFinalScore	 Percent of NaN value: 0.00%[0m
[1m[34m
---> column: visitorFinalScore	 Percent of NaN value: 0.00%[0m


# <div style="color:white;background-color:#000000;padding:3%;border-radius:50px 50px;font-size:1em;text-align:center">Players Data</div>

In [26]:
#players data
players = pd.read_csv('../data/players.csv')
players.head()

Unnamed: 0,nflId,height,weight,birthDate,collegeName,position,displayName
0,25511,6-4,225,1977-08-03,Michigan,QB,Tom Brady
1,29550,6-4,328,1982-01-22,Arkansas,T,Jason Peters
2,29851,6-2,225,1983-12-02,California,QB,Aaron Rodgers
3,30842,6-6,267,1984-05-19,UCLA,TE,Marcedes Lewis
4,33084,6-4,217,1985-05-17,Boston College,QB,Matt Ryan


In [29]:
#check player data structure
players.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1697 entries, 0 to 1696
Data columns (total 7 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   nflId        1697 non-null   int64 
 1   height       1697 non-null   object
 2   weight       1697 non-null   int64 
 3   birthDate    1210 non-null   object
 4   collegeName  1697 non-null   object
 5   position     1697 non-null   object
 6   displayName  1697 non-null   object
dtypes: int64(2), object(5)
memory usage: 92.9+ KB


In [30]:
# call summarize_dataframe for high level overview of missing values, unique values, etc
summarize_dataframe(players).style.background_gradient(cmap='Purples')

Unnamed: 0,dtypes,missing#,missing%,uniques,first_value,last_value,count,min,max,mean
nflId,int64,0,0.0,1697,25511,55241,1697,25511.0,55241.0,48237.157336
height,object,0,0.0,16,6-4,6-2,1697,,,
weight,int64,0,0.0,179,225,280,1697,153.0,380.0,245.774308
birthDate,object,48700,28.697702,989,1977-08-03,,1210,,,
collegeName,object,0,0.0,225,Michigan,Coastal Carolina,1697,,,
position,object,0,0.0,19,QB,DT,1697,,,
displayName,object,0,0.0,1687,Tom Brady,C.J. Brewer,1697,,,


# <div style="color:white;background-color:#000000;padding:3%;border-radius:50px 50px;font-size:1em;text-align:center">Plays Data</div>

In [11]:
#players data
plays = pd.read_csv('../data/plays.csv')
plays.head()

Unnamed: 0,gameId,playId,playDescription,quarter,down,yardsToGo,possessionTeam,defensiveTeam,yardlineSide,yardlineNumber,...,yardsGained,homeTeamWinProbabilityAdded,visitorTeamWinProbilityAdded,expectedPointsAdded,isDropback,pff_runConceptPrimary,pff_runConceptSecondary,pff_runPassOption,pff_passCoverage,pff_manZone
0,2022102302,2655,(1:54) (Shotgun) J.Burrow pass short middle to...,3,1,10,CIN,ATL,CIN,21,...,9,0.004634,-0.004634,0.702717,True,,,0,Cover-3,Zone
1,2022091809,3698,(2:13) (Shotgun) J.Burrow pass short right to ...,4,1,10,CIN,DAL,CIN,8,...,4,0.002847,-0.002847,-0.240509,True,,,0,Quarters,Zone
2,2022103004,3146,(2:00) (Shotgun) D.Mills pass short right to D...,4,3,12,HOU,TEN,HOU,20,...,6,0.000205,-0.000205,-0.21848,True,,,0,Quarters,Zone
3,2022110610,348,(9:28) (Shotgun) P.Mahomes pass short left to ...,1,2,10,KC,TEN,TEN,23,...,4,-0.001308,0.001308,-0.427749,True,,,0,Quarters,Zone
4,2022102700,2799,(2:16) (Shotgun) L.Jackson up the middle to TB...,3,2,8,BAL,TB,TB,27,...,-1,0.027141,-0.027141,-0.638912,False,MAN,READ OPTION,0,Cover-1,Man


In [14]:
unique_values = plays['pff_passCoverage'].value_counts()

print(unique_values)

pff_passCoverage
Cover-3                 4956
Cover-1                 3300
Quarters                2073
Cover-2                 1852
Cover 6-Left             692
Cover-6 Right            690
Cover-3 Seam             636
Cover-0                  605
Red Zone                 537
2-Man                    186
Goal Line                146
Bracket                   75
Cover-1 Double            54
Prevent                   46
Cover-3 Cloud Right       31
Cover-3 Cloud Left        30
Miscellaneous             14
Cover-3 Double Cloud       9
Name: count, dtype: int64


In [33]:
plays.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16124 entries, 0 to 16123
Data columns (total 50 columns):
 #   Column                            Non-Null Count  Dtype  
---  ------                            --------------  -----  
 0   gameId                            16124 non-null  int64  
 1   playId                            16124 non-null  int64  
 2   playDescription                   16124 non-null  object 
 3   quarter                           16124 non-null  int64  
 4   down                              16124 non-null  int64  
 5   yardsToGo                         16124 non-null  int64  
 6   possessionTeam                    16124 non-null  object 
 7   defensiveTeam                     16124 non-null  object 
 8   yardlineSide                      15900 non-null  object 
 9   yardlineNumber                    16124 non-null  int64  
 10  gameClock                         16124 non-null  object 
 11  preSnapHomeScore                  16124 non-null  int64  
 12  preS

In [32]:
# call summarize_dataframe for high level overview of missing values, unique values, etc
summarize_dataframe(plays).style.background_gradient(cmap='Greens')

Unnamed: 0,dtypes,missing#,missing%,uniques,first_value,last_value,count,min,max,mean
gameId,int64,0,0.0,136,2022102302,2022101602,16124,2022090800.0,2022110700.0,2022098903.691206
playId,int64,0,0.0,4207,2655,3046,16124,54.0,5120.0,2023.830563
playDescription,object,0,0.0,16122,(1:54) (Shotgun) J.Burrow pass short middle to T.Boyd to CIN 30 for 9 yards (J.Hawkins).,"(15:00) Br.Hall up the middle for 34 yards, TOUCHDOWN.",16124,,,
quarter,int64,0,0.0,5,3,4,16124,1.0,5.0,2.572253
down,int64,0,0.0,4,1,1,16124,1.0,4.0,1.800794
yardsToGo,int64,0,0.0,32,10,10,16124,1.0,38.0,8.4613
possessionTeam,object,0,0.0,32,CIN,NYJ,16124,,,
defensiveTeam,object,0,0.0,32,ATL,GB,16124,,,
yardlineSide,object,22400,1.389233,32,CIN,GB,15900,,,
yardlineNumber,int64,0,0.0,50,21,34,16124,1.0,50.0,29.226185


# <div style="color:white;background-color:#000000;padding:3%;border-radius:50px 50px;font-size:1em;text-align:center">Weeks Data</div>

In [39]:
tracking_week1 = pd.read_csv('../data/tracking_week_1.csv')
tracking_week1.head()

tracking_week2 = pd.read_csv('../data/tracking_week_2.csv')
tracking_week2.head()

tracking_week3 = pd.read_csv('../data/tracking_week_3.csv')
tracking_week3.head()

tracking_week4 = pd.read_csv('../data/tracking_week_4.csv')
tracking_week4.head()

tracking_week5 = pd.read_csv('../data/tracking_week_5.csv')
tracking_week5.head()

tracking_week6 = pd.read_csv('../data/tracking_week_6.csv')
tracking_week6.head()

tracking_week7 = pd.read_csv('../data/tracking_week_7.csv')
tracking_week7.head()

tracking_week8 = pd.read_csv('../data/tracking_week_8.csv')
tracking_week8.head()

tracking_week9 = pd.read_csv('../data/tracking_week_9.csv')
tracking_week9.head()

Unnamed: 0,gameId,playId,nflId,displayName,frameId,frameType,time,jerseyNumber,club,playDirection,x,y,s,a,dis,o,dir,event
0,2022110700,56,33131.0,Calais Campbell,1,BEFORE_SNAP,2022-11-08 01:16:09.2,93.0,BAL,left,83.0,28.44,0.0,0.0,0.01,197.06,23.13,huddle_break_offense
1,2022110700,56,33131.0,Calais Campbell,2,BEFORE_SNAP,2022-11-08 01:16:09.3,93.0,BAL,left,83.0,28.45,0.0,0.0,0.01,196.46,38.7,
2,2022110700,56,33131.0,Calais Campbell,3,BEFORE_SNAP,2022-11-08 01:16:09.4,93.0,BAL,left,83.01,28.45,0.0,0.0,0.01,196.46,47.59,
3,2022110700,56,33131.0,Calais Campbell,4,BEFORE_SNAP,2022-11-08 01:16:09.5,93.0,BAL,left,83.02,28.45,0.0,0.0,0.01,195.5,52.81,
4,2022110700,56,33131.0,Calais Campbell,5,BEFORE_SNAP,2022-11-08 01:16:09.6,93.0,BAL,left,83.02,28.46,0.0,0.0,0.0,194.24,51.82,


In [47]:
# call summarize_dataframe for high level overview of missing values, unique values, etc
summarize_dataframe(tracking_week1).style.background_gradient(cmap='viridis')

Unnamed: 0,dtypes,missing#,missing%,uniques,first_value,last_value,count,min,max,mean
gameId,int64,0,0.0,16,2022091200,2022090800,7104700,2022090800.0,2022091200.0,2022091094.030593
playId,int64,0,0.0,1574,64,3696,7104700,55.0,5120.0,2024.279479
nflId,float64,30890000,4.347826,1183,35459.000000,,6795800,25511.0,55173.0,47186.246824
displayName,object,0,0.0,1178,Kareem Jackson,football,7104700,,,
frameId,int64,0,0.0,697,1,175,7104700,1.0,697.0,86.931638
frameType,object,0,0.0,3,BEFORE_SNAP,AFTER_SNAP,7104700,,,
time,object,0,0.0,203263,2022-09-13 00:16:03.5,2022-09-09 03:07:38.1,7104700,,,
jerseyNumber,float64,30890000,4.347826,99,22.000000,,6795800,1.0,99.0,48.085401
club,object,0,0.0,33,DEN,football,7104700,,,
playDirection,object,0,0.0,2,right,left,7104700,,,


In [48]:
# call summarize_dataframe for high level overview of missing values, unique values, etc
summarize_dataframe(tracking_week2).style.background_gradient(cmap='plasma')

Unnamed: 0,dtypes,missing#,missing%,uniques,first_value,last_value,count,min,max,mean
gameId,int64,0,0.0,16,2022091901,2022091500,6704339,2022091500.0,2022091901.0,2022091796.124569
playId,int64,0,0.0,1474,64,4076,6704339,55.0,4519.0,2005.656129
nflId,float64,29149300,4.347826,1150,37078.000000,,6412846,25511.0,55241.0,47293.233689
displayName,object,0,0.0,1146,Patrick Peterson,football,6704339,,,
frameId,int64,0,0.0,489,1,94,6704339,1.0,489.0,87.406891
frameType,object,0,0.0,3,BEFORE_SNAP,AFTER_SNAP,6704339,,,
time,object,0,0.0,216513,2022-09-20 00:32:03.3,2022-09-16 03:13:01.4,6704339,,,
jerseyNumber,float64,29149300,4.347826,99,7.000000,,6412846,1.0,99.0,48.269581
club,object,0,0.0,33,MIN,football,6704339,,,
playDirection,object,0,0.0,2,right,right,6704339,,,


In [49]:
# call summarize_dataframe for high level overview of missing values, unique values, etc
summarize_dataframe(tracking_week3).style.background_gradient(cmap='inferno')

Unnamed: 0,dtypes,missing#,missing%,uniques,first_value,last_value,count,min,max,mean
gameId,int64,0,0.0,16,2022092600,2022092200,7129195,2022092200.0,2022092600.0,2022092491.980362
playId,int64,0,0.0,1590,57,4183,7129195,54.0,4364.0,2012.302295
nflId,float64,30996500,4.347826,1184,40171.000000,,6819230,25511.0,55240.0,47351.730347
displayName,object,0,0.0,1182,Nicholas Williams,football,7129195,,,
frameId,int64,0,0.0,461,1,212,7129195,1.0,461.0,86.092019
frameType,object,0,0.0,3,BEFORE_SNAP,AFTER_SNAP,7129195,,,
time,object,0,0.0,204919,2022-09-27 00:16:08.5,2022-09-23 03:25:08.6,7129195,,,
jerseyNumber,float64,30996500,4.347826,99,93.000000,,6819230,1.0,99.0,48.647565
club,object,0,0.0,33,NYG,football,7129195,,,
playDirection,object,0,0.0,2,left,left,7129195,,,


In [52]:
# call summarize_dataframe for high level overview of missing values, unique values, etc
summarize_dataframe(tracking_week4).style.background_gradient(cmap='cividis')

Unnamed: 0,dtypes,missing#,missing%,uniques,first_value,last_value,count,min,max,mean
gameId,int64,0,0.0,16,2022100300,2022092900,6747556,2022092900.0,2022100300.0,2022099692.230537
playId,int64,0,0.0,1491,59,3882,6747556,54.0,4569.0,1932.443856
nflId,float64,29337200,4.347826,1164,34452.000000,,6454184,25511.0,55239.0,47439.515738
displayName,object,0,0.0,1161,Matthew Stafford,football,6747556,,,
frameId,int64,0,0.0,521,1,64,6747556,1.0,521.0,86.598036
frameType,object,0,0.0,3,BEFORE_SNAP,AFTER_SNAP,6747556,,,
time,object,0,0.0,213599,2022-10-04 00:16:05.9,2022-09-30 03:20:40.8,6747556,,,
jerseyNumber,float64,29337200,4.347826,99,9.000000,,6454184,1.0,99.0,48.596129
club,object,0,0.0,33,LA,football,6747556,,,
playDirection,object,0,0.0,2,right,left,6747556,,,


In [53]:
# call summarize_dataframe for high level overview of missing values, unique values, etc
summarize_dataframe(tracking_week5).style.background_gradient(cmap='turbo')

Unnamed: 0,dtypes,missing#,missing%,uniques,first_value,last_value,count,min,max,mean
gameId,int64,0,0.0,16,2022101000,2022100600,7103067,2022100600.0,2022101000.0,2022100891.038562
playId,int64,0,0.0,1535,57,4724,7103067,55.0,4724.0,1992.843917
nflId,float64,30882900,4.347826,1197,41265.000000,,6794238,25511.0,55240.0,47559.140576
displayName,object,0,0.0,1192,Derek Carr,football,7103067,,,
frameId,int64,0,0.0,700,1,166,7103067,1.0,700.0,87.897487
frameType,object,0,0.0,3,BEFORE_SNAP,AFTER_SNAP,7103067,,,
time,object,0,0.0,219181,2022-10-11 00:15:50.4,2022-10-07 03:44:50.8,7103067,,,
jerseyNumber,float64,30882900,4.347826,99,4.000000,,6794238,1.0,99.0,48.558437
club,object,0,0.0,33,LV,football,7103067,,,
playDirection,object,0,0.0,2,right,left,7103067,,,


In [54]:
# call summarize_dataframe for high level overview of missing values, unique values, etc
summarize_dataframe(tracking_week6).style.background_gradient(cmap='magma')

Unnamed: 0,dtypes,missing#,missing%,uniques,first_value,last_value,count,min,max,mean
gameId,int64,0,0.0,14,2022101700,2022101300,6239486,2022101300.0,2022101700.0,2022101591.846149
playId,int64,0,0.0,1431,90,3894,6239486,54.0,4539.0,2006.896934
nflId,float64,27128200,4.347826,1032,35459.000000,,5968204,25511.0,55200.0,47426.821742
displayName,object,0,0.0,1028,Kareem Jackson,football,6239486,,,
frameId,int64,0,0.0,620,1,84,6239486,1.0,620.0,86.487334
frameType,object,0,0.0,3,BEFORE_SNAP,AFTER_SNAP,6239486,,,
time,object,0,0.0,192259,2022-10-18 00:17:11.6,2022-10-14 03:05:49.4,6239486,,,
jerseyNumber,float64,27128200,4.347826,99,22.000000,,5968204,1.0,99.0,48.673111
club,object,0,0.0,29,DEN,football,6239486,,,
playDirection,object,0,0.0,2,left,left,6239486,,,


In [55]:
# call summarize_dataframe for high level overview of missing values, unique values, etc
summarize_dataframe(tracking_week6).style.background_gradient(cmap='twilight')

Unnamed: 0,dtypes,missing#,missing%,uniques,first_value,last_value,count,min,max,mean
gameId,int64,0,0.0,14,2022101700,2022101300,6239486,2022101300.0,2022101700.0,2022101591.846149
playId,int64,0,0.0,1431,90,3894,6239486,54.0,4539.0,2006.896934
nflId,float64,27128200,4.347826,1032,35459.000000,,5968204,25511.0,55200.0,47426.821742
displayName,object,0,0.0,1028,Kareem Jackson,football,6239486,,,
frameId,int64,0,0.0,620,1,84,6239486,1.0,620.0,86.487334
frameType,object,0,0.0,3,BEFORE_SNAP,AFTER_SNAP,6239486,,,
time,object,0,0.0,192259,2022-10-18 00:17:11.6,2022-10-14 03:05:49.4,6239486,,,
jerseyNumber,float64,27128200,4.347826,99,22.000000,,5968204,1.0,99.0,48.673111
club,object,0,0.0,29,DEN,football,6239486,,,
playDirection,object,0,0.0,2,left,left,6239486,,,


In [56]:
# call summarize_dataframe for high level overview of missing values, unique values, etc
summarize_dataframe(tracking_week6).style.background_gradient(cmap='coolwarm')

Unnamed: 0,dtypes,missing#,missing%,uniques,first_value,last_value,count,min,max,mean
gameId,int64,0,0.0,14,2022101700,2022101300,6239486,2022101300.0,2022101700.0,2022101591.846149
playId,int64,0,0.0,1431,90,3894,6239486,54.0,4539.0,2006.896934
nflId,float64,27128200,4.347826,1032,35459.000000,,5968204,25511.0,55200.0,47426.821742
displayName,object,0,0.0,1028,Kareem Jackson,football,6239486,,,
frameId,int64,0,0.0,620,1,84,6239486,1.0,620.0,86.487334
frameType,object,0,0.0,3,BEFORE_SNAP,AFTER_SNAP,6239486,,,
time,object,0,0.0,192259,2022-10-18 00:17:11.6,2022-10-14 03:05:49.4,6239486,,,
jerseyNumber,float64,27128200,4.347826,99,22.000000,,5968204,1.0,99.0,48.673111
club,object,0,0.0,29,DEN,football,6239486,,,
playDirection,object,0,0.0,2,left,left,6239486,,,


In [5]:
# call summarize_dataframe for high level overview of missing values, unique values, etc
summarize_dataframe(tracking_week6).style.background_gradient(cmap='Spectral')

NameError: name 'tracking_week6' is not defined

In [9]:
finalweek1 = pd.read_csv('../data/processed/final_tracking_week_1.csv')

In [10]:
summarize_dataframe(finalweek1)

Unnamed: 0,dtypes,missing#,missing%,uniques,first_value,last_value,count,min,max,mean
gameId,int64,0,0.0,16,2022091200,2022090800,7104700,2022091000.0,2022091000.0,2022091000.0
playId,int64,0,0.0,1574,64,3696,7104700,55.0,5120.0,2024.279
nflId,float64,30890000,4.347826,1183,35459.0,,6795800,25511.0,55173.0,47186.25
displayName,object,0,0.0,1178,Kareem Jackson,football,7104700,,,
frameId,int64,0,0.0,697,1,175,7104700,1.0,697.0,86.93164
frameType,object,0,0.0,3,BEFORE_SNAP,AFTER_SNAP,7104700,,,
time,object,0,0.0,203263,2022-09-13 00:16:03.5,2022-09-09 03:07:38.1,7104700,,,
jerseyNumber,float64,30890000,4.347826,99,22.0,,6795800,1.0,99.0,48.0854
club,object,0,0.0,33,DEN,football,7104700,,,
playDirection,object,0,0.0,2,right,left,7104700,,,


In [5]:
#player_plays data
player_plays = pd.read_csv('../data/player_play.csv')
player_plays.head()

Unnamed: 0,gameId,playId,nflId,teamAbbr,hadRushAttempt,rushingYards,hadDropback,passingYards,sackYardsAsOffense,hadPassReception,...,wasRunningRoute,routeRan,blockedPlayerNFLId1,blockedPlayerNFLId2,blockedPlayerNFLId3,pressureAllowedAsBlocker,timeToPressureAllowedAsBlocker,pff_defensiveCoverageAssignment,pff_primaryDefensiveCoverageMatchupNflId,pff_secondaryDefensiveCoverageMatchupNflId
0,2022090800,56,35472,BUF,0,0,0,0,0,0,...,,,47917.0,,,0.0,,,,
1,2022090800,56,42392,BUF,0,0,0,0,0,0,...,,,47917.0,,,0.0,,,,
2,2022090800,56,42489,BUF,0,0,0,0,0,1,...,1.0,IN,,,,,,,,
3,2022090800,56,44875,BUF,0,0,0,0,0,0,...,,,43335.0,,,0.0,,,,
4,2022090800,56,44985,BUF,0,0,0,0,0,0,...,1.0,OUT,,,,,,,,


In [8]:
#check missing values in player_play data
for col in player_plays.columns:
    msg = 'column: {:>10}\t Percent of NaN value: {:.2f}%'.format(col,100*(player_plays[col].isnull().sum()/player_plays[col].shape[0]))
    PrintColor(f"\n---> {msg}");

[1m[34m
---> column:     gameId	 Percent of NaN value: 0.00%[0m
[1m[34m
---> column:     playId	 Percent of NaN value: 0.00%[0m
[1m[34m
---> column:      nflId	 Percent of NaN value: 0.00%[0m
[1m[34m
---> column:   teamAbbr	 Percent of NaN value: 0.00%[0m
[1m[34m
---> column: hadRushAttempt	 Percent of NaN value: 0.00%[0m
[1m[34m
---> column: rushingYards	 Percent of NaN value: 0.00%[0m
[1m[34m
---> column: hadDropback	 Percent of NaN value: 0.00%[0m
[1m[34m
---> column: passingYards	 Percent of NaN value: 0.00%[0m
[1m[34m
---> column: sackYardsAsOffense	 Percent of NaN value: 0.00%[0m
[1m[34m
---> column: hadPassReception	 Percent of NaN value: 0.00%[0m
[1m[34m
---> column: receivingYards	 Percent of NaN value: 0.00%[0m
[1m[34m
---> column: wasTargettedReceiver	 Percent of NaN value: 0.00%[0m
[1m[34m
---> column: yardageGainedAfterTheCatch	 Percent of NaN value: 0.00%[0m
[1m[34m
---> column:    fumbles	 Percent of NaN value: 0.00%[0m
[1m[34m

In [9]:
# call summarize_dataframe for high level overview of missing values, unique values, etc
summarize_dataframe(player_plays).style.background_gradient(cmap='twilight')

Unnamed: 0,dtypes,missing#,missing%,uniques,first_value,last_value,count,min,max,mean
gameId,int64,0,0.0,136,2022090800,2022110700,354727,2022090800.0,2022110700.0,2022098903.683585
playId,int64,0,0.0,4207,56,3787,354727,54.0,5120.0,2023.833091
nflId,int64,0,0.0,1697,35472,55125,354727,25511.0,55241.0,47437.201033
teamAbbr,object,0,0.0,32,BUF,NO,354727,,,
hadRushAttempt,int64,0,0.0,2,0,0,354727,0.0,1.0,0.019136
rushingYards,int64,0,0.0,74,0,0,354727,-10.0,75.0,0.087304
hadDropback,int64,0,0.0,2,0,0,354727,0.0,1.0,0.017571
passingYards,int64,0,0.0,80,0,0,354727,-10.0,98.0,0.173268
sackYardsAsOffense,int64,0,0.0,19,0,0,354727,-18.0,0.0,-0.011471
hadPassReception,int64,0,0.0,2,0,0,354727,0.0,1.0,0.015857
