In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
from pandas.plotting import scatter_matrix
import seaborn as sns
from sklearn.preprocessing import StandardScaler
import plotly.express as px
import plotly.graph_objects as go

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
import sys
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

#Disable the warnings
import warnings
warnings.filterwarnings('ignore')

#Disabling the plotly errors
from plotly.offline import plot, iplot, init_notebook_mode
init_notebook_mode(connected=True)

In [None]:
players = pd.read_csv('../input/nfl-big-data-bowl-2022/players.csv')
games = pd.read_csv('../input/nfl-big-data-bowl-2022/games.csv')
plays = pd.read_csv('../input/nfl-big-data-bowl-2022/plays.csv')
scouting = pd.read_csv('../input/nfl-big-data-bowl-2022/PFFScoutingData.csv')

In [None]:
print("Players dataset")
players.info()

print("Games dataset")
games.info()

In [None]:
plays.head(3)

In [None]:
#Converting Feet to Inches
temp = players["height"].str.split("-", n = 1, expand = True)
temp.columns = ['Feet', 'Inch']
players["height_feet"] = pd.to_numeric(temp["Feet"]) #converting string to numeric values
players["height_inch"] = pd.to_numeric(temp["Inch"])
print("The shape of the dataset after feature engineering is : ", players.shape)
print(players.sample(2))

#Thanks to @Sanjay V for the following cleanup codes, and additional data

In [None]:
# Column - height_feet
for i in players["height_feet"]:
    if i > 7:
        j = 0.0833333 * i
        players["height_feet"].replace({i : j}, inplace=True)
        
# Column - height_inch
players["height_inch"] = 0.0833333 * players["height_inch"]
#print(players.tail())

#filling NA with '0'
for i in players["height_inch"]:
    players["height_inch"] = players["height_inch"].fillna(0)
#print(players["height_inch"])

#bring it all together
players["height_feet"] = players["height_feet"] + players["height_inch"]

print(players["height_feet"])

In [None]:
#Replacing the position abbreviations with full format

players["Position"].replace({"WR": "Wide Receiver", "CB": "Corner Back", "DE": "Defensive End", "OLB": "Outside Linebacker",
                           "TE": "Tight End", "T": "Tackle", "RB": "Running Back", "G": "Guard", "DT": "Defensive Tackle",
                           "ILB": "Inside Linebacker", "FS": "Free Safety", "SS": "Strong Safety", "C": "Center", "K": "Kicker",
                           "P": "Punter", "NT": "Nose Tackle", "LS": "Long Snapper", "LB": "Linebacker", "DB": "Defensive Back",
                           "MLB" : "Middle Linebacker", "FB": "Full Back", "OT": "Offensive Tackle", "QB": "Quarter Back",
                           "OG": "Offensive guard", "S": "Safety", "HB": "Half Back"}, inplace=True)
print(players["Position"])

In [None]:
games["homeTeamAbbr"].replace({"ARI": "Arizona Cardinals", "KC": "Kansas City Chiefs", "TEN": "Tennessee Titans", "TB": "Tampa Bay Buccaneers",
                           "SF": "San Francisco 49ers", "SEA": "Seattle Seahawks", "PIT": "Pittsburgh Steelers", "PHI": "Philadelphia Eagles",
                            "NYJ": "New York Jets", "NYG": "New York Giants", "NO": "New Orleans Saints", "NE": "New England Patriots", 
                             "MIA": "Miami Dolphins", "ATL": "Atlanta Falcons", "LA": "Los Angeles Rams", "JAX": "Jacksonville Jaguars", 
                             "IND": "Indianapolis Colts", "BAL": "Baltimore Ravens", "BUF": "Buffalo Bills", "CAR": "Carolina Panthers", 
                             "CIN": "Cincinnati Bengals", "CLE": "Cleveland Browns", "DAL": "Dallas Cowboys", "DET": "Detroit Lions", 
                             "GB": "Green Bay Packers", "HOU": "Houston Texans", "WAS": "Washington Football Team", "DEN": "Denver Broncos", 
                             "MIN": "Minnesota Vikings", "CHI": "Chicago Bears", "LAC": "Los Angeles Chargers", "OAK": "Oakland Raiders", 
                             "LV": "Las Vegas Raiders"}, inplace=True)
print(games["homeTeamAbbr"])

### Objective of this Competition

Development of these new methods to find the additional stats for special teams plays. 

The effort put into finding the method can be adopted by the NFL for on air distribution, and it will help in improving America's most popular sports league.

Where to start? Competition evaluation metric is comprehensive, with opening for creativity.

* Quantify special teams strategy. Which strategies yield the best results? What are other strategies that could be adopted?

* Rank special teams players. How do they stack up with respect to one another?

* Create a new special teams metric. 

Before embarking on the new, check what exists already....
https://nextgenstats.nfl.com/glossary

In [None]:
submit_table = go.Figure()
header = ['Sl_no','Features','Rankings','Remarks']
sl_no = [1,2,3,4,5,6]
Features = ['Best Team','Best Strategy','Strategies to be adopted','Team Players Ranking',
            'Special metric','Penalty Losing Team']
Rankings = ['WAS : Washington Football Team','Punting','Return,Touchback and Muffed',
            'Tress Way','Yet to be finalised','Las Vegas Raiders']
Remarks = ['Washington Football Team had the best yards gained record in the seasons covered till 2020',
           'Punting gains the maximum yards, while Kick-off comes close in next best strategy',
          'Each strategy had multiple results based on the oponents.The most used were Return, Touchback & Muffed',
          'Tress Way was best ranking player in terms of the yards gained in the games',
          'More analysis is required by bringing in other NFL related datasets',
          'Las Vegas Raiders have lost more yards due to penalties']
submit_table.add_trace(go.Table(header_values=header,header_font_color='black',
                              header_font_size=30,header_height=40,
                              cells_values=[sl_no,Features,Rankings,Remarks],
                              cells_font_size=15,cells_height=40,cells_align='center'))
submit_table.update_layout(height = 800,width = 1000,title="Results Submission Table")
submit_table.show()

How the above results have been concluded is shared below. Only the tables, and charts that matter have been kept for viewing. Other charts are hidden to meet the competition guidelines.

In [None]:
from plotly.subplots import make_subplots

fig = make_subplots(rows = 1, cols= 2)

fig.add_trace(go.Box(x=players.height,name='Height'),row=1,col=1)
fig.add_trace(go.Box(x=players.weight,name='Weight'),row=1,col=2)

fig.update_layout(title='Players Height & Weights')

In [None]:
team_table = go.Figure()
header = ['Sl_no','Seasons','Games']
sl_no = [1,2,3]
seasons = games.season.unique()
game_played = games.groupby('season')['gameId'].count().values

team_table.add_trace(go.Table(header_values=header,header_font_color='black',
                              header_font_size=30,header_height=40,
                              cells_values=[sl_no,seasons,game_played],
                              cells_font_size=20,cells_height=40))
team_table.update_layout(height = 500,width = 800,title="Games & Seasons")
team_table.show()

### Which team has the highest yards gained?

In [None]:
#Scores by each team in a game
play_result = pd.DataFrame()
for gameid in plays.gameId.unique():
    temp = plays[(plays.gameId == gameid)].groupby('possessionTeam')['playResult'].mean().reset_index()
    temp['gameId'] = gameid
    #print(temp.shape[1])
    play_result = play_result.append(temp)
    
play_result['season'] = play_result.gameId.apply(lambda x: int(x/1000000))
play_result.head()

In [None]:
#Bar chart of the average "yards gained" by each team in the games in multiple seasons till date.
team_res = play_result.groupby(['possessionTeam','season'])['playResult'].mean().reset_index()
team_res.season = team_res.season.astype('category')
p_res = px.bar(data_frame=team_res,y='possessionTeam',x='playResult',color='season')
p_res.update_layout(barmode = 'stack',height = 800, width = 800, 
                    title = 'Play results of the Teams',
                   yaxis = {'categoryorder' : 'total ascending'})
p_res.show()

### Which strategy has the highest yards gained for the teams in average?

In [None]:
plays['season'] = plays.gameId.apply(lambda x:int(x/1000000))
play_tidy = plays.groupby(['season','specialTeamsPlayType','specialTeamsResult'])['playResult'].mean().reset_index()
play_type = px.bar(data_frame=play_tidy,x='playResult',y='specialTeamsPlayType',
                   color ='specialTeamsResult',facet_col='season',facet_col_wrap=2)
play_type.update_layout(width = 1000,height = 1000,title = 'Play types & its results',
                        barmode = 'group')
play_type.show()

### Which teams are leading in the various play types?
This chart shows how the teams were implementing the startegies, and having the overall advantage.

In [None]:
play_team = plays.pivot_table(index=['specialTeamsResult','possessionTeam'],columns='specialTeamsPlayType',
                              values='playResult',aggfunc='mean').reset_index()
play_team.fillna(0,inplace=True)

team_play = px.bar(data_frame=play_team,y='possessionTeam',x='Punt',color='specialTeamsResult')
team_play.update_layout(barmode='stack', yaxis={'categoryorder':'total ascending'},height = 1200,
                       title = 'Yard gain distribution when using the Punt Playtype')
team_play.show()

In [None]:
field_goal = px.bar(data_frame=play_team,y='possessionTeam',x='Field Goal',color='specialTeamsResult')
field_goal.update_layout(barmode='stack', yaxis={'categoryorder':'total ascending'},
                        height = 1200,title = 'Yard gain distribution when using the Field Goals Type')
field_goal.show()

In [None]:
Kickoff = px.bar(data_frame=play_team,y='possessionTeam',x='Kickoff',color='specialTeamsResult')
Kickoff.update_layout(barmode='stack', yaxis={'categoryorder':'total ascending'},
                        height = 1200,title = 'Yard gain distribution when using the Kickoff Playtype')
Kickoff.show()

## The players in the game types, and their performance

In [None]:
player_id = players[['displayName','nflId']] #Just the name
plays = pd.merge(left=player_id,right=plays,how='right',right_on='kickerId',left_on='nflId') #merging DF
plays.drop('nflId',axis=1,inplace=True)#dropping duplicate Id
#plays.info()

### Which player had the best yards gained in the seasons till date. 
This results include the 2021 season also.

In [None]:
#Connecting the Players to the results, and selecting the top kickers whose total results were above 10,000
play_act = plays.pivot_table(index=['displayName','season'],columns='specialTeamsResult',
                              values='playResult',aggfunc='mean').reset_index()
play_act.fillna(0,inplace=True)
play_act['total_result'] = play_act.iloc[:, 2:].sum(axis=1)
play_act.sort_values(by='total_result',ascending=False,inplace=True)
play_act.head(10)

In [None]:
kicker = play_act[(play_act.total_result > 200)&
        (play_act.season == 2020)]['displayName'].values
plays[plays.displayName.isin(kicker)].head()

### Which player has gained most yards in average every season?

In [None]:
kick_act = plays[plays.displayName.isin(kicker)]
kick_act['season'] = kick_act.season.astype('str')
kick_act_grp = kick_act.groupby(['displayName','season'])['playResult'].mean().reset_index()
result_fig = px.bar(data_frame=kick_act_grp,y='displayName',x='playResult',color='season')
result_fig.update_layout(yaxis={'categoryorder':'total descending'},
                         title='Yard gain distribution by the player')
result_fig.show()

### Which type penalty has lead to yards gained or lost

In [None]:
#Lets check the penalties, which team has the most and its impact

plays.penaltyYards.fillna(0,inplace=True)

penalty_imp = plays.groupby('penaltyCodes')['penaltyYards'].mean().reset_index()
penal = px.bar(data_frame=penalty_imp,x='penaltyCodes',y='penaltyYards')
penal.update_layout(xaxis={'categoryorder' : 'total ascending'},
                    title = 'Yard lost/gained distribution based on penalty types')
penal.show()

In [None]:
#Seperating penalty jersey numbers by teams
team_jer_no = plays.penaltyJerseyNumbers.str.split(';',expand=True)
#team_jer_no[0]

In [None]:
#Seperating penalty Jersey number 
jer_no = plays.penaltyJerseyNumbers.str.split(' ',expand=True,n=2)
jer_no.drop(2,inplace=True,axis=1)
jer_no.columns = ['Team','Id']
jer_no['penaltyCodes'] = plays.penaltyCodes
jer_no['penaltyYards'] = plays.penaltyYards
jer_no['teamjerseyCodes'] = team_jer_no[0] #taking the 1st penalty jersey code
jer_no.dropna(inplace=True)
jer_no.Id = jer_no.Id.str.split(';',expand=True)[0]
jer_no.head()

### Which teams had the most Penalty Yard

In [None]:
#Next to see, which team is being penalised
penalty_team = jer_no.groupby('Team')['penaltyYards'].mean().reset_index()
penal_team = px.bar(data_frame=penalty_team,x='Team',y='penaltyYards')
penal_team.update_layout(xaxis={'categoryorder' : 'total ascending'},
                        title = 'Yard gain/lost distribution based on team')
penal_team.show()

### Next to see, who was the reason behind the penalty yards?

In [None]:
penalty_id = jer_no.groupby('teamjerseyCodes')['penaltyYards'].sum().reset_index()
penal_id = px.bar(data_frame=penalty_id,y='teamjerseyCodes',x='penaltyYards')
penal_id.update_layout(height = 1000,yaxis={'categoryorder' : 'total ascending'},
                      title = 'Yard gain/lost distribution based on player')
penal_id.show()

### How did the snap turned out for each play? 

In [None]:
scout_snap = scouting[~scouting.snapDetail.isna()]
play_snap = pd.DataFrame()
for gameid in scout_snap.gameId.unique():
    play_id = scout_snap[scout_snap.gameId == gameid]['playId']
    temp = plays[plays.playId.isin(play_id)&(plays.gameId == gameid)]
    play_snap = play_snap.append(temp)
scout_snap['displayName'] = play_snap['displayName']
scout_snap['playResult'] = play_snap['playResult']
scout_snap['season'] = play_snap['season']
scout_snap.head()

In [None]:
#Recovering memory given to dataframes
import gc
del play_team,play_act,kick_act_grp,penalty_imp,penalty_team,penalty_id
gc.collect()

In [None]:
snap_time = px.box(data_frame=scout_snap,x='season',y='snapTime')
snap_time.update_layout(title='Snap Times over seasons',height = 600)

In [None]:
hang_time = px.box(data_frame=scout_snap,x='season',y='hangTime')
hang_time.update_layout(title='Hang Times over seasons',height = 600)

In [None]:
oper_time = px.box(data_frame=scout_snap,x='season',y='operationTime')
oper_time.update_layout(title='operation Times over seasons',height = 500)

### How did the snapDetail impact the yards gained?
The spread of the play result for the "OK" Snaps always dont gain yards. There is a probability of yards being lost even with good snap

In [None]:
scout_box = px.box(data_frame=scout_snap,x='playResult',color='snapDetail')
scout_box.show()

### How did the players impact the Yards gained per snap in each season

In [None]:
play_box = scout_snap.groupby(['displayName','season'])['playResult'].mean().reset_index()
play_box.season = play_box.season.astype('category')
scout_bar = px.bar(data_frame=play_box,x='playResult',y='displayName',animation_frame='season',
                   category_orders={'season':['2018','2019','2020','2021']})
scout_bar.update_layout(title='Impact of Players on the Snap results',height =1000,
                       yaxis = {'categoryorder' : 'total ascending'})
scout_bar.show()

In [None]:
gc.collect()
del play_box

### How is the track record of the players on the field? 

There are 3 tracking datasets from 2018 to 2020 provided to us for analysis

All three datasets have the same number of columns, so essentially, all the parameters can be created as 
a time-series over the 3 seasons. Even to predict, what to expect from each players in the coming season. 
The time series data provided was cleaned using the helper functions, and the below datasets were created. 

#### The entire code can create condensed version of the tracking file
perf_trac = pd.DataFrame()

play_trac = pd.DataFrame()

for reference in trac.gameId.unique():

    print('Processing {}'.format(reference))
    
    temp_id = performance(reference)
    
    temp_play = play_perform(reference)
    
    print('completed {}'.format(reference))
    
    perf_trac = perf_trac.append(temp_id)
    
    play_trac = play_trac.append(temp_play)
    
#### The above code takes more than 1.5 hours to complete, due to multiple groupby operations on huge data 

So I have ran the code, created the csv file. Below 

I am loading the same CSV files, to continue the visualisation

In [None]:
play_trac = pd.read_csv('../input/nfl-transformed-data/play_trac.csv',index_col=False)
perf_trac = pd.read_csv('../input/nfl-transformed-data/perf_trac.csv',index_col=False)

In [None]:
#The tracking of the player performance with respect each gameID and season. 
#The Distance, Speed and Acceleration is collected.
play_trac.head()

In [None]:
#The performance track of individual player alone is also collected. 
perf_trac.head()

In [None]:
box_perf = go.Figure()
px.box(data_frame=perf_trac,x='dis',color='season')

In [None]:
#Accelaration plot
px.box(data_frame=perf_trac,x='a',color='season')

In [None]:
#Speed plot
px.box(data_frame=perf_trac,x='s',color='season')

In [None]:
#Player Performance data
#perf_trac.drop('Unnamed: 0',inplace=True,axis=1)
#perf_trac.head()
#play_trac.drop('Unnamed: 0',inplace=True,axis=1)
play_trac = pd.merge(left = players[['nflId','displayName']],
                     left_on='nflId',right_on='nflId',right=play_trac,how='right')
play_trac.head()

In [None]:
play_trac.gameId = play_trac.gameId.astype('str')
play_trac.season = play_trac.season.astype('category')
perf_trac.gameId = perf_trac.gameId.astype('str')
perf_trac.season = perf_trac.season.astype('category')


Before we start searching the haystack, lets decide what to search for.

* Highest average Distance covered by the player in a Game

* Highest Average speed and acceleration of the player in a Game

* Number of events in a fixed time duration, say 1 minute(not calculated)

* Average distance travelled between events(not calculated)

* Does the Distance, Speed and Acceleration change, depending on being in Home or Away?(not calculated)

* Do the players and their performance remain same across seasons? 

In [None]:
dis_all = play_trac.groupby(['season','displayName'])['dis'].sum().reset_index()
acc_all = play_trac.groupby(['season','displayName'])['a'].mean().reset_index()
spd_all = play_trac.groupby(['season','displayName'])['s'].mean().reset_index()
dis_all.head()

In [None]:
dis_all.sort_values(by='dis',ascending=False,inplace=True)
dis_all.loc[:,['displayName','dis']].head(5)

In [None]:
spd_all.sort_values(by='s',ascending=False,inplace=True)
spd_all.loc[:,['displayName','s']].head(5)

In [None]:
acc_all.sort_values(by='a',ascending=False,inplace=True)
acc_all.loc[:,['displayName','a']].head(5)

In [None]:
#Distance covered by a particular player in multiple games
velo_trac = play_trac[play_trac.nflId == 39470].groupby(['gameId','season'])['s'].mean().reset_index()
velo_fig = px.strip(data_frame=velo_trac,x='season',y='s',color='gameId')
velo_fig.update_layout(title='Average speed of player',yaxis={'categoryorder':'total ascending'})
velo_fig.update_yaxes(type = 'log')
velo_fig.show()

In [None]:
#Distance covered by a particular player in multiple games
acl_trac = play_trac[play_trac.nflId == 39470].groupby(['gameId','season'])['a'].mean().reset_index()
acl_fig = px.strip(data_frame=acl_trac,x='season',y='a',color='gameId')
acl_fig.update_layout(title='Average acceleration of player')
acl_fig.update_yaxes(type = 'log')
acl_fig.show()

In [None]:
#Distance covered by a particular player in multiple games
dis_trac = play_trac[play_trac.nflId == 39470].groupby(['gameId','season'])['dis'].sum().reset_index()
dis_fig = px.strip(data_frame=dis_trac,x='season',y='dis',color='gameId')
dis_fig.update_layout(title='Distance run by NFLId player',yaxis={'categoryorder':'total ascending'})
dis_fig.update_yaxes(type = 'log')
dis_fig.show()

### End of the Notebook.... The following markdown cells are helper functions, written for collecting the player tracking datasets

trac2018 = pd.read_csv('../input/nfl-big-data-bowl-2022/tracking2018.csv')
trac2019 = pd.read_csv('../input/nfl-big-data-bowl-2022/tracking2019.csv')
trac2020 = pd.read_csv('../input/nfl-big-data-bowl-2022/tracking2020.csv')

* Distance moved the entire game by the player
* Average speed and speed distribution of the player
* Time & Distance Interval between the events
* Yards gained or lost as the play result
* Variation of the above parameters from one game to other
* Variation of the above parameters over multiple seasons


from dateutil import parser
def time_parser(session):
    x = parser.parse(session)
    return pd.Timestamp(x)
    

#bringing all the dataset into one variable
print(trac2018.shape[0],trac2019.shape[0],trac2020.shape[0])
trac = trac2018.append(trac2019)
trac = trac.append(trac2020)
print(trac.shape)

#Saving memory from the RAM
gc.collect()
del trac2018,trac2019,trac2020

#Attaching seasons to the data
trac['season'] = trac.gameId.apply(lambda x: int(x/1000000))

### Lets get some facts about these collosal datasets, before we delve deeper for insights

trac_fig = make_subplots(rows=3,cols=1,shared_xaxes=True,vertical_spacing=0.03,
                         specs=[[{"type": "table"}],[{"type": "table"}],[{"type": "table"}]])
header = ['Sl_no','Features','values']
sl_no = [1,2,3]
Features = ['Games Covered','Players Covered','Data points per player']

def data_point(season):
    game_covered = len(trac[trac.season == season].gameId.unique())
    id_covered = len(trac[trac.season == season].nflId.unique())
    data_points = int(trac[trac.season == season].shape[0]/id_covered)
    return game_covered, id_covered, data_points

value_list = []
for season in trac.season.unique():
    value_list.append(data_point(season))

trac_fig.add_trace(go.Table(header_values=header,header_height=40,header_font_size=24,
                            cells_values=[sl_no,Features,value_list[0]],cells_font_size=18),row=1,col=1)
trac_fig.add_trace(go.Table(header_values=header,header_height=40,header_font_size=24,
                            cells_values=[sl_no,Features,value_list[1]],cells_font_size=18),row=2,col=1)
trac_fig.add_trace(go.Table(header_values=header,header_height=40,header_font_size=24,
                            cells_values=[sl_no,Features,value_list[2]],cells_font_size=18),row=3,col=1)
trac_fig.update_layout(height = 1000,width = 800,title="Games, players & data_points")
trac_fig.show()

### Couple of insights

* The 2020 season has lesser games covered and even lesser players

* Datapoints in 2020 season is also lesser per player

#Analyse for a single player across the entire 3 seasons, then use that pipeline to work on the remaing 
#players to get the necessary output from the helper function

#Helper Function to get the NFL_Id related data
def performance (ref_id):
    trac_perf = trac[trac.gameId == ref_id]
    trac_perf.loc[:,'time'] = trac_perf.time.apply(lambda x: time_parser(x))
    trac_perf.set_index('time',inplace=True)
    agg_rules = {'x':'last', 'y':'last', 's':'max', 'a':'max', 'dis':'sum',
                 'event':'unique', 'nflId':'last','displayName':'last'}
    grouper = pd.Grouper(freq='60S')
    trac_min = trac_perf.groupby([grouper,'nflId']).agg(agg_rules)
    trac_min.drop('nflId',axis=1,inplace=True)
    trac_min.reset_index(inplace=True)
    trac_min['event_count'] = trac_min.event.apply(lambda x:len(x))
    agg_rul2 = {'s':'max', 'a':'max', 'dis':'sum','event_count':'sum'}
    trac_agg = trac_min.groupby('nflId')['dis','s','a','event_count'].agg(agg_rul2).reset_index()
    trac_agg['gameId'] = ref_id
    trac_agg['season'] = int(ref_id/1000000)
    return trac_agg

#Helper Function to get the play_Id related data
def play_perform(ref_id):
    trac_perf = trac[trac.gameId == ref_id]
    trac_perf.loc[:,'time'] = trac_perf.time.apply(lambda x: time_parser(x))
    trac_perf.set_index('time',inplace=True)
    agg_rules = {'x':'last', 'y':'last', 's':'max', 'a':'max', 'dis':'sum',
                 'event':'unique', 'displayName':'last','playId':'unique'}
    grouper = pd.Grouper(freq='60S')
    trac_min = trac_perf.groupby([grouper,'playId','nflId']).agg(agg_rules)
    trac_min.drop('playId',axis=1,inplace=True)
    trac_min.reset_index(inplace=True)
    trac_min['event_count'] = trac_min.event.apply(lambda x:len(x))
    agg_rul2 = {'s':'max', 'a':'max', 'dis':'sum','event_count':'sum'}
    play_agg = trac_min.groupby(['playId','nflId'])['dis','s','a','event_count','nflId'].agg(agg_rul2).reset_index()
    play_agg['gameId'] = ref_id
    play_agg['season'] = int(ref_id/1000000)
    return play_agg