In [1]:
import numpy as np
import pandas as pd
import pickle

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 125)

In [2]:
data = pd.read_csv("data/Sample_PFF_Data.csv")

In [3]:
dataset = data.copy(deep=True)

In [4]:
# should Shotgun and Pistol be NaN for run plays and 0 for passes where no shotgun? Or just zero fine?
dataset.SHOTGUN.where(dataset.SHOTGUN.isna(),1, inplace=True)
dataset.SHOTGUN.fillna(0, inplace=True)

dataset.PISTOL.where(dataset.PISTOL.isna(),1, inplace=True)
dataset.PISTOL.fillna(0, inplace=True)

dataset['MOFO_PLAYED'] = dataset.MOFOCPLAYED.replace({'O': 1, 'C': 0})
dataset['MOFO_SHOWN'] = dataset.MOFOCPLAYED.replace({'O': 1, 'C': 0})


In [5]:
dataset.SHOTGUN.unique()

array([0, 1])

In [6]:
# convert string time (2:00) into seconds as int (120)
def convert_time(time_str):
    return int(time_str[0:2])*60 + int(time_str[3:])
# remove * and +Q to simplify features
# decided to group all 3-RB sets together since had similar Run vs Pass rates (and all had low sample sizes)
def convert_off_personnel(personnel_str):
    new_alignment = personnel_str[0:2]
    if new_alignment[0] == '3':
        new_alignment = '3+'
    if new_alignment == 'Un':
        new_alignment = np.nan
    return new_alignment

In [7]:
dataset['CLOCK_INT'] = dataset['CLOCK'].apply(convert_time)

In [8]:
dataset.QUARTER = dataset.QUARTER.astype("category")
dataset.DOWN = dataset.DOWN.astype("category")
dataset.OFFTIMEOUTSREMAINING = dataset.OFFTIMEOUTSREMAINING.astype("category")
dataset.DEFTIMEOUTSREMAINING = dataset.DEFTIMEOUTSREMAINING.astype("category")
dataset.HASH = dataset.HASH.astype("category")

In [9]:
dataset = dataset.loc[(dataset.RUNPASS == 'P') | (dataset.RUNPASS == 'R') ].reset_index(drop=True)
dataset.RUNPASS = dataset.RUNPASS.astype("category")

In [10]:
dataset['OFFPERSONNEL_SIMPLIFIED'] = dataset['OFFPERSONNELBASIC'].apply(convert_off_personnel)

In [11]:
col_names_for_numeric_previous = [
'FORCEDFUMBLE',
'HIT',
'HURRY',
'GAINLOSSNET',
'INTERCEPTION',
'NOHUDDLE',
'PENALTY',
'PASSDEPTH',
'PASSBREAKUP',

'DROPBACKDEPTH',

'MOFO_PLAYED',
'MOFO_SHOWN',

'PISTOL',
'PLAYACTION',
'SACK',
'SCREEN',
'SHIFTMOTION',
'SHOTGUN',

'QBMOVEDOFFSPOT',
'QBPRESSURE',
'TIMETOPRESSURE',
'TIMETOTHROW',
'YARDSAFTERCATCH',
'YARDSAFTERCONTACT',
]

In [12]:
prev_names = ['prev_' + x for x in col_names_for_numeric_previous]
game_prev_names = ['game_prev_' + x for x in col_names_for_numeric_previous] # instead of just previous, get cumulative mean from all previous plays in game
historical_prev_names = ['historical_prev_' + x for x in col_names_for_numeric_previous] # instead of just previous, get cumulative mean from all previous plays in game


In [13]:
# can't sort by ["GAMEID", "DRIVE", "DRIVEPLAY"] bc each team has drive 1, drive 2, etc. Sort by playid, looks like it aligns with driveplay

# need to sort? think so
dataset = dataset.sort_values(["GAMEID", "OffTeam", "PLAYID"]).reset_index(drop=True)
dataset[prev_names] = dataset.groupby(["GAMEID", "OffTeam", "DRIVE"])[col_names_for_numeric_previous].shift(1)
dataset[game_prev_names] = dataset.groupby(["GAMEID", "OffTeam"])[col_names_for_numeric_previous].expanding(min_periods=10).mean().reset_index(drop=True)


In [19]:
col_names_for_categorical_previous = [
    'OFFPERSONNEL_SIMPLIFIED', 
    'CENTERPASSBLOCKDIRECTION'
]
prev_categorical_names = ['prev_' + x for x in col_names_for_categorical_previous]
# create previous play categorical
dataset[prev_categorical_names] = dataset.groupby(["GAMEID", "OffTeam", "DRIVE"])[col_names_for_categorical_previous].shift(1)

In [20]:
# create rolling game

pd.get_dummies(dataset, prefix_sep="_", dummy_na=True, columns=col_names_for_categorical_previous, drop_first=True).head()

Unnamed: 0,2MINUTE,CLOCK,DEFENSIVELINESHIFT,DEFPERSONNEL,DEFSCORE,DEFSUBSTITUTIONS,DefTeam,DEFTIMEOUTSREMAINING,DISTANCE,DOWN,DRIVE,DRIVEENDEVENT,DRIVEENDFIELDPOSITION,DRIVEENDPLAYNUMBER,DRIVEPLAY,DRIVESTARTEVENT,DRIVESTARTFIELDPOSITION,DROPBACKDEPTH,DROPBACKTYPE,FIELDPOSITION,FORCEDFUMBLE,FUMBLE,FUMBLELOST,GAINLOSS,GAINLOSSNET,GAMEID,GARBAGETIME,HASH,HASHDEF,HIT,HURRY,INTERCEPTION,KICKYARDS,MOFOCPLAYED,MOFOCSHOWN,NEXTPLAYID,NOHUDDLE,NOPLAY,OFFFORMATIONUNBALANCED,OFFPERSONNELBASIC,OFFSCORE,OFFSUBSTITUTIONS,OffTeam,OFFTIMEOUTSREMAINING,OPERATIONTIME,OPTION,PASSBREAKUP,PASSDEPTH,PASSDIRECTION,PASSRECEIVERPOSITIONTARGET,PASSRESULT,PASSRUSHRESULT,PASSWIDTH,PENALTY,PENALTYYARDS,PISTOL,PLAYACTION,PLAYACTIONFAKE,PLAYCLOCK,PLAYENDFIELDPOSITION,PLAYID,POAACTUAL,POAINTENDED,PREVIOUSPFFPLAYID,PUMPFAKE,QBMOVEDOFFSPOT,QBPRESSURE,QBRESET,QUARTER,RBDIRECTION,RBSINBACKFIELD,RETURNYARDS,RUNPASS,RUNPASSOPTION,SACK,SCORE,SCOREDIFFERENTIAL,SCREEN,SHIFTMOTION,SHOTGUN,SNAPTIME,SORTORDER,SPOTLEFT,STUNT,TACKLE,TEALIGNMENT,TEMPO,TIMETOPRESSURE,TIMETOTHROW,TOUCHDOWN,TRICKLOOK,TRICKPLAY,WEEK,YARDSAFTERCATCH,YARDSAFTERCONTACT,MOFO_PLAYED,MOFO_SHOWN,CLOCK_INT,prev_FORCEDFUMBLE,prev_HIT,prev_HURRY,prev_GAINLOSSNET,prev_INTERCEPTION,prev_NOHUDDLE,prev_PENALTY,prev_PASSDEPTH,prev_PASSBREAKUP,prev_DROPBACKDEPTH,prev_MOFO_PLAYED,prev_MOFO_SHOWN,prev_PISTOL,prev_PLAYACTION,prev_SACK,prev_SCREEN,prev_SHIFTMOTION,prev_SHOTGUN,prev_QBMOVEDOFFSPOT,prev_QBPRESSURE,prev_TIMETOPRESSURE,prev_TIMETOTHROW,prev_YARDSAFTERCATCH,prev_YARDSAFTERCONTACT,game_prev_FORCEDFUMBLE,game_prev_HIT,game_prev_HURRY,game_prev_GAINLOSSNET,game_prev_INTERCEPTION,game_prev_NOHUDDLE,game_prev_PENALTY,game_prev_PASSDEPTH,game_prev_PASSBREAKUP,game_prev_DROPBACKDEPTH,game_prev_MOFO_PLAYED,game_prev_MOFO_SHOWN,game_prev_PISTOL,game_prev_PLAYACTION,game_prev_SACK,game_prev_SCREEN,game_prev_SHIFTMOTION,game_prev_SHOTGUN,game_prev_QBMOVEDOFFSPOT,game_prev_QBPRESSURE,game_prev_TIMETOPRESSURE,game_prev_TIMETOTHROW,game_prev_YARDSAFTERCATCH,game_prev_YARDSAFTERCONTACT,prev_OFFPERSONNEL_SIMPLIFIED,prev_CENTERPASSBLOCKDIRECTION,OFFPERSONNEL_SIMPLIFIED_01,OFFPERSONNEL_SIMPLIFIED_02,OFFPERSONNEL_SIMPLIFIED_03,OFFPERSONNEL_SIMPLIFIED_10,OFFPERSONNEL_SIMPLIFIED_11,OFFPERSONNEL_SIMPLIFIED_12,OFFPERSONNEL_SIMPLIFIED_13,OFFPERSONNEL_SIMPLIFIED_14,OFFPERSONNEL_SIMPLIFIED_20,OFFPERSONNEL_SIMPLIFIED_21,OFFPERSONNEL_SIMPLIFIED_22,OFFPERSONNEL_SIMPLIFIED_23,OFFPERSONNEL_SIMPLIFIED_3+,OFFPERSONNEL_SIMPLIFIED_nan,CENTERPASSBLOCKDIRECTION_L,CENTERPASSBLOCKDIRECTION_R,CENTERPASSBLOCKDIRECTION_nan
0,0,14:54,0,4-2-5,0,1,Team_3,3,10,1,1.0,FIELD GOAL,20.0,14.0,1.0,KICKOFF - RETURN,-39.0,8.0,SD,-39,0,0,0,,0,18548,0,R,L,0,0,0,,C,C,3528156.0,0,0,0,11,0,1,Team_23,3,,0,0,33.0,L,LWR,INCOMPLETE,,1.0,0,,0,1,1,13.0,-39,3528152,,,3528149.0,0,0,0,0,1,,1.0,,P,0,0,0.0,0,0,1,1,,2,29,0,0,R,0,,2.7,0,0,0,2,,,0.0,0.0,894,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,False,False,False,False,True,False,False,False,False,False,False,False,False,False,True,False,False
1,0,14:49,0,4-2-5,0,0,Team_3,3,10,2,1.0,FIELD GOAL,20.0,14.0,2.0,KICKOFF - RETURN,-39.0,7.0,SD,-39,0,0,0,10.0,10,18548,0,R,L,0,0,0,,C,C,3528157.0,0,0,0,11,0,1,Team_23,3,,0,0,5.0,M,SRWR,COMPLETE,,32.0,0,,0,0,0,22.0,-49,3528156,,,3528152.0,0,0,0,0,1,,,,P,0,0,0.0,0,0,0,1,,3,29,1,1,,0,,1.7,0,0,0,2,5.0,4.0,0.0,0.0,889,0.0,0.0,0.0,0.0,0.0,0.0,0.0,33.0,0.0,8.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,,2.7,,,,,,,,,,,,,,,,,,,,,,,,,,,11.0,L,False,False,False,False,True,False,False,False,False,False,False,False,False,False,True,False,False
2,0,14:09,0,4-2-5,0,0,Team_3,3,10,1,1.0,FIELD GOAL,20.0,14.0,3.0,KICKOFF - RETURN,-39.0,,,-49,0,0,0,2.0,2,18548,0,C,C,0,0,0,,C,O,3528160.0,0,0,0,11,0,1,Team_23,3,,0,0,,,,,,,0,,0,0,0,6.0,49,3528157,ML,ML,3528156.0,0,0,0,0,1,L,1.0,,R,0,0,0.0,0,0,0,1,,4,28,0,1,R,0,,,0,0,0,2,,2.0,0.0,0.0,849,0.0,0.0,0.0,10.0,0.0,0.0,0.0,5.0,0.0,7.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,,1.7,5.0,4.0,,,,,,,,,,,,,,,,,,,,,,,,,11.0,L,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,True
3,0,13:37,0,4-2-5,0,0,Team_3,3,8,2,1.0,FIELD GOAL,20.0,14.0,4.0,KICKOFF - RETURN,-39.0,8.0,SD,49,0,0,0,3.0,3,18548,0,L,R,0,1,0,,O,O,3528162.0,0,0,0,12,0,1,Team_23,3,,0,0,3.0,M,RWR,COMPLETE,HURRY,33.0,0,,0,0,0,,46,3528160,,,3528157.0,0,0,1,0,1,,1.0,,P,0,0,0.0,0,0,0,1,,5,24,0,1,L;L,0,2.6,2.6,0,0,0,2,0.0,0.0,1.0,1.0,817,0.0,0.0,0.0,2.0,0.0,0.0,0.0,,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,,,,2.0,,,,,,,,,,,,,,,,,,,,,,,,,11.0,,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,True,False
4,0,12:56,0,4-2-5,0,1,Team_3,3,5,3,1.0,FIELD GOAL,20.0,14.0,5.0,KICKOFF - RETURN,-39.0,8.0,SD,46,0,0,0,7.0,7,18548,0,R,L,0,1,0,,C,C,3528164.0,0,0,0,11,0,1,Team_23,3,,0,0,0.0,X,,RUN,HURRY,,0,,0,0,0,5.0,39,3528162,QB SCRAMBLE,QB SCRAMBLE,3528160.0,0,1,1,0,1,L,1.0,,P,0,0,0.0,0,0,1,1,,6,29,0,1,,0,2.2,4.0,0,0,0,2,,2.0,0.0,0.0,776,0.0,0.0,1.0,3.0,0.0,0.0,0.0,3.0,0.0,8.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,2.6,2.6,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,12.0,R,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False


In [58]:
pd.get_dummies(dataset[['OFFPERSONNEL_SIMPLIFIED']]).astype(int)

Unnamed: 0,OFFPERSONNEL_SIMPLIFIED_00,OFFPERSONNEL_SIMPLIFIED_01,OFFPERSONNEL_SIMPLIFIED_02,OFFPERSONNEL_SIMPLIFIED_03,OFFPERSONNEL_SIMPLIFIED_10,OFFPERSONNEL_SIMPLIFIED_11,OFFPERSONNEL_SIMPLIFIED_12,OFFPERSONNEL_SIMPLIFIED_13,OFFPERSONNEL_SIMPLIFIED_14,OFFPERSONNEL_SIMPLIFIED_20,OFFPERSONNEL_SIMPLIFIED_21,OFFPERSONNEL_SIMPLIFIED_22,OFFPERSONNEL_SIMPLIFIED_23,OFFPERSONNEL_SIMPLIFIED_3+
0,0,0,0,0,0,1,0,0,0,0,0,0,0,0
1,0,0,0,0,0,1,0,0,0,0,0,0,0,0
2,0,0,0,0,0,1,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,1,0,0,0,0,0,0,0
4,0,0,0,0,0,1,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19829,0,0,0,0,0,1,0,0,0,0,0,0,0,0
19830,0,0,0,0,0,1,0,0,0,0,0,0,0,0
19831,0,0,0,0,0,1,0,0,0,0,0,0,0,0
19832,0,0,0,0,0,1,0,0,0,0,0,0,0,0


In [56]:
pd.get_dummies(dataset.groupby(["GAMEID", "OffTeam"])[['OFFPERSONNEL_SIMPLIFIED']]).expanding(min_periods=10).mean()

TypeError: unhashable type: 'DataFrame'

In [None]:
base_feature_names = [
    'WEEK',
    'QUARTER',
    'SCOREDIFFERENTIAL',
    'SCORE',
    'DISTANCE',
    'DOWN',
    'FIELDPOSITION',
    'DRIVE',
    'DRIVEPLAY',
    'OFFTIMEOUTSREMAINING',
    'DEFTIMEOUTSREMAINING',
    'HASH',
    '2MINUTE', 
    'CLOCK_INT']
engineered_features_names = prev_names + game_prev_names
target_names = ['RUNPASS']

In [None]:
# dataset = dataset[['GAMEID', 'PLAYID'] + base_feature_names + target_names].reset_index(drop=True)
# dataset.to_pickle("data/dataset.pkl")   # use pickle to keep data types

advanced_dataset = dataset[['GAMEID', 'PLAYID'] + base_feature_names + engineered_features_names + target_names].copy(deep=True).reset_index(drop=True)
advanced_dataset = advanced_dataset.sort_values(["GAMEID", "PLAYID"]).reset_index(drop=True)
advanced_dataset.to_pickle("data/advanced_dataset2.pkl")   # use pickle to keep data types

In [54]:
pd.get_dummies(dataset.groupby(["GAMEID", "OffTeam"]).get_group((18548, "Team_3"))[['OFFPERSONNEL_SIMPLIFIED']]).expanding(min_periods=10).mean()

Unnamed: 0,OFFPERSONNEL_SIMPLIFIED_11,OFFPERSONNEL_SIMPLIFIED_12,OFFPERSONNEL_SIMPLIFIED_13,OFFPERSONNEL_SIMPLIFIED_21,OFFPERSONNEL_SIMPLIFIED_22
92,,,,,
93,,,,,
94,,,,,
95,,,,,
96,,,,,
97,,,,,
98,,,,,
99,,,,,
100,,,,,
101,0.4,0.2,0.0,0.2,0.2


In [10]:
# for tendencies:
# DROPBACKTYPE, CENTERPASSBLOCKDIRECTION, DEFPERSONNEL, OFFPERSONNELBASIC, TEALIGNMENT, PASSRESULT

In [32]:
dataset.DROPBACKTYPE.unique()

array(['SD', nan, 'RR', 'SR', 'SL', 'RL', 'RSR', 'STP', 'RSL', 'FF',
       'RRL', 'WRP', 'RLR', 'BFP', 'RBP'], dtype=object)

In [30]:
dataset.PASSRESULT.unique()

array(['INCOMPLETE', 'COMPLETE', nan, 'RUN', 'SACK', 'THROWN AWAY',
       'SPIKE', 'HIT AS THREW', 'INTERCEPTION', 'BATTED PASS', 'LATERAL'],
      dtype=object)

In [28]:
''' 
Want to make sure the indices are aligned, so check that rows match (e.g. gameid switches at 150)
'''
dataset.groupby(["GAMEID", "OffTeam"])[col_names_for_numeric_previous].expanding(min_periods=10).mean().iloc[145:155]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,FORCEDFUMBLE,HIT,HURRY,GAINLOSSNET,INTERCEPTION,NOHUDDLE,PENALTY,PASSDEPTH,PASSBREAKUP,DROPBACKDEPTH,MOFO_PLAYED,MOFO_SHOWN,PISTOL,PLAYACTION,SACK,SCREEN,SHIFTMOTION,SHOTGUN,QBMOVEDOFFSPOT,QBPRESSURE,TIMETOPRESSURE,TIMETOTHROW,YARDSAFTERCATCH,YARDSAFTERCONTACT
GAMEID,OffTeam,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1
18548,Team_3,145,0.0,0.0,0.148148,7.314815,0.018519,0.018519,0.074074,8.958333,0.0,6.708333,0.377358,0.377358,0.0,0.185185,0.0,0.018519,0.703704,0.333333,0.166667,0.148148,2.836364,3.383333,4.75,3.659574
18548,Team_3,146,0.0,0.0,0.145455,7.2,0.018182,0.018182,0.072727,8.958333,0.0,6.708333,0.37037,0.37037,0.0,0.181818,0.0,0.018182,0.709091,0.327273,0.163636,0.145455,2.836364,3.383333,4.75,3.583333
18548,Team_3,147,0.0,0.0,0.142857,7.25,0.017857,0.017857,0.071429,8.958333,0.0,6.708333,0.363636,0.363636,0.0,0.178571,0.0,0.017857,0.696429,0.321429,0.160714,0.142857,2.836364,3.383333,4.75,3.714286
18548,Team_3,148,0.0,0.0,0.140351,7.140351,0.017544,0.017544,0.070175,8.958333,0.0,6.708333,0.375,0.375,0.0,0.175439,0.0,0.017544,0.684211,0.315789,0.157895,0.140351,2.836364,3.383333,4.75,3.66
18548,Team_3,149,0.0,0.0,0.137931,6.982759,0.017241,0.017241,0.068966,8.958333,0.0,6.708333,0.375,0.375,0.0,0.172414,0.0,0.017241,0.672414,0.310345,0.155172,0.137931,2.836364,3.383333,4.75,3.588235
18549,Team_12,150,,,,,,,,,,,,,,,,,,,,,,,,
18549,Team_12,151,,,,,,,,,,,,,,,,,,,,,,,,
18549,Team_12,152,,,,,,,,,,,,,,,,,,,,,,,,
18549,Team_12,153,,,,,,,,,,,,,,,,,,,,,,,,
18549,Team_12,154,,,,,,,,,,,,,,,,,,,,,,,,


In [29]:
dataset.iloc[145:155]

Unnamed: 0,2MINUTE,CENTERPASSBLOCKDIRECTION,CLOCK,DEFENSIVELINESHIFT,DEFPERSONNEL,DEFSCORE,DEFSUBSTITUTIONS,DefTeam,DEFTIMEOUTSREMAINING,DISTANCE,DOWN,DRIVE,DRIVEENDEVENT,DRIVEENDFIELDPOSITION,DRIVEENDPLAYNUMBER,DRIVEPLAY,DRIVESTARTEVENT,DRIVESTARTFIELDPOSITION,DROPBACKDEPTH,DROPBACKTYPE,FIELDPOSITION,FORCEDFUMBLE,FUMBLE,FUMBLELOST,GAINLOSS,GAINLOSSNET,GAMEID,GARBAGETIME,HASH,HASHDEF,HIT,HURRY,INTERCEPTION,KICKYARDS,MOFOCPLAYED,MOFOCSHOWN,NEXTPLAYID,NOHUDDLE,NOPLAY,OFFFORMATIONUNBALANCED,OFFPERSONNELBASIC,OFFSCORE,OFFSUBSTITUTIONS,OffTeam,OFFTIMEOUTSREMAINING,OPERATIONTIME,OPTION,PASSBREAKUP,PASSDEPTH,PASSDIRECTION,PASSRECEIVERPOSITIONTARGET,PASSRESULT,PASSRUSHRESULT,PASSWIDTH,PENALTY,PENALTYYARDS,PISTOL,PLAYACTION,PLAYACTIONFAKE,PLAYCLOCK,PLAYENDFIELDPOSITION,PLAYID,POAACTUAL,POAINTENDED,PREVIOUSPFFPLAYID,PUMPFAKE,QBMOVEDOFFSPOT,QBPRESSURE,QBRESET,QUARTER,RBDIRECTION,RBSINBACKFIELD,RETURNYARDS,RUNPASS,RUNPASSOPTION,SACK,SCORE,SCOREDIFFERENTIAL,SCREEN,SHIFTMOTION,SHOTGUN,SNAPTIME,SORTORDER,SPOTLEFT,STUNT,TACKLE,TEALIGNMENT,TEMPO,TIMETOPRESSURE,TIMETOTHROW,TOUCHDOWN,TRICKLOOK,TRICKPLAY,WEEK,YARDSAFTERCATCH,YARDSAFTERCONTACT,MOFO_PLAYED,MOFO_SHOWN,CLOCK_INT,prev_FORCEDFUMBLE,prev_HIT,prev_HURRY,prev_GAINLOSSNET,prev_INTERCEPTION,prev_NOHUDDLE,prev_PENALTY,prev_PASSDEPTH,prev_PASSBREAKUP,prev_DROPBACKDEPTH,prev_MOFO_PLAYED,prev_MOFO_SHOWN,prev_PISTOL,prev_PLAYACTION,prev_SACK,prev_SCREEN,prev_SHIFTMOTION,prev_SHOTGUN,prev_QBMOVEDOFFSPOT,prev_QBPRESSURE,prev_TIMETOPRESSURE,prev_TIMETOTHROW,prev_YARDSAFTERCATCH,prev_YARDSAFTERCONTACT,game_prev_FORCEDFUMBLE,game_prev_HIT,game_prev_HURRY,game_prev_GAINLOSSNET,game_prev_INTERCEPTION,game_prev_NOHUDDLE,game_prev_PENALTY,game_prev_PASSDEPTH,game_prev_PASSBREAKUP,game_prev_DROPBACKDEPTH,game_prev_MOFO_PLAYED,game_prev_MOFO_SHOWN,game_prev_PISTOL,game_prev_PLAYACTION,game_prev_SACK,game_prev_SCREEN,game_prev_SHIFTMOTION,game_prev_SHOTGUN,game_prev_QBMOVEDOFFSPOT,game_prev_QBPRESSURE,game_prev_TIMETOPRESSURE,game_prev_TIMETOTHROW,game_prev_YARDSAFTERCATCH,game_prev_YARDSAFTERCONTACT
145,4,,04:23,0,6-2-3,23,1,Team_23,3,10,1,9.0,TOUCHDOWN,0.0,6.0,3.0,KICKOFF -,-25.0,,,45,0,0,0,33.0,33,18548,0,L,R,0,0,0,,C,C,3528452.0,0,0,0,22,28,1,Team_3,2,,0,0,,,,,,,0,,0,0,0,1.0,12,3528449,RT,RT,3528448.0,0,0,0,0,4,R,2.0,,R,0,0,28.23,5,0,1,0,,155,24,0,1,L;R,0,,,0,0,0,2,,28.0,0.0,0.0,263,0.0,0.0,0.0,26.0,0.0,0.0,0.0,,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,,,,15.0,0.0,0.0,0.148148,7.314815,0.018519,0.018519,0.074074,8.958333,0.0,6.708333,0.377358,0.377358,0.0,0.185185,0.0,0.018519,0.703704,0.333333,0.166667,0.148148,2.836364,3.383333,4.75,3.659574
146,4,,04:10,0,5-2-4,23,1,Team_23,2,10,1,9.0,TOUCHDOWN,0.0,6.0,4.0,KICKOFF -,-25.0,,,12,0,0,0,1.0,1,18548,0,R,L,0,0,0,,C,C,3528454.0,0,0,0,12,28,1,Team_3,2,,0,0,,,,,,,0,,0,0,0,15.0,11,3528452,LT,LT,3528449.0,0,0,0,0,4,L,1.0,,R,0,0,28.23,5,0,1,0,,156,29,0,1,L;L;L,0,,,0,0,0,2,,0.0,0.0,0.0,250,0.0,0.0,0.0,33.0,0.0,0.0,0.0,,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,,,,28.0,0.0,0.0,0.145455,7.2,0.018182,0.018182,0.072727,8.958333,0.0,6.708333,0.37037,0.37037,0.0,0.181818,0.0,0.018182,0.709091,0.327273,0.163636,0.145455,2.836364,3.383333,4.75,3.583333
147,4,,04:06,0,4-2-5,23,1,Team_23,1,9,2,9.0,TOUCHDOWN,0.0,6.0,5.0,KICKOFF -,-25.0,,,11,0,0,0,10.0,10,18548,0,R,L,0,0,0,,C,C,3528458.0,0,0,0,11,28,1,Team_3,2,,0,0,,,,,,,0,,0,0,0,,1,3528454,RT,RT,3528452.0,0,0,0,0,4,R,1.0,,R,0,0,28.23,5,0,0,0,,157,29,0,1,R,0,,,0,0,0,2,,10.0,0.0,0.0,246,0.0,0.0,0.0,1.0,0.0,0.0,0.0,,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,,,,0.0,0.0,0.0,0.142857,7.25,0.017857,0.017857,0.071429,8.958333,0.0,6.708333,0.363636,0.363636,0.0,0.178571,0.0,0.017857,0.696429,0.321429,0.160714,0.142857,2.836364,3.383333,4.75,3.714286
148,4,,03:59,0,4-2-5,23,0,Team_23,0,1,1,9.0,TOUCHDOWN,0.0,6.0,6.0,KICKOFF -,-25.0,,,1,0,0,0,1.0,1,18548,0,R,L,0,0,0,,O,O,3528459.0,0,0,0,11,28,0,Team_3,2,,0,0,,,,,,,0,,0,0,0,,0,3528458,RE,RE,3528454.0,0,0,0,0,4,R,1.0,,R,0,0,28.23,5,0,0,0,,158,29,0,0,R,0,,,1,0,0,2,,1.0,1.0,1.0,239,0.0,0.0,0.0,10.0,0.0,0.0,0.0,,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,10.0,0.0,0.0,0.140351,7.140351,0.017544,0.017544,0.070175,8.958333,0.0,6.708333,0.375,0.375,0.0,0.175439,0.0,0.017544,0.684211,0.315789,0.157895,0.140351,2.836364,3.383333,4.75,3.66
149,4,,00:42,0,5-2-4,30,1,Team_23,0,10,1,10.0,END OF GAME,48.0,1.0,1.0,KICKOFF - ONSIDE,46.0,,,46,0,0,0,-2.0,-2,18548,0,R,L,0,0,0,,,,,0,0,0,22,35,1,Team_3,1,,0,0,,,,,,,0,,0,0,0,13.0,48,3528495,QB KNEEL,QB KNEEL,3528494.0,0,0,0,0,4,U,3.0,,R,0,0,35.3,5,0,0,0,,179,29,0,0,L;R,0,,,0,0,0,2,,0.0,,,42,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.137931,6.982759,0.017241,0.017241,0.068966,8.958333,0.0,6.708333,0.375,0.375,0.0,0.172414,0.0,0.017241,0.672414,0.310345,0.155172,0.137931,2.836364,3.383333,4.75,3.588235
150,0,,14:53,0,3-3-5,0,1,Team_29,3,10,1,1.0,TOUCHDOWN,0.0,13.0,1.0,KICKOFF - RETURN,-18.0,,,-18,0,0,0,3.0,3,18549,0,L,R,0,0,0,,C,C,3535741.0,0,0,0,12,0,1,Team_12,3,,0,0,,,,,,,0,,0,0,0,12.0,-21,3535738,ML,ML,3535723.0,0,0,0,0,1,R,1.0,,R,0,0,0.0,0,0,1,0,,2,24,0,1,L;R,0,,,0,1,0,2,,2.0,0.0,0.0,893,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
151,0,,14:16,0,3-3-5,0,0,Team_29,3,7,2,1.0,TOUCHDOWN,0.0,13.0,2.0,KICKOFF - RETURN,-18.0,,,-21,0,0,0,2.0,2,18549,0,C,C,0,0,0,,O,O,3535744.0,0,0,0,11,0,1,Team_12,3,,0,0,,,,,,,0,,0,0,0,6.0,-23,3535741,ML,ML,3535738.0,0,0,0,0,1,L,1.0,,R,0,0,0.0,0,0,1,0,,3,28,0,1,,0,,,0,0,0,2,,2.0,1.0,1.0,856,0.0,0.0,0.0,3.0,0.0,0.0,0.0,,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,,,,2.0,,,,,,,,,,,,,,,,,,,,,,,,
152,0,C,13:35,0,0-3-8,0,1,Team_29,3,5,3,1.0,TOUCHDOWN,0.0,13.0,3.0,KICKOFF - RETURN,-18.0,8.0,SD,-23,0,0,0,17.0,17,18549,0,L,R,0,0,0,,O,C,3535747.0,0,0,0,11,0,1,Team_12,3,,0,0,16.0,M,RWR,COMPLETE,,31.0,0,,0,0,0,9.0,-40,3535744,,,3535741.0,0,0,0,0,1,L,1.0,,P,0,0,0.0,0,0,0,1,,4,24,1,1,,0,,3.2,0,0,0,2,1.0,0.0,1.0,1.0,815,0.0,0.0,0.0,2.0,0.0,0.0,0.0,,0.0,,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,,,,2.0,,,,,,,,,,,,,,,,,,,,,,,,
153,0,R,12:55,0,3-3-5,0,1,Team_29,3,10,1,1.0,TOUCHDOWN,0.0,13.0,4.0,KICKOFF - RETURN,-18.0,9.0,SD,-40,0,0,0,-6.0,-6,18549,0,R,L,0,1,0,,C,C,3535749.0,0,0,0,21,0,1,Team_12,3,,0,0,0.0,X,,SACK,SACK,,0,,0,1,1,9.0,-34,3535747,,,3535744.0,0,0,1,0,1,L,1.0,,P,0,1,0.0,0,0,1,0,,5,29,0,0,R,0,3.1,4.0,0,1,0,2,,,0.0,0.0,775,0.0,0.0,0.0,17.0,0.0,0.0,0.0,16.0,0.0,8.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,,3.2,1.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,
154,0,L,12:10,0,3-3-5,0,0,Team_29,3,16,2,1.0,TOUCHDOWN,0.0,13.0,5.0,KICKOFF - RETURN,-18.0,6.0,SD,-34,0,0,0,15.0,15,18549,0,R,L,0,0,0,,C,C,3535753.0,0,0,0,11,0,1,Team_12,3,,0,0,-4.0,M,SLiWR,COMPLETE,,15.0,0,,0,0,0,3.0,-49,3535749,,,3535747.0,0,0,0,0,1,,,,P,0,0,0.0,0,1,1,1,,6,29,0,1,,0,,1.2,0,0,0,2,19.0,0.0,0.0,0.0,730,0.0,0.0,1.0,-6.0,0.0,0.0,0.0,0.0,0.0,9.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,1.0,3.1,4.0,,,,,,,,,,,,,,,,,,,,,,,,,,


In [17]:
#dataset[game_prev_names] = dataset.groupby(["OffTeam"])[col_names_for_previous].expanding(min_periods=10).mean().reset_index(drop=True)

In [18]:
#groups = dataset.groupby(["GAMEID", "OffTeam", "DRIVE"])
#ex_group = groups.get_group((18548, 'Team_23', 1))

# checking that shifts and expanding mean do what we want

In [19]:
dataset.groupby(["GAMEID", "OffTeam"])[['FORCEDFUMBLE', 'PASSDEPTH', 'DRIVEPLAY']].expanding().mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,FORCEDFUMBLE,PASSDEPTH,DRIVEPLAY
GAMEID,OffTeam,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
18548,Team_23,0,0.000000,33.000000,1.000000
18548,Team_23,1,0.000000,19.000000,1.500000
18548,Team_23,2,0.000000,19.000000,2.000000
18548,Team_23,3,0.000000,13.666667,2.500000
18548,Team_23,4,0.000000,10.250000,3.000000
...,...,...,...,...,...
19807,Team_7,19829,0.020000,6.564103,3.604167
19807,Team_7,19830,0.019608,6.325000,3.795918
19807,Team_7,19831,0.019231,6.170732,4.000000
19807,Team_7,19832,0.018868,6.523810,4.000000


In [20]:
dataset.query("GAMEID == 18548 and OffTeam == 'Team_23'").head(20)[['GAMEID', 'PLAYID', 'OffTeam', 'DOWN', 'DRIVE', 'DRIVEPLAY', 'FORCEDFUMBLE', 'PASSDEPTH', 'game_prev_PASSDEPTH']]

Unnamed: 0,GAMEID,PLAYID,OffTeam,DOWN,DRIVE,DRIVEPLAY,FORCEDFUMBLE,PASSDEPTH,game_prev_PASSDEPTH
0,18548,3528152,Team_23,1,1.0,1.0,0,33.0,
1,18548,3528156,Team_23,2,1.0,2.0,0,5.0,
2,18548,3528157,Team_23,1,1.0,3.0,0,,
3,18548,3528160,Team_23,2,1.0,4.0,0,3.0,
4,18548,3528162,Team_23,3,1.0,5.0,0,0.0,
5,18548,3528164,Team_23,1,1.0,6.0,0,1.0,
6,18548,3528165,Team_23,2,1.0,7.0,0,,
7,18548,3528167,Team_23,3,1.0,8.0,0,,
8,18548,3528169,Team_23,4,1.0,9.0,0,6.0,
9,18548,3528171,Team_23,1,1.0,10.0,0,,


In [21]:
''' 
Inspect section to make sure shifting was done properly
'''
dataset.loc[147:170, ['GAMEID', 'PLAYID', 'OffTeam', 'DOWN', 'DRIVE', 'DRIVEPLAY', 'FORCEDFUMBLE', 'prev_FORCEDFUMBLE', 'game_prev_FORCEDFUMBLE', 'SHOTGUN', 'prev_SHOTGUN', 'game_prev_SHOTGUN']]

Unnamed: 0,GAMEID,PLAYID,OffTeam,DOWN,DRIVE,DRIVEPLAY,FORCEDFUMBLE,prev_FORCEDFUMBLE,game_prev_FORCEDFUMBLE,SHOTGUN,prev_SHOTGUN,game_prev_SHOTGUN
147,18548,3528454,Team_3,2,9.0,5.0,0,0.0,0.0,0,0.0,0.321429
148,18548,3528458,Team_3,1,9.0,6.0,0,0.0,0.0,0,0.0,0.315789
149,18548,3528495,Team_3,1,10.0,1.0,0,,0.0,0,,0.310345
150,18549,3535738,Team_12,1,1.0,1.0,0,,,0,,
151,18549,3535741,Team_12,2,1.0,2.0,0,0.0,,0,0.0,
152,18549,3535744,Team_12,3,1.0,3.0,0,0.0,,1,0.0,
153,18549,3535747,Team_12,1,1.0,4.0,0,0.0,,0,1.0,
154,18549,3535749,Team_12,2,1.0,5.0,0,0.0,,1,0.0,
155,18549,3535753,Team_12,3,1.0,6.0,0,0.0,,1,1.0,
156,18549,3535760,Team_12,1,1.0,8.0,0,0.0,,1,1.0,


In [22]:
dataset.loc[250:291, ['GAMEID', 'PLAYID', 'OffTeam', 'DOWN', 'DRIVE', 'DRIVEPLAY', 'FORCEDFUMBLE', 'prev_FORCEDFUMBLE', 'game_prev_FORCEDFUMBLE', 'SHOTGUN', 'prev_SHOTGUN', 'game_prev_SHOTGUN']]

Unnamed: 0,GAMEID,PLAYID,OffTeam,DOWN,DRIVE,DRIVEPLAY,FORCEDFUMBLE,prev_FORCEDFUMBLE,game_prev_FORCEDFUMBLE,SHOTGUN,prev_SHOTGUN,game_prev_SHOTGUN
250,18549,3537611,Team_29,1,8.0,7.0,0,0.0,0.027778,1,1.0,0.527778
251,18549,3537632,Team_29,1,8.0,8.0,0,0.0,0.027027,1,1.0,0.540541
252,18549,3537645,Team_29,1,8.0,9.0,0,0.0,0.026316,1,1.0,0.552632
253,18549,3537659,Team_29,2,8.0,10.0,0,0.0,0.025641,1,1.0,0.564103
254,18549,3537676,Team_29,3,8.0,11.0,0,0.0,0.025,0,1.0,0.55
255,18549,3537688,Team_29,4,8.0,12.0,0,0.0,0.02439,0,0.0,0.536585
256,18549,3537861,Team_29,1,9.0,1.0,0,,0.02381,0,,0.52381
257,18549,3537875,Team_29,1,9.0,2.0,0,0.0,0.023256,0,0.0,0.511628
258,18549,3537878,Team_29,2,9.0,3.0,0,0.0,0.022727,1,0.0,0.522727
259,18549,3537886,Team_29,3,9.0,4.0,0,0.0,0.022222,1,1.0,0.533333


In [55]:
pd.get_dummies(dataset.groupby(["GAMEID", "OffTeam"]).get_group((18548, "Team_3"))[['OFFPERSONNEL_SIMPLIFIED']]).expanding(min_periods=10).mean()

Unnamed: 0,OFFPERSONNEL_SIMPLIFIED_11,OFFPERSONNEL_SIMPLIFIED_12,OFFPERSONNEL_SIMPLIFIED_13,OFFPERSONNEL_SIMPLIFIED_21,OFFPERSONNEL_SIMPLIFIED_22
92,,,,,
93,,,,,
94,,,,,
95,,,,,
96,,,,,
97,,,,,
98,,,,,
99,,,,,
100,,,,,
101,0.4,0.2,0.0,0.2,0.2
