In [241]:
import pandas as pd
import numpy as np

In [242]:
plays_df = pd.read_csv("./data2023/plays.csv")
plays_df = plays_df.loc[plays_df['passResult'].isin(['C', 'I', 'IN'])]
player_stats_df = pd.read_csv("merged_df.csv").drop('Unnamed: 0', axis='columns')

In [243]:
plays_df['passResult'].unique()

array(['I', 'C', 'IN'], dtype=object)

In [244]:
player_stats_df["abrev_name"] = player_stats_df.displayName.apply(lambda x: str(x)[0]+ "."+"".join(str(x).split(" ")[1:]))
player_stats_df['abrev_name']

0                T.Brady
1       B.Roethlisberger
2              A.Rodgers
3          R.Fitzpatrick
4                M.Lewis
              ...       
1409         J.Jefferson
1410             D.Milne
1411            G.Stuard
1412            J.Jacobs
1413            J.Heflin
Name: abrev_name, Length: 1414, dtype: object

In [245]:
# Remove time from play description
playDescText = plays_df.playDescription.apply(lambda x: " ".join(str(x).split()[1:]))
# Remove formation from play description
playDescText = playDescText.apply(lambda x: "".join(x.split(")")[1:]) if str(x).startswith("(") else x)
playDescText

0         T.Brady pass incomplete deep right to C.Godwin.
1        D.Prescott pass deep left to A.Cooper pushed ...
2        D.Prescott pass short middle to D.Schultz to ...
3         D.Prescott pass incomplete deep left to C.Lamb.
4        D.Prescott pass incomplete short left to C.La...
                              ...                        
8549     P.Mahomes pass incomplete short right [O.Ximi...
8550     P.Mahomes pass short right intended for B.Pri...
8551     P.Mahomes pass short right to T.Kelce to KC 3...
8553     D.Jones pass short right to E.Engram pushed o...
8555     D.Jones pass incomplete short right to E.Engram.
Name: playDescription, Length: 7565, dtype: object

In [246]:
# Create columns of names of players who carried out play
# People who pass the ball
passers = playDescText.map(lambda x: str(x).lower().split(" ")[str(x).lower().split(" ").index("pass")-1] if "pass" in str(x).lower().split(" ") else np.nan)

# People who received the ball
receivers = playDescText.map(lambda x: str(x).lower().split(" ")[str(x).lower().split(" ").index("to")+1] if ("pass" in str(x).lower().split(" ") and "to" in str(x).lower().split(" ")) & ("intercept" not in str(x).lower()) else np.nan)
# Remove full stop from some names at end
receivers.loc[receivers.map(lambda x: str(x)[-1]) == "."] = receivers.loc[receivers.map(lambda x: str(x)[-1]) == "."].apply(lambda x: "".join(str(x)[:-1]))
# 2 values have NO # instead of names, so we can remove these
receivers.loc[receivers=="no"] = np.nan
receivers.loc[~receivers.str.contains('.', na=False, regex=False)] = np.nan

In [247]:
key_players_df = pd.DataFrame({'gameId': plays_df['gameId'], 'playId': plays_df['playId'], 'passer': passers, 'receiver': receivers})
key_players_df = key_players_df.dropna()
key_players_df['receiver'].unique()

array(['c.godwin', 'a.cooper', 'd.schultz', 'c.lamb', 'm.evans',
       'r.gronkowski', 'a.brown', 'm.gallup', 'b.jarwin', 'l.fournette',
       't.pollard', 'c.wilson', 'g.bernard', 't.johnson', 'e.elliott',
       'c.ridley', 'c.patterson', 'k.pitts', 'h.hurst', 'r.gage',
       'm.davis', 'd.smith', 'k.smith', 'd.goedert', 'z.ertz',
       'm.sanders', 'k.gainwell', 'j.reagor', 'c.beasley', 's.diggs',
       'd.johnson', 'd.knox', 'e.sanders', 'j.smith-schuster',
       'c.claypool', 'n.harris', 'd.singletary', 'g.davis', 'e.ebron',
       'p.freiermuth', 'r.griffin', 't.marshall', 'c.davis', 'dj.moore',
       'd.arnold', 'e.moore', 'c.mccaffrey', 'i.thomas', 'j.smith',
       't.kroft', 'mi.carter', 'r.anderson', 'b.berrios', 'c.hubbard',
       'd.mims', 't.conklin', 'a.thielen', 'j.jefferson', 'd.westbrook',
       't.boyd', 's.perine', 'd.cook', 't.higgins', 'a.mattison',
       'k.osborn', 'j.mixon', 'j.chase', 'a.abdullah', 'm.thomas',
       'c.herndon', 'c.ham', 'c.uzomah',

In [248]:
plays_df[(plays_df['gameId'] == 2021091204) & (plays_df['playId'] == 282)]['playDescription'].values

array(['(10:34) (Shotgun) J.Goff pass incomplete deep left to A.St. Brown.'],
      dtype=object)

In [249]:
def stats_from_name(name, stats, columns):
    return stats[stats['abrev_name'].str.lower() == name].drop(stats.columns.difference(columns), axis='columns')[:1]

def yards_from_play(gameId, playId, plays):
    return plays[(plays['gameId'] == gameId) & (plays['playId'] == playId)]['prePenaltyPlayResult'].values[0]

In [250]:
feature_columns = ['Age', 'Height', 'Weight', 'Overall', 'Speed',
 'Acceleration', 'Agility', 'Change of Dir', 'Strength', 'Jumping',
 'Awareness', 'Carrying', 'Break Tackle', 'Juke Move', 'Spin Move',
 'Trucking', 'Stiff Arm', 'BC Vision', 'Catching', 'Catch In Traffic',
 'Spec Catch', 'Release', 'Short RR', 'Medium RR', 'Deep RR',
 'Throw Power', 'Throw Acc Short', 'Throw Acc Mid', 'Throw Acc Deep',
 'Throw Under Pressure', 'Throw On The Run', 'Play Action', 'Break Sack',
 'Run Block', 'Run Block Power', 'Run Block Finesse', 'Pass Block',
 'Pass Block Power', 'Pass Block Finesse', 'Impact Blocking',
 'Lead Blocking', 'Tackle', 'Hit Power', 'Pursuit', 'Man Coverage',
 'Zone Coverage', 'Press', 'Play Recognition', 'Power Moves',
 'Finesse Moves', 'Block Shedding', 'Kick Power', 'Kick Accuracy',
 'Kick Return', 'Stamina', 'Injury', 'Toughness', 'Years Pro']

stats_from_name('t.brady', player_stats_df, feature_columns)

Unnamed: 0,Acceleration,Age,Agility,Awareness,BC Vision,Block Shedding,Break Sack,Break Tackle,Carrying,Catch In Traffic,...,Throw Acc Mid,Throw Acc Short,Throw On The Run,Throw Power,Throw Under Pressure,Toughness,Trucking,Weight,Years Pro,Zone Coverage
0,66,44,70,99,51,33,61,31,69,15,...,97,99,84,91,87,96,20,225,21,19


In [264]:
X = np.stack(key_players_df[:12].apply(lambda x: np.concatenate((stats_from_name(x['passer'], player_stats_df, feature_columns),
                                                   stats_from_name(x['receiver'], player_stats_df, feature_columns)), axis=1)[0], axis=1))
X.shape

(12, 116)

In [269]:
print(key_players_df.loc[12])
print(stats_from_name('m.gallup', player_stats_df, feature_columns))

gameId      2021090900
playId             687
passer      d.prescott
receiver      m.gallup
Name: 12, dtype: object
Empty DataFrame
Columns: [Acceleration, Age, Agility, Awareness, BC Vision, Block Shedding, Break Sack, Break Tackle, Carrying, Catch In Traffic, Catching, Change of Dir, Deep RR, Finesse Moves, Height, Hit Power, Impact Blocking, Injury, Juke Move, Jumping, Kick Accuracy, Kick Power, Kick Return, Lead Blocking, Man Coverage, Medium RR, Overall, Pass Block, Pass Block Finesse, Pass Block Power, Play Action, Play Recognition, Power Moves, Press, Pursuit, Release, Run Block, Run Block Finesse, Run Block Power, Short RR, Spec Catch, Speed, Spin Move, Stamina, Stiff Arm, Strength, Tackle, Throw Acc Deep, Throw Acc Mid, Throw Acc Short, Throw On The Run, Throw Power, Throw Under Pressure, Toughness, Trucking, Weight, Years Pro, Zone Coverage]
Index: []

[0 rows x 58 columns]
