In [80]:
import nfl_regression
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score
from sklearn.metrics import f1_score

## Importing the dataset

In [81]:
train_receiver_stats, train_player_stats, train_weeks = nfl_regression.preprocess_year_data('2021')
X_train, Y_train = nfl_regression.get_yards_by_stats(train_receiver_stats, train_player_stats, train_weeks, '2021')

test_receiver_stats, test_player_stats, test_weeks = nfl_regression.preprocess_year_data('2023')
X_test, Y_test = nfl_regression.get_yards_by_stats(test_receiver_stats, test_player_stats, test_weeks, '2023')

## Use merged receiver stats

In [82]:
play_data = test_receiver_stats
play_data.head(1)

Unnamed: 0,gameId,playId,playDescription,quarter,down,yardsToGo,possessionTeam,defensiveTeam,yardlineSide,yardlineNumber,...,Zone Coverage,birthDate,collegeName,displayName,height,nflId,officialPosition,updatedName,updatedName2,weight
0,2021090900,97,(13:33) (Shotgun) T.Brady pass incomplete deep...,1,3,2,TB,DAL,TB,33,...,25.0,1996-02-27,Penn State,Chris Godwin,6-1,44896.0,WR,Chris Godwin,Chris Godwin,209.0


## Load week data into single DataFrame

In [83]:
#week_data = pd.concat([pd.read_csv(f'BigDataBowl2023\data2023\week{i}.csv') for i in range(1, 9)])

In [84]:
#week_data.head(1)

## DataFrame of offensive players on pitch

In [85]:
#pitch_lookup = pd.concat((play_data[['gameId', 'playId']], pd.DataFrame(np.vstack(play_data.apply(lambda x: week_data[(week_data['gameId'] == x['gameId']) & (week_data['playId'] == x['playId'])][week_data[(week_data['gameId'] == x['gameId']) & (week_data['playId'] == x['playId'])]['team'] == x['possessionTeam']]['nflId'].unique(), axis=1).to_numpy()), columns=[f'player{i}' for i in range(1, 12)])), axis=1)
#pitch_lookup.to_csv('BigDataBowl2023/pitch_lookup.csv', index=False)

## Merge with saved file of offensive players

In [86]:
pitch_lookup = pd.read_csv('BigDataBowl2023/pitch_lookup.csv')
pitch_lookup.head(1)

Unnamed: 0,gameId,playId,player1,player2,player3,player4,player5,player6,player7,player8,player9,player10,player11
0,2021090900,97,25511.0,35481.0,35634.0,39985.0,40151.0,41233.0,42377.0,42404.0,44896.0,46163.0,52421.0


In [87]:
play_data_with_players = play_data.merge(pitch_lookup, how='inner', on=['gameId', 'playId'])

In [88]:
feature_columns = ['Age', 'Height', 'Weight', 'Overall', 'Speed',
 'Acceleration', 'Agility', 'Change of Dir', 'Strength', 'Jumping',
 'Awareness', 'Carrying', 'Break Tackle', 'Juke Move', 'Spin Move',
 'Trucking', 'Stiff Arm', 'BC Vision', 'Catching', 'Catch In Traffic',
 'Spec Catch', 'Release', 'Short RR', 'Medium RR', 'Deep RR',
 'Throw Power', 'Throw Acc Short', 'Throw Acc Mid', 'Throw Acc Deep',
 'Throw Under Pressure', 'Throw On The Run', 'Play Action', 'Break Sack',
 'Run Block', 'Run Block Power', 'Run Block Finesse', 'Pass Block',
 'Pass Block Power', 'Pass Block Finesse', 'Impact Blocking',
 'Lead Blocking', 'Tackle', 'Hit Power', 'Pursuit', 'Man Coverage',
 'Zone Coverage', 'Press', 'Play Recognition', 'Power Moves',
 'Finesse Moves', 'Block Shedding', 'Kick Power', 'Kick Accuracy',
 'Kick Return', 'Stamina', 'Injury', 'Toughness', 'Years Pro']

players_columns = [f'player{i}' for i in range(1, 12)]

player_stats = pd.read_csv('BigDataBowl2023/merged_df.csv').drop('Unnamed: 0', axis='columns')

## Generate batches of player stats to pass into model

In [89]:
X = np.array(play_data_with_players.apply(lambda x: player_stats[player_stats['nflId'].isin(x[players_columns])].reset_index(drop=True), axis=1))
X.shape

(6119,)

In [90]:
#for x in X:
#    x.drop(x[~x['Position'].isin(["WR", "TE", "FB", "RB"])].index, axis='index', inplace=True)
#    x.reset_index(drop=True, inplace=True)

In [91]:
Y = np.array(play_data['nflId'])
Y.shape

(6119,)

## Training ElasticNet

In [92]:
from sklearn.linear_model import ElasticNet
from sklearn.feature_selection import RFE
reg = ElasticNet()
rfe = RFE(reg, n_features_to_select=22).fit(X_train, Y_train)
Xk_train, Xk_test = rfe.transform(X_train), rfe.transform(X_test)
reg = reg.fit(Xk_train, Y_train)
print(f'train {reg.score(Xk_train, Y_train):.4f}', f'test {reg.score(Xk_test, Y_test):.4f}')

train 0.5960 test 0.4698


## Propose pass by picking largest predicted yardage

In [93]:
Y_hat = np.array(list(map(lambda x: x.loc[np.argmax(reg.predict(rfe.transform(x[feature_columns])))]['nflId'], X)))
Y_hat.shape

(6119,)

## Calculate accuracy against the ground truth receiver

In [94]:
print(f'classification pass accuracy {accuracy_score(y_true=Y, y_pred=Y_hat) * 100:.4f}')
print(f'classification pass recall {recall_score(y_true=Y, y_pred=Y_hat, average="weighted", zero_division=0) * 100:.4f}')
print(f'classification pass precision {precision_score(y_true=Y, y_pred=Y_hat, average="weighted", zero_division=0) * 100:.4f}')
print(f'classification pass f1 {f1_score(y_true=Y, y_pred=Y_hat, average="weighted", zero_division=0) * 100:.4f}')

classification pass accuracy 29.6944
classification pass recall 29.6944
classification pass precision 21.2448
classification pass f1 17.9969


## Training the SVM

In [95]:
from sklearn.svm import SVR
from sklearn.feature_selection import SelectKBest, f_regression
reg = SVR(kernel='poly')
k_best = SelectKBest(f_regression, k=15).fit(X_train, Y_train)
Xk_train, Xk_test = k_best.transform(X_train), k_best.transform(X_test)
reg = reg.fit(Xk_train, Y_train)
print(f'train {reg.score(Xk_train, Y_train):.4f}', f'test {reg.score(Xk_test, Y_test):.4f}')

train 0.5865 test 0.5056


## Propose pass by picking largest predicted yardage

In [96]:
Y_hat = np.array(list(map(lambda x: x.loc[np.argmax(reg.predict(k_best.transform(x[feature_columns])))]['nflId'], X)))
Y_hat.shape

(6119,)

## Calculate accuracy against the ground truth receiver

In [97]:
print(f'classification pass accuracy {accuracy_score(y_true=Y, y_pred=Y_hat) * 100:.4f}')
print(f'classification pass recall {recall_score(y_true=Y, y_pred=Y_hat, average="weighted", zero_division=0) * 100:.4f}')
print(f'classification pass precision {precision_score(y_true=Y, y_pred=Y_hat, average="weighted", zero_division=0) * 100:.4f}')
print(f'classification pass f1 {f1_score(y_true=Y, y_pred=Y_hat, average="weighted", zero_division=0) * 100:.4f}')

classification pass accuracy 28.6975
classification pass recall 28.6975
classification pass precision 21.1770
classification pass f1 17.1285


## Training RandomForestRegressor

In [98]:
from sklearn.ensemble import RandomForestRegressor
reg = RandomForestRegressor(random_state=0)
reg = reg.fit(X_train, Y_train)
print(f'train {reg.score(X_train, Y_train):.4f}', f'test {reg.score(X_test, Y_test):.4f}')

train 0.9447 test 0.5020


## Propose pass by picking largest predicted yardage

In [99]:
Y_hat = np.array(list(map(lambda x: x.loc[np.argmax(reg.predict(x[feature_columns]))]['nflId'], X)))
Y_hat.shape

(6119,)

## Calculate accuracy against the ground truth receiver

In [100]:
print(f'classification pass accuracy {accuracy_score(y_true=Y, y_pred=Y_hat) * 100:.4f}')
print(f'classification pass recall {recall_score(y_true=Y, y_pred=Y_hat, average="weighted", zero_division=0) * 100:.4f}')
print(f'classification pass precision {precision_score(y_true=Y, y_pred=Y_hat, average="weighted", zero_division=0) * 100:.4f}')
print(f'classification pass f1 {f1_score(y_true=Y, y_pred=Y_hat, average="weighted", zero_division=0) * 100:.4f}')

classification pass accuracy 29.1878
classification pass recall 29.1878
classification pass precision 20.8236
classification pass f1 17.4476


## Random baseline by picking player uniformly

In [101]:
accuracy = 0
num_iterations = 100
for i in range(num_iterations):
    Y_hat = np.array(list(map(lambda x: x.loc[np.random.default_rng().choice(x.shape[0])]['nflId'], X)))
    accuracy += accuracy_score(y_true=Y, y_pred=Y_hat)
accuracy = accuracy / num_iterations

In [102]:
print(f'random baseline accuracy {accuracy * 100:.4f}')

random baseline accuracy 9.9492
