In [1]:
import nfl_regression
import pandas as pd
import numpy as np
from sklearn.metrics import f1_score

## Importing the dataset

In [2]:
train_receiver_stats, train_player_stats, train_weeks = nfl_regression.preprocess_year_data('2021')
X_train, Y_train = nfl_regression.get_yards_by_stats(train_receiver_stats, train_player_stats, train_weeks, '2021')

test_receiver_stats, test_player_stats, test_weeks = nfl_regression.preprocess_year_data('2023')
X_test, Y_test = nfl_regression.get_yards_by_stats(test_receiver_stats, test_player_stats, test_weeks, '2023')

## Use merged receiver stats

In [3]:
play_data = test_receiver_stats
play_data.head(1)

Unnamed: 0,gameId,playId,playDescription,quarter,down,yardsToGo,possessionTeam,defensiveTeam,yardlineSide,yardlineNumber,...,Zone Coverage,birthDate,collegeName,displayName,height,nflId,officialPosition,updatedName,updatedName2,weight
0,2021090900,97,(13:33) (Shotgun) T.Brady pass incomplete deep...,1,3,2,TB,DAL,TB,33,...,25.0,1996-02-27,Penn State,Chris Godwin,6-1,44896.0,WR,Chris Godwin,Chris Godwin,209.0


## Load week data into single DataFrame

In [4]:
week_data = pd.concat([pd.read_csv(f'BigDataBowl2023\data2023\week{i}.csv') for i in range(1, 9)])

FileNotFoundError: [Errno 2] No such file or directory: 'BigDataBowl2023\\data2023\\week1.csv'

In [None]:
week_data.head(1)

## DataFrame of offensive players on pitch

In [None]:
#pitch_lookup = pd.concat((play_data[['gameId', 'playId']], pd.DataFrame(np.vstack(play_data.apply(lambda x: week_data[(week_data['gameId'] == x['gameId']) & (week_data['playId'] == x['playId'])][week_data[(week_data['gameId'] == x['gameId']) & (week_data['playId'] == x['playId'])]['team'] == x['possessionTeam']]['nflId'].unique(), axis=1).to_numpy()), columns=[f'player{i}' for i in range(1, 12)])), axis=1)
#pitch_lookup.to_csv('BigDataBowl2023/pitch_lookup.csv', index=False)

## Merge with saved file of offensive players

In [None]:
pitch_lookup = pd.read_csv('BigDataBowl2023/pitch_lookup.csv')
pitch_lookup.head(1)

In [None]:
play_data_with_players = play_data.merge(pitch_lookup, how='inner', on=['gameId', 'playId'])

In [None]:
feature_columns = ['Age', 'Height', 'Weight', 'Overall', 'Speed',
 'Acceleration', 'Agility', 'Change of Dir', 'Strength', 'Jumping',
 'Awareness', 'Carrying', 'Break Tackle', 'Juke Move', 'Spin Move',
 'Trucking', 'Stiff Arm', 'BC Vision', 'Catching', 'Catch In Traffic',
 'Spec Catch', 'Release', 'Short RR', 'Medium RR', 'Deep RR',
 'Throw Power', 'Throw Acc Short', 'Throw Acc Mid', 'Throw Acc Deep',
 'Throw Under Pressure', 'Throw On The Run', 'Play Action', 'Break Sack',
 'Run Block', 'Run Block Power', 'Run Block Finesse', 'Pass Block',
 'Pass Block Power', 'Pass Block Finesse', 'Impact Blocking',
 'Lead Blocking', 'Tackle', 'Hit Power', 'Pursuit', 'Man Coverage',
 'Zone Coverage', 'Press', 'Play Recognition', 'Power Moves',
 'Finesse Moves', 'Block Shedding', 'Kick Power', 'Kick Accuracy',
 'Kick Return', 'Stamina', 'Injury', 'Toughness', 'Years Pro']

players_columns = [f'player{i}' for i in range(1, 12)]

player_stats = pd.read_csv('BigDataBowl2023\merged_df.csv').drop('Unnamed: 0', axis='columns')

## Generate batches of player stats to pass into model

In [None]:
X = np.array(play_data_with_players.apply(lambda x: player_stats[player_stats['nflId'].isin(x[players_columns])].reset_index(drop=True), axis=1))
X.shape

In [None]:
Y = np.array(play_data['nflId'])
Y.shape

## Training ElasticNet

In [None]:
from sklearn.linear_model import ElasticNet
from sklearn.feature_selection import RFE
reg = ElasticNet()
rfe = RFE(reg, n_features_to_select=22).fit(X_train, Y_train)
Xk_train, Xk_test = rfe.transform(X_train), rfe.transform(X_test)
reg = reg.fit(Xk_train, Y_train)
print(f'train {reg.score(Xk_train, Y_train):.4f}', f'test {reg.score(Xk_test, Y_test):.4f}')

## Propose pass by picking largest predicted yardage

In [None]:
Y_hat = np.array(list(map(lambda x: x.loc[np.argmax(reg.predict(rfe.transform(x[feature_columns])))]['nflId'], X)))
Y_hat.shape

## Calculate accuracy against the ground truth receiver

In [None]:
print(f'classification pass accuracy {np.sum(Y == Y_hat) / len(Y) * 100:.4f}')

## Training the SVM

In [None]:
from sklearn.svm import SVR
from sklearn.feature_selection import SelectKBest, f_regression
reg = SVR(kernel='poly')
k_best = SelectKBest(f_regression, k=15).fit(X_train, Y_train)
Xk_train, Xk_test = k_best.transform(X_train), k_best.transform(X_test)
reg = reg.fit(Xk_train, Y_train)
print(f'train {reg.score(Xk_train, Y_train):.4f}', f'test {reg.score(Xk_test, Y_test):.4f}')

## Propose pass by picking largest predicted yardage

In [None]:
Y_hat = np.array(list(map(lambda x: x.loc[np.argmax(reg.predict(k_best.transform(x[feature_columns])))]['nflId'], X)))
Y_hat.shape

## Calculate accuracy against the ground truth receiver

In [None]:
print(f'classification pass accuracy {np.sum(Y == Y_hat) / len(Y) * 100:.4f}')
print('f1 score: ', f1_score(y_true=Y, y_pred=Y_hat))


## Training RandomForestRegressor

In [None]:
from sklearn.ensemble import RandomForestRegressor
reg = RandomForestRegressor(random_state=0)
reg = reg.fit(X_train, Y_train)
print(f'train {reg.score(X_train, Y_train):.4f}', f'test {reg.score(X_test, Y_test):.4f}')

## Propose pass by picking largest predicted yardage

In [None]:
Y_hat = np.array(list(map(lambda x: x.loc[np.argmax(reg.predict(x[feature_columns]))]['nflId'], X)))
Y_hat.shape

## Calculate accuracy against the ground truth receiver

In [None]:
print(f'classification pass accuracy {np.sum(Y == Y_hat) / len(Y) * 100:.4f}')

In [None]:
print('f1 score: ', f1_score(y_true=Y, y_pred=Y_hat))

## Baseline from deterministically picking most frequent

In [None]:
from sklearn.dummy import DummyClassifier
dummy_reg = DummyClassifier(strategy='most_frequent')
dummy_reg = dummy_reg.fit(None, Y)

In [None]:
print(f'deterministic baseline {dummy_reg.score(None, Y) * 100:.4f}')