In [40]:
import pickle
import math
import pandas as pd
import numpy as np
import os
import pathlib
import tqdm
from functools import reduce
from scipy.stats import uniform, randint
from sklearn.metrics import auc, accuracy_score, confusion_matrix, mean_squared_error
from sklearn.model_selection import cross_val_score, GridSearchCV, KFold, RandomizedSearchCV, train_test_split
import warnings
import torch
from torch.utils.data import Dataset
from torch.utils.data import DataLoader

proj_dir = '/Users/nickbachelder/Desktop/Kaggle/Linemen'
os.chdir( os.path.join(proj_dir, 'code/clean') )
%run clean_objects.ipynb
os.chdir( os.path.join(proj_dir, 'code/modeling') )
%run metric_objects.ipynb
%run lstm_objects.ipynb
os.chdir( os.path.join(proj_dir, 'data') )
with open('all_data.pkl', 'rb') as file:
    all_data = pickle.load(file)
lstm_model = SequenceModel(n_features = 21, n_classes = 2, n_hidden = 15, n_layers = 1)
lstm_model.load_state_dict(torch.load("lstm_weights"))
os.chdir( os.path.join(proj_dir, 'code/plotting') )
%run export_objects.ipynb

In [46]:
week = 9
folder = os.path.join(proj_dir, 'data' )

# export week 1 training dat for feature disribution plot

print('Exporting data for feature distribution plots')
export_w1_for_plotting = all_data.training_data_team.get('1')
export_w1_for_plotting.to_csv(f"{folder}/w1_for_plotting")

# Sack Plots

print('Exporting sack play')
download_play_plot_df(master_track = all_data, play_id = 20210912082010, model = lstm_model, folder = folder)
download_metric_plot_df(master_track = all_data, play_id = 20210912082010, model = lstm_model, folder = folder)

# Hit Plots

print('Exporting hit play')
download_play_plot_df(master_track = all_data, play_id = 20210912003480, model = lstm_model, folder = folder)
download_metric_plot_df(master_track = all_data, play_id = 20210912003480, model = lstm_model, folder = folder)

# Hurry Plots


print('Exporting hurry play')
download_play_plot_df(master_track = all_data, play_id = 2021090900687, model = lstm_model, folder = folder)
download_metric_plot_df(master_track = all_data, play_id = 2021090900687, model = lstm_model, folder = folder)

# Non-Disrupt Plots


print('Exporting non-disrupt play')
download_play_plot_df(master_track = all_data, play_id = 2021090900282, model = lstm_model, folder = folder)
download_metric_plot_df(master_track = all_data, play_id = 2021090900282, model = lstm_model, folder = folder)


# Prob difference example plot


print('Exporting probability differences plot data')
download_play_prob_differences(master_track = all_data, play_id = 20210909003962, model = lstm_model, folder = folder)


# export final rankings data

print('Exporting top player metrics data (this may take some time)')

ranking_dict = get_player_metrics(master_track = all_data, model = lstm_model, week = week)
ranking_df = get_top_player_metrics(master_track = all_data, metric_dict = ranking_dict, min_play_n = 10)
ranking_df[0:50].to_csv(f'{folder}/final_metric_rankings.csv')

Exporting data for feature distribution plots
Exporting sack play
Exporting hit play
Exporting hurry play
Exporting non-disrupt play
Exporting probability differences plot data
Exporting top player metrics data (this may take some time)


0it [00:00, ?it/s]
100%|██████████| 1170/1170 [05:40<00:00,  3.43it/s]
100%|██████████| 1062/1062 [05:46<00:00,  3.07it/s]
100%|██████████| 1138/1138 [06:53<00:00,  2.75it/s]
100%|██████████| 1105/1105 [07:49<00:00,  2.35it/s]
100%|██████████| 1104/1104 [08:46<00:00,  2.10it/s]
100%|██████████| 999/999 [08:17<00:00,  2.01it/s]
100%|██████████| 912/912 [08:20<00:00,  1.82it/s]
100%|██████████| 1030/1030 [12:10<00:00,  1.41it/s]


In [57]:
ranking_dict

{'44893': [0.2519437991082668, 1],
 '43366': [0.23405820354819298, 2],
 '43761': [0.2113274091397997, 2],
 '47823': [0.1948197392316965, 1],
 '47828': [0.16485070884227754, 2],
 '52452': [0.16431483087560503, 3],
 '53472': [0.15289784426038916, 1],
 '53615': [0.14569181646849658, 1],
 '44845': [0.1432158483399285, 1],
 '44202': [0.13258873057310228, 2],
 '52852': [0.13176275789737701, 1],
 '46168': [0.13048127364544643, 1],
 '52542': [0.12340510732088333, 1],
 '45004': [0.11816164345613547, 2],
 '44925': [0.11469483524560928, 1],
 '46349': [0.11452769196551779, 1],
 '46187': [0.11375691525397762, 1],
 '52991': [0.11203291231677645, 2],
 '46127': [0.1115248512133166, 11],
 '48524': [0.1099156534237435, 3],
 '53530': [0.10288211988124536, 2],
 '47938': [0.10254904329776764, 1],
 '53612': [0.10207965456183513, 5],
 '52602': [0.0980250530913745, 4],
 '43392': [0.09597073698267732, 5],
 '47830': [0.09555492877960205, 1],
 '46216': [0.09373392230447601, 2],
 '52863': [0.08914829826218043, 5]

In [56]:
max(ranking_df.n_play)

262

In [58]:
ranking_df.loc[ranking_df.n_play >= 100][0:50].to_csv(f'{folder}/final_metric_rankings.csv')