In [None]:
import pandas as pd
import numpy as np
import tqdm
import os
import warnings
warnings.filterwarnings("ignore", message="pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.")

# import all_Data object ... if not loaded run clean.ipynb script in code/clean folder to create

# proj_dir = INSERT YOUR WD

os.chdir( os.path.join(proj_dir, 'code/clean') )
%run clean_objects.ipynb
os.chdir( os.path.join(proj_dir, 'code/modeling') )
%run lstm_objects.ipynb

def get_all_player_metrics_on_play(master_track, play_id, model):
    max_time_after = max(master_track.search_track_weeks(variables = ["playId"], variable_values = [play_id]).time_after_snap.values)
    max_frame = int(max_time_after * 10)
    original_prob = predict_play(master_track = master_track, play_id = play_id, model = model, normalize = True, replace_player = None)
    all_plays = master_track.play_details_df
    play_rushers = all_plays.loc[(all_plays.pff_role == 'Pass Rush') & (all_plays.playId == play_id)].nflId.tolist()
    rusher_metrics = {str(player_id) : (np.array(original_prob) - np.array(predict_play(master_track, play_id, model, normalize = True, replace_player = player_id))).tolist()[0:(max_frame + 1)] for player_id in play_rushers}
    return rusher_metrics

def get_player_metrics(week, master_track, model):
    player_metrics_each_play = {}
    for this_week in range(week):
        all_plays = master_track.search_track_weeks(variables = ['week'], variable_values = [this_week]).playId.unique()
        for play in tqdm.tqdm(all_plays):
            this_play_dat = get_all_player_metrics_on_play(master_track, play_id = play, model = model) 
            this_play_dat_avgs = {key : sum(this_play_dat.get(key)) / len(this_play_dat.get(key)) for key in this_play_dat.keys()}
            for key in this_play_dat_avgs.keys():
                if (key in player_metrics_each_play.keys()):
                    player_metrics_each_play[key].append(this_play_dat_avgs[key])
                else:
                    player_metrics_each_play.update({key : [ this_play_dat_avgs[key] ]})
    avg_metric_to_this_point = {key : [sum(player_metrics_each_play.get(key)) / len(player_metrics_each_play.get(key)), len(player_metrics_each_play.get(key))] for key in player_metrics_each_play.keys()}
    avg_metric_to_this_point = dict(sorted(avg_metric_to_this_point.items(), reverse = True, key=lambda item: item[1]))

    return(avg_metric_to_this_point)

def get_top_player_metrics(master_track, metric_dict, min_play_n):
    top_n = pd.DataFrame({'nflId' : list(metric_dict.keys()), 'metric' : [item[0] for item in metric_dict.values()], 'n_play' : [item[1] for item in metric_dict.values()]}).astype({'nflId': 'int32'})
    player_df = master_track.player_df.astype({'nflId': 'int32'})
    top_n = top_n.merge(player_df, on = 'nflId', how = 'left')
    top_n = top_n.loc[top_n.n_play >= min_play_n].reset_index(drop = 1)
    top_n['rank'] = top_n.index + 1
    return(top_n)

def get_number_of_individual_disruptions(master_track, player_names):
    play_dets = master_track.play_details_df
    all_disrupt_nums = []
    for name in player_names:
        player_id = master_track.player_df.query('displayName == @name').reset_index(drop = 0).nflId[0]
        num_df = play_dets.loc[(play_dets.nflId == player_id) & (play_dets.pff_role == "Pass Rush")  & ( (play_dets.pff_hit == 1) | (play_dets.pff_hurry == 1) | (play_dets.pff_sack == 1) )]
        all_disrupt_nums.append(len(num_df))
    return all_disrupt_nums

os.chdir( os.path.join(proj_dir, 'code/modeling') )