In [33]:
from utils import normalize_df, create_train_test_val_df
import os
notebook_dir = os.getcwd()
root_dir = os.path.abspath(os.path.join(notebook_dir, '..'))
data_dir = os.path.join(root_dir, 'data')
import numpy as np
import pandas as pd
from sklearn.metrics import brier_score_loss, make_scorer, log_loss, mean_squared_error
from IPython.display import display_html
from copy import deepcopy
import pickle
from sklearn import calibration
import matplotlib.pyplot as plt
# from utils.utils

# Model 1.0
This notebook will act as an interactive tutorial for our Live Win Probability Model. This "model" is actual comprised of 3 separate models that "stack" on each other.
1. Play and drive outcome models
    * technically this is two separate models:
        * Play outcome (first down, field goal made, field goal missed, touchdown, turnover, and none/other)
            * only using the first down prediction from the output of this model
        * Drive outcome (Clock, field goal made, field goal missed, punt, safety, touch down, turnover, turnover on downs)
    * outputs for both models will be a series of probabilities for each class that all add up to 1
2. End of regulation score differential model
    * Dealing with overtime later, we want to predict how the score differential will change by the end of regulation.
        * i.e., if the current score differential (home score - away score) is -3 and the end of regulation score differential is -10, the target value will be -7
    * Output of this will be a series of probabilities from for all score differential possibilities from -35 to 35 (outputs <-35 or >35 will be set to -35/35 respectively)
3. End of regulation score total model
    * Similar concept to the score differential model
    * Again, we're using the change in end of regulation score total as the target value
    * Outputs will be a series of probabilites for classes from 0 to 83 (outputs will be capped at 83)

## Data
Let's take a look at the data that we are pulling from oracle
* First we have event_df and odds_df
* event_df is the play by play data mixed with some import game information
* Odds data has vegas predictions for almost all the games in the set (missing games will be given the average vegas spread and over/under)
    * The spread and over/under are merged with the event table to give us our pre-game priors
    * some games have multiple odds so duplicates are removed


In [2]:
event_df = pd.read_parquet(os.path.join(data_dir, "event_data.parquet"))
event_df = event_df.drop_duplicates(["nevent", "game_code"]).reset_index(drop=True)
odds_df = pd.read_parquet(os.path.join(data_dir, "odds_data.parquet"))
odds_df = odds_df.drop_duplicates("game_code")
event_df[["cur_spread", "cur_over_under"]] = event_df.merge(odds_df, how="left", on="game_code")[["cur_spread", "cur_over_under"]].fillna({"cur_spread": np.mean(odds_df["cur_spread"]), "cur_over_under": np.mean(odds_df["cur_over_under"])})
pd.set_option("display.max_columns", None)
display_html(event_df)

Unnamed: 0,game_code,game_date,season,home_team_id,home_team,home_team_abbrev,away_team_id,away_team,away_team_abbrev,home_final_score,away_final_score,final_score_diff,end_of_regulation_score_diff,home_rest_of_game_score,away_rest_of_game_score,end_of_regulation_score_diff_change,home_score_added,away_score_added,current_score_diff,current_score_total,home_start_score,away_start_score,home_team_outcome,home_team_win,draw,away_team_win,nevent,quarter,overtime,home_team_has_ball,off_team_id,def_team_id,kick_off,punt,point_after_kick,two_point_attempt,field_goal_attempt,off_start_score,off_end_score,off_score_change,def_start_score,def_end_score,def_score_change,play_counts,efficiency_counts,from_scrimmage,first_down,scoring_play,possession_change,continuation,event_name,event_id,yards_gained,drive_outcome_id,drive_outcome_desc,down,ytg,yd_from_goal,drive_id,drive_start,play_start_time,cur_spread,cur_over_under
0,819846,2008-09-04,2008,351,New York Giants,NYG,363,Washington Redskins,Was,16,7,9,9,16,7,9,0,0,0,0,0,0,W,1,0,0,1,1,0,0,363,351,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,Kick Off,5,73.0,,,0,-1,70,,3600,900.0,-4.5,41.5
1,819846,2008-09-04,2008,351,New York Giants,NYG,363,Washington Redskins,Was,16,7,9,9,16,7,9,0,0,0,0,0,0,W,1,0,0,2,1,0,1,351,363,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,Kick Off Return,6,19.0,,,0,-1,103,,3600,900.0,-4.5,41.5
2,819846,2008-09-04,2008,351,New York Giants,NYG,363,Washington Redskins,Was,16,7,9,9,16,7,9,0,0,0,0,0,0,W,1,0,0,3,1,0,1,351,363,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,Run,4,3.0,37.0,TD,1,10,84,1.0,3600,895.0,-4.5,41.5
3,819846,2008-09-04,2008,351,New York Giants,NYG,363,Washington Redskins,Was,16,7,9,9,16,7,9,0,0,0,0,0,0,W,1,0,0,4,1,0,1,351,363,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,Incomplete Pass,2,0.0,37.0,TD,2,7,81,1.0,3600,860.0,-4.5,41.5
4,819846,2008-09-04,2008,351,New York Giants,NYG,363,Washington Redskins,Was,16,7,9,9,16,7,9,0,0,0,0,0,0,W,1,0,0,5,1,0,1,351,363,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,0,Pass Completion,1,8.0,37.0,TD,3,7,81,1.0,3600,854.0,-4.5,41.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
816793,2337728,2022-02-13,2021,327,Cincinnati Bengals,Cin,343,Los Angeles Rams,LAR,20,23,-3,-3,0,0,0,0,0,-3,43,20,23,L,0,0,1,201,4,0,1,327,343,0,0,0,0,0,20,20,0,23,23,0,1,1,1,0,0,0,0,Run,4,0.0,40.0,Downs,3,1,49,13.0,85,48.0,4.5,48.5
816794,2337728,2022-02-13,2021,327,Cincinnati Bengals,Cin,343,Los Angeles Rams,LAR,20,23,-3,-3,0,0,0,0,0,-3,43,20,23,L,0,0,1,202,4,0,1,327,343,0,0,0,0,0,20,20,0,23,23,0,1,0,0,0,0,0,0,Offense Timeout,57,,40.0,Downs,4,1,49,13.0,85,43.0,4.5,48.5
816795,2337728,2022-02-13,2021,327,Cincinnati Bengals,Cin,343,Los Angeles Rams,LAR,20,23,-3,-3,0,0,0,0,0,-3,43,20,23,L,0,0,1,203,4,0,1,327,343,0,0,0,0,0,20,20,0,23,23,0,1,1,1,0,0,1,0,Incomplete Pass,2,0.0,40.0,Downs,4,1,49,13.0,85,43.0,4.5,48.5
816796,2337728,2022-02-13,2021,327,Cincinnati Bengals,Cin,343,Los Angeles Rams,LAR,20,23,-3,-3,0,0,0,0,0,-3,43,20,23,L,0,0,1,204,4,0,0,343,327,0,0,0,0,0,23,23,0,20,20,0,1,1,1,0,0,0,0,Run,4,-1.0,39.0,End Game,1,10,51,13.0,39,39.0,4.5,48.5


Adding timeouts remaining for both teams and time left in game

In [3]:
event_df["half"] = round((event_df["quarter"] + 0.01) / 2)
event_df["home_timeout"] = np.where(((event_df["event_id"]==57)&(event_df["home_team_has_ball"]==1))|((event_df["event_id"]==58)&(event_df["home_team_has_ball"]==0)), 1, 0)
event_df["away_timeout"] = np.where(((event_df["event_id"]==57)&(event_df["home_team_has_ball"]==0))|((event_df["event_id"]==58)&(event_df["home_team_has_ball"]==1)), 1, 0)
event_df["home_timeouts_remaining"] = np.clip(3 - event_df.groupby(["game_code", "half"])["home_timeout"].cumsum(), 0, 3)
event_df["away_timeouts_remaining"] = np.clip(3 - event_df.groupby(["game_code", "half"])["away_timeout"].cumsum(), 0, 3)
event_df["time_left_in_game"] = np.where(event_df["quarter"] <= 4, event_df["play_start_time"] + (4 - event_df["quarter"]) * 900, event_df["play_start_time"])
# event_df["time_elapsed"] = 900 - event_df["play_start_time"] + (event_df["quarter"] - 1) * 900


* Our PBP will have multiple rows for one play, so if there's a fumble then recovery by offense and a touchdown, 
* that could have 2-3 rows of data and the touchdown wouldn't show up as being apart of the original play 
    * plays would look like this: 1. Run, 2. Fumble, 3. Offense Recovers the ball (TD)
* So what we've done here is ensure that plays that are "continuation" that end in a touchdown, give a TD=True for all of the plays in the sequence
* After that is taken care of we can setup all of the labels for play and drive description

In [4]:
event_df["sequence"] = event_df["continuation"].groupby(event_df["continuation"].eq(0).cumsum()).cumsum()
event_df["play_start_id"] = event_df["nevent"] - event_df["sequence"]

turnover_ids = [9, 16]
event_df["turnover"] = np.where(event_df["event_id"].isin(turnover_ids), 1, 0)
event_df["touchdown_scored"] = np.where(event_df["home_score_added"]+event_df["away_score_added"]>=6, 1, 0)
event_df["fieldgoal_made"] = np.where(event_df["home_score_added"]+event_df["away_score_added"]==3, 1, 0)

play_outcome_aggregate =event_df[["game_code", "play_start_id", "turnover", "touchdown_scored", "fieldgoal_made", "first_down"]].groupby(["game_code", "play_start_id"], as_index=False).sum()
event_df["touchdown_in_play"] = np.clip(event_df.merge(play_outcome_aggregate,on=["game_code", "play_start_id"], how="left")["touchdown_scored_y"], 0, 1)
event_df["turnover_in_play"] = np.clip(event_df.merge(play_outcome_aggregate,on=["game_code", "play_start_id"], how="left")["turnover_y"], 0, 1)
event_df["field_goal_in_play"] = np.clip(event_df.merge(play_outcome_aggregate,on=["game_code", "play_start_id"], how="left")["fieldgoal_made_y"], 0, 1)
event_df["first_down_in_play"] = np.clip(event_df.merge(play_outcome_aggregate,on=["game_code", "play_start_id"], how="left")["first_down_y"], 0, 1)



event_df["play_outcome"] = (
    np.where((event_df["turnover_in_play"]==1), "turnover",
    # (event_df["touchdown_in_play"]==1)&(event_df["turnover_in_play"]==1), "defensive_touchdown", 
    np.where((event_df["field_goal_in_play"]==1), "field_goal_made",
    np.where((event_df["field_goal_attempt"]==1)&(event_df["field_goal_in_play"]==0), "field_goal_missed",
    np.where((event_df["first_down_in_play"]==1)&(event_df["touchdown_in_play"]==0)&(event_df["turnover_in_play"]==0), "first_down",
    np.where((event_df["touchdown_in_play"]==1)&(event_df["turnover_in_play"]==0), "offensive_touchdown", "none")))))
)
drive_description_matrix = {
    7: "punt",
    9: "turnover",
    14: "turnover",
    17: "field_goal_made",
    18: "punt",
    20: "safety",
    35: "field_goal_missed",
    36: "field_goal_missed",
    37: "touch_down",
    38: "clock",
    39: "clock",
    40: "turnover_on_downs",
    42: "field_goal_made",
    51: "clock",
}
event_df["drive_outcome_desc_basic"] = event_df["drive_outcome_id"].map(drive_description_matrix)

# event_df["drive_outcome"] = np.where(
#     (event_df["touchdown_in_drive"]==1)&(event_df["turnover_in_drive"]==1), "defensive_touchdown", 
#     np.where((event_df["touchdown_in_drive"]==1)&(event_df["turnover_in_drive"]==0), "offensive_touchdown",
#     np.where((event_df["field_goal_in_drive"]==1), "field_goal_made",
#     np.where((event_df["touchdown_in_drive"]==0)&(event_df["turnover_in_drive"]==1), "turnover", "none"
# ))))
game_end_of_regulation_total_score = event_df[event_df.overtime==0].groupby("game_code", as_index=False).max()[["game_code", "home_start_score", "away_start_score"]]
game_end_of_regulation_total_score["end_of_regulation_score_total"] = game_end_of_regulation_total_score["home_start_score"] + game_end_of_regulation_total_score["away_start_score"]
# event_df["end_of_regulation_score_total_diff"] = 
event_df["end_of_regulation_score_total_diff"] = (
    event_df.merge(game_end_of_regulation_total_score, on="game_code")["end_of_regulation_score_total"]
    - (event_df["home_start_score"] + event_df["away_start_score"])
)

  game_end_of_regulation_total_score = event_df[event_df.overtime==0].groupby("game_code", as_index=False).max()[["game_code", "home_start_score", "away_start_score"]]


### Data Manipulation
* We need to do a little bit of data manipulation to get the values we need, but we don't want to "overwrite" the values in event_df so we'll make copy of it called model_df
* time left in half is added
* from_scrimmage is changed so that PATs and two point conversions are not included
* down, ytg, and yd_from_goal are changed so that all non-scrimmage plays are changed to a default "null" value
* home_team_has_ball is change so that when kickoffs occur, the team receiving is the one that is in possession of the ball

### Data Subset
* Removing continuation plays that we mentioned before, so that each snap has just one target
* Remove plays where the down is equal to 0 
* Remove plays from scrimmage that did not count (e.g., plays that were waved off by penalties)
* scrimmage_plays_we_want is event_id of all the scrimmage plays that *aren't* timeouts, end of quarters, and the two minute warning.
* Remove all NA values for the feature inputs and target
* Remove all plays that are not from scrimmage
* Remove all overtime plays

In [5]:
model_df = deepcopy(event_df)
model_df["time_left_in_half"] = event_df["time_left_in_game"] - ((2 - event_df["half"]) * 1800)
model_df["from_scrimmage"] = np.where(event_df["event_id"].isin([22, 47, 52, 53, 54, 55, 56]), 0, event_df["from_scrimmage"])
model_df["down"] = np.where(model_df["from_scrimmage"] == 0, 0, event_df["down"])
model_df["ytg"] = np.where(model_df["from_scrimmage"] == 0, -1, event_df["ytg"])
model_df["yd_from_goal"] = np.where(model_df["from_scrimmage"] == 0, -1, event_df["yd_from_goal"])
model_df["home_team_has_ball"] = np.where(event_df["event_id"].isin([5]), 1 - event_df["home_team_has_ball"], event_df["home_team_has_ball"])
scrimmage_plays_we_want = [1, 2, 3, 4, 7, 9, 14, 17, 18, 35]

input_names = [
    'time_left_in_half',
    'half',
    'current_score_diff',
    'current_score_total',
    'cur_spread',
    'cur_over_under',
    'home_timeouts_remaining',
    'away_timeouts_remaining',
    'punt',
    'field_goal_attempt',
    'ytg',
    'yd_from_goal',
    'down',
    'home_team_has_ball',
]
mask_model = (
    (model_df.continuation==0)&
    (model_df.down!=0)&
    (model_df.play_counts==1)&
    (model_df.event_id.isin(scrimmage_plays_we_want))&
    (model_df[input_names].notna().all(axis=1))&
    (model_df["from_scrimmage"]==1)&
    (model_df["overtime"]==0)
)


Let's take a look at what the input features and output features look like. 

Event Name and yards gained is included to help interpret what is going on. 

This is the first 2 drives of the first game in the dataset

In [6]:
model_df[mask_model][input_names + ["event_name", "yards_gained", "play_outcome", "drive_outcome_desc_basic"]].head(15)

Unnamed: 0,time_left_in_half,half,current_score_diff,current_score_total,cur_spread,cur_over_under,home_timeouts_remaining,away_timeouts_remaining,punt,field_goal_attempt,ytg,yd_from_goal,down,home_team_has_ball,event_name,yards_gained,play_outcome,drive_outcome_desc_basic
2,1795.0,1.0,0,0,-4.5,41.5,3,3,0,0,10,84,1,1,Run,3.0,none,touch_down
3,1760.0,1.0,0,0,-4.5,41.5,3,3,0,0,7,81,2,1,Incomplete Pass,0.0,none,touch_down
4,1754.0,1.0,0,0,-4.5,41.5,3,3,0,0,7,81,3,1,Pass Completion,8.0,first_down,touch_down
5,1723.0,1.0,0,0,-4.5,41.5,3,3,0,0,10,73,1,1,Run,3.0,none,touch_down
6,1676.0,1.0,0,0,-4.5,41.5,3,3,0,0,7,70,2,1,Pass Completion,30.0,first_down,touch_down
7,1650.0,1.0,0,0,-4.5,41.5,3,3,0,0,10,40,1,1,Pass Completion,19.0,first_down,touch_down
8,1612.0,1.0,0,0,-4.5,41.5,3,3,0,0,10,21,1,1,Run,4.0,none,touch_down
9,1562.0,1.0,0,0,-4.5,41.5,3,3,0,0,6,17,2,1,Incomplete Pass,0.0,none,touch_down
10,1555.0,1.0,0,0,-4.5,41.5,3,3,0,0,6,17,3,1,Pass Completion,11.0,first_down,touch_down
11,1517.0,1.0,0,0,-4.5,41.5,3,3,0,0,6,6,1,1,Incomplete Pass,0.0,none,touch_down


### Loading in the Models and Creating Play/Drive Predictions
* For this exercise we won't be training the models, just loading saved models and then using them to make predictions
* We're now including plays that didn't wind up counting
* In addition, each prediction will be split up between home and away. So if the home team has the ball the predictions for the away team play/drive outcomes are going to be set to 0


Let's take a look at how the home team predictions look for our dataset

In [7]:
search_rf_play_outcome = pickle.load(open(os.path.join(root_dir, "models/search_rf_play_outcome.p"), 'rb'))
search_rf_drive_outcome = pickle.load(open(os.path.join(root_dir, "models/search_rf_drive_outcome.p"), 'rb'))
search_rf_play_outcome.best_estimator_.verbose = 0
search_rf_drive_outcome.best_estimator_.verbose = 0


mask_model_predict = (
    (model_df.continuation==0)&
    (model_df.down!=0)&
    (model_df[input_names].notna().all(axis=1))&
    (model_df["from_scrimmage"]==1)&
    (model_df["overtime"]==0)
)


search_rf_play_class_names = ["search_rf_play_" + x for x in search_rf_play_outcome.classes_]
search_rf_drive_class_names = ["search_rf_drive_" + x for x in search_rf_drive_outcome.classes_]
model_df[search_rf_play_class_names] = pd.DataFrame(search_rf_play_outcome.predict_proba(model_df[mask_model_predict][input_names]), index=model_df[mask_model_predict].index)
model_df[search_rf_play_class_names] = model_df[search_rf_play_class_names].fillna(0)
model_df[search_rf_drive_class_names] = pd.DataFrame(search_rf_drive_outcome.predict_proba(model_df[mask_model_predict][input_names]), index=model_df[mask_model_predict].index)
model_df[search_rf_drive_class_names] = model_df[search_rf_drive_class_names].fillna(0)

search_rf_play_class_names_home = [x + "_home" for x in search_rf_play_class_names]
search_rf_play_class_names_away = [x + "_away" for x in search_rf_play_class_names]
search_rf_drive_class_names_home = [x + "_home" for x in search_rf_drive_class_names]
search_rf_drive_class_names_away = [x + "_away" for x in search_rf_drive_class_names]
model_df[search_rf_play_class_names_home] = model_df[search_rf_play_class_names].where(model_df.home_team_has_ball==1, 0)
model_df[search_rf_play_class_names_away] = model_df[search_rf_play_class_names].where(model_df.home_team_has_ball==0, 0)
model_df[search_rf_drive_class_names_home] = model_df[search_rf_drive_class_names].where(model_df.home_team_has_ball==1, 0)
model_df[search_rf_drive_class_names_away] = model_df[search_rf_drive_class_names].where(model_df.home_team_has_ball==0, 0)
display_html(model_df[mask_model][search_rf_play_class_names_home].head(15))
display_html(model_df[mask_model][search_rf_drive_class_names_home].head(15))

Unnamed: 0,search_rf_play_field_goal_made_home,search_rf_play_field_goal_missed_home,search_rf_play_first_down_home,search_rf_play_none_home,search_rf_play_offensive_touchdown_home,search_rf_play_turnover_home
2,0.0,0.0,0.177257,0.797776,0.006842,0.018125
3,2.762011e-07,5e-06,0.269195,0.703363,0.006831,0.020607
4,0.0,3e-06,0.344761,0.608076,0.00902,0.03814
5,1.22414e-06,0.0,0.184944,0.789976,0.009224,0.015855
6,2.762011e-07,6e-06,0.298935,0.670069,0.009723,0.021267
7,1.22414e-06,0.0,0.189624,0.782763,0.011211,0.0164
8,1.094359e-05,0.0,0.104436,0.819032,0.062322,0.0142
9,0.0003027111,6.2e-05,0.274966,0.643217,0.059572,0.021879
10,0.0002973331,6.2e-05,0.309201,0.588757,0.068857,0.032825
11,2.617972e-07,0.0,0.018438,0.777139,0.194211,0.010212


Unnamed: 0,search_rf_drive_clock_home,search_rf_drive_field_goal_made_home,search_rf_drive_field_goal_missed_home,search_rf_drive_punt_home,search_rf_drive_safety_home,search_rf_drive_touch_down_home,search_rf_drive_turnover_home,search_rf_drive_turnover_on_downs_home
2,0.001627,0.11646,0.022646,0.543364,0.003435,0.175361,0.120722,0.016386
3,0.001256,0.110399,0.020726,0.578062,0.002997,0.159085,0.111848,0.015627
4,0.000609,0.07122,0.012759,0.71112,0.003036,0.10136,0.08859,0.011305
5,0.001456,0.137521,0.025099,0.495564,0.001075,0.204051,0.115819,0.019414
6,0.001254,0.131096,0.022911,0.532615,0.001048,0.184651,0.108144,0.018281
7,0.00111,0.316343,0.058961,0.124513,4.3e-05,0.369347,0.095493,0.03419
8,0.000937,0.371114,0.046645,0.034786,4e-05,0.449162,0.072568,0.024747
9,0.000916,0.388575,0.043961,0.035553,3.4e-05,0.437526,0.067151,0.026285
10,0.000479,0.489969,0.065972,0.045532,4.7e-05,0.313843,0.053709,0.03045
11,0.000539,0.277402,0.019524,0.0186,2.5e-05,0.611744,0.047145,0.02502


### Score Difference Model
* Using the same inputs and adding the outputs of the previous model, we will predict the score differential probablities
* Since this model is an MLP model, we will normalize the inputs. (all features will be made so that the range is from 0 to 1)
* In this model, continuation, null values, end of quarters, and overtime is removed.
* NEW: spread and over/under are replaced by a weighted home and away vegas score prediction. The weight is based on time remaining normalized. So beginning of the game is the full score predictions, half time would be 0.5 * full score predictions

In [8]:
model_df["away_vegas_score_pred"] = np.clip(model_df["cur_over_under"], 30, 80) * 0.5 + model_df["cur_spread"] * 0.5
model_df["home_vegas_score_pred"] = np.clip(model_df["cur_over_under"], 30, 80) * 0.5 - model_df["cur_spread"] * 0.5
model_df["away_vegas_score_pred_weighted"] = model_df["away_vegas_score_pred"] * (model_df["time_left_in_game"] / 3600)
model_df["home_vegas_score_pred_weighted"] = model_df["home_vegas_score_pred"] * (model_df["time_left_in_game"] / 3600)


# search_mlp_score_diff_clipped_rf_drive_preds = pickle.load(open(os.path.join(root_dir, "models/search_mlp_score_diff_clipped_rf_drive_preds.p"), 'rb'))
search_mlp_score_diff_clipped_rf_drive_preds = pickle.load(open(os.path.join(root_dir, "models/search_mlp_score_diff_clipped_rf_drive_preds_vegas_adjusted.p"), 'rb'))
model_df["end_of_regulation_score_diff_change_clipped"] = np.clip(model_df["end_of_regulation_score_diff_change"], -35, 35)


# input_names_score_pred = [item for item in input_names if item not in ["punt", "field_goal_attempt"]] + ["kick_off", "point_after_kick", "two_point_attempt"] + ["search_rf_play_first_down_home", "search_rf_play_first_down_away"] + search_rf_drive_class_names_home[1:] + search_rf_drive_class_names_away[1:]

input_names_score_pred = [
    'time_left_in_half',
    'half',
    'current_score_diff',
    'current_score_total',
    'home_vegas_score_pred_weighted',
    'away_vegas_score_pred_weighted',
    # 'cur_spread',
    # 'cur_over_under',
    'home_timeouts_remaining',
    'away_timeouts_remaining',
    'ytg',
    'yd_from_goal',
    'down',
    'home_team_has_ball',
    'kick_off',
    'point_after_kick',
    'two_point_attempt',
    'search_rf_play_first_down_home',
    'search_rf_play_first_down_away',
    'search_rf_drive_field_goal_made_home',
    'search_rf_drive_field_goal_missed_home',
    'search_rf_drive_punt_home',
    'search_rf_drive_safety_home',
    'search_rf_drive_touch_down_home',
    'search_rf_drive_turnover_home',
    'search_rf_drive_turnover_on_downs_home',
    'search_rf_drive_field_goal_made_away',
    'search_rf_drive_field_goal_missed_away',
    'search_rf_drive_punt_away',
    'search_rf_drive_safety_away',
    'search_rf_drive_touch_down_away',
    'search_rf_drive_turnover_away',
    'search_rf_drive_turnover_on_downs_away'

]
output_name = "end_of_regulation_score_diff_change_clipped"

mask_model_score_diff = (
    (model_df.continuation==0)&
    (model_df[input_names_score_pred+[output_name]].notna().all(axis=1))&
    ~(model_df.event_id.isin([12,57,58,13]))&
    (model_df["overtime"]==0)
)


normalized_score_pred_df = normalize_df(model_df[mask_model_score_diff][input_names_score_pred], model_df[mask_model_score_diff & (model_df.season<2020)][input_names_score_pred])
mlp_search_score_diff_clipped_rf_drive_preds_preds = pd.DataFrame(search_mlp_score_diff_clipped_rf_drive_preds.predict_proba(normalized_score_pred_df.values), index=model_df[mask_model_score_diff].index)
score_diff_clipped_rf_drive_preds_matrix = pd.DataFrame(np.zeros(mlp_search_score_diff_clipped_rf_drive_preds_preds.shape), index=mlp_search_score_diff_clipped_rf_drive_preds_preds.index)
score_diff_change_list_clipped = list(model_df.end_of_regulation_score_diff_change_clipped.drop_duplicates().sort_values())

for column in score_diff_clipped_rf_drive_preds_matrix.columns:
    score_diff_clipped_rf_drive_preds_matrix[column] = score_diff_change_list_clipped[column] + model_df["current_score_diff"]

model_df["xhome_win_mlp_search_clipped_rf_drive_preds"] = np.sum(mlp_search_score_diff_clipped_rf_drive_preds_preds.T[score_diff_clipped_rf_drive_preds_matrix.T>0], axis=0)
model_df["xovertime_mlp_search_clipped_rf_drive_preds"] = np.sum(mlp_search_score_diff_clipped_rf_drive_preds_preds.T[score_diff_clipped_rf_drive_preds_matrix.T==0], axis=0)
model_df["xaway_win_mlp_search_clipped_rf_drive_preds"] = np.sum(mlp_search_score_diff_clipped_rf_drive_preds_preds.T[score_diff_clipped_rf_drive_preds_matrix.T<0], axis=0)
model_df["xend_of_regulation_score_diff_mlp_search_clipped_rf_drive_preds"] = np.sum(score_diff_clipped_rf_drive_preds_matrix * mlp_search_score_diff_clipped_rf_drive_preds_preds, axis=1)

display_html(model_df[["xhome_win_mlp_search_clipped_rf_drive_preds", "xovertime_mlp_search_clipped_rf_drive_preds", "xaway_win_mlp_search_clipped_rf_drive_preds"]].dropna())
display_html(pd.DataFrame(mlp_search_score_diff_clipped_rf_drive_preds_preds.values, columns=score_diff_change_list_clipped))

Unnamed: 0,xhome_win_mlp_search_clipped_rf_drive_preds,xovertime_mlp_search_clipped_rf_drive_preds,xaway_win_mlp_search_clipped_rf_drive_preds
0,0.591256,0.058451,0.350293
2,0.586799,0.059595,0.353606
3,0.583465,0.059713,0.356822
4,0.571353,0.060742,0.367905
5,0.593152,0.059738,0.347110
...,...,...,...
816791,0.077233,0.187556,0.735212
816792,0.046173,0.187217,0.766610
816793,0.044726,0.073117,0.882157
816795,0.034394,0.054719,0.910887


Unnamed: 0,-35,-34,-33,-32,-31,-30,-29,-28,-27,-26,-25,-24,-23,-22,-21,-20,-19,-18,-17,-16,-15,-14,-13,-12,-11,-10,-9,-8,-7,-6,-5,-4,-3,-2,-1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35
0,1.150268e-03,1.636042e-03,6.731524e-04,6.780141e-04,2.494678e-03,1.619762e-03,9.414211e-04,2.733766e-03,2.399525e-03,9.419301e-04,1.833368e-03,5.459472e-03,3.411801e-03,0.002549,0.008500,0.005627,3.271398e-03,0.006095,0.012069,0.010055,0.006018,0.014781,0.013929,0.004799,0.007559,0.013874,0.008573,0.011147,0.041300,0.021684,0.013183,0.032871,0.046137,0.019594,0.020703,0.058451,0.028751,0.025262,0.052861,0.028614,0.020148,0.026653,0.049626,0.019814,0.011355,0.038683,0.016749,0.011043,0.020616,0.033007,0.009766,0.015859,0.019407,0.012234,0.007028,0.016114,0.017779,6.286064e-03,0.008659,0.016552,5.597886e-03,5.821043e-03,1.019233e-02,1.228632e-02,3.568373e-03,5.472472e-03,8.655130e-03,3.214562e-03,3.184646e-03,3.934317e-03,1.646335e-02
1,1.239577e-03,1.798313e-03,7.095840e-04,7.080947e-04,2.638308e-03,1.707082e-03,1.016404e-03,2.890738e-03,2.589795e-03,9.725548e-04,1.948423e-03,5.893395e-03,3.499952e-03,0.002738,0.008859,0.005881,3.318015e-03,0.006331,0.012616,0.010247,0.005927,0.015226,0.013772,0.004847,0.007843,0.014906,0.008693,0.011387,0.040728,0.021783,0.013005,0.033016,0.044440,0.018977,0.021453,0.059595,0.027406,0.024822,0.058534,0.028188,0.019839,0.027012,0.050494,0.018404,0.010991,0.038780,0.015786,0.010377,0.020746,0.031061,0.009035,0.015710,0.019442,0.011884,0.006874,0.016309,0.017354,5.732145e-03,0.008588,0.016214,5.090089e-03,5.805916e-03,1.027134e-02,1.220425e-02,3.346231e-03,5.345160e-03,8.892828e-03,2.857199e-03,3.139301e-03,3.865860e-03,1.639786e-02
2,1.269040e-03,1.841269e-03,7.216933e-04,7.158830e-04,2.679306e-03,1.738635e-03,1.035136e-03,2.923776e-03,2.639301e-03,9.830963e-04,1.978229e-03,6.039911e-03,3.549745e-03,0.002801,0.008981,0.005986,3.363655e-03,0.006422,0.012805,0.010383,0.005940,0.015392,0.013889,0.004915,0.007982,0.015292,0.008803,0.011629,0.041242,0.022105,0.013063,0.033096,0.043842,0.019115,0.021657,0.059713,0.027256,0.024895,0.059835,0.028407,0.019804,0.026850,0.050101,0.018085,0.010854,0.038330,0.015670,0.010219,0.020545,0.030537,0.008939,0.015554,0.019290,0.011841,0.006789,0.016174,0.017179,5.620349e-03,0.008455,0.015983,4.987173e-03,5.764237e-03,1.014904e-02,1.211440e-02,3.271931e-03,5.264451e-03,8.855633e-03,2.764750e-03,3.099457e-03,3.810593e-03,1.617102e-02
3,1.360675e-03,2.003710e-03,7.793231e-04,7.531159e-04,2.871116e-03,1.875110e-03,1.111243e-03,3.091699e-03,2.847951e-03,1.053329e-03,2.065184e-03,6.704032e-03,3.801624e-03,0.003003,0.009381,0.006481,3.582598e-03,0.006585,0.013719,0.011108,0.006041,0.015884,0.014725,0.005110,0.008282,0.016306,0.009181,0.011914,0.042082,0.023107,0.013154,0.033175,0.043296,0.019676,0.021794,0.060742,0.027490,0.024557,0.060655,0.028395,0.019621,0.026282,0.049607,0.017647,0.010470,0.037326,0.015476,0.009764,0.019724,0.029455,0.008779,0.015030,0.018679,0.011705,0.006634,0.015531,0.016822,5.410749e-03,0.008004,0.015264,4.753977e-03,5.612990e-03,9.539212e-03,1.177332e-02,3.128659e-03,4.932383e-03,8.554213e-03,2.592531e-03,2.978388e-03,3.646226e-03,1.551386e-02
4,1.135709e-03,1.632270e-03,6.559173e-04,6.614319e-04,2.473088e-03,1.557107e-03,9.174873e-04,2.696502e-03,2.446117e-03,8.977474e-04,1.864702e-03,5.541888e-03,3.240274e-03,0.002555,0.008454,0.005681,3.160715e-03,0.006270,0.012259,0.009733,0.005718,0.014581,0.013116,0.004713,0.007676,0.014548,0.008721,0.011075,0.040935,0.021745,0.012872,0.032951,0.045054,0.018415,0.021155,0.059738,0.027028,0.024931,0.059351,0.028269,0.019492,0.027638,0.050316,0.018380,0.011193,0.040718,0.016286,0.010402,0.021535,0.031889,0.008890,0.015877,0.020220,0.011720,0.006873,0.016878,0.017495,5.627700e-03,0.008831,0.016633,5.087101e-03,5.719371e-03,1.056002e-02,1.212514e-02,3.290697e-03,5.297183e-03,8.805816e-03,2.814652e-03,3.055753e-03,3.822534e-03,1.609956e-02
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
617304,1.639633e-07,6.209539e-08,1.140663e-07,2.041363e-07,1.103168e-06,3.963043e-07,1.031446e-07,1.081840e-06,8.731921e-07,1.368343e-07,1.038092e-06,1.832040e-06,8.043088e-07,0.000004,0.000045,0.000011,2.210161e-06,0.000015,0.000049,0.000016,0.000267,0.000484,0.000119,0.000179,0.000205,0.001774,0.000639,0.002180,0.009659,0.002634,0.000838,0.008753,0.218195,0.001720,0.001085,0.482737,0.000750,0.002841,0.187556,0.003278,0.001801,0.015085,0.044255,0.003320,0.002244,0.004601,0.000724,0.000163,0.000494,0.000833,0.000103,0.000127,0.000089,0.000009,0.000009,0.000031,0.000029,1.869523e-06,0.000012,0.000011,2.875712e-06,1.431989e-06,4.020217e-06,2.572747e-06,5.528674e-07,2.082761e-07,3.984335e-07,2.971958e-07,7.678085e-07,1.660253e-07,2.554083e-07
617305,9.611947e-08,3.381209e-08,6.309569e-08,1.173131e-07,6.213967e-07,2.292044e-07,6.036388e-08,6.110362e-07,4.989144e-07,7.188997e-08,6.094221e-07,9.624084e-07,4.522022e-07,0.000002,0.000026,0.000007,1.252876e-06,0.000009,0.000027,0.000009,0.000184,0.000264,0.000071,0.000122,0.000120,0.001089,0.000504,0.001523,0.006588,0.001869,0.000585,0.006631,0.200418,0.001381,0.000839,0.541702,0.000433,0.002203,0.187217,0.001967,0.001230,0.010847,0.024385,0.001882,0.001585,0.002868,0.000390,0.000093,0.000288,0.000410,0.000055,0.000072,0.000047,0.000004,0.000005,0.000016,0.000013,9.067698e-07,0.000007,0.000005,1.403931e-06,7.448055e-07,2.146999e-06,1.168752e-06,2.622782e-07,1.007494e-07,1.749586e-07,1.371534e-07,3.953487e-07,6.930122e-08,1.026982e-07
617306,9.954718e-08,3.260240e-08,6.344581e-08,1.203597e-07,6.453520e-07,2.368733e-07,6.419381e-08,6.570908e-07,5.479060e-07,7.897296e-08,5.977395e-07,8.966880e-07,4.607936e-07,0.000002,0.000029,0.000006,1.120717e-06,0.000008,0.000023,0.000008,0.000188,0.000295,0.000072,0.000111,0.000100,0.000835,0.000419,0.001713,0.008678,0.002085,0.000498,0.004961,0.107973,0.001177,0.000919,0.749626,0.000443,0.001984,0.073117,0.001622,0.000968,0.008851,0.027050,0.002152,0.001093,0.001904,0.000302,0.000069,0.000225,0.000320,0.000043,0.000062,0.000029,0.000003,0.000003,0.000010,0.000009,6.952128e-07,0.000004,0.000003,9.168873e-07,4.693479e-07,1.253793e-06,7.741779e-07,1.917826e-07,6.635932e-08,1.053034e-07,9.273813e-08,2.996665e-07,4.247648e-08,5.905700e-08
617307,5.186720e-08,1.714634e-08,3.580122e-08,7.068189e-08,3.544303e-07,1.345817e-07,3.848543e-08,3.698864e-07,2.874753e-07,4.442219e-08,3.052563e-07,4.450616e-07,2.580794e-07,0.000001,0.000016,0.000003,6.301034e-07,0.000004,0.000012,0.000005,0.000139,0.000181,0.000044,0.000076,0.000052,0.000517,0.000278,0.001394,0.005765,0.001501,0.000362,0.003209,0.091379,0.000883,0.000615,0.802609,0.000249,0.001589,0.054719,0.000877,0.000720,0.007693,0.020628,0.001723,0.000856,0.001224,0.000177,0.000046,0.000143,0.000196,0.000030,0.000047,0.000015,0.000001,0.000002,0.000006,0.000005,3.687896e-07,0.000003,0.000001,4.999065e-07,2.703628e-07,6.981125e-07,3.971500e-07,1.120207e-07,3.463046e-08,5.060374e-08,5.357408e-08,1.706281e-07,2.198117e-08,2.976211e-08
