In [1]:
from utils import normalize_df
import os
notebook_dir = os.getcwd()
root_dir = os.path.abspath(os.path.join(notebook_dir, '..'))
data_dir = os.path.join(root_dir, 'data')
import numpy as np
import pandas as pd
from sklearn.metrics import brier_score_loss, make_scorer, log_loss, mean_squared_error
from IPython.display import display_html
from copy import deepcopy
import pickle
from sklearn import calibration
import matplotlib.pyplot as plt
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.model_selection import GroupKFold, RandomizedSearchCV, cross_val_predict, GridSearchCV
# from utils.utils

In [2]:
def create_train_test_val_df(
    df,
    input_names,
    output_name,
    group_col="game_code",
    mask_test_season=2021,
    mask_val_season=[2019, 2020],
    normalize=False
):
    mask_train = ~(df.season.isin([mask_test_season] + mask_val_season))
    mask_test = (df.season == mask_test_season)
    mask_val = (df.season.isin(mask_val_season))
    if normalize==False:
        X_train = df.loc[mask_train, input_names]
        X_test = df.loc[mask_test, input_names]
        X_val = df.loc[mask_val, input_names]
    else:
        X_train = normalize_df(df.loc[mask_train, input_names])
        X_test = normalize_df(df.loc[mask_test, input_names], df.loc[mask_train, input_names])
        X_val = normalize_df(df.loc[mask_val, input_names], df.loc[mask_train, input_names])
    y_train = df[mask_train][output_name]
    group_train = df[mask_train][group_col]
    y_test = df[mask_test][output_name]
    group_test = df[mask_test][group_col]
    y_val = df[mask_val][output_name]
    group_val = df[mask_val][group_col]
    return X_train, y_train, group_train, X_test, y_test, group_test, X_val, y_val, group_val

# Model 1.0
This notebook will act as an interactive tutorial for our Live Win Probability Model. This "model" is actual comprised of 3 separate models that "stack" on each other.
1. Play and drive outcome models
    * technically this is two separate models:
        * Play outcome (first down, field goal made, field goal missed, touchdown, turnover, and none/other)
            * only using the first down prediction from the output of this model
        * Drive outcome (Clock, field goal made, field goal missed, punt, safety, touch down, turnover, turnover on downs)
    * outputs for both models will be a series of probabilities for each class that all add up to 1
2. End of regulation score differential model
    * Dealing with overtime later, we want to predict how the score differential will change by the end of regulation.
        * i.e., if the current score differential (home score - away score) is -3 and the end of regulation score differential is -10, the target value will be -7
    * Output of this will be a series of probabilities from for all score differential possibilities from -35 to 35 (outputs <-35 or >35 will be set to -35/35 respectively)
3. End of regulation score total model
    * Similar concept to the score differential model
    * Again, we're using the change in end of regulation score total as the target value
    * Outputs will be a series of probabilites for classes from 0 to 83 (outputs will be capped at 83)

## Data
Let's take a look at the data that we are pulling from oracle
* First we have event_df and odds_df
* event_df is the play by play data mixed with some import game information
* Odds data has vegas predictions for almost all the games in the set (missing games will be given the average vegas spread and over/under)
    * The spread and over/under are merged with the event table to give us our pre-game priors
    * some games have multiple odds so duplicates are removed


In [3]:
event_df = pd.read_parquet(os.path.join(data_dir, "event_data_cfb.parquet"))
event_df = event_df.drop_duplicates(["nevent", "game_code"]).reset_index(drop=True)
odds_df = pd.read_parquet(os.path.join(data_dir, "odds_data_cfb.parquet"))
odds_df = odds_df.drop_duplicates("game_code")
event_df[["cur_spread", "cur_over_under"]] = event_df.merge(odds_df, how="left", on="game_code")[["cur_spread", "cur_over_under"]].fillna({"cur_spread": np.mean(odds_df["cur_spread"]), "cur_over_under": np.mean(odds_df["cur_over_under"])})
pd.set_option("display.max_columns", None)
display_html(event_df)

Unnamed: 0,game_code,game_date,season,home_team_id,home_team,home_team_abbrev,home_conf_name,home_division_type,away_team_id,away_team,away_team_abbrev,away_conf_name,away_division_type,home_final_score,away_final_score,final_score_diff,end_of_regulation_score_diff,home_rest_of_game_score,away_rest_of_game_score,end_of_regulation_score_diff_change,home_score_added,away_score_added,current_score_diff,current_score_total,home_start_score,away_start_score,home_team_outcome,home_team_win,draw,away_team_win,nevent,quarter,overtime,home_team_has_ball,off_team_id,def_team_id,kick_off,punt,point_after_kick,two_point_attempt,field_goal_attempt,off_start_score,off_end_score,off_score_change,def_start_score,def_end_score,def_score_change,play_counts,efficiency_counts,from_scrimmage,first_down,scoring_play,possession_change,continuation,event_name,event_id,yards_gained,drive_outcome_id,drive_outcome_desc,down,ytg,yd_from_goal,drive_id,drive_start,play_start_time,cur_spread,cur_over_under
0,1178084,2012-08-30 17:15:00,2012,3499,Utah Utes,Utah,Pac-12,FBS,3710,Northern Colorado Bears,NorCol,Big Sky,FCS,41,0,41,41,41,0,41,0,0,0,0,0,0,W,1,0,0,1,1,0,0,3710,3499,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,Kick Off,5,65.0,,,0,-1,65,,3600.0,900.0,-3.308451,55.707539
1,1178084,2012-08-30 17:15:00,2012,3499,Utah Utes,Utah,Pac-12,FBS,3710,Northern Colorado Bears,NorCol,Big Sky,FCS,41,0,41,41,41,0,41,0,0,0,0,0,0,W,1,0,0,2,1,0,1,3499,3710,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,Touchback,21,0.0,,,0,-1,100,,3600.0,,-3.308451,55.707539
2,1178084,2012-08-30 17:15:00,2012,3499,Utah Utes,Utah,Pac-12,FBS,3710,Northern Colorado Bears,NorCol,Big Sky,FCS,41,0,41,41,41,0,41,0,0,0,0,0,0,W,1,0,0,3,1,0,1,3499,3710,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,Run,4,5.0,9.0,Intercepted Pass,1,10,75,1.0,3600.0,900.0,-3.308451,55.707539
3,1178084,2012-08-30 17:15:00,2012,3499,Utah Utes,Utah,Pac-12,FBS,3710,Northern Colorado Bears,NorCol,Big Sky,FCS,41,0,41,41,41,0,41,0,0,0,0,0,0,W,1,0,0,4,1,0,1,3499,3710,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,Run,4,4.0,9.0,Intercepted Pass,2,5,70,1.0,3600.0,866.0,-3.308451,55.707539
4,1178084,2012-08-30 17:15:00,2012,3499,Utah Utes,Utah,Pac-12,FBS,3710,Northern Colorado Bears,NorCol,Big Sky,FCS,41,0,41,41,41,0,41,0,0,0,0,0,0,W,1,0,0,5,1,0,1,3499,3710,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,0,Run,4,11.0,9.0,Intercepted Pass,3,1,66,1.0,3600.0,839.0,-3.308451,55.707539
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1930683,2405840,2022-01-10 20:00:00,2021,3478,Alabama Crimson Tide,Ala,Southeastern,FBS,3473,Georgia Bulldogs,UGa,Southeastern,FBS,18,33,-15,-15,0,0,0,0,0,-15,51,18,33,L,0,0,1,234,4,0,1,3478,3473,0,0,0,0,0,18,18,0,33,33,0,1,1,1,0,0,0,0,Sack,3,-6.0,40.0,Downs,2,22,58,13.0,54.0,34.0,3.000000,53.000000
1930684,2405840,2022-01-10 20:00:00,2021,3478,Alabama Crimson Tide,Ala,Southeastern,FBS,3473,Georgia Bulldogs,UGa,Southeastern,FBS,18,33,-15,-15,0,0,0,0,0,-15,51,18,33,L,0,0,1,235,4,0,1,3478,3473,0,0,0,0,0,18,18,0,33,33,0,1,0,0,0,0,0,0,Offense Timeout,57,,40.0,Downs,3,28,64,13.0,54.0,,3.000000,53.000000
1930685,2405840,2022-01-10 20:00:00,2021,3478,Alabama Crimson Tide,Ala,Southeastern,FBS,3473,Georgia Bulldogs,UGa,Southeastern,FBS,18,33,-15,-15,0,0,0,0,0,-15,51,18,33,L,0,0,1,236,4,0,1,3478,3473,0,0,0,0,0,18,18,0,33,33,0,1,1,1,0,0,0,0,Pass Completion,1,17.0,40.0,Downs,3,28,64,13.0,54.0,28.0,3.000000,53.000000
1930686,2405840,2022-01-10 20:00:00,2021,3478,Alabama Crimson Tide,Ala,Southeastern,FBS,3473,Georgia Bulldogs,UGa,Southeastern,FBS,18,33,-15,-15,0,0,0,0,0,-15,51,18,33,L,0,0,1,237,4,0,1,3478,3473,0,0,0,0,0,18,18,0,33,33,0,1,1,1,0,0,1,0,Sack,3,-12.0,40.0,Downs,4,11,47,13.0,54.0,4.0,3.000000,53.000000


Adding timeouts remaining for both teams and time left in game

In [4]:
event_df["half"] = round((event_df["quarter"] + 0.01) / 2)
event_df["home_timeout"] = np.where(((event_df["event_id"]==57)&(event_df["home_team_has_ball"]==1))|((event_df["event_id"]==58)&(event_df["home_team_has_ball"]==0)), 1, 0)
event_df["away_timeout"] = np.where(((event_df["event_id"]==57)&(event_df["home_team_has_ball"]==0))|((event_df["event_id"]==58)&(event_df["home_team_has_ball"]==1)), 1, 0)
event_df["home_timeouts_remaining"] = np.clip(3 - event_df.groupby(["game_code", "half"])["home_timeout"].cumsum(), 0, 3)
event_df["away_timeouts_remaining"] = np.clip(3 - event_df.groupby(["game_code", "half"])["away_timeout"].cumsum(), 0, 3)
event_df["time_left_in_game"] = np.where(event_df["quarter"] <= 4, event_df["play_start_time"] + (4 - event_df["quarter"]) * 900, event_df["play_start_time"])
event_df["time_left_in_game"] = event_df["time_left_in_game"].fillna(event_df["time_left_in_game"].shift(1))
# event_df["time_elapsed"] = 900 - event_df["play_start_time"] + (event_df["quarter"] - 1) * 900


* Our PBP will have multiple rows for one play, so if there's a fumble then recovery by offense and a touchdown, 
* that could have 2-3 rows of data and the touchdown wouldn't show up as being apart of the original play 
    * plays would look like this: 1. Run, 2. Fumble, 3. Offense Recovers the ball (TD)
* So what we've done here is ensure that plays that are "continuation" that end in a touchdown, give a TD=True for all of the plays in the sequence
* After that is taken care of we can setup all of the labels for play and drive description

In [5]:
event_df["sequence"] = event_df["continuation"].groupby(event_df["continuation"].eq(0).cumsum()).cumsum()
event_df["play_start_id"] = event_df["nevent"] - event_df["sequence"]

turnover_ids = [9, 16]
event_df["turnover"] = np.where(event_df["event_id"].isin(turnover_ids), 1, 0)
event_df["touchdown_scored"] = np.where(event_df["home_score_added"]+event_df["away_score_added"]>=6, 1, 0)
event_df["fieldgoal_made"] = np.where(event_df["home_score_added"]+event_df["away_score_added"]==3, 1, 0)

play_outcome_aggregate =event_df[["game_code", "play_start_id", "turnover", "touchdown_scored", "fieldgoal_made", "first_down"]].groupby(["game_code", "play_start_id"], as_index=False).sum()
event_df["touchdown_in_play"] = np.clip(event_df.merge(play_outcome_aggregate,on=["game_code", "play_start_id"], how="left")["touchdown_scored_y"], 0, 1)
event_df["turnover_in_play"] = np.clip(event_df.merge(play_outcome_aggregate,on=["game_code", "play_start_id"], how="left")["turnover_y"], 0, 1)
event_df["field_goal_in_play"] = np.clip(event_df.merge(play_outcome_aggregate,on=["game_code", "play_start_id"], how="left")["fieldgoal_made_y"], 0, 1)
event_df["first_down_in_play"] = np.clip(event_df.merge(play_outcome_aggregate,on=["game_code", "play_start_id"], how="left")["first_down_y"], 0, 1)



event_df["play_outcome"] = (
    np.where((event_df["turnover_in_play"]==1), "turnover",
    # (event_df["touchdown_in_play"]==1)&(event_df["turnover_in_play"]==1), "defensive_touchdown", 
    np.where((event_df["punt"]==1), "punt",
    np.where((event_df["field_goal_in_play"]==1), "field_goal_made",
    np.where((event_df["field_goal_attempt"]==1)&(event_df["field_goal_in_play"]==0), "field_goal_missed",
    np.where((event_df["first_down_in_play"]==1)&(event_df["touchdown_in_play"]==0)&(event_df["turnover_in_play"]==0)&(event_df["punt"]==0), "first_down",
    np.where((event_df["touchdown_in_play"]==1)&(event_df["turnover_in_play"]==0), "offensive_touchdown", "none"))))))
)
drive_description_matrix = {
    7: "punt",
    9: "turnover",
    14: "turnover",
    17: "field_goal_made",
    18: "punt",
    20: "safety",
    35: "field_goal_missed",
    36: "field_goal_missed",
    37: "touch_down",
    38: "clock",
    39: "clock",
    40: "turnover_on_downs",
    42: "field_goal_made",
    51: "clock",
}
event_df["drive_outcome_desc_basic"] = event_df["drive_outcome_id"].map(drive_description_matrix)

# event_df["drive_outcome"] = np.where(
#     (event_df["touchdown_in_drive"]==1)&(event_df["turnover_in_drive"]==1), "defensive_touchdown", 
#     np.where((event_df["touchdown_in_drive"]==1)&(event_df["turnover_in_drive"]==0), "offensive_touchdown",
#     np.where((event_df["field_goal_in_drive"]==1), "field_goal_made",
#     np.where((event_df["touchdown_in_drive"]==0)&(event_df["turnover_in_drive"]==1), "turnover", "none"
# ))))
game_end_of_regulation_total_score = event_df[event_df.overtime==0].groupby("game_code", as_index=False).max()[["game_code", "home_start_score", "away_start_score"]]
game_end_of_regulation_total_score["end_of_regulation_score_total"] = game_end_of_regulation_total_score["home_start_score"] + game_end_of_regulation_total_score["away_start_score"]
# event_df["end_of_regulation_score_total_diff"] = 
event_df["end_of_regulation_score_total_diff"] = (
    event_df.merge(game_end_of_regulation_total_score, on="game_code")["end_of_regulation_score_total"]
    - (event_df["home_start_score"] + event_df["away_start_score"])
)

  game_end_of_regulation_total_score = event_df[event_df.overtime==0].groupby("game_code", as_index=False).max()[["game_code", "home_start_score", "away_start_score"]]


### Data Manipulation
* We need to do a little bit of data manipulation to get the values we need, but we don't want to "overwrite" the values in event_df so we'll make copy of it called model_df
* time left in half is added
* from_scrimmage is changed so that PATs and two point conversions are not included
* down, ytg, and yd_from_goal are changed so that all non-scrimmage plays are changed to a default "null" value
* home_team_has_ball is change so that when kickoffs occur, the team receiving is the one that is in possession of the ball

### Data Subset
* Removing continuation plays that we mentioned before, so that each snap has just one target
* Remove plays where the down is equal to 0 
* Remove plays from scrimmage that did not count (e.g., plays that were waved off by penalties)
* scrimmage_plays_we_want is event_id of all the scrimmage plays that *aren't* timeouts, end of quarters, and the two minute warning.
* Remove all NA values for the feature inputs and target
* Remove all plays that are not from scrimmage
* Remove all overtime plays

In [6]:
model_df = deepcopy(event_df)
model_df["time_left_in_half"] = event_df["time_left_in_game"] - ((2 - event_df["half"]) * 1800)
model_df["from_scrimmage"] = np.where(event_df["event_id"].isin([22, 47, 52, 53, 54, 55, 56]), 0, event_df["from_scrimmage"])
model_df["down"] = np.where(model_df["from_scrimmage"] == 0, 0, event_df["down"])
model_df["ytg"] = np.where(model_df["from_scrimmage"] == 0, -1, event_df["ytg"])
model_df["yd_from_goal"] = np.where(model_df["from_scrimmage"] == 0, -1, event_df["yd_from_goal"])
model_df["home_team_has_ball"] = np.where(event_df["event_id"].isin([5]), 1 - event_df["home_team_has_ball"], event_df["home_team_has_ball"])
scrimmage_plays_we_want = [1, 2, 3, 4, 7, 9, 14, 17, 18, 35]


In [7]:

input_names = [
    'time_left_in_half',
    'half',
    'current_score_diff',
    'current_score_total',
    'cur_spread',
    'cur_over_under',
    'home_timeouts_remaining',
    'away_timeouts_remaining',
    # 'punt',
    # 'field_goal_attempt',
    'ytg',
    'yd_from_goal',
    'down',
    'home_team_has_ball',
]
output_name = "play_outcome"
mask_model = (
    (model_df.continuation==0)&
    (model_df.down!=0)&
    (model_df.play_counts==1)&
    (model_df.event_id.isin(scrimmage_plays_we_want))&
    (model_df[input_names+[output_name]].notna().all(axis=1))&
    (model_df["from_scrimmage"]==1)&
    (model_df["overtime"]==0)
)
X_train, y_train, group_train, X_test, y_test, group_test, X_val, y_val, group_val = create_train_test_val_df(model_df[mask_model], input_names, output_name)


Let's take a look at what the input features and output features look like. 

Event Name and yards gained is included to help interpret what is going on. 

This is the first 2 drives of the first game in the dataset

In [None]:
model_df[mask_model][input_names + ["event_name", "yards_gained", "play_outcome", "drive_outcome_desc_basic"]].head(15)

### Loading in the Models and Creating Play/Drive Predictions
* ~~For this exercise we won't be training the models, just loading saved models and then using them to make predictions~~
* Now we are going to train the models
* In addition, each prediction will be split up between home and away. So if the home team has the ball the predictions for the away team play/drive outcomes are going to be set to 0


Let's take a look at how the predictions look for our dataset

In [8]:
cv=GroupKFold(n_splits=3)

rf_grid = {
    "n_estimators": np.linspace(start=50, stop=500, num=10, dtype=int),
    "max_features": ["auto", "sqrt"],
    "max_depth": np.linspace(5, 15, num=11, dtype=int),
    "min_samples_split": [2, 5, 10],
    "min_samples_leaf": [1, 2, 4],
    "bootstrap": [True, False],
}

log_loss_scorer = make_scorer(log_loss, greater_is_better=False, needs_proba=True)
rf_play_outcome_search_model = RandomForestClassifier(verbose=100, n_jobs=-1, random_state=1)
search_rf_play_outcome = RandomizedSearchCV(rf_play_outcome_search_model, rf_grid, cv=cv,random_state=42,n_iter=10,n_jobs=1,verbose=100, scoring=log_loss_scorer)
# rf_play_outcome_search_model = RandomForestClassifier(verbose=100, n_jobs=1, random_state=1)
# search_rf_play_outcome = RandomizedSearchCV(rf_play_outcome_search_model, rf_grid, cv=cv,random_state=42,n_iter=10,n_jobs=-1,verbose=100, scoring=log_loss_scorer)
search_rf_play_outcome.fit(X_train,y_train,groups=group_train)

Fitting 3 folds for each of 10 candidates, totalling 30 fits
[CV 1/3; 1/10] START bootstrap=False, max_depth=1, max_features=sqrt, min_samples_leaf=1, min_samples_split=10, n_estimators=1
[CV 2/3; 1/10] START bootstrap=False, max_depth=1, max_features=sqrt, min_samples_leaf=1, min_samples_split=10, n_estimators=1
[CV 3/3; 1/10] START bootstrap=False, max_depth=1, max_features=sqrt, min_samples_leaf=1, min_samples_split=10, n_estimators=1
[CV 1/3; 2/10] START bootstrap=False, max_depth=1, max_features=auto, min_samples_leaf=4, min_samples_split=10, n_estimators=1
[CV 2/3; 2/10] START bootstrap=False, max_depth=1, max_features=auto, min_samples_leaf=4, min_samples_split=10, n_estimators=1
[CV 3/3; 2/10] START bootstrap=False, max_depth=1, max_features=auto, min_samples_leaf=4, min_samples_split=10, n_estimators=1
[CV 1/3; 3/10] START bootstrap=True, max_depth=1, max_features=sqrt, min_samples_leaf=2, min_samples_split=10, n_estimators=1
[CV 2/3; 3/10] START bootstrap=True, max_depth=1, m

RandomizedSearchCV(cv=GroupKFold(n_splits=3),
                   estimator=RandomForestClassifier(n_jobs=1, random_state=1,
                                                    verbose=100),
                   n_jobs=-1,
                   param_distributions={'bootstrap': [True, False],
                                        'max_depth': array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]),
                                        'max_features': ['auto', 'sqrt'],
                                        'min_samples_leaf': [1, 2, 4],
                                        'min_samples_split': [2, 5, 10],
                                        'n_estimators': array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1])},
                   random_state=42,
                   scoring=make_scorer(log_loss, greater_is_better=False, needs_proba=True),
                   verbose=100)

In [9]:
output_name = "drive_outcome_desc_basic"
mask_model = (
    (model_df.continuation==0)&
    (model_df.down!=0)&
    (model_df.play_counts==1)&
    (model_df.event_id.isin(scrimmage_plays_we_want))&
    (model_df[input_names+[output_name]].notna().all(axis=1))&
    (model_df["from_scrimmage"]==1)&
    (model_df["overtime"]==0)
)

X_train, y_train, group_train, X_test, y_test, group_test, X_val, y_val, group_val = create_train_test_val_df(model_df[mask_model], input_names, output_name)
cv=GroupKFold(n_splits=3)

log_loss_scorer = make_scorer(log_loss, greater_is_better=False, needs_proba=True)
rf_drive_outcome_search_model = RandomForestClassifier(verbose=100, n_jobs=-1, random_state=1)
search_rf_drive_outcome = RandomizedSearchCV(rf_drive_outcome_search_model, rf_grid, cv=cv,random_state=42,n_iter=10,n_jobs=1,verbose=100, scoring=log_loss_scorer)
search_rf_drive_outcome.fit(X_train,y_train,groups=group_train)


Fitting 3 folds for each of 10 candidates, totalling 30 fits
[CV 1/3; 1/10] START bootstrap=False, max_depth=1, max_features=sqrt, min_samples_leaf=1, min_samples_split=10, n_estimators=1
[CV 2/3; 1/10] START bootstrap=False, max_depth=1, max_features=sqrt, min_samples_leaf=1, min_samples_split=10, n_estimators=1
[CV 3/3; 1/10] START bootstrap=False, max_depth=1, max_features=sqrt, min_samples_leaf=1, min_samples_split=10, n_estimators=1
[CV 1/3; 2/10] START bootstrap=False, max_depth=1, max_features=auto, min_samples_leaf=4, min_samples_split=10, n_estimators=1
[CV 2/3; 2/10] START bootstrap=False, max_depth=1, max_features=auto, min_samples_leaf=4, min_samples_split=10, n_estimators=1
[CV 3/3; 2/10] START bootstrap=False, max_depth=1, max_features=auto, min_samples_leaf=4, min_samples_split=10, n_estimators=1
[CV 1/3; 3/10] START bootstrap=True, max_depth=1, max_features=sqrt, min_samples_leaf=2, min_samples_split=10, n_estimators=1
[CV 2/3; 3/10] START bootstrap=True, max_depth=1, m

RandomizedSearchCV(cv=GroupKFold(n_splits=3),
                   estimator=RandomForestClassifier(n_jobs=1, random_state=1,
                                                    verbose=100),
                   n_jobs=-1,
                   param_distributions={'bootstrap': [True, False],
                                        'max_depth': array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]),
                                        'max_features': ['auto', 'sqrt'],
                                        'min_samples_leaf': [1, 2, 4],
                                        'min_samples_split': [2, 5, 10],
                                        'n_estimators': array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1])},
                   random_state=42,
                   scoring=make_scorer(log_loss, greater_is_better=False, needs_proba=True),
                   verbose=100)

## Storing Predictions
* We'll add in the plays that didn't count
* The predictions are broken up by home/away team

In [10]:
mask_model_predict = (
    (model_df.continuation==0)&
    (model_df.down!=0)&
    (model_df[input_names].notna().all(axis=1))&
    (model_df["from_scrimmage"]==1)&
    (model_df["overtime"]==0)
)


search_rf_play_class_names = ["search_rf_play_" + x for x in search_rf_play_outcome.classes_]
search_rf_drive_class_names = ["search_rf_drive_" + x for x in search_rf_drive_outcome.classes_]
model_df[search_rf_play_class_names] = pd.DataFrame(search_rf_play_outcome.predict_proba(model_df[mask_model_predict][input_names]), index=model_df[mask_model_predict].index)
model_df[search_rf_play_class_names] = model_df[search_rf_play_class_names].fillna(0)
model_df[search_rf_drive_class_names] = pd.DataFrame(search_rf_drive_outcome.predict_proba(model_df[mask_model_predict][input_names]), index=model_df[mask_model_predict].index)
model_df[search_rf_drive_class_names] = model_df[search_rf_drive_class_names].fillna(0)

search_rf_play_class_names_home = [x + "_home" for x in search_rf_play_class_names]
search_rf_play_class_names_away = [x + "_away" for x in search_rf_play_class_names]
search_rf_drive_class_names_home = [x + "_home" for x in search_rf_drive_class_names]
search_rf_drive_class_names_away = [x + "_away" for x in search_rf_drive_class_names]
model_df[search_rf_play_class_names_home] = model_df[search_rf_play_class_names].where(model_df.home_team_has_ball==1, 0)
model_df[search_rf_play_class_names_away] = model_df[search_rf_play_class_names].where(model_df.home_team_has_ball==0, 0)
model_df[search_rf_drive_class_names_home] = model_df[search_rf_drive_class_names].where(model_df.home_team_has_ball==1, 0)
model_df[search_rf_drive_class_names_away] = model_df[search_rf_drive_class_names].where(model_df.home_team_has_ball==0, 0)
display_html(model_df[mask_model][input_names + search_rf_play_class_names + search_rf_drive_class_names].head(15))
# display_html(model_df[mask_model][input_names + search_rf_drive_class_names].head(15))

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s finished


Unnamed: 0,time_left_in_half,half,current_score_diff,current_score_total,cur_spread,cur_over_under,home_timeouts_remaining,away_timeouts_remaining,ytg,yd_from_goal,down,home_team_has_ball,search_rf_play_field_goal_made,search_rf_play_field_goal_missed,search_rf_play_first_down,search_rf_play_none,search_rf_play_offensive_touchdown,search_rf_play_punt,search_rf_play_turnover,search_rf_drive_clock,search_rf_drive_field_goal_made,search_rf_drive_field_goal_missed,search_rf_drive_punt,search_rf_drive_safety,search_rf_drive_touch_down,search_rf_drive_turnover,search_rf_drive_turnover_on_downs
2,1800.0,1.0,0,0,-3.308451,55.707539,3,3,10,75,1,1,0.014819,0.005277,0.235855,0.616493,0.049244,0.05936,0.018953,0.034446,0.133544,0.043166,0.315783,0.000739,0.328911,0.081106,0.062305
3,1766.0,1.0,0,0,-3.308451,55.707539,3,3,5,70,2,1,0.014819,0.005277,0.235855,0.616493,0.049244,0.05936,0.018953,0.034446,0.133544,0.043166,0.315783,0.000739,0.328911,0.081106,0.062305
4,1739.0,1.0,0,0,-3.308451,55.707539,3,3,1,66,3,1,0.014819,0.005277,0.235855,0.616493,0.049244,0.05936,0.018953,0.034446,0.133544,0.043166,0.315783,0.000739,0.328911,0.081106,0.062305
5,1705.0,1.0,0,0,-3.308451,55.707539,3,3,10,55,1,1,0.014819,0.005277,0.235855,0.616493,0.049244,0.05936,0.018953,0.034446,0.133544,0.043166,0.315783,0.000739,0.328911,0.081106,0.062305
6,1660.0,1.0,0,0,-3.308451,55.707539,3,3,3,48,2,1,0.014819,0.005277,0.235855,0.616493,0.049244,0.05936,0.018953,0.034446,0.133544,0.043166,0.315783,0.000739,0.328911,0.081106,0.062305
7,1633.0,1.0,0,0,-3.308451,55.707539,3,3,10,44,1,1,0.014819,0.005277,0.235855,0.616493,0.049244,0.05936,0.018953,0.034446,0.133544,0.043166,0.315783,0.000739,0.328911,0.081106,0.062305
9,1625.0,1.0,0,0,-3.308451,55.707539,3,3,10,81,1,0,0.013595,0.005237,0.216375,0.634968,0.037205,0.071322,0.021299,0.031426,0.12383,0.045365,0.378092,0.001287,0.258144,0.088362,0.073494
10,1584.0,1.0,0,0,-3.308451,55.707539,3,3,9,80,2,0,0.013595,0.005237,0.216375,0.634968,0.037205,0.071322,0.021299,0.031426,0.12383,0.045365,0.378092,0.001287,0.258144,0.088362,0.073494
11,1547.0,1.0,0,0,-3.308451,55.707539,3,3,10,70,1,0,0.013595,0.005237,0.216375,0.634968,0.037205,0.071322,0.021299,0.031426,0.12383,0.045365,0.378092,0.001287,0.258144,0.088362,0.073494
12,1524.0,1.0,0,0,-3.308451,55.707539,3,3,4,64,2,0,0.013595,0.005237,0.216375,0.634968,0.037205,0.071322,0.021299,0.031426,0.12383,0.045365,0.378092,0.001287,0.258144,0.088362,0.073494


### Score Difference Model
* Using the same inputs and adding the outputs of the previous model, we will predict the score differential probablities
* Since this model is an MLP model, we will normalize the inputs. (all features will be made so that the range is from 0 to 1)
* In this model, continuation, null values, end of quarters, and overtime is removed.
* NEW: spread and over/under are replaced by a weighted home and away vegas score prediction. The weight is based on time remaining normalized. So beginning of the game is the full score predictions, half time would be 0.5 * full score predictions

In [24]:
model_df["away_vegas_score_pred"] = np.clip(model_df["cur_over_under"], 30, 80) * 0.5 + model_df["cur_spread"] * 0.5
model_df["home_vegas_score_pred"] = np.clip(model_df["cur_over_under"], 30, 80) * 0.5 - model_df["cur_spread"] * 0.5
model_df["away_vegas_score_pred_weighted"] = model_df["away_vegas_score_pred"] * (model_df["time_left_in_game"] / 3600)
model_df["home_vegas_score_pred_weighted"] = model_df["home_vegas_score_pred"] * (model_df["time_left_in_game"] / 3600)
model_df["point_after_play"] = np.where(model_df["point_after_kick"] + model_df["two_point_attempt"]==1, 1, 0)


# search_mlp_score_diff_clipped_rf_drive_preds = pickle.load(open(os.path.join(root_dir, "models/search_mlp_score_diff_clipped_rf_drive_preds.p"), 'rb'))
# search_mlp_score_diff_clipped_rf_drive_preds = pickle.load(open(os.path.join(root_dir, "models/search_mlp_score_diff_clipped_rf_drive_preds_vegas_adjusted.p"), 'rb'))
model_df["end_of_regulation_score_diff_change_clipped"] = np.clip(model_df["end_of_regulation_score_diff_change"], -50, 50)


# input_names_score_diff_pred = [item for item in input_names if item not in ["punt", "field_goal_attempt"]] + ["kick_off", "point_after_kick", "two_point_attempt"] + ["search_rf_play_first_down_home", "search_rf_play_first_down_away"] + search_rf_drive_class_names_home[1:] + search_rf_drive_class_names_away[1:]

input_names_score_diff_pred = [
    'time_left_in_half',
    'half',
    'current_score_diff',
    'current_score_total',
    # 'home_vegas_score_pred_weighted',
    # 'away_vegas_score_pred_weighted',
    'cur_spread',
    'cur_over_under',
    'home_timeouts_remaining',
    'away_timeouts_remaining',
    'ytg',
    'yd_from_goal',
    'down',
    'home_team_has_ball',
    'kick_off',
    'point_after_play',
    'search_rf_play_first_down_home',
    'search_rf_play_first_down_away',
    'search_rf_drive_field_goal_made_home',
    'search_rf_drive_field_goal_missed_home',
    'search_rf_drive_punt_home',
    'search_rf_drive_safety_home',
    'search_rf_drive_touch_down_home',
    'search_rf_drive_turnover_home',
    'search_rf_drive_turnover_on_downs_home',
    'search_rf_drive_field_goal_made_away',
    'search_rf_drive_field_goal_missed_away',
    'search_rf_drive_punt_away',
    'search_rf_drive_safety_away',
    'search_rf_drive_touch_down_away',
    'search_rf_drive_turnover_away',
    'search_rf_drive_turnover_on_downs_away'

]
output_name = "end_of_regulation_score_diff_change_clipped"

mask_model_score_diff = (
    (model_df.continuation==0)&
    (model_df[input_names_score_diff_pred+[output_name]].notna().all(axis=1))&
    ~(model_df.event_id.isin([12,57,58,13]))&
    (model_df["overtime"]==0)
)
X_train, y_train, group_train, X_test, y_test, group_test, X_val, y_val, group_val = create_train_test_val_df(model_df[mask_model_score_diff], input_names_score_diff_pred, output_name, normalize=True)
mlp_grid = {
    'hidden_layer_sizes': [(50,50,50), (10,30,10),(100,)],
    'activation': ['tanh', 'relu'],
    'solver': ['adam'],
    'alpha': [0.0001, 0.05],
    'learning_rate': ['constant','adaptive'],
}
score_diff_change_list_clipped = list(model_df.end_of_regulation_score_diff_change_clipped.drop_duplicates().sort_values())

log_loss_scorer_score_diff = make_scorer(log_loss, greater_is_better=False, needs_proba=True, labels=score_diff_change_list_clipped)
mlp_score_diff_search_base = MLPClassifier(verbose=True, early_stopping=True, n_iter_no_change=5, random_state=1)
search_mlp_score_diff_clipped_rf_drive_preds = GridSearchCV(mlp_score_diff_search_base, mlp_grid, cv=cv,n_jobs=-1,verbose=100, scoring=log_loss_scorer_score_diff)
search_mlp_score_diff_clipped_rf_drive_preds.fit(X_train,y_train,groups=group_train)

normalized_score_pred_df = normalize_df(model_df[mask_model_score_diff][input_names_score_diff_pred], model_df[mask_model_score_diff & (model_df.season<2020)][input_names_score_diff_pred])
mlp_search_score_diff_clipped_rf_drive_preds_preds = pd.DataFrame(search_mlp_score_diff_clipped_rf_drive_preds.predict_proba(normalized_score_pred_df.values), index=model_df[mask_model_score_diff].index)
score_diff_clipped_rf_drive_preds_matrix = pd.DataFrame(np.zeros(mlp_search_score_diff_clipped_rf_drive_preds_preds.shape), index=mlp_search_score_diff_clipped_rf_drive_preds_preds.index)
score_diff_change_list_clipped = list(model_df.end_of_regulation_score_diff_change_clipped.drop_duplicates().sort_values())

for column in score_diff_clipped_rf_drive_preds_matrix.columns:
    score_diff_clipped_rf_drive_preds_matrix[column] = score_diff_change_list_clipped[column] + model_df["current_score_diff"]

model_df["xhome_win_mlp_search_clipped_rf_drive_preds"] = np.sum(mlp_search_score_diff_clipped_rf_drive_preds_preds.T[score_diff_clipped_rf_drive_preds_matrix.T>0], axis=0)
model_df["xovertime_mlp_search_clipped_rf_drive_preds"] = np.sum(mlp_search_score_diff_clipped_rf_drive_preds_preds.T[score_diff_clipped_rf_drive_preds_matrix.T==0], axis=0)
model_df["xaway_win_mlp_search_clipped_rf_drive_preds"] = np.sum(mlp_search_score_diff_clipped_rf_drive_preds_preds.T[score_diff_clipped_rf_drive_preds_matrix.T<0], axis=0)
model_df["xend_of_regulation_score_diff_mlp_search_clipped_rf_drive_preds"] = np.sum(score_diff_clipped_rf_drive_preds_matrix * mlp_search_score_diff_clipped_rf_drive_preds_preds, axis=1)

display_html(model_df[["xhome_win_mlp_search_clipped_rf_drive_preds", "xovertime_mlp_search_clipped_rf_drive_preds", "xaway_win_mlp_search_clipped_rf_drive_preds"]].dropna())
display_html(pd.DataFrame(mlp_search_score_diff_clipped_rf_drive_preds_preds.values, columns=score_diff_change_list_clipped))

Fitting 3 folds for each of 24 candidates, totalling 72 fits
[CV 1/3; 2/24] START activation=tanh, alpha=0.0001, hidden_layer_sizes=(50, 50, 50), learning_rate=adaptive, solver=adam
[CV 1/3; 3/24] START activation=tanh, alpha=0.0001, hidden_layer_sizes=(10, 30, 10), learning_rate=constant, solver=adam
[CV 1/3; 1/24] START activation=tanh, alpha=0.0001, hidden_layer_sizes=(50, 50, 50), learning_rate=constant, solver=adam
[CV 3/3; 2/24] START activation=tanh, alpha=0.0001, hidden_layer_sizes=(50, 50, 50), learning_rate=adaptive, solver=adam
[CV 2/3; 2/24] START activation=tanh, alpha=0.0001, hidden_layer_sizes=(50, 50, 50), learning_rate=adaptive, solver=adam
[CV 2/3; 3/24] START activation=tanh, alpha=0.0001, hidden_layer_sizes=(10, 30, 10), learning_rate=constant, solver=adam
[CV 2/3; 1/24] START activation=tanh, alpha=0.0001, hidden_layer_sizes=(50, 50, 50), learning_rate=constant, solver=adam
[CV 3/3; 1/24] START activation=tanh, alpha=0.0001, hidden_layer_sizes=(50, 50, 50), learnin



Validation score: 0.153556
Validation score: 0.151185




[CV 2/3; 3/24] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(10, 30, 10), learning_rate=constant, solver=adam;, score=-3.471 total time=  32.2s
[CV 1/3; 3/24] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(10, 30, 10), learning_rate=constant, solver=adam;, score=-3.463 total time=  32.6s
[CV 3/3; 3/24] START activation=tanh, alpha=0.0001, hidden_layer_sizes=(10, 30, 10), learning_rate=constant, solver=adam
[CV 1/3; 4/24] START activation=tanh, alpha=0.0001, hidden_layer_sizes=(10, 30, 10), learning_rate=adaptive, solver=adam
Iteration 1, loss = 3.44982853
Iteration 1, loss = 3.45049505
Iteration 1, loss = 3.45101640
Iteration 1, loss = 3.45049505




Iteration 1, loss = 3.45101640
Iteration 1, loss = 3.44982853




Validation score: 0.155405
Validation score: 0.159135
Validation score: 0.154551




Validation score: 0.159135
Validation score: 0.154551
Validation score: 0.155405




[CV 3/3; 2/24] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(50, 50, 50), learning_rate=adaptive, solver=adam;, score=-3.398 total time=  45.9s
[CV 2/3; 1/24] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(50, 50, 50), learning_rate=constant, solver=adam;, score=-3.400 total time=  46.4s
[CV 3/3; 1/24] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(50, 50, 50), learning_rate=constant, solver=adam;, score=-3.398 total time=  46.5s
[CV 1/3; 2/24] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(50, 50, 50), learning_rate=adaptive, solver=adam;, score=-3.406 total time=  46.6s
[CV 2/3; 2/24] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(50, 50, 50), learning_rate=adaptive, solver=adam;, score=-3.400 total time=  47.0s
[CV 1/3; 1/24] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(50, 50, 50), learning_rate=constant, solver=adam;, score=-3.406 total time=  47.2s
[CV 2/3; 4/24] START activation=tanh, alpha=0.0001, hidden_layer_sizes=(10, 30, 10



Iteration 1, loss = 3.57965070




Validation score: 0.151327
Validation score: 0.151185




[CV 3/3; 3/24] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(10, 30, 10), learning_rate=constant, solver=adam;, score=-3.468 total time=  25.9s
[CV 1/3; 4/24] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(10, 30, 10), learning_rate=adaptive, solver=adam;, score=-3.463 total time=  26.5s
[CV 2/3; 6/24] START activation=tanh, alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=adaptive, solver=adam
[CV 3/3; 6/24] START activation=tanh, alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=adaptive, solver=adam
Iteration 1, loss = 3.58687485
Iteration 1, loss = 3.58690009




Validation score: 0.153556
Validation score: 0.151327




[CV 2/3; 4/24] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(10, 30, 10), learning_rate=adaptive, solver=adam;, score=-3.471 total time=  28.4s
[CV 3/3; 4/24] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(10, 30, 10), learning_rate=adaptive, solver=adam;, score=-3.468 total time=  28.6s
Iteration 1, loss = 3.50472712
Iteration 1, loss = 3.50472712




Iteration 1, loss = 3.50585628
Iteration 1, loss = 3.50572745
Validation score: 0.152623




[CV 1/3; 7/24] START activation=tanh, alpha=0.05, hidden_layer_sizes=(50, 50, 50), learning_rate=constant, solver=adam
[CV 2/3; 7/24] START activation=tanh, alpha=0.05, hidden_layer_sizes=(50, 50, 50), learning_rate=constant, solver=adam
Validation score: 0.152623




Validation score: 0.152655
Validation score: 0.155073
Iteration 1, loss = 3.50572745




Iteration 1, loss = 3.50585628




Validation score: 0.155073




Validation score: 0.152655
[CV 1/3; 5/24] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=constant, solver=adam;, score=-3.445 total time=  37.3s
[CV 1/3; 6/24] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=adaptive, solver=adam;, score=-3.445 total time=  37.5s
[CV 3/3; 7/24] START activation=tanh, alpha=0.05, hidden_layer_sizes=(50, 50, 50), learning_rate=constant, solver=adam
[CV 1/3; 8/24] START activation=tanh, alpha=0.05, hidden_layer_sizes=(50, 50, 50), learning_rate=adaptive, solver=adam
[CV 2/3; 5/24] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=constant, solver=adam;, score=-3.447 total time=  40.0s
[CV 2/3; 6/24] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=adaptive, solver=adam;, score=-3.447 total time=  29.2s
[CV 3/3; 5/24] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=constant, solver=adam;, score=-3.441 total time=  40.0s




Validation score: 0.157997
Validation score: 0.153192




Iteration 1, loss = 3.60689994




Iteration 1, loss = 3.60075783




Validation score: 0.153192
Validation score: 0.150995
[CV 2/3; 7/24] END activation=tanh, alpha=0.05, hidden_layer_sizes=(50, 50, 50), learning_rate=constant, solver=adam;, score=-3.404 total time=  39.8s
[CV 1/3; 7/24] END activation=tanh, alpha=0.05, hidden_layer_sizes=(50, 50, 50), learning_rate=constant, solver=adam;, score=-3.410 total time=  39.9s
Iteration 1, loss = 3.48560208




[CV 3/3; 9/24] START activation=tanh, alpha=0.05, hidden_layer_sizes=(10, 30, 10), learning_rate=constant, solver=adam
[CV 1/3; 10/24] START activation=tanh, alpha=0.05, hidden_layer_sizes=(10, 30, 10), learning_rate=adaptive, solver=adam
Iteration 1, loss = 3.48671953




Validation score: 0.155689




[CV 2/3; 9/24] END activation=tanh, alpha=0.05, hidden_layer_sizes=(10, 30, 10), learning_rate=constant, solver=adam;, score=-3.479 total time=  28.2s
[CV 1/3; 9/24] END activation=tanh, alpha=0.05, hidden_layer_sizes=(10, 30, 10), learning_rate=constant, solver=adam;, score=-3.473 total time=  28.4s
Validation score: 0.153192




[CV 2/3; 10/24] START activation=tanh, alpha=0.05, hidden_layer_sizes=(10, 30, 10), learning_rate=adaptive, solver=adam
[CV 3/3; 10/24] START activation=tanh, alpha=0.05, hidden_layer_sizes=(10, 30, 10), learning_rate=adaptive, solver=adam
Iteration 1, loss = 3.48614730
Iteration 1, loss = 3.48560208




Validation score: 0.157997
Validation score: 0.155689




[CV 3/3; 7/24] END activation=tanh, alpha=0.05, hidden_layer_sizes=(50, 50, 50), learning_rate=constant, solver=adam;, score=-3.402 total time=  39.4s
[CV 1/3; 8/24] END activation=tanh, alpha=0.05, hidden_layer_sizes=(50, 50, 50), learning_rate=adaptive, solver=adam;, score=-3.410 total time=  39.8s
[CV 1/3; 11/24] START activation=tanh, alpha=0.05, hidden_layer_sizes=(100,), learning_rate=constant, solver=adam
[CV 2/3; 11/24] START activation=tanh, alpha=0.05, hidden_layer_sizes=(100,), learning_rate=constant, solver=adam
[CV 3/3; 8/24] END activation=tanh, alpha=0.05, hidden_layer_sizes=(50, 50, 50), learning_rate=adaptive, solver=adam;, score=-3.402 total time=  43.8s
[CV 2/3; 8/24] END activation=tanh, alpha=0.05, hidden_layer_sizes=(50, 50, 50), learning_rate=adaptive, solver=adam;, score=-3.404 total time=  43.9s
[CV 1/3; 12/24] START activation=tanh, alpha=0.05, hidden_layer_sizes=(100,), learning_rate=adaptive, solver=adam[CV 3/3; 11/24] START activation=tanh, alpha=0.05, hidd



Iteration 1, loss = 3.60075783
Iteration 1, loss = 3.60630688
Iteration 1, loss = 3.60630688




Validation score: 0.153192
Validation score: 0.150995




Validation score: 0.151279
Validation score: 0.151279




[CV 2/3; 10/24] END activation=tanh, alpha=0.05, hidden_layer_sizes=(10, 30, 10), learning_rate=adaptive, solver=adam;, score=-3.479 total time=  27.8s
[CV 1/3; 10/24] END activation=tanh, alpha=0.05, hidden_layer_sizes=(10, 30, 10), learning_rate=adaptive, solver=adam;, score=-3.473 total time=  30.1s
[CV 2/3; 12/24] START activation=tanh, alpha=0.05, hidden_layer_sizes=(100,), learning_rate=adaptive, solver=adam
[CV 3/3; 10/24] END activation=tanh, alpha=0.05, hidden_layer_sizes=(10, 30, 10), learning_rate=adaptive, solver=adam;, score=-3.476 total time=  28.9s
[CV 3/3; 9/24] END activation=tanh, alpha=0.05, hidden_layer_sizes=(10, 30, 10), learning_rate=constant, solver=adam;, score=-3.476 total time=  30.7s
[CV 3/3; 12/24] START activation=tanh, alpha=0.05, hidden_layer_sizes=(100,), learning_rate=adaptive, solver=adam
[CV 1/3; 13/24] START activation=relu, alpha=0.0001, hidden_layer_sizes=(50, 50, 50), learning_rate=constant, solver=adam
[CV 2/3; 13/24] START activation=relu, alph



Validation score: 0.155326




Iteration 1, loss = 3.53572669




Validation score: 0.152512




Iteration 1, loss = 3.53709332




Iteration 1, loss = 3.53572669




Validation score: 0.151864




[CV 2/3; 11/24] END activation=tanh, alpha=0.05, hidden_layer_sizes=(100,), learning_rate=constant, solver=adam;, score=-3.455 total time=  34.1s
[CV 3/3; 13/24] START activation=relu, alpha=0.0001, hidden_layer_sizes=(50, 50, 50), learning_rate=constant, solver=adam
Validation score: 0.152512




[CV 1/3; 11/24] END activation=tanh, alpha=0.05, hidden_layer_sizes=(100,), learning_rate=constant, solver=adam;, score=-3.453 total time=  35.8s
[CV 1/3; 14/24] START activation=relu, alpha=0.0001, hidden_layer_sizes=(50, 50, 50), learning_rate=adaptive, solver=adam
[CV 3/3; 11/24] END activation=tanh, alpha=0.05, hidden_layer_sizes=(100,), learning_rate=constant, solver=adam;, score=-3.448 total time=  34.4s
[CV 2/3; 14/24] START activation=relu, alpha=0.0001, hidden_layer_sizes=(50, 50, 50), learning_rate=adaptive, solver=adam
[CV 1/3; 12/24] END activation=tanh, alpha=0.05, hidden_layer_sizes=(100,), learning_rate=adaptive, solver=adam;, score=-3.453 total time=  34.9s
[CV 3/3; 14/24] START activation=relu, alpha=0.0001, hidden_layer_sizes=(50, 50, 50), learning_rate=adaptive, solver=adam
Iteration 1, loss = 3.53660923




Iteration 1, loss = 3.53709332




Validation score: 0.155326




Validation score: 0.151864




Iteration 1, loss = 3.44801131
Iteration 1, loss = 3.44288462




Validation score: 0.154077




Validation score: 0.158661




[CV 3/3; 12/24] END activation=tanh, alpha=0.05, hidden_layer_sizes=(100,), learning_rate=adaptive, solver=adam;, score=-3.448 total time=  32.5s
[CV 1/3; 15/24] START activation=relu, alpha=0.0001, hidden_layer_sizes=(10, 30, 10), learning_rate=constant, solver=adam
[CV 2/3; 12/24] END activation=tanh, alpha=0.05, hidden_layer_sizes=(100,), learning_rate=adaptive, solver=adam;, score=-3.455 total time=  33.7s
[CV 2/3; 15/24] START activation=relu, alpha=0.0001, hidden_layer_sizes=(10, 30, 10), learning_rate=constant, solver=adam
[CV 1/3; 13/24] END activation=relu, alpha=0.0001, hidden_layer_sizes=(50, 50, 50), learning_rate=constant, solver=adam;, score=-3.405 total time=  33.5s
[CV 3/3; 15/24] START activation=relu, alpha=0.0001, hidden_layer_sizes=(10, 30, 10), learning_rate=constant, solver=adam
[CV 2/3; 13/24] END activation=relu, alpha=0.0001, hidden_layer_sizes=(50, 50, 50), learning_rate=constant, solver=adam;, score=-3.398 total time=  33.8s
[CV 1/3; 16/24] START activation=r



Validation score: 0.156970




Iteration 1, loss = 3.44801131




Validation score: 0.154077




Iteration 1, loss = 3.44288462




Iteration 1, loss = 3.44523181
[CV 3/3; 13/24] END activation=relu, alpha=0.0001, hidden_layer_sizes=(50, 50, 50), learning_rate=constant, solver=adam;, score=-3.399 total time=  36.1s
Validation score: 0.158661




[CV 2/3; 16/24] START activation=relu, alpha=0.0001, hidden_layer_sizes=(10, 30, 10), learning_rate=adaptive, solver=adam
Validation score: 0.156970




[CV 1/3; 14/24] END activation=relu, alpha=0.0001, hidden_layer_sizes=(50, 50, 50), learning_rate=adaptive, solver=adam;, score=-3.405 total time=  36.9s
[CV 3/3; 16/24] START activation=relu, alpha=0.0001, hidden_layer_sizes=(10, 30, 10), learning_rate=adaptive, solver=adam
Iteration 1, loss = 3.53193998
Iteration 1, loss = 3.52681134




Validation score: 0.152686




Validation score: 0.151011




Iteration 1, loss = 3.52549062




Iteration 1, loss = 3.52681134
Validation score: 0.151580




Validation score: 0.151011




[CV 2/3; 14/24] END activation=relu, alpha=0.0001, hidden_layer_sizes=(50, 50, 50), learning_rate=adaptive, solver=adam;, score=-3.398 total time=  40.3s
[CV 3/3; 14/24] END activation=relu, alpha=0.0001, hidden_layer_sizes=(50, 50, 50), learning_rate=adaptive, solver=adam;, score=-3.399 total time=  41.1s
[CV 1/3; 17/24] START activation=relu, alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=constant, solver=adam
[CV 2/3; 17/24] START activation=relu, alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=constant, solver=adam
[CV 1/3; 15/24] END activation=relu, alpha=0.0001, hidden_layer_sizes=(10, 30, 10), learning_rate=constant, solver=adam;, score=-3.441 total time=  31.1s
[CV 2/3; 15/24] END activation=relu, alpha=0.0001, hidden_layer_sizes=(10, 30, 10), learning_rate=constant, solver=adam;, score=-3.438 total time=  30.6s
[CV 3/3; 15/24] END activation=relu, alpha=0.0001, hidden_layer_sizes=(10, 30, 10), learning_rate=constant, solver=adam;, score=-3.443 total time=  30.5



Iteration 1, loss = 3.52549062
Validation score: 0.152686




Validation score: 0.151580




[CV 2/3; 16/24] END activation=relu, alpha=0.0001, hidden_layer_sizes=(10, 30, 10), learning_rate=adaptive, solver=adam;, score=-3.438 total time=  29.4s
[CV 3/3; 16/24] END activation=relu, alpha=0.0001, hidden_layer_sizes=(10, 30, 10), learning_rate=adaptive, solver=adam;, score=-3.443 total time=  27.4s
[CV 1/3; 19/24] START activation=relu, alpha=0.05, hidden_layer_sizes=(50, 50, 50), learning_rate=constant, solver=adam
[CV 2/3; 19/24] START activation=relu, alpha=0.05, hidden_layer_sizes=(50, 50, 50), learning_rate=constant, solver=adam
Iteration 1, loss = 3.48366659




Iteration 1, loss = 3.48500898
Validation score: 0.155768




Iteration 1, loss = 3.48500898
Iteration 1, loss = 3.48350782




Validation score: 0.157855




Validation score: 0.157855
Validation score: 0.156906
Iteration 1, loss = 3.48366659
Iteration 1, loss = 3.48350782




Validation score: 0.155768
Validation score: 0.156906




[CV 1/3; 17/24] END activation=relu, alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=constant, solver=adam;, score=-3.417 total time=  30.9s
[CV 2/3; 17/24] END activation=relu, alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=constant, solver=adam;, score=-3.418 total time=  30.5s
[CV 3/3; 17/24] END activation=relu, alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=constant, solver=adam;, score=-3.413 total time=  28.4s
[CV 3/3; 18/24] END activation=relu, alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=adaptive, solver=adam;, score=-3.413 total time=  27.8s
[CV 1/3; 18/24] END activation=relu, alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=adaptive, solver=adam;, score=-3.417 total time=  28.6s
[CV 2/3; 18/24] END activation=relu, alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=adaptive, solver=adam;, score=-3.418 total time=  28.1s
[CV 3/3; 19/24] START activation=relu, alpha=0.05, hidden_layer_sizes=(50, 50, 50), learning_rate=constant, solv



Validation score: 0.152196
Validation score: 0.158345




[CV 1/3; 19/24] END activation=relu, alpha=0.05, hidden_layer_sizes=(50, 50, 50), learning_rate=constant, solver=adam;, score=-3.406 total time=  28.0s
[CV 2/3; 19/24] END activation=relu, alpha=0.05, hidden_layer_sizes=(50, 50, 50), learning_rate=constant, solver=adam;, score=-3.398 total time=  27.7s
[CV 1/3; 22/24] START activation=relu, alpha=0.05, hidden_layer_sizes=(10, 30, 10), learning_rate=adaptive, solver=adam
[CV 3/3; 21/24] START activation=relu, alpha=0.05, hidden_layer_sizes=(10, 30, 10), learning_rate=constant, solver=adam
Iteration 1, loss = 3.54428118
Iteration 1, loss = 3.54285695




Validation score: 0.151453
Validation score: 0.150647




Iteration 1, loss = 3.46726208




Validation score: 0.155958




Iteration 1, loss = 3.46891099




[CV 2/3; 21/24] END activation=relu, alpha=0.05, hidden_layer_sizes=(10, 30, 10), learning_rate=constant, solver=adam;, score=-3.442 total time=  21.4s
Validation score: 0.152196




[CV 1/3; 21/24] END activation=relu, alpha=0.05, hidden_layer_sizes=(10, 30, 10), learning_rate=constant, solver=adam;, score=-3.447 total time=  21.6s
Iteration 1, loss = 3.46632818
Iteration 1, loss = 3.46726208




[CV 3/3; 22/24] START activation=relu, alpha=0.05, hidden_layer_sizes=(10, 30, 10), learning_rate=adaptive, solver=adam
[CV 2/3; 22/24] START activation=relu, alpha=0.05, hidden_layer_sizes=(10, 30, 10), learning_rate=adaptive, solver=adam
Validation score: 0.158345




Validation score: 0.155958
[CV 3/3; 19/24] END activation=relu, alpha=0.05, hidden_layer_sizes=(50, 50, 50), learning_rate=constant, solver=adam;, score=-3.398 total time=  25.8s
[CV 1/3; 23/24] START activation=relu, alpha=0.05, hidden_layer_sizes=(100,), learning_rate=constant, solver=adam
[CV 1/3; 20/24] END activation=relu, alpha=0.05, hidden_layer_sizes=(50, 50, 50), learning_rate=adaptive, solver=adam;, score=-3.406 total time=  25.6s
[CV 2/3; 23/24] START activation=relu, alpha=0.05, hidden_layer_sizes=(100,), learning_rate=constant, solver=adam
[CV 2/3; 20/24] END activation=relu, alpha=0.05, hidden_layer_sizes=(50, 50, 50), learning_rate=adaptive, solver=adam;, score=-3.398 total time=  27.9s
[CV 3/3; 20/24] END activation=relu, alpha=0.05, hidden_layer_sizes=(50, 50, 50), learning_rate=adaptive, solver=adam;, score=-3.398 total time=  28.3s
[CV 3/3; 23/24] START activation=relu, alpha=0.05, hidden_layer_sizes=(100,), learning_rate=constant, solver=adam
[CV 1/3; 24/24] START a



Validation score: 0.151232
Validation score: 0.150647




[CV 1/3; 22/24] END activation=relu, alpha=0.05, hidden_layer_sizes=(10, 30, 10), learning_rate=adaptive, solver=adam;, score=-3.447 total time=  20.0s
[CV 3/3; 21/24] END activation=relu, alpha=0.05, hidden_layer_sizes=(10, 30, 10), learning_rate=constant, solver=adam;, score=-3.444 total time=  20.1s
Iteration 1, loss = 3.54428118




[CV 3/3; 24/24] START activation=relu, alpha=0.05, hidden_layer_sizes=(100,), learning_rate=adaptive, solver=adam
Validation score: 0.151453
[CV 2/3; 24/24] START activation=relu, alpha=0.05, hidden_layer_sizes=(100,), learning_rate=adaptive, solver=adam
Iteration 1, loss = 3.54203195




Validation score: 0.151232




[CV 2/3; 22/24] END activation=relu, alpha=0.05, hidden_layer_sizes=(10, 30, 10), learning_rate=adaptive, solver=adam;, score=-3.442 total time=  15.5s
[CV 3/3; 22/24] END activation=relu, alpha=0.05, hidden_layer_sizes=(10, 30, 10), learning_rate=adaptive, solver=adam;, score=-3.444 total time=  15.7s
Iteration 1, loss = 3.51340346




Validation score: 0.155800




Iteration 1, loss = 3.51456632




Validation score: 0.158503




Iteration 1, loss = 3.51290639
Iteration 1, loss = 3.51340346
[CV 1/3; 23/24] END activation=relu, alpha=0.05, hidden_layer_sizes=(100,), learning_rate=constant, solver=adam;, score=-3.425 total time=  19.9s




Validation score: 0.156654
Validation score: 0.155800




[CV 2/3; 23/24] END activation=relu, alpha=0.05, hidden_layer_sizes=(100,), learning_rate=constant, solver=adam;, score=-3.426 total time=  19.8s
Iteration 1, loss = 3.51456632
Iteration 1, loss = 3.51290639




Validation score: 0.158503
Validation score: 0.156654




[CV 3/3; 23/24] END activation=relu, alpha=0.05, hidden_layer_sizes=(100,), learning_rate=constant, solver=adam;, score=-3.421 total time=  18.2s
[CV 1/3; 24/24] END activation=relu, alpha=0.05, hidden_layer_sizes=(100,), learning_rate=adaptive, solver=adam;, score=-3.425 total time=  18.1s
[CV 2/3; 24/24] END activation=relu, alpha=0.05, hidden_layer_sizes=(100,), learning_rate=adaptive, solver=adam;, score=-3.426 total time=  13.8s
[CV 3/3; 24/24] END activation=relu, alpha=0.05, hidden_layer_sizes=(100,), learning_rate=adaptive, solver=adam;, score=-3.421 total time=  13.9s
Iteration 1, loss = 3.44967922




Validation score: 0.156238




Unnamed: 0,xhome_win_mlp_search_clipped_rf_drive_preds,xovertime_mlp_search_clipped_rf_drive_preds,xaway_win_mlp_search_clipped_rf_drive_preds
0,0.705842,0.037698,0.256461
2,0.691698,0.042614,0.265688
3,0.671838,0.043616,0.284546
4,0.647115,0.045379,0.307506
5,0.728288,0.039809,0.231903
...,...,...,...
1930679,0.000275,0.000249,0.999476
1930680,0.000394,0.000333,0.999273
1930683,0.000187,0.000188,0.999626
1930685,0.000118,0.000128,0.999754


Unnamed: 0,-50,-49,-48,-47,-46,-45,-44,-43,-42,-41,-40,-39,-38,-37,-36,-35,-34,-33,-32,-31,-30,-29,-28,-27,-26,-25,-24,-23,-22,-21,-20,-19,-18,-17,-16,-15,-14,-13,-12,-11,-10,-9,-8,-7,-6,-5,-4,-3,-2,-1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50
0,1.063882e-03,7.058262e-04,4.602159e-04,2.197077e-04,3.032222e-04,9.690249e-04,6.368054e-04,4.947485e-04,1.112594e-03,9.328005e-04,5.537844e-04,7.176245e-04,1.813745e-03,9.159564e-04,8.641231e-04,2.376468e-03,1.719738e-03,4.361552e-04,2.031661e-03,2.266164e-03,1.840513e-03,1.243668e-03,0.003880,3.724587e-03,0.002112,2.827818e-03,5.114649e-03,2.817427e-03,0.004154,0.007256,0.003870,0.002745,0.007454,0.007497,0.004781,0.003729,0.010564,0.006034,0.005282,0.008388,0.012790,0.004998,0.009207,0.023485,0.011188,0.011246,0.015582,0.026236,0.009679,0.016140,0.037698,0.012260,0.011237,0.037983,0.020246,0.009673,0.016191,0.036687,0.012508,0.007761,0.026927,0.015746,0.008776,0.013995,0.033126,0.012229,0.011999,0.023472,0.017949,0.010155,0.014540,0.026295,0.010803,0.011429,0.023634,1.645302e-02,7.184739e-03,1.440259e-02,0.021005,1.064799e-02,8.053953e-03,1.948269e-02,1.360311e-02,6.496597e-03,1.244845e-02,1.505403e-02,5.872752e-03,6.605758e-03,1.512258e-02,7.688820e-03,3.485357e-03,7.528138e-03,1.485246e-02,4.258290e-03,2.328684e-03,9.152668e-03,4.403837e-03,1.308036e-03,4.014418e-03,1.476870e-02,3.400012e-02
1,9.991768e-04,6.446407e-04,3.946388e-04,1.792092e-04,2.521961e-04,8.870417e-04,5.656977e-04,4.088975e-04,1.048534e-03,8.711686e-04,4.824758e-04,6.531565e-04,1.721502e-03,8.120453e-04,7.434430e-04,2.379482e-03,1.606442e-03,3.920618e-04,1.936264e-03,2.085166e-03,1.694620e-03,1.139210e-03,0.003817,3.533075e-03,0.002094,2.834336e-03,5.137203e-03,2.745533e-03,0.004033,0.007309,0.003590,0.002607,0.007986,0.007599,0.004595,0.003786,0.011317,0.006046,0.005129,0.008598,0.013878,0.004792,0.009313,0.027241,0.011177,0.011478,0.016597,0.028218,0.009833,0.018508,0.042614,0.011888,0.011258,0.043837,0.019687,0.009770,0.016511,0.039144,0.011593,0.007867,0.027107,0.014838,0.007887,0.013778,0.034280,0.011161,0.011812,0.022448,0.016735,0.009979,0.014153,0.025289,0.009729,0.011275,0.023635,1.522029e-02,6.502861e-03,1.348556e-02,0.019777,9.697696e-03,7.555025e-03,1.945055e-02,1.282582e-02,5.632213e-03,1.168834e-02,1.429357e-02,5.103491e-03,5.825464e-03,1.516685e-02,7.039965e-03,2.993501e-03,6.683160e-03,1.418248e-02,3.789771e-03,1.976931e-03,8.520747e-03,3.804895e-03,1.056170e-03,3.524722e-03,1.487115e-02,3.536836e-02
2,1.016872e-03,6.843404e-04,4.128136e-04,1.876062e-04,2.621324e-04,9.207953e-04,5.874099e-04,4.162362e-04,1.123439e-03,9.292925e-04,4.970375e-04,6.884954e-04,1.831759e-03,8.512283e-04,7.611257e-04,2.568687e-03,1.714474e-03,4.217234e-04,2.032630e-03,2.249146e-03,1.782633e-03,1.215898e-03,0.004152,3.733544e-03,0.002201,3.023137e-03,5.567072e-03,2.947321e-03,0.004230,0.007971,0.003907,0.002811,0.008623,0.008354,0.004978,0.004045,0.012336,0.006646,0.005490,0.009482,0.015215,0.005141,0.009898,0.029226,0.011753,0.011857,0.017858,0.029804,0.010117,0.020025,0.043616,0.012427,0.011617,0.045141,0.020392,0.010012,0.016922,0.040021,0.011677,0.008083,0.027338,0.015336,0.007988,0.014051,0.034694,0.011046,0.011746,0.022113,0.016456,0.009709,0.014107,0.024625,0.009503,0.010819,0.022566,1.446337e-02,6.091865e-03,1.281580e-02,0.018904,9.043034e-03,7.114052e-03,1.814632e-02,1.176584e-02,5.091449e-03,1.056156e-02,1.332913e-02,4.577554e-03,5.189509e-03,1.392626e-02,6.443653e-03,2.709555e-03,5.973972e-03,1.281137e-02,3.409769e-03,1.818801e-03,7.526952e-03,3.402263e-03,9.658979e-04,3.144384e-03,1.339085e-02,3.083302e-02
3,1.056408e-03,7.366870e-04,4.368839e-04,1.992735e-04,2.761596e-04,9.643473e-04,6.171883e-04,4.243453e-04,1.232615e-03,1.011255e-03,5.195039e-04,7.400390e-04,1.985825e-03,9.007862e-04,7.877139e-04,2.824191e-03,1.853142e-03,4.603041e-04,2.164403e-03,2.483582e-03,1.909897e-03,1.307840e-03,0.004584,4.011022e-03,0.002332,3.299922e-03,6.108773e-03,3.193926e-03,0.004483,0.008883,0.004292,0.003080,0.009406,0.009351,0.005412,0.004357,0.013710,0.007409,0.005896,0.010464,0.016759,0.005526,0.010615,0.031661,0.012510,0.012154,0.019423,0.031836,0.010470,0.021387,0.045379,0.012870,0.012044,0.046301,0.021334,0.010217,0.017429,0.040951,0.011771,0.008332,0.027553,0.015700,0.008057,0.014130,0.034865,0.010807,0.011568,0.021602,0.015950,0.009339,0.013866,0.023735,0.009121,0.010291,0.021204,1.353121e-02,5.586580e-03,1.202371e-02,0.017731,8.277981e-03,6.542388e-03,1.666845e-02,1.059584e-02,4.495339e-03,9.331638e-03,1.214645e-02,4.007768e-03,4.499626e-03,1.246396e-02,5.780140e-03,2.390329e-03,5.199503e-03,1.133002e-02,2.994827e-03,1.639923e-03,6.469284e-03,2.959360e-03,8.710776e-04,2.735737e-03,1.173421e-02,2.607207e-02
4,7.141891e-04,4.628233e-04,2.925084e-04,1.342910e-04,1.957434e-04,6.821021e-04,4.516182e-04,3.461257e-04,7.509680e-04,6.279262e-04,3.730853e-04,4.706845e-04,1.294147e-03,6.230031e-04,5.787318e-04,1.856904e-03,1.238299e-03,2.778130e-04,1.456373e-03,1.603543e-03,1.309611e-03,8.752800e-04,0.003132,2.860783e-03,0.001717,2.216497e-03,4.074733e-03,2.198343e-03,0.003346,0.006122,0.002967,0.002095,0.006752,0.006409,0.003952,0.003192,0.009613,0.005245,0.004508,0.007701,0.012216,0.004278,0.008427,0.023984,0.009967,0.010729,0.015357,0.026062,0.009120,0.017049,0.039809,0.011059,0.010898,0.042110,0.019398,0.009453,0.016235,0.038669,0.011897,0.007711,0.029022,0.015523,0.008124,0.014743,0.036495,0.011949,0.012210,0.025077,0.018069,0.010135,0.015019,0.027984,0.010444,0.012134,0.026465,1.685657e-02,7.029213e-03,1.503770e-02,0.021838,1.063782e-02,8.090307e-03,2.151562e-02,1.399383e-02,6.111813e-03,1.290473e-02,1.583178e-02,5.613218e-03,6.212314e-03,1.650056e-02,7.447411e-03,3.191051e-03,7.418453e-03,1.587769e-02,3.935765e-03,2.047995e-03,9.197998e-03,4.037890e-03,1.058330e-03,3.782462e-03,1.558928e-02,3.570868e-02
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1359854,1.527868e-10,4.167490e-09,3.633627e-09,2.385674e-09,5.783556e-09,3.206179e-09,2.242721e-08,1.349538e-08,1.166153e-08,8.211138e-09,5.247377e-09,4.763964e-09,5.572361e-08,1.935120e-08,4.289261e-09,2.556949e-07,4.107672e-08,1.407087e-08,9.304502e-08,8.009518e-08,7.959545e-08,1.377219e-07,0.000002,2.563003e-07,0.000002,7.902895e-07,6.594187e-07,4.452675e-07,0.000007,0.000020,0.000011,0.000003,0.000021,0.000014,0.000013,0.000085,0.001259,0.000199,0.000182,0.000290,0.000536,0.000267,0.003531,0.031767,0.004746,0.000346,0.002763,0.014895,0.000893,0.006254,0.775715,0.002790,0.001553,0.034258,0.002089,0.000547,0.020104,0.085196,0.002819,0.000320,0.002135,0.000214,0.000143,0.000596,0.002889,0.000249,0.000018,0.000078,0.000016,0.000006,0.000032,0.000097,0.000009,0.000006,0.000003,7.428969e-07,3.296883e-07,1.449233e-06,0.000006,1.001622e-07,7.770987e-08,3.014284e-07,6.066886e-08,1.388685e-08,8.481805e-08,4.692923e-07,1.171550e-08,1.654985e-09,3.149846e-08,1.205508e-08,4.425570e-09,1.058758e-08,1.714345e-07,1.166094e-09,4.729665e-09,1.208198e-09,8.171282e-10,1.008854e-09,1.268464e-09,1.996786e-08,2.728358e-09
1359855,1.810844e-10,4.734246e-09,4.169367e-09,2.737372e-09,6.766918e-09,3.854150e-09,2.649062e-08,1.672729e-08,1.333080e-08,9.245832e-09,6.212053e-09,5.453628e-09,6.249992e-08,2.314229e-08,5.403187e-09,2.872220e-07,4.774982e-08,1.550696e-08,1.056151e-07,9.271504e-08,9.270409e-08,1.626468e-07,0.000002,2.970898e-07,0.000002,9.069217e-07,7.818195e-07,5.212000e-07,0.000008,0.000023,0.000013,0.000004,0.000025,0.000016,0.000016,0.000097,0.001394,0.000229,0.000216,0.000356,0.000624,0.000316,0.003905,0.032959,0.005089,0.000432,0.003445,0.015519,0.001021,0.007570,0.739905,0.003451,0.001872,0.041460,0.002555,0.000681,0.023590,0.100604,0.003497,0.000399,0.002822,0.000285,0.000191,0.000822,0.003856,0.000333,0.000025,0.000113,0.000023,0.000008,0.000047,0.000138,0.000012,0.000009,0.000005,1.100544e-06,4.760720e-07,2.139635e-06,0.000009,1.556232e-07,1.174687e-07,4.475576e-07,9.450825e-08,2.263306e-08,1.343417e-07,6.912636e-07,1.866779e-08,2.630922e-09,4.786082e-08,1.838764e-08,6.852886e-09,1.682940e-08,2.638056e-07,1.856862e-09,7.079957e-09,1.957885e-09,1.322435e-09,1.454114e-09,1.982006e-09,3.121707e-08,4.285529e-09
1359856,1.330963e-10,3.826851e-09,3.270753e-09,2.215619e-09,5.251010e-09,2.831837e-09,1.992658e-08,1.128628e-08,1.073986e-08,7.832571e-09,4.728869e-09,4.374158e-09,5.133629e-08,1.686381e-08,3.658436e-09,2.378979e-07,3.741493e-08,1.349246e-08,8.416487e-08,7.174209e-08,7.154118e-08,1.254980e-07,0.000001,2.215751e-07,0.000002,7.145153e-07,5.779068e-07,3.919584e-07,0.000007,0.000019,0.000010,0.000003,0.000017,0.000012,0.000011,0.000077,0.001148,0.000181,0.000155,0.000242,0.000468,0.000229,0.003283,0.029288,0.004549,0.000273,0.002346,0.014793,0.000804,0.005365,0.807131,0.002300,0.001330,0.028862,0.001793,0.000457,0.017289,0.070118,0.002424,0.000261,0.001567,0.000166,0.000111,0.000436,0.002095,0.000188,0.000013,0.000052,0.000011,0.000004,0.000022,0.000065,0.000006,0.000004,0.000002,4.898598e-07,2.290278e-07,9.699451e-07,0.000004,6.458428e-08,4.953580e-08,1.969321e-07,3.768138e-08,8.419984e-09,5.036856e-08,2.921680e-07,7.162778e-09,1.019749e-09,1.957568e-08,7.740230e-09,2.828013e-09,6.187735e-09,1.049904e-07,7.265818e-10,3.071861e-09,7.027271e-10,4.887826e-10,6.975751e-10,7.800716e-10,1.169684e-08,1.569914e-09
1359857,1.156560e-10,3.442589e-09,2.799614e-09,1.908732e-09,4.423731e-09,2.404012e-09,1.663201e-08,8.584640e-09,9.916423e-09,7.329372e-09,3.973084e-09,3.939820e-09,4.730402e-08,1.414587e-08,2.922406e-09,2.212842e-07,3.367743e-08,1.250640e-08,7.576194e-08,6.434763e-08,6.304352e-08,1.102870e-07,0.000001,1.911142e-07,0.000002,6.531562e-07,5.097784e-07,3.435282e-07,0.000006,0.000018,0.000009,0.000002,0.000014,0.000012,0.000009,0.000067,0.001063,0.000161,0.000127,0.000199,0.000406,0.000190,0.002959,0.027591,0.004201,0.000206,0.001931,0.014371,0.000699,0.004458,0.838712,0.001790,0.001064,0.023213,0.001497,0.000351,0.013902,0.055343,0.001925,0.000202,0.001091,0.000122,0.000077,0.000300,0.001459,0.000128,0.000009,0.000032,0.000007,0.000003,0.000014,0.000041,0.000004,0.000003,0.000001,2.981671e-07,1.405800e-07,5.861891e-07,0.000002,3.701721e-08,2.868105e-08,1.199424e-07,2.163904e-08,4.486815e-09,2.716160e-08,1.689323e-07,3.814705e-09,5.557511e-10,1.142584e-08,4.461526e-09,1.585084e-09,3.266677e-09,5.790909e-08,3.986564e-10,1.750310e-09,3.656904e-10,2.602703e-10,4.187278e-10,4.174396e-10,6.375699e-09,8.513991e-10


In [22]:
model_df[mask_model_score_diff][input_names_score_diff_pred+ ["continuation", output_name]]

Unnamed: 0,time_left_in_half,half,current_score_diff,current_score_total,cur_spread,cur_over_under,home_timeouts_remaining,away_timeouts_remaining,ytg,yd_from_goal,down,home_team_has_ball,kick_off,point_after_play,search_rf_play_first_down_home,search_rf_play_first_down_away,search_rf_drive_field_goal_made_home,search_rf_drive_field_goal_missed_home,search_rf_drive_punt_home,search_rf_drive_safety_home,search_rf_drive_touch_down_home,search_rf_drive_turnover_home,search_rf_drive_turnover_on_downs_home,search_rf_drive_field_goal_made_away,search_rf_drive_field_goal_missed_away,search_rf_drive_punt_away,search_rf_drive_safety_away,search_rf_drive_touch_down_away,search_rf_drive_turnover_away,search_rf_drive_turnover_on_downs_away,continuation,end_of_regulation_score_diff_change_clipped
0,1800.0,1.0,0,0,-3.308451,55.707539,3,3,-1,-1,0,1,1,0,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,41
2,1800.0,1.0,0,0,-3.308451,55.707539,3,3,10,75,1,1,0,0,0.235855,0.0,0.133544,0.043166,0.315783,0.000739,0.328911,0.081106,0.062305,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,41
3,1766.0,1.0,0,0,-3.308451,55.707539,3,3,5,70,2,1,0,0,0.235855,0.0,0.133544,0.043166,0.315783,0.000739,0.328911,0.081106,0.062305,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,41
4,1739.0,1.0,0,0,-3.308451,55.707539,3,3,1,66,3,1,0,0,0.235855,0.0,0.133544,0.043166,0.315783,0.000739,0.328911,0.081106,0.062305,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,41
5,1705.0,1.0,0,0,-3.308451,55.707539,3,3,10,55,1,1,0,0,0.235855,0.0,0.133544,0.043166,0.315783,0.000739,0.328911,0.081106,0.062305,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,41
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1930679,43.0,2.0,-15,51,3.000000,53.000000,2,2,10,57,1,1,0,0,0.235855,0.0,0.133544,0.043166,0.315783,0.000739,0.328911,0.081106,0.062305,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0
1930680,39.0,2.0,-15,51,3.000000,53.000000,2,2,10,46,1,1,0,0,0.235855,0.0,0.133544,0.043166,0.315783,0.000739,0.328911,0.081106,0.062305,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0
1930683,34.0,2.0,-15,51,3.000000,53.000000,1,2,22,58,2,1,0,0,0.235855,0.0,0.133544,0.043166,0.315783,0.000739,0.328911,0.081106,0.062305,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0
1930685,28.0,2.0,-15,51,3.000000,53.000000,0,2,28,64,3,1,0,0,0.235855,0.0,0.133544,0.043166,0.315783,0.000739,0.328911,0.081106,0.062305,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0


In [27]:
model_df

Unnamed: 0,game_code,game_date,season,home_team_id,home_team,home_team_abbrev,home_conf_name,home_division_type,away_team_id,away_team,away_team_abbrev,away_conf_name,away_division_type,home_final_score,away_final_score,final_score_diff,end_of_regulation_score_diff,home_rest_of_game_score,away_rest_of_game_score,end_of_regulation_score_diff_change,home_score_added,away_score_added,current_score_diff,current_score_total,home_start_score,away_start_score,home_team_outcome,home_team_win,draw,away_team_win,nevent,quarter,overtime,home_team_has_ball,off_team_id,def_team_id,kick_off,punt,point_after_kick,two_point_attempt,field_goal_attempt,off_start_score,off_end_score,off_score_change,def_start_score,def_end_score,def_score_change,play_counts,efficiency_counts,from_scrimmage,first_down,scoring_play,possession_change,continuation,event_name,event_id,yards_gained,drive_outcome_id,drive_outcome_desc,down,ytg,yd_from_goal,drive_id,drive_start,play_start_time,cur_spread,cur_over_under,half,home_timeout,away_timeout,home_timeouts_remaining,away_timeouts_remaining,time_left_in_game,sequence,play_start_id,turnover,touchdown_scored,fieldgoal_made,touchdown_in_play,turnover_in_play,field_goal_in_play,first_down_in_play,play_outcome,drive_outcome_desc_basic,end_of_regulation_score_total_diff,time_left_in_half,search_rf_play_field_goal_made,search_rf_play_field_goal_missed,search_rf_play_first_down,search_rf_play_none,search_rf_play_offensive_touchdown,search_rf_play_punt,search_rf_play_turnover,search_rf_drive_clock,search_rf_drive_field_goal_made,search_rf_drive_field_goal_missed,search_rf_drive_punt,search_rf_drive_safety,search_rf_drive_touch_down,search_rf_drive_turnover,search_rf_drive_turnover_on_downs,search_rf_play_field_goal_made_home,search_rf_play_field_goal_missed_home,search_rf_play_first_down_home,search_rf_play_none_home,search_rf_play_offensive_touchdown_home,search_rf_play_punt_home,search_rf_play_turnover_home,search_rf_play_field_goal_made_away,search_rf_play_field_goal_missed_away,search_rf_play_first_down_away,search_rf_play_none_away,search_rf_play_offensive_touchdown_away,search_rf_play_punt_away,search_rf_play_turnover_away,search_rf_drive_clock_home,search_rf_drive_field_goal_made_home,search_rf_drive_field_goal_missed_home,search_rf_drive_punt_home,search_rf_drive_safety_home,search_rf_drive_touch_down_home,search_rf_drive_turnover_home,search_rf_drive_turnover_on_downs_home,search_rf_drive_clock_away,search_rf_drive_field_goal_made_away,search_rf_drive_field_goal_missed_away,search_rf_drive_punt_away,search_rf_drive_safety_away,search_rf_drive_touch_down_away,search_rf_drive_turnover_away,search_rf_drive_turnover_on_downs_away,away_vegas_score_pred,home_vegas_score_pred,away_vegas_score_pred_weighted,home_vegas_score_pred_weighted,point_after_play,end_of_regulation_score_diff_change_clipped,xhome_win_mlp_search_clipped_rf_drive_preds,xovertime_mlp_search_clipped_rf_drive_preds,xaway_win_mlp_search_clipped_rf_drive_preds,xend_of_regulation_score_diff_mlp_search_clipped_rf_drive_preds,end_of_regulation_score_total_diff_clipped
0,1178084,2012-08-30 17:15:00,2012,3499,Utah Utes,Utah,Pac-12,FBS,3710,Northern Colorado Bears,NorCol,Big Sky,FCS,41,0,41,41,41,0,41,0,0,0,0,0,0,W,1,0,0,1,1,0,1,3710,3499,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,Kick Off,5,65.0,,,0,-1,-1,,3600.0,900.0,-3.308451,55.707539,1.0,0,0,3,3,3600.0,0,1,0,0,0,0,0,0,0,none,,41,1800.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,26.199544,29.507995,26.199544,29.507995,0,41,0.705842,0.037698,0.256461,12.244753,41
1,1178084,2012-08-30 17:15:00,2012,3499,Utah Utes,Utah,Pac-12,FBS,3710,Northern Colorado Bears,NorCol,Big Sky,FCS,41,0,41,41,41,0,41,0,0,0,0,0,0,W,1,0,0,2,1,0,1,3499,3710,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,Touchback,21,0.0,,,0,-1,-1,,3600.0,,-3.308451,55.707539,1.0,0,0,3,3,3600.0,1,1,0,0,0,0,0,0,0,none,,41,1800.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,26.199544,29.507995,26.199544,29.507995,0,41,,,,,41
2,1178084,2012-08-30 17:15:00,2012,3499,Utah Utes,Utah,Pac-12,FBS,3710,Northern Colorado Bears,NorCol,Big Sky,FCS,41,0,41,41,41,0,41,0,0,0,0,0,0,W,1,0,0,3,1,0,1,3499,3710,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,Run,4,5.0,9.0,Intercepted Pass,1,10,75,1.0,3600.0,900.0,-3.308451,55.707539,1.0,0,0,3,3,3600.0,0,3,0,0,0,0,0,0,0,none,turnover,41,1800.0,0.014819,0.005277,0.235855,0.616493,0.049244,0.05936,0.018953,0.034446,0.133544,0.043166,0.315783,0.000739,0.328911,0.081106,0.062305,0.014819,0.005277,0.235855,0.616493,0.049244,0.05936,0.018953,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.034446,0.133544,0.043166,0.315783,0.000739,0.328911,0.081106,0.062305,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,26.199544,29.507995,26.199544,29.507995,0,41,0.691698,0.042614,0.265688,11.692183,41
3,1178084,2012-08-30 17:15:00,2012,3499,Utah Utes,Utah,Pac-12,FBS,3710,Northern Colorado Bears,NorCol,Big Sky,FCS,41,0,41,41,41,0,41,0,0,0,0,0,0,W,1,0,0,4,1,0,1,3499,3710,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,Run,4,4.0,9.0,Intercepted Pass,2,5,70,1.0,3600.0,866.0,-3.308451,55.707539,1.0,0,0,3,3,3566.0,0,4,0,0,0,0,0,0,0,none,turnover,41,1766.0,0.014819,0.005277,0.235855,0.616493,0.049244,0.05936,0.018953,0.034446,0.133544,0.043166,0.315783,0.000739,0.328911,0.081106,0.062305,0.014819,0.005277,0.235855,0.616493,0.049244,0.05936,0.018953,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.034446,0.133544,0.043166,0.315783,0.000739,0.328911,0.081106,0.062305,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,26.199544,29.507995,25.952104,29.229308,0,41,0.671838,0.043616,0.284546,10.534613,41
4,1178084,2012-08-30 17:15:00,2012,3499,Utah Utes,Utah,Pac-12,FBS,3710,Northern Colorado Bears,NorCol,Big Sky,FCS,41,0,41,41,41,0,41,0,0,0,0,0,0,W,1,0,0,5,1,0,1,3499,3710,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,0,Run,4,11.0,9.0,Intercepted Pass,3,1,66,1.0,3600.0,839.0,-3.308451,55.707539,1.0,0,0,3,3,3539.0,0,5,0,0,0,0,0,0,1,first_down,turnover,41,1739.0,0.014819,0.005277,0.235855,0.616493,0.049244,0.05936,0.018953,0.034446,0.133544,0.043166,0.315783,0.000739,0.328911,0.081106,0.062305,0.014819,0.005277,0.235855,0.616493,0.049244,0.05936,0.018953,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.034446,0.133544,0.043166,0.315783,0.000739,0.328911,0.081106,0.062305,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,26.199544,29.507995,25.755607,29.007998,0,41,0.647115,0.045379,0.307506,9.173082,41
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1930683,2405840,2022-01-10 20:00:00,2021,3478,Alabama Crimson Tide,Ala,Southeastern,FBS,3473,Georgia Bulldogs,UGa,Southeastern,FBS,18,33,-15,-15,0,0,0,0,0,-15,51,18,33,L,0,0,1,234,4,0,1,3478,3473,0,0,0,0,0,18,18,0,33,33,0,1,1,1,0,0,0,0,Sack,3,-6.0,40.0,Downs,2,22,58,13.0,54.0,34.0,3.000000,53.000000,2.0,0,0,1,2,34.0,0,234,0,0,0,0,0,0,0,none,turnover_on_downs,0,34.0,0.014819,0.005277,0.235855,0.616493,0.049244,0.05936,0.018953,0.034446,0.133544,0.043166,0.315783,0.000739,0.328911,0.081106,0.062305,0.014819,0.005277,0.235855,0.616493,0.049244,0.05936,0.018953,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.034446,0.133544,0.043166,0.315783,0.000739,0.328911,0.081106,0.062305,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,28.000000,25.000000,0.264444,0.236111,0,0,0.000187,0.000188,0.999626,-14.575555,0
1930684,2405840,2022-01-10 20:00:00,2021,3478,Alabama Crimson Tide,Ala,Southeastern,FBS,3473,Georgia Bulldogs,UGa,Southeastern,FBS,18,33,-15,-15,0,0,0,0,0,-15,51,18,33,L,0,0,1,235,4,0,1,3478,3473,0,0,0,0,0,18,18,0,33,33,0,1,0,0,0,0,0,0,Offense Timeout,57,,40.0,Downs,0,-1,-1,13.0,54.0,,3.000000,53.000000,2.0,1,0,0,2,34.0,0,235,0,0,0,0,0,0,0,none,turnover_on_downs,0,34.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,28.000000,25.000000,0.264444,0.236111,0,0,,,,,0
1930685,2405840,2022-01-10 20:00:00,2021,3478,Alabama Crimson Tide,Ala,Southeastern,FBS,3473,Georgia Bulldogs,UGa,Southeastern,FBS,18,33,-15,-15,0,0,0,0,0,-15,51,18,33,L,0,0,1,236,4,0,1,3478,3473,0,0,0,0,0,18,18,0,33,33,0,1,1,1,0,0,0,0,Pass Completion,1,17.0,40.0,Downs,3,28,64,13.0,54.0,28.0,3.000000,53.000000,2.0,0,0,0,2,28.0,0,236,0,0,0,0,0,0,0,none,turnover_on_downs,0,28.0,0.014819,0.005277,0.235855,0.616493,0.049244,0.05936,0.018953,0.034446,0.133544,0.043166,0.315783,0.000739,0.328911,0.081106,0.062305,0.014819,0.005277,0.235855,0.616493,0.049244,0.05936,0.018953,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.034446,0.133544,0.043166,0.315783,0.000739,0.328911,0.081106,0.062305,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,28.000000,25.000000,0.217778,0.194444,0,0,0.000118,0.000128,0.999754,-14.717601,0
1930686,2405840,2022-01-10 20:00:00,2021,3478,Alabama Crimson Tide,Ala,Southeastern,FBS,3473,Georgia Bulldogs,UGa,Southeastern,FBS,18,33,-15,-15,0,0,0,0,0,-15,51,18,33,L,0,0,1,237,4,0,1,3478,3473,0,0,0,0,0,18,18,0,33,33,0,1,1,1,0,0,1,0,Sack,3,-12.0,40.0,Downs,4,11,47,13.0,54.0,4.0,3.000000,53.000000,2.0,0,0,0,2,4.0,0,237,0,0,0,0,0,0,0,none,turnover_on_downs,0,4.0,0.014819,0.005277,0.235855,0.616493,0.049244,0.05936,0.018953,0.034446,0.133544,0.043166,0.315783,0.000739,0.328911,0.081106,0.062305,0.014819,0.005277,0.235855,0.616493,0.049244,0.05936,0.018953,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.034446,0.133544,0.043166,0.315783,0.000739,0.328911,0.081106,0.062305,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,28.000000,25.000000,0.031111,0.027778,0,0,0.000161,0.000173,0.999666,-14.600335,0


In [30]:
model_df["end_of_regulation_score_total_diff_clipped"] = np.clip(model_df["end_of_regulation_score_total_diff"], 0, 105)


output_name = "end_of_regulation_score_total_diff_clipped"
input_names_total_score_pred = list(search_mlp_score_diff_clipped_rf_drive_preds.feature_names_in_) + ["xend_of_regulation_score_diff_mlp_search_clipped_rf_drive_preds"]
X_train, y_train, group_train, X_test, y_test, group_test, X_val, y_val, group_val = create_train_test_val_df(model_df[mask_model_score_diff], input_names_total_score_pred, output_name, normalize=True)


mlp_total_score_search_base = MLPClassifier(verbose=True, early_stopping=True, n_iter_no_change=5, random_state=1)
search_mlp_total_score = GridSearchCV(mlp_total_score_search_base, mlp_grid, cv=cv,n_jobs=-1,verbose=100, scoring=log_loss_scorer)
search_mlp_total_score.fit(X_train,y_train,groups=group_train)


Fitting 3 folds for each of 24 candidates, totalling 72 fits
[CV 1/3; 2/24] START activation=tanh, alpha=0.0001, hidden_layer_sizes=(50, 50, 50), learning_rate=adaptive, solver=adam
[CV 1/3; 1/24] START activation=tanh, alpha=0.0001, hidden_layer_sizes=(50, 50, 50), learning_rate=constant, solver=adam
[CV 2/3; 1/24] START activation=tanh, alpha=0.0001, hidden_layer_sizes=(50, 50, 50), learning_rate=constant, solver=adam
[CV 2/3; 2/24] START activation=tanh, alpha=0.0001, hidden_layer_sizes=(50, 50, 50), learning_rate=adaptive, solver=adam
[CV 3/3; 1/24] START activation=tanh, alpha=0.0001, hidden_layer_sizes=(50, 50, 50), learning_rate=constant, solver=adam
[CV 3/3; 2/24] START activation=tanh, alpha=0.0001, hidden_layer_sizes=(50, 50, 50), learning_rate=adaptive, solver=adam
[CV 1/3; 3/24] START activation=tanh, alpha=0.0001, hidden_layer_sizes=(10, 30, 10), learning_rate=constant, solver=adam
[CV 2/3; 3/24] START activation=tanh, alpha=0.0001, hidden_layer_sizes=(10, 30, 10), learnin



[CV 2/3; 3/24] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(10, 30, 10), learning_rate=constant, solver=adam;, score=-3.344 total time=  22.9s
[CV 1/3; 3/24] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(10, 30, 10), learning_rate=constant, solver=adam;, score=-3.337 total time=  23.1s




[CV 1/3; 4/24] START activation=tanh, alpha=0.0001, hidden_layer_sizes=(10, 30, 10), learning_rate=adaptive, solver=adam
[CV 3/3; 3/24] START activation=tanh, alpha=0.0001, hidden_layer_sizes=(10, 30, 10), learning_rate=constant, solver=adam




[CV 1/3; 2/24] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(50, 50, 50), learning_rate=adaptive, solver=adam;, score=-3.319 total time=  31.4s
[CV 2/3; 4/24] START activation=tanh, alpha=0.0001, hidden_layer_sizes=(10, 30, 10), learning_rate=adaptive, solver=adam
[CV 2/3; 1/24] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(50, 50, 50), learning_rate=constant, solver=adam;, score=-3.324 total time=  35.8s
[CV 2/3; 2/24] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(50, 50, 50), learning_rate=adaptive, solver=adam;, score=-3.324 total time=  35.8s
[CV 3/3; 1/24] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(50, 50, 50), learning_rate=constant, solver=adam;, score=-3.329 total time=  36.0s
[CV 3/3; 2/24] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(50, 50, 50), learning_rate=adaptive, solver=adam;, score=-3.329 total time=  36.0s
[CV 1/3; 1/24] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(50, 50, 50), learning_rate=constant, solver=



[CV 1/3; 4/24] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(10, 30, 10), learning_rate=adaptive, solver=adam;, score=-3.337 total time=  21.4s
[CV 3/3; 3/24] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(10, 30, 10), learning_rate=constant, solver=adam;, score=-3.347 total time=  21.6s




[CV 2/3; 6/24] START activation=tanh, alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=adaptive, solver=adam
[CV 3/3; 6/24] START activation=tanh, alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=adaptive, solver=adam
[CV 2/3; 4/24] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(10, 30, 10), learning_rate=adaptive, solver=adam;, score=-3.344 total time=  19.7s
[CV 1/3; 7/24] START activation=tanh, alpha=0.05, hidden_layer_sizes=(50, 50, 50), learning_rate=constant, solver=adam




[CV 3/3; 4/24] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(10, 30, 10), learning_rate=adaptive, solver=adam;, score=-3.347 total time=  20.6s
[CV 2/3; 7/24] START activation=tanh, alpha=0.05, hidden_layer_sizes=(50, 50, 50), learning_rate=constant, solver=adam




[CV 1/3; 6/24] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=adaptive, solver=adam;, score=-3.335 total time=  28.5s
[CV 2/3; 5/24] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=constant, solver=adam;, score=-3.340 total time=  29.0s
[CV 3/3; 5/24] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=constant, solver=adam;, score=-3.337 total time=  29.1s
[CV 1/3; 5/24] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=constant, solver=adam;, score=-3.335 total time=  29.4s
[CV 3/3; 7/24] START activation=tanh, alpha=0.05, hidden_layer_sizes=(50, 50, 50), learning_rate=constant, solver=adam
[CV 3/3; 8/24] START activation=tanh, alpha=0.05, hidden_layer_sizes=(50, 50, 50), learning_rate=adaptive, solver=adam
[CV 1/3; 8/24] START activation=tanh, alpha=0.05, hidden_layer_sizes=(50, 50, 50), learning_rate=adaptive, solver=adam
[CV 2/3; 8/24] START activation=tanh, alpha=0.05, hidde



[CV 2/3; 6/24] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=adaptive, solver=adam;, score=-3.340 total time=  27.1s
[CV 3/3; 6/24] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=adaptive, solver=adam;, score=-3.337 total time=  27.5s




[CV 1/3; 9/24] START activation=tanh, alpha=0.05, hidden_layer_sizes=(10, 30, 10), learning_rate=constant, solver=adam
[CV 2/3; 9/24] START activation=tanh, alpha=0.05, hidden_layer_sizes=(10, 30, 10), learning_rate=constant, solver=adam




KeyboardInterrupt: 