In [1]:
from utils import normalize_df, create_train_test_val_df
import os
notebook_dir = os.getcwd()
root_dir = os.path.abspath(os.path.join(notebook_dir, '..'))
data_dir = os.path.join(root_dir, 'data')
import numpy as np
import pandas as pd
from sklearn.metrics import brier_score_loss, make_scorer, log_loss, mean_squared_error
from IPython.display import display_html
from copy import deepcopy
import pickle
from sklearn import calibration
import matplotlib.pyplot as plt
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.model_selection import GroupKFold, RandomizedSearchCV, cross_val_predict, GridSearchCV
from sklearn.linear_model import LogisticRegression
# from utils.utils
pd.set_option("display.max_columns", None)


In [2]:
def normalize_df(df, anchor_df=None):
    for col in df.columns:
        data = df[col]
        if anchor_df is None:
            df[col] = (data - np.min(data)) / (np.max(data) - np.min(data))
        else:
            df[col] = (data - np.min(anchor_df[col])) / (np.max(anchor_df[col]) - np.min(anchor_df[col]))
    return df

def create_train_test_val_df(
    df,
    input_names,
    output_name,
    group_col="game_code",
    mask_test_season=2021,
    mask_val_season=2020,
    normalize=False
):
    mask_train = ~(df.season.isin([mask_test_season, mask_val_season]))
    mask_test = (df.season == mask_test_season)
    mask_val = (df.season == mask_val_season)
    if normalize==False:
        X_train = df.loc[mask_train, input_names]
        X_test = df.loc[mask_test, input_names]
        X_val = df.loc[mask_val, input_names]
    else:
        X_train = normalize_df(df.loc[mask_train, input_names])
        X_test = normalize_df(df.loc[mask_test, input_names], df.loc[mask_train, input_names])
        X_val = normalize_df(df.loc[mask_val, input_names], df.loc[mask_train, input_names])
    y_train = df[mask_train][output_name]
    group_train = df[mask_train][group_col]
    y_test = df[mask_test][output_name]
    group_test = df[mask_test][group_col]
    y_val = df[mask_val][output_name]
    group_val = df[mask_val][group_col]
    return X_train, y_train, group_train, X_test, y_test, group_test, X_val, y_val, group_val
# model_df["game_type_desc"].value_counts()

def flip_and_reverse_it(df, negative_cols=[]):
    reverse_df = deepcopy(df)
    for col in negative_cols:
        reverse_df[col] = -df[col]
    
    home_team_cols = [col for col in df.columns if 'home' in col]
    away_team_cols = [col for col in df.columns if 'away' in col]
    # print(team_cols)
    for col in home_team_cols:
        try:
            df[col.replace("home", "team")] = df.loc[:, col]
            df[col.replace("home", "opp")] = df.loc[:, col.replace("home", "away")]
            reverse_df[col.replace("home", "team")] = df.loc[:, col.replace("home", "away")]
            reverse_df[col.replace("home", "opp")] = df.loc[:, col]
        except:
            print(col)
        # print(home_team_cols + away_team_cols)
    # for col in away_team_cols:
    #     reverse_df[col.replace("away", "team")] = df.loc[col]
    #     df[col.replace("away", "team")] = df.loc[col.replace("away", "home")]
    # df = df.drop(home_team_cols + away_team_cols, axis=1)
    # reverse_df = reverse_df.drop(home_team_cols + away_team_cols, axis=1)
    return pd.concat([df, reverse_df])
    # for col in one_minus_cols:
    #     reverse_df[col] = 1 - 


In [3]:
event_df = pd.read_parquet(os.path.join(data_dir, "event_data.parquet"))
event_df = event_df.drop_duplicates(["nevent", "game_code"]).reset_index(drop=True)
odds_df = pd.read_parquet(os.path.join(data_dir, "odds_data.parquet"))
odds_df = odds_df.drop_duplicates("game_code")
event_ids = event_df[["event_id", "event_name"]].drop_duplicates().sort_values("event_id").reset_index(drop=True)
event_ids.to_csv(os.path.join(data_dir, "event_ids.csv"))


event_df["half"] = round((event_df["quarter"] + 0.01) / 2)
event_df["home_timeout"] = np.where(((event_df["event_id"]==57)&(event_df["home_team_has_ball"]==1))|((event_df["event_id"]==58)&(event_df["home_team_has_ball"]==0)), 1, 0)
event_df["away_timeout"] = np.where(((event_df["event_id"]==57)&(event_df["home_team_has_ball"]==0))|((event_df["event_id"]==58)&(event_df["home_team_has_ball"]==1)), 1, 0)
event_df["home_timeouts_remaining"] = np.clip(3 - event_df.groupby(["game_code", "half"])["home_timeout"].cumsum(), 0, 3)
event_df["away_timeouts_remaining"] = np.clip(3 - event_df.groupby(["game_code", "half"])["away_timeout"].cumsum(), 0, 3)
event_df["time_left_in_game"] = np.where(event_df["quarter"] <= 4, event_df["play_start_time"] + (4 - event_df["quarter"]) * 900, event_df["play_start_time"])
event_df["time_elapsed"] = 900 - event_df["play_start_time"] + (event_df["quarter"] - 1) * 900


event_df[["cur_spread", "cur_over_under"]] = event_df.merge(odds_df, how="left", on="game_code")[["cur_spread", "cur_over_under"]].fillna({"cur_spread": np.mean(odds_df["cur_spread"]), "cur_over_under": np.mean(odds_df["cur_over_under"])})
event_df["sequence"] = event_df["continuation"].groupby(event_df["continuation"].eq(0).cumsum()).cumsum()
event_df["play_start_id"] = event_df["nevent"] - event_df["sequence"]

turnover_ids = [9, 16]
# mask_turnover_on_downs = (event_df["down"]==4)&(event_df["field_goal_attempt"]==0)&(event_df["punt"]==0)&(event_df["yards_gained"]<event_df["ytg"])&(event_df["home_team_has_ball"]!=event_df.shift(-1)["home_team_has_ball"])
event_df["turnover"] = np.where(event_df["event_id"].isin(turnover_ids), 1, 0)
event_df["touchdown_scored"] = np.where(event_df["home_score_added"]+event_df["away_score_added"]>=6, 1, 0)
event_df["fieldgoal_made"] = np.where(event_df["home_score_added"]+event_df["away_score_added"]==3, 1, 0)

play_outcome_aggregate =event_df[["game_code", "play_start_id", "turnover", "touchdown_scored", "fieldgoal_made", "first_down"]].groupby(["game_code", "play_start_id"], as_index=False).sum()
# touchdown_key_df =event_df[["game_code", "play_start_id", "turnover"]].groupby(["game_code", "play_start_id"], as_index=False).sum()
event_df["touchdown_in_play"] = np.clip(event_df.merge(play_outcome_aggregate,on=["game_code", "play_start_id"], how="left")["touchdown_scored_y"], 0, 1)
event_df["turnover_in_play"] = np.clip(event_df.merge(play_outcome_aggregate,on=["game_code", "play_start_id"], how="left")["turnover_y"], 0, 1)
event_df["field_goal_in_play"] = np.clip(event_df.merge(play_outcome_aggregate,on=["game_code", "play_start_id"], how="left")["fieldgoal_made_y"], 0, 1)
event_df["first_down_in_play"] = np.clip(event_df.merge(play_outcome_aggregate,on=["game_code", "play_start_id"], how="left")["first_down_y"], 0, 1)


# drive_outcome_aggregate =event_df[["game_code", "off_team_id", "drive_id", "turnover", "touchdown_scored", "fieldgoal_made"]].groupby(["game_code", "off_team_id", "drive_id"], as_index=False).sum()
# event_df["touchdown_in_drive"] = np.clip(event_df.merge(drive_outcome_aggregate,on=["game_code", "drive_id", "off_team_id"], how="left")["touchdown_scored_y"], 0, 1)
# event_df["turnover_in_drive"] = np.clip(event_df.merge(drive_outcome_aggregate,on=["game_code", "drive_id", "off_team_id"], how="left")["turnover_y"], 0, 1)
# event_df["field_goal_in_drive"] = np.clip(event_df.merge(drive_outcome_aggregate,on=["game_code", "drive_id", "off_team_id"], how="left")["fieldgoal_made_y"], 0, 1)

event_df["play_outcome"] = (
    np.where((event_df["turnover_in_play"]==1), "turnover",
    # (event_df["touchdown_in_play"]==1)&(event_df["turnover_in_play"]==1), "defensive_touchdown", 
    np.where((event_df["punt"]==1), "punt",
    np.where((event_df["field_goal_in_play"]==1), "field_goal_made",
    np.where((event_df["field_goal_attempt"]==1)&(event_df["field_goal_in_play"]==0), "field_goal_missed",
    np.where((event_df["first_down_in_play"]==1)&(event_df["touchdown_in_play"]==0)&(event_df["turnover_in_play"]==0)&(event_df["punt"]==0), "first_down",
    np.where((event_df["touchdown_in_play"]==1)&(event_df["turnover_in_play"]==0), "offensive_touchdown", "none"))))))
)
drive_description_matrix = {
    7: "punt",
    9: "turnover",
    14: "turnover",
    17: "field_goal_made",
    18: "punt",
    20: "safety",
    35: "field_goal_missed",
    36: "field_goal_missed",
    37: "touch_down",
    38: "clock",
    39: "clock",
    40: "turnover_on_downs",
    42: "field_goal_made",
    51: "clock",
}
event_df["drive_outcome_desc_basic"] = event_df["drive_outcome_id"].map(drive_description_matrix)

# event_df["drive_outcome"] = np.where(
#     (event_df["touchdown_in_drive"]==1)&(event_df["turnover_in_drive"]==1), "defensive_touchdown", 
#     np.where((event_df["touchdown_in_drive"]==1)&(event_df["turnover_in_drive"]==0), "offensive_touchdown",
#     np.where((event_df["field_goal_in_drive"]==1), "field_goal_made",
#     np.where((event_df["touchdown_in_drive"]==0)&(event_df["turnover_in_drive"]==1), "turnover", "none"
# ))))
scrimmage_plays_we_want = [1, 2, 3, 4, 7, 9, 14, 17, 18, 35]
game_end_of_regulation_total_score = event_df[event_df.overtime==0].groupby("game_code", as_index=False).max()[["game_code", "home_start_score", "away_start_score"]]
game_end_of_regulation_total_score["end_of_regulation_score_total"] = game_end_of_regulation_total_score["home_start_score"] + game_end_of_regulation_total_score["away_start_score"]
# event_df["end_of_regulation_score_total_diff"] = 
event_df["end_of_regulation_score_total_diff"] = (
    event_df.merge(game_end_of_regulation_total_score, on="game_code")["end_of_regulation_score_total"]
    - (event_df["home_start_score"] + event_df["away_start_score"])
)

  game_end_of_regulation_total_score = event_df[event_df.overtime==0].groupby("game_code", as_index=False).max()[["game_code", "home_start_score", "away_start_score"]]


In [4]:
# overtime_games = event_df[event_df.overtime==1]

output_name = "play_outcome"

model_df = deepcopy(event_df)
model_df["time_left_in_half"] = event_df["time_left_in_game"] - ((2 - event_df["half"]) * 1800)
model_df["from_scrimmage"] = np.where(event_df["event_id"].isin([22, 52, 53, 55, 47, 54, 56]), 0, event_df["from_scrimmage"])
model_df["ytg"] = np.where(model_df["from_scrimmage"] == 0, -1, event_df["ytg"])
model_df["down"] = np.where(model_df["from_scrimmage"] == 0, 0, event_df["down"])
model_df["home_team_has_ball"] = np.where(event_df["event_id"].isin([5]), 1 - event_df["home_team_has_ball"], event_df["home_team_has_ball"])
model_df["yd_from_goal"] = np.where(model_df["from_scrimmage"] == 0, -1, event_df["yd_from_goal"])
model_df["point_after_play"] = np.where(model_df["point_after_kick"] + model_df["two_point_attempt"]==1, 1, 0)
model_df["offense_point_diff"] = np.where(model_df["home_team_has_ball"]==1, model_df["current_score_diff"], -model_df["current_score_diff"])
model_df["play_start_time_sq"] = model_df["play_start_time"]**2
model_df["yd_from_goal_sq"] = model_df["yd_from_goal"]**2
model_df["yd_from_goal_cu"] = model_df["yd_from_goal"]**3
model_df["down_sq"] = model_df["down"]**2
model_df["ytg_sq"] = model_df["ytg"]**2


model_df["away_vegas_score_pred"] = np.clip(model_df["cur_over_under"], 30, 80) * 0.5 + model_df["cur_spread"] * 0.5
model_df["home_vegas_score_pred"] = np.clip(model_df["cur_over_under"], 30, 80) * 0.5 - model_df["cur_spread"] * 0.5
model_df["away_vegas_score_pred_weighted"] = model_df["away_vegas_score_pred"] * (model_df["time_left_in_game"] / 3600)
model_df["home_vegas_score_pred_weighted"] = model_df["home_vegas_score_pred"] * (model_df["time_left_in_game"] / 3600)
model_df["kicking_vegas_pred"] = np.where(model_df["home_team_has_ball"] == 1, model_df["home_vegas_score_pred"], model_df["away_vegas_score_pred"])
model_df["receiving_vegas_pred"] = np.where(model_df["home_team_has_ball"] == 0, model_df["home_vegas_score_pred"], model_df["away_vegas_score_pred"])
model_df["cur_over_under"] = np.clip(event_df["cur_over_under"], 30, 80)
model_df["is_playoff_game"] = np.where(model_df["game_type_id"] == 1, 0, 1)
team_specific_cols = []
for col in ["ytg", "ytg_sq", "yd_from_goal", "yd_from_goal_sq", "yd_from_goal_cu", "down", "down_sq", "kick_off"]:
    model_df["home_" + col] = model_df[col].where(model_df["home_team_has_ball"]==1, 0)
    model_df["away_" + col] = model_df[col].where(model_df["home_team_has_ball"]==0, 0)
    team_specific_cols = team_specific_cols + ["home_" + col] + ["away_" + col]
model_df["away_team_has_ball"] = 1 - model_df["home_team_has_ball"]
model_df["is_home"] = 1
model_df["is_away"] = 0


search_rf_play_outcome = pickle.load(open(os.path.join(root_dir, "models/search_rf_play_outcome.p"), 'rb'))
# search_mlp_play_outcome = pickle.load(open(os.path.join(root_dir, "models/search_mlp_play_outcome.p"), 'rb'))
search_rf_drive_outcome = pickle.load(open(os.path.join(root_dir, "models/search_rf_drive_outcome.p"), 'rb'))
# search_mlp_drive_outcome = pickle.load(open(os.path.join(root_dir, "models/search_mlp_drive_outcome.p"), 'rb'))




https://scikit-learn.org/stable/modules/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/modules/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/modules/model_persistence.html#security-maintainability-limitations


In [5]:
input_names_play_and_drive_preds = [
    'time_left_in_half',
    'half',
    'current_score_diff',
    'current_score_total',
    # 'home_vegas_score_pred_weighted',
    # 'away_vegas_score_pred_weighted',
    'cur_spread',
    'cur_over_under',
    'home_timeouts_remaining',
    'away_timeouts_remaining',
    'ytg',
    'yd_from_goal',
    'down',
    'home_team_has_ball',
]

field_goal_data = model_df[(model_df["field_goal_attempt"]==1)&(model_df["play_counts"]==1)][input_names_play_and_drive_preds + ["event_name", "play_counts", "yards_gained", "season", "home_vegas_score_pred", "away_vegas_score_pred", "game_code", "nevent", "kicking_vegas_pred", "receiving_vegas_pred", "drive_outcome_desc_basic", "offense_point_diff", "yd_from_goal_sq", "yd_from_goal_cu"]].reset_index(drop=True)
input_names_field_goal_logit = [
    # 'time_left_in_half',
    # 'half',
    'offense_point_diff',
    'current_score_total',
    'kicking_vegas_pred',
    'receiving_vegas_pred',
    'yd_from_goal',
    'yd_from_goal_sq',
    'yd_from_goal_cu',
    'home_team_has_ball',    
]
field_goal_prediction_inputs = normalize_df(model_df[(model_df.continuation==0)&(model_df.yd_from_goal!=-1)][input_names_field_goal_logit], field_goal_data[field_goal_data["season"]<2020]).dropna()
logit_field_goal_made = pickle.load(open(os.path.join(root_dir, "models/logit_field_goal_made.p"), "rb"))
model_df[["xfield_goal_missed", "xfield_goal_made"]] = pd.DataFrame(logit_field_goal_made.predict_proba(field_goal_prediction_inputs), field_goal_prediction_inputs.index)
model_df["xfield_goal_made_home"] = model_df["xfield_goal_made"].where(model_df["home_team_has_ball"]==1, 0).fillna(0)
model_df["xfield_goal_made_away"] = model_df["xfield_goal_made"].where(model_df["home_team_has_ball"]==0, 0).fillna(0)


model_df

Unnamed: 0,game_code,game_date,season,game_type_id,game_type_desc,home_team_id,home_team,home_team_abbrev,away_team_id,away_team,away_team_abbrev,home_final_score,away_final_score,final_score_diff,end_of_regulation_score_diff,home_rest_of_game_score,away_rest_of_game_score,end_of_regulation_score_diff_change,home_score_added,away_score_added,current_score_diff,current_score_total,home_start_score,away_start_score,home_team_outcome,home_team_win,draw,away_team_win,nevent,quarter,overtime,home_team_has_ball,off_team_id,def_team_id,kick_off,punt,point_after_kick,two_point_attempt,field_goal_attempt,off_start_score,off_end_score,off_score_change,def_start_score,def_end_score,def_score_change,play_counts,efficiency_counts,from_scrimmage,first_down,scoring_play,possession_change,continuation,event_name,event_id,yards_gained,drive_outcome_id,drive_outcome_desc,down,ytg,yd_from_goal,drive_id,drive_start,play_start_time,half,home_timeout,away_timeout,home_timeouts_remaining,away_timeouts_remaining,time_left_in_game,time_elapsed,cur_spread,cur_over_under,sequence,play_start_id,turnover,touchdown_scored,fieldgoal_made,touchdown_in_play,turnover_in_play,field_goal_in_play,first_down_in_play,play_outcome,drive_outcome_desc_basic,end_of_regulation_score_total_diff,time_left_in_half,point_after_play,offense_point_diff,play_start_time_sq,yd_from_goal_sq,yd_from_goal_cu,down_sq,ytg_sq,away_vegas_score_pred,home_vegas_score_pred,away_vegas_score_pred_weighted,home_vegas_score_pred_weighted,kicking_vegas_pred,receiving_vegas_pred,is_playoff_game,home_ytg,away_ytg,home_ytg_sq,away_ytg_sq,home_yd_from_goal,away_yd_from_goal,home_yd_from_goal_sq,away_yd_from_goal_sq,home_yd_from_goal_cu,away_yd_from_goal_cu,home_down,away_down,home_down_sq,away_down_sq,home_kick_off,away_kick_off,away_team_has_ball,is_home,is_away,xfield_goal_missed,xfield_goal_made,xfield_goal_made_home,xfield_goal_made_away
0,819846,2008-09-04,2008,1,Regular Season,351,New York Giants,NYG,363,Washington Redskins,Was,16,7,9,9,16,7,9,0,0,0,0,0,0,W,1,0,0,1,1,0,1,363,351,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,Kick Off,5,73.0,,,0,-1,-1,,3600,900.0,1.0,0,0,3,3,3600.0,0.0,-4.5,41.5,0,1,0,0,0,0,0,0,0,none,,23,1800.0,0,0,810000.0,1,-1,0,1,18.5,23.0,18.500000,23.000000,23.0,18.5,0,-1,0,1,0,-1,0,1,0,-1,0,0,0,0,0,1,0,0,1,0,,,0.000000e+00,0.000000
1,819846,2008-09-04,2008,1,Regular Season,351,New York Giants,NYG,363,Washington Redskins,Was,16,7,9,9,16,7,9,0,0,0,0,0,0,W,1,0,0,2,1,0,1,351,363,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,Kick Off Return,6,19.0,,,0,-1,-1,,3600,900.0,1.0,0,0,3,3,3600.0,0.0,-4.5,41.5,1,1,0,0,0,0,0,0,0,none,,23,1800.0,0,0,810000.0,1,-1,0,1,18.5,23.0,18.500000,23.000000,23.0,18.5,0,-1,0,1,0,-1,0,1,0,-1,0,0,0,0,0,1,0,0,1,0,,,0.000000e+00,0.000000
2,819846,2008-09-04,2008,1,Regular Season,351,New York Giants,NYG,363,Washington Redskins,Was,16,7,9,9,16,7,9,0,0,0,0,0,0,W,1,0,0,3,1,0,1,351,363,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,Run,4,3.0,37.0,TD,1,10,84,1.0,3600,895.0,1.0,0,0,3,3,3595.0,5.0,-4.5,41.5,0,3,0,0,0,0,0,0,0,none,touch_down,23,1795.0,0,0,801025.0,7056,592704,1,100,18.5,23.0,18.474306,22.968056,23.0,18.5,0,10,0,100,0,84,0,7056,0,592704,0,1,0,1,0,0,0,0,1,0,1.000000,4.771848e-09,4.771848e-09,0.000000
3,819846,2008-09-04,2008,1,Regular Season,351,New York Giants,NYG,363,Washington Redskins,Was,16,7,9,9,16,7,9,0,0,0,0,0,0,W,1,0,0,4,1,0,1,351,363,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,Incomplete Pass,2,0.0,37.0,TD,2,7,81,1.0,3600,860.0,1.0,0,0,3,3,3560.0,40.0,-4.5,41.5,0,4,0,0,0,0,0,0,0,none,touch_down,23,1760.0,0,0,739600.0,6561,531441,4,49,18.5,23.0,18.294444,22.744444,23.0,18.5,0,7,0,49,0,81,0,6561,0,531441,0,2,0,4,0,0,0,0,1,0,1.000000,6.069402e-08,6.069402e-08,0.000000
4,819846,2008-09-04,2008,1,Regular Season,351,New York Giants,NYG,363,Washington Redskins,Was,16,7,9,9,16,7,9,0,0,0,0,0,0,W,1,0,0,5,1,0,1,351,363,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,0,Pass Completion,1,8.0,37.0,TD,3,7,81,1.0,3600,854.0,1.0,0,0,3,3,3554.0,46.0,-4.5,41.5,0,5,0,0,0,0,0,0,1,first_down,touch_down,23,1754.0,0,0,729316.0,6561,531441,9,49,18.5,23.0,18.263611,22.706111,23.0,18.5,0,7,0,49,0,81,0,6561,0,531441,0,3,0,9,0,0,0,0,1,0,1.000000,6.069402e-08,6.069402e-08,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
816793,2337728,2022-02-13,2021,3,Super Bowl,327,Cincinnati Bengals,Cin,343,Los Angeles Rams,LAR,20,23,-3,-3,0,0,0,0,0,-3,43,20,23,L,0,0,1,201,4,0,1,327,343,0,0,0,0,0,20,20,0,23,23,0,1,1,1,0,0,0,0,Run,4,0.0,40.0,Downs,3,1,49,13.0,85,48.0,2.0,0,0,2,3,48.0,3552.0,4.5,48.5,0,201,0,0,0,0,0,0,0,none,turnover_on_downs,0,48.0,0,-3,2304.0,2401,117649,9,1,26.5,22.0,0.353333,0.293333,22.0,26.5,1,1,0,1,0,49,0,2401,0,117649,0,3,0,9,0,0,0,0,1,0,0.822606,1.773944e-01,1.773944e-01,0.000000
816794,2337728,2022-02-13,2021,3,Super Bowl,327,Cincinnati Bengals,Cin,343,Los Angeles Rams,LAR,20,23,-3,-3,0,0,0,0,0,-3,43,20,23,L,0,0,1,202,4,0,1,327,343,0,0,0,0,0,20,20,0,23,23,0,1,0,0,0,0,0,0,Offense Timeout,57,,40.0,Downs,0,-1,-1,13.0,85,43.0,2.0,1,0,1,3,43.0,3557.0,4.5,48.5,0,202,0,0,0,0,0,0,0,none,turnover_on_downs,0,43.0,0,-3,1849.0,1,-1,0,1,26.5,22.0,0.316528,0.262778,22.0,26.5,1,-1,0,1,0,-1,0,1,0,-1,0,0,0,0,0,0,0,0,1,0,,,0.000000e+00,0.000000
816795,2337728,2022-02-13,2021,3,Super Bowl,327,Cincinnati Bengals,Cin,343,Los Angeles Rams,LAR,20,23,-3,-3,0,0,0,0,0,-3,43,20,23,L,0,0,1,203,4,0,1,327,343,0,0,0,0,0,20,20,0,23,23,0,1,1,1,0,0,1,0,Incomplete Pass,2,0.0,40.0,Downs,4,1,49,13.0,85,43.0,2.0,0,0,1,3,43.0,3557.0,4.5,48.5,0,203,0,0,0,0,0,0,0,none,turnover_on_downs,0,43.0,0,-3,1849.0,2401,117649,16,1,26.5,22.0,0.316528,0.262778,22.0,26.5,1,1,0,1,0,49,0,2401,0,117649,0,4,0,16,0,0,0,0,1,0,0.822606,1.773944e-01,1.773944e-01,0.000000
816796,2337728,2022-02-13,2021,3,Super Bowl,327,Cincinnati Bengals,Cin,343,Los Angeles Rams,LAR,20,23,-3,-3,0,0,0,0,0,-3,43,20,23,L,0,0,1,204,4,0,0,343,327,0,0,0,0,0,23,23,0,20,20,0,1,1,1,0,0,0,0,Run,4,-1.0,39.0,End Game,1,10,51,13.0,39,39.0,2.0,0,0,1,3,39.0,3561.0,4.5,48.5,0,204,0,0,0,0,0,0,0,none,clock,0,39.0,0,3,1521.0,2601,132651,1,100,26.5,22.0,0.287083,0.238333,26.5,22.0,1,0,10,0,100,0,51,0,2601,0,132651,0,1,0,1,0,0,1,1,0,0.891863,1.081367e-01,0.000000e+00,0.108137


In [6]:
home_team_outcome_number_map = {"W": 1, "L": -1, "T": 0}

In [7]:
model_df["home_team_outcome_number"] = model_df["home_team_outcome"].map(home_team_outcome_number_map)
model_df["away_team_outcome_number"] = - model_df["home_team_outcome"].map(home_team_outcome_number_map)

In [8]:
model_df[["home_team_outcome_number", "home_team_outcome"]].value_counts()

home_team_outcome_number  home_team_outcome
 1                        W                    454883
-1                        L                    358907
 0                        T                      3008
dtype: int64

In [33]:
input_names_score_pred = [
    'time_left_in_half',
    # 'half',
    'current_score_diff',
    'current_score_total',
    # 'home_vegas_score_pred_weighted',
    # 'away_vegas_score_pred_weighted',
    'cur_spread',
    'cur_over_under',
    'home_timeouts_remaining',
    'away_timeouts_remaining',
    'home_ytg',
    'away_ytg',
    'home_yd_from_goal',
    'away_yd_from_goal',
    'home_down',
    'away_down',
    'home_team_has_ball',
    'away_team_has_ball',
    'is_home',
    'is_away',
    'home_kick_off',
    'away_kick_off',
    'is_playoff_game',
    # 'point_after_kick',
    # 'two_point_attempt',
    # 'point_after_play',
]
output_name = "final_score_diff_change"
model_df["final_score_diff_change"] = model_df["final_score_diff"] - model_df["current_score_diff"]

mask_model = (
    (model_df.continuation==0)&
    (model_df[input_names_score_pred+[output_name]].notna().all(axis=1))&
    ~(model_df.event_id.isin([57,58,13]))&
    (model_df["overtime"]==1)&
    (model_df["final_score_diff"]!=2)
)
search_rf_play_outcome_classes = ["search_rf_play_" + x for x in search_rf_play_outcome.classes_]
search_rf_play_outcome_classes_home = ["home_" + x for x in search_rf_play_outcome_classes]
search_rf_play_outcome_classes_away = ["away_" + x for x in search_rf_play_outcome_classes]

search_rf_drive_outcome_classes = ["search_rf_drive_" + x for x in search_rf_drive_outcome.classes_]
search_rf_drive_outcome_classes_home = ["home_" + x for x in search_rf_drive_outcome_classes]
search_rf_drive_outcome_classes_away = ["away_" + x for x in search_rf_drive_outcome_classes]

search_rf_play_outcome.best_estimator_.verbose=0
search_rf_drive_outcome.best_estimator_.verbose=0

model_df[search_rf_play_outcome_classes] = pd.DataFrame(search_rf_play_outcome.predict_proba(model_df[mask_model][search_rf_play_outcome.feature_names_in_]), index=model_df[mask_model].index)
model_df[search_rf_play_outcome_classes_home] = model_df[search_rf_play_outcome_classes].where((model_df["home_team_has_ball"]==1)&(model_df["down"]!=0), 0)
model_df[search_rf_play_outcome_classes_away] = model_df[search_rf_play_outcome_classes].where((model_df["home_team_has_ball"]==0)&(model_df["down"]!=0), 0)

model_df[search_rf_drive_outcome_classes] = pd.DataFrame(search_rf_drive_outcome.predict_proba(model_df[mask_model][search_rf_drive_outcome.feature_names_in_]), index=model_df[mask_model].index)
model_df[search_rf_drive_outcome_classes_home] = model_df[search_rf_drive_outcome_classes].where((model_df["home_team_has_ball"]==1)&(model_df["down"]!=0), 0)
model_df[search_rf_drive_outcome_classes_away] = model_df[search_rf_drive_outcome_classes].where((model_df["home_team_has_ball"]==0)&(model_df["down"]!=0), 0)


extra_cols = ["season", "game_code", output_name, "drive_id", "play_start_time", "nevent", "event_name", "event_id", "away_team_outcome_number"]

overtime_df = flip_and_reverse_it(model_df[mask_model], negative_cols=["cur_spread", "current_score_diff", output_name])
overtime_df["half"] = 2


overtime_df[search_rf_play_outcome_classes] = pd.DataFrame(search_rf_play_outcome.predict_proba(overtime_df[mask_model][search_rf_play_outcome.feature_names_in_]), index=overtime_df[mask_model].index)
overtime_df[search_rf_play_outcome_classes_home] = overtime_df[search_rf_play_outcome_classes].where((overtime_df["home_team_has_ball"]==1)&(overtime_df["down"]!=0), 0)
overtime_df[search_rf_play_outcome_classes_away] = overtime_df[search_rf_play_outcome_classes].where((overtime_df["home_team_has_ball"]==0)&(overtime_df["down"]!=0), 0)

overtime_df[search_rf_drive_outcome_classes] = pd.DataFrame(search_rf_drive_outcome.predict_proba(overtime_df[mask_model][search_rf_drive_outcome.feature_names_in_]), index=overtime_df[mask_model].index)
overtime_df[search_rf_drive_outcome_classes_home] = overtime_df[search_rf_drive_outcome_classes].where((overtime_df["home_team_has_ball"]==1)&(overtime_df["down"]!=0), 0)
overtime_df[search_rf_drive_outcome_classes_away] = overtime_df[search_rf_drive_outcome_classes].where((overtime_df["home_team_has_ball"]==0)&(overtime_df["down"]!=0), 0)




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col.replace("home", "team")] = df.loc[:, col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col.replace("home", "opp")] = df.loc[:, col.replace("home", "away")]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col.replace("home", "team")] = df.loc[:, col]
A value is trying to be set on a co

home_team_outcome


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col.replace("home", "team")] = df.loc[:, col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col.replace("home", "opp")] = df.loc[:, col.replace("home", "away")]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col.replace("home", "team")] = df.loc[:, col]
A value is trying to be set on a co

In [11]:
overtime_df[search_rf_drive_outcome_classes].head(20)

Unnamed: 0,search_rf_drive_clock,search_rf_drive_field_goal_made,search_rf_drive_field_goal_missed,search_rf_drive_punt,search_rf_drive_safety,search_rf_drive_touch_down,search_rf_drive_turnover,search_rf_drive_turnover_on_downs
5466,0.0002,0.110696,0.012211,0.016158,0.0,0.784215,0.037822,0.038697
5468,0.005955,0.107141,0.017709,0.506681,0.003063,0.174973,0.135674,0.048804
5469,0.005584,0.098662,0.018102,0.554923,0.002103,0.154996,0.123425,0.042205
5471,0.004048,0.070719,0.018657,0.644344,0.001807,0.115437,0.104331,0.040657
5472,0.007674,0.198611,0.042451,0.269483,0.000112,0.296706,0.110281,0.074682
5473,0.00606,0.280828,0.062756,0.115607,1e-05,0.384219,0.084011,0.066509
5475,0.015466,0.278755,0.04584,0.154155,0.0,0.322853,0.078621,0.10431
5476,0.008369,0.334807,0.056886,0.055968,0.0,0.391472,0.093837,0.058661
5478,0.009468,0.332568,0.058587,0.046978,0.0,0.401273,0.097082,0.054044
5479,0.010784,0.33572,0.05368,0.032335,0.0,0.433215,0.077382,0.056884


In [34]:
overtime_first_play = overtime_df[["team_team_has_ball", "drive_id", "is_team", "game_code"]].dropna().drop_duplicates(["game_code", "is_team"])
overtime_df["first_team_team_has_ball"] = overtime_df.merge(overtime_first_play, on=["game_code", "is_team"], how="left")["team_team_has_ball_y"].values
overtime_df["first_drive_id"] = overtime_df.merge(overtime_first_play, on=["game_code", "is_team"], how="left")["drive_id_y"].values
overtime_df["start_of_overtime"] = np.where((overtime_df["season"]>=2017)&(overtime_df["is_playoff_game"]==0), 600, 900)
overtime_df["first_drive"] = ((overtime_df["first_team_team_has_ball"] == overtime_df["team_team_has_ball"] )&(overtime_df["first_drive_id"] == overtime_df["drive_id"]))|(overtime_df["start_of_overtime"]==overtime_df["play_start_time"])
overtime_df["field_goal_ends_game"] = np.where(((overtime_df["season"]>=2012)|((overtime_df["season"]>=2010)&(overtime_df["is_playoff_game"]==1)))&((overtime_df["first_drive"])|(~(overtime_df["first_drive"])&(overtime_df["current_score_diff"]!=0))), 0, 1)
overtime_df["field_goal_ends_game_team"] = overtime_df['field_goal_ends_game'].where(overtime_df.team_team_has_ball==1, 0)
overtime_df["field_goal_ends_game_opp"] = overtime_df['field_goal_ends_game'].where(overtime_df.opp_team_has_ball==1, 0)
overtime_df["xfield_goal_made_team_ends_game"] = overtime_df["xfield_goal_made_team"].where(overtime_df["field_goal_ends_game_team"]==1, 0)
overtime_df["xfield_goal_made_opp_ends_game"] = overtime_df["xfield_goal_made_opp"].where(overtime_df["field_goal_ends_game_opp"]==1, 0)
# overtime_df
overtime_df

Unnamed: 0,game_code,game_date,season,game_type_id,game_type_desc,home_team_id,home_team,home_team_abbrev,away_team_id,away_team,away_team_abbrev,home_final_score,away_final_score,final_score_diff,end_of_regulation_score_diff,home_rest_of_game_score,away_rest_of_game_score,end_of_regulation_score_diff_change,home_score_added,away_score_added,current_score_diff,current_score_total,home_start_score,away_start_score,home_team_outcome,home_team_win,draw,away_team_win,nevent,quarter,overtime,home_team_has_ball,off_team_id,def_team_id,kick_off,punt,point_after_kick,two_point_attempt,field_goal_attempt,off_start_score,off_end_score,off_score_change,def_start_score,def_end_score,def_score_change,play_counts,efficiency_counts,from_scrimmage,first_down,scoring_play,possession_change,continuation,event_name,event_id,yards_gained,drive_outcome_id,drive_outcome_desc,down,ytg,yd_from_goal,drive_id,drive_start,play_start_time,half,home_timeout,away_timeout,home_timeouts_remaining,away_timeouts_remaining,time_left_in_game,time_elapsed,cur_spread,cur_over_under,sequence,play_start_id,turnover,touchdown_scored,fieldgoal_made,touchdown_in_play,turnover_in_play,field_goal_in_play,first_down_in_play,play_outcome,drive_outcome_desc_basic,end_of_regulation_score_total_diff,time_left_in_half,point_after_play,offense_point_diff,play_start_time_sq,yd_from_goal_sq,yd_from_goal_cu,down_sq,ytg_sq,away_vegas_score_pred,home_vegas_score_pred,away_vegas_score_pred_weighted,home_vegas_score_pred_weighted,kicking_vegas_pred,receiving_vegas_pred,is_playoff_game,home_ytg,away_ytg,home_ytg_sq,away_ytg_sq,home_yd_from_goal,away_yd_from_goal,home_yd_from_goal_sq,away_yd_from_goal_sq,home_yd_from_goal_cu,away_yd_from_goal_cu,home_down,away_down,home_down_sq,away_down_sq,home_kick_off,away_kick_off,away_team_has_ball,is_home,is_away,xfield_goal_missed,xfield_goal_made,xfield_goal_made_home,xfield_goal_made_away,home_team_outcome_number,away_team_outcome_number,search_rf_play_field_goal_made,search_rf_play_field_goal_missed,search_rf_play_first_down,search_rf_play_none,search_rf_play_offensive_touchdown,search_rf_play_punt,search_rf_play_turnover,home_search_rf_play_field_goal_made,home_search_rf_play_field_goal_missed,home_search_rf_play_first_down,home_search_rf_play_none,home_search_rf_play_offensive_touchdown,home_search_rf_play_punt,home_search_rf_play_turnover,away_search_rf_play_field_goal_made,away_search_rf_play_field_goal_missed,away_search_rf_play_first_down,away_search_rf_play_none,away_search_rf_play_offensive_touchdown,away_search_rf_play_punt,away_search_rf_play_turnover,search_rf_drive_clock,search_rf_drive_field_goal_made,search_rf_drive_field_goal_missed,search_rf_drive_punt,search_rf_drive_safety,search_rf_drive_touch_down,search_rf_drive_turnover,search_rf_drive_turnover_on_downs,home_search_rf_drive_clock,home_search_rf_drive_field_goal_made,home_search_rf_drive_field_goal_missed,home_search_rf_drive_punt,home_search_rf_drive_safety,home_search_rf_drive_touch_down,home_search_rf_drive_turnover,home_search_rf_drive_turnover_on_downs,away_search_rf_drive_clock,away_search_rf_drive_field_goal_made,away_search_rf_drive_field_goal_missed,away_search_rf_drive_punt,away_search_rf_drive_safety,away_search_rf_drive_touch_down,away_search_rf_drive_turnover,away_search_rf_drive_turnover_on_downs,final_score_diff_change,team_team_id,opp_team_id,team_team,opp_team,team_team_abbrev,opp_team_abbrev,team_final_score,opp_final_score,team_rest_of_game_score,opp_rest_of_game_score,team_score_added,opp_score_added,team_start_score,opp_start_score,team_team_outcome,team_team_win,opp_team_win,team_team_has_ball,opp_team_has_ball,team_timeout,opp_timeout,team_timeouts_remaining,opp_timeouts_remaining,team_vegas_score_pred,opp_vegas_score_pred,team_vegas_score_pred_weighted,opp_vegas_score_pred_weighted,team_ytg,opp_ytg,team_ytg_sq,opp_ytg_sq,team_yd_from_goal,opp_yd_from_goal,team_yd_from_goal_sq,opp_yd_from_goal_sq,team_yd_from_goal_cu,opp_yd_from_goal_cu,team_down,opp_down,team_down_sq,opp_down_sq,team_kick_off,opp_kick_off,is_team,is_opp,xfield_goal_made_team,xfield_goal_made_opp,team_team_outcome_number,opp_team_outcome_number,team_search_rf_play_field_goal_made,opp_search_rf_play_field_goal_made,team_search_rf_play_field_goal_missed,opp_search_rf_play_field_goal_missed,team_search_rf_play_first_down,opp_search_rf_play_first_down,team_search_rf_play_none,opp_search_rf_play_none,team_search_rf_play_offensive_touchdown,opp_search_rf_play_offensive_touchdown,team_search_rf_play_punt,opp_search_rf_play_punt,team_search_rf_play_turnover,opp_search_rf_play_turnover,team_search_rf_drive_clock,opp_search_rf_drive_clock,team_search_rf_drive_field_goal_made,opp_search_rf_drive_field_goal_made,team_search_rf_drive_field_goal_missed,opp_search_rf_drive_field_goal_missed,team_search_rf_drive_punt,opp_search_rf_drive_punt,team_search_rf_drive_safety,opp_search_rf_drive_safety,team_search_rf_drive_touch_down,opp_search_rf_drive_touch_down,team_search_rf_drive_turnover,opp_search_rf_drive_turnover,team_search_rf_drive_turnover_on_downs,opp_search_rf_drive_turnover_on_downs,first_team_team_has_ball,first_drive_id,start_of_overtime,first_drive,field_goal_ends_game,field_goal_ends_game_team,field_goal_ends_game_opp,xfield_goal_made_team_ends_game,xfield_goal_made_opp_ends_game
5466,819900,2008-09-14,2008,1,Regular Season,361,Seattle Seahawks,Sea,359,San Francisco 49ers,SF,30,33,-3,0,0,3,0,0,0,0,60,30,30,L,0,0,1,239,5,1,0,361,359,1,0,0,0,0,30,30,0,30,30,0,1,0,0,0,0,1,0,Kick Off,5,70.0,,,0,-1,-1,,69,900.0,2,0,0,3,3,900.0,3600.0,-6.5,38.0,0,239,0,0,0,0,0,0,0,none,,0,2700.0,0,0,810000.0,1,-1,0,1,15.75,22.25,3.937500,5.562500,15.75,22.25,0,0,-1,0,1,0,-1,0,1,0,-1,0,0,0,0,0,1,1,1,0,,,0.0,0.000000e+00,-1,1,0.000673,0.000000,0.043037,0.406424,0.538219,0.000000,0.011647,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000200,0.110696,0.012211,0.016158,0.000000,0.784215,0.037822,0.038697,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,-3,361,359,Seattle Seahawks,San Francisco 49ers,Sea,SF,30,33,0,3,0,0,30,30,L,0,1,0,1,0,0,3,3,22.25,15.75,5.562500,3.937500,0,-1,0,1,0,-1,0,1,0,-1,0,0,0,0,0,1,1,0,0.000000,0.000000e+00,-1,1,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0,12.0,900,True,1,0,1,0.000000,0.000000e+00
5468,819900,2008-09-14,2008,1,Regular Season,361,Seattle Seahawks,Sea,359,San Francisco 49ers,SF,30,33,-3,0,0,3,0,0,0,0,60,30,30,L,0,0,1,241,5,1,0,359,361,0,0,0,0,0,30,30,0,30,30,0,1,1,1,0,0,0,0,Run,4,3.0,42.0,Made FG,1,10,80,12.0,0,900.0,2,0,0,3,3,900.0,3600.0,-6.5,38.0,0,241,0,0,0,0,0,0,0,none,field_goal_made,0,2700.0,0,0,810000.0,6400,512000,1,100,15.75,22.25,3.937500,5.562500,15.75,22.25,0,0,10,0,100,0,80,0,6400,0,512000,0,1,0,1,0,0,1,1,0,1.000000,1.182087e-07,0.0,1.182087e-07,-1,1,0.000012,0.000002,0.208299,0.753742,0.008889,0.000148,0.028909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000012,0.000002,0.208299,0.753742,0.008889,0.000148,0.028909,0.005955,0.107141,0.017709,0.506681,0.003063,0.174973,0.135674,0.048804,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.005955,0.107141,0.017709,0.506681,0.003063,0.174973,0.135674,0.048804,-3,361,359,Seattle Seahawks,San Francisco 49ers,Sea,SF,30,33,0,3,0,0,30,30,L,0,1,0,1,0,0,3,3,22.25,15.75,5.562500,3.937500,0,10,0,100,0,80,0,6400,0,512000,0,1,0,1,0,0,1,0,0.000000,1.182087e-07,-1,1,0.000000,0.000012,0.000000,0.000002,0.000000,0.208299,0.000000,0.753742,0.000000,0.008889,0.000000,0.000148,0.000000,0.028909,0.000000,0.005955,0.000000,0.107141,0.000000,0.017709,0.000000,0.506681,0.000000,0.003063,0.000000,0.174973,0.000000,0.135674,0.000000,0.048804,0,12.0,900,True,1,0,1,0.000000,1.182087e-07
5469,819900,2008-09-14,2008,1,Regular Season,361,Seattle Seahawks,Sea,359,San Francisco 49ers,SF,30,33,-3,0,0,3,0,0,0,0,60,30,30,L,0,0,1,242,5,1,0,359,361,0,0,0,0,0,30,30,0,30,30,0,1,1,1,0,0,0,0,Incomplete Pass,2,0.0,42.0,Made FG,2,7,77,12.0,0,861.0,2,0,0,3,3,861.0,3639.0,-6.5,38.0,0,242,0,0,0,0,0,0,0,none,field_goal_made,0,2661.0,0,0,741321.0,5929,456533,4,49,15.75,22.25,3.766875,5.321458,15.75,22.25,0,0,7,0,49,0,77,0,5929,0,456533,0,2,0,4,0,0,1,1,0,0.999999,1.097856e-06,0.0,1.097856e-06,-1,1,0.000000,0.000000,0.309940,0.657674,0.008382,0.000000,0.024005,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.309940,0.657674,0.008382,0.000000,0.024005,0.005584,0.098662,0.018102,0.554923,0.002103,0.154996,0.123425,0.042205,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.005584,0.098662,0.018102,0.554923,0.002103,0.154996,0.123425,0.042205,-3,361,359,Seattle Seahawks,San Francisco 49ers,Sea,SF,30,33,0,3,0,0,30,30,L,0,1,0,1,0,0,3,3,22.25,15.75,5.321458,3.766875,0,7,0,49,0,77,0,5929,0,456533,0,2,0,4,0,0,1,0,0.000000,1.097856e-06,-1,1,0.000000,0.000000,0.000000,0.000000,0.000000,0.309940,0.000000,0.657674,0.000000,0.008382,0.000000,0.000000,0.000000,0.024005,0.000000,0.005584,0.000000,0.098662,0.000000,0.018102,0.000000,0.554923,0.000000,0.002103,0.000000,0.154996,0.000000,0.123425,0.000000,0.042205,0,12.0,900,True,1,0,1,0.000000,1.097856e-06
5471,819900,2008-09-14,2008,1,Regular Season,361,Seattle Seahawks,Sea,359,San Francisco 49ers,SF,30,33,-3,0,0,3,0,0,0,0,60,30,30,L,0,0,1,244,5,1,0,359,361,0,0,0,0,0,30,30,0,30,30,0,1,1,1,1,1,0,0,Pass Completion,1,33.0,42.0,Made FG,3,7,77,12.0,0,855.0,2,0,0,3,2,855.0,3645.0,-6.5,38.0,0,244,0,0,0,0,0,0,1,first_down,field_goal_made,0,2655.0,0,0,731025.0,5929,456533,9,49,15.75,22.25,3.740625,5.284375,15.75,22.25,0,0,7,0,49,0,77,0,5929,0,456533,0,3,0,9,0,0,1,1,0,0.999999,1.097856e-06,0.0,1.097856e-06,-1,1,0.000003,0.000003,0.404381,0.535224,0.016062,0.001486,0.042843,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000003,0.000003,0.404381,0.535224,0.016062,0.001486,0.042843,0.004048,0.070719,0.018657,0.644344,0.001807,0.115437,0.104331,0.040657,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.004048,0.070719,0.018657,0.644344,0.001807,0.115437,0.104331,0.040657,-3,361,359,Seattle Seahawks,San Francisco 49ers,Sea,SF,30,33,0,3,0,0,30,30,L,0,1,0,1,0,0,3,2,22.25,15.75,5.284375,3.740625,0,7,0,49,0,77,0,5929,0,456533,0,3,0,9,0,0,1,0,0.000000,1.097856e-06,-1,1,0.000000,0.000003,0.000000,0.000003,0.000000,0.404381,0.000000,0.535224,0.000000,0.016062,0.000000,0.001486,0.000000,0.042843,0.000000,0.004048,0.000000,0.070719,0.000000,0.018657,0.000000,0.644344,0.000000,0.001807,0.000000,0.115437,0.000000,0.104331,0.000000,0.040657,0,12.0,900,True,1,0,1,0.000000,1.097856e-06
5472,819900,2008-09-14,2008,1,Regular Season,361,Seattle Seahawks,Sea,359,San Francisco 49ers,SF,30,33,-3,0,0,3,0,0,0,0,60,30,30,L,0,0,1,245,5,1,0,359,361,0,0,0,0,0,30,30,0,30,30,0,1,1,1,0,0,0,0,Pass Completion,1,7.0,42.0,Made FG,1,10,44,12.0,0,815.0,2,0,0,3,2,815.0,3685.0,-6.5,38.0,0,245,0,0,0,0,0,0,0,none,field_goal_made,0,2615.0,0,0,664225.0,1936,85184,1,100,15.75,22.25,3.565625,5.037153,15.75,22.25,0,0,10,0,100,0,44,0,1936,0,85184,0,1,0,1,0,0,1,1,0,0.705809,2.941908e-01,0.0,2.941908e-01,-1,1,0.000037,0.000010,0.201260,0.756691,0.012854,0.000130,0.029018,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000037,0.000010,0.201260,0.756691,0.012854,0.000130,0.029018,0.007674,0.198611,0.042451,0.269483,0.000112,0.296706,0.110281,0.074682,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.007674,0.198611,0.042451,0.269483,0.000112,0.296706,0.110281,0.074682,-3,361,359,Seattle Seahawks,San Francisco 49ers,Sea,SF,30,33,0,3,0,0,30,30,L,0,1,0,1,0,0,3,2,22.25,15.75,5.037153,3.565625,0,10,0,100,0,44,0,1936,0,85184,0,1,0,1,0,0,1,0,0.000000,2.941908e-01,-1,1,0.000000,0.000037,0.000000,0.000010,0.000000,0.201260,0.000000,0.756691,0.000000,0.012854,0.000000,0.000130,0.000000,0.029018,0.000000,0.007674,0.000000,0.198611,0.000000,0.042451,0.000000,0.269483,0.000000,0.000112,0.000000,0.296706,0.000000,0.110281,0.000000,0.074682,0,12.0,900,True,1,0,1,0.000000,2.941908e-01
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
816397,2337724,2022-01-30,2021,7,Conf. Championship,339,Kansas City Chiefs,KC,327,Cincinnati Bengals,Cin,24,27,-3,0,0,3,0,0,0,0,48,24,24,L,0,0,1,209,5,1,0,327,339,0,0,0,0,0,24,24,0,24,24,0,1,1,1,0,0,0,0,Run,4,7.0,42.0,Made FG,1,10,32,10.0,-14,727.0,2,0,0,3,3,727.0,3773.0,7.0,54.5,0,209,0,0,0,0,0,0,0,none,field_goal_made,0,2527.0,0,0,528529.0,1024,32768,1,100,23.75,30.75,4.796181,6.209792,23.75,30.75,1,0,10,0,100,0,32,0,1024,0,32768,0,1,0,1,0,0,1,1,0,0.289164,7.108362e-01,0.0,7.108362e-01,-1,1,0.000168,0.000062,0.216878,0.737918,0.029799,0.000002,0.015172,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000168,0.000062,0.216878,0.737918,0.029799,0.000002,0.015172,0.003091,0.276102,0.054211,0.050206,0.000019,0.478508,0.105565,0.032298,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.003091,0.276102,0.054211,0.050206,0.000019,0.478508,0.105565,0.032298,3,327,339,Cincinnati Bengals,Kansas City Chiefs,Cin,KC,27,24,3,0,0,0,24,24,,1,0,1,0,0,0,3,3,23.75,30.75,4.796181,6.209792,10,0,100,0,32,0,1024,0,32768,0,1,0,1,0,0,0,0,1,0.710836,0.000000e+00,1,-1,0.000145,0.000000,0.000046,0.000000,0.208792,0.000000,0.745981,0.000000,0.028540,0.000000,0.000002,0.000000,0.016494,0.000000,0.002552,0.000000,0.285393,0.000000,0.055123,0.000000,0.051453,0.000000,0.000019,0.000000,0.462795,0.000000,0.108635,0.000000,0.034030,0.000000,0,11.0,900,False,1,1,0,0.710836,0.000000e+00
816398,2337724,2022-01-30,2021,7,Conf. Championship,339,Kansas City Chiefs,KC,327,Cincinnati Bengals,Cin,24,27,-3,0,0,3,0,0,0,0,48,24,24,L,0,0,1,210,5,1,0,327,339,0,0,0,0,0,24,24,0,24,24,0,1,1,1,1,1,0,0,Run,4,13.0,42.0,Made FG,2,3,25,10.0,-14,686.0,2,0,0,3,3,686.0,3814.0,7.0,54.5,0,210,0,0,0,0,0,0,1,first_down,field_goal_made,0,2486.0,0,0,470596.0,625,15625,4,9,23.75,30.75,4.525694,5.859583,23.75,30.75,1,0,3,0,9,0,25,0,625,0,15625,0,2,0,4,0,0,1,1,0,0.178544,8.214559e-01,0.0,8.214559e-01,-1,1,0.000617,0.000153,0.551110,0.407438,0.027351,0.000460,0.012870,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000617,0.000153,0.551110,0.407438,0.027351,0.000460,0.012870,0.002441,0.265575,0.048276,0.029691,0.000000,0.529683,0.083024,0.041310,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.002441,0.265575,0.048276,0.029691,0.000000,0.529683,0.083024,0.041310,3,327,339,Cincinnati Bengals,Kansas City Chiefs,Cin,KC,27,24,3,0,0,0,24,24,,1,0,1,0,0,0,3,3,23.75,30.75,4.525694,5.859583,3,0,9,0,25,0,625,0,15625,0,2,0,4,0,0,0,0,1,0.821456,0.000000e+00,1,-1,0.000050,0.000000,0.000011,0.000000,0.554530,0.000000,0.400660,0.000000,0.029259,0.000000,0.000000,0.000000,0.015490,0.000000,0.001873,0.000000,0.272386,0.000000,0.048059,0.000000,0.033020,0.000000,0.000000,0.000000,0.514318,0.000000,0.083425,0.000000,0.046919,0.000000,0,11.0,900,False,1,1,0,0.821456,0.000000e+00
816400,2337724,2022-01-30,2021,7,Conf. Championship,339,Kansas City Chiefs,KC,327,Cincinnati Bengals,Cin,24,27,-3,0,0,3,0,0,0,0,48,24,24,L,0,0,1,212,5,1,0,327,339,0,0,0,0,0,24,24,0,24,24,0,1,1,1,0,0,0,0,Run,4,2.0,42.0,Made FG,1,10,12,10.0,-14,652.0,2,0,0,2,3,652.0,3848.0,7.0,54.5,0,212,0,0,0,0,0,0,0,none,field_goal_made,0,2452.0,0,0,425104.0,144,1728,1,100,23.75,30.75,4.301389,5.569167,23.75,30.75,1,0,10,0,100,0,12,0,144,0,1728,0,1,0,1,0,0,1,1,0,0.051353,9.486467e-01,0.0,9.486467e-01,-1,1,0.002297,0.000015,0.065136,0.781099,0.129337,0.000000,0.022116,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.002297,0.000015,0.065136,0.781099,0.129337,0.000000,0.022116,0.001495,0.261536,0.025868,0.015959,0.000019,0.576710,0.089122,0.029291,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.001495,0.261536,0.025868,0.015959,0.000019,0.576710,0.089122,0.029291,3,327,339,Cincinnati Bengals,Kansas City Chiefs,Cin,KC,27,24,3,0,0,0,24,24,,1,0,1,0,0,0,3,2,23.75,30.75,4.301389,5.569167,10,0,100,0,12,0,144,0,1728,0,1,0,1,0,0,0,0,1,0.948647,0.000000e+00,1,-1,0.002295,0.000000,0.000020,0.000000,0.069621,0.000000,0.775061,0.000000,0.127285,0.000000,0.000000,0.000000,0.025719,0.000000,0.001203,0.000000,0.276698,0.000000,0.029329,0.000000,0.017321,0.000000,0.000019,0.000000,0.552984,0.000000,0.090366,0.000000,0.032079,0.000000,0,11.0,900,False,1,1,0,0.948647,0.000000e+00
816401,2337724,2022-01-30,2021,7,Conf. Championship,339,Kansas City Chiefs,KC,327,Cincinnati Bengals,Cin,24,27,-3,0,0,3,0,0,0,0,48,24,24,L,0,0,1,213,5,1,0,327,339,0,0,0,0,0,24,24,0,24,24,0,1,1,1,0,0,0,0,Run,4,-3.0,42.0,Made FG,2,8,10,10.0,-14,607.0,2,0,0,2,3,607.0,3893.0,7.0,54.5,0,213,0,0,0,0,0,0,0,none,field_goal_made,0,2407.0,0,0,368449.0,100,1000,4,64,23.75,30.75,4.004514,5.184792,23.75,30.75,1,0,8,0,64,0,10,0,100,0,1000,0,2,0,4,0,0,1,1,0,0.038802,9.611976e-01,0.0,9.611976e-01,-1,1,0.003637,0.000000,0.076036,0.723414,0.173856,0.000000,0.023058,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.003637,0.000000,0.076036,0.723414,0.173856,0.000000,0.023058,0.001962,0.282721,0.032952,0.013763,0.000019,0.557374,0.084186,0.027023,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.001962,0.282721,0.032952,0.013763,0.000019,0.557374,0.084186,0.027023,3,327,339,Cincinnati Bengals,Kansas City Chiefs,Cin,KC,27,24,3,0,0,0,24,24,,1,0,1,0,0,0,3,2,23.75,30.75,4.004514,5.184792,8,0,64,0,10,0,100,0,1000,0,2,0,4,0,0,0,0,1,0.961198,0.000000e+00,1,-1,0.002149,0.000000,0.000000,0.000000,0.080297,0.000000,0.729737,0.000000,0.150846,0.000000,0.000000,0.000000,0.036971,0.000000,0.001424,0.000000,0.298170,0.000000,0.036944,0.000000,0.011659,0.000000,0.000019,0.000000,0.531346,0.000000,0.088600,0.000000,0.031839,0.000000,0,11.0,900,False,1,1,0,0.961198,0.000000e+00


In [None]:
# input_names_score_pred_non_linear = [
#     'time_left_in_half',
#     # 'half',
#     'current_score_diff',
#     'current_score_total',
#     # 'home_vegas_score_pred_weighted',
#     # 'away_vegas_score_pred_weighted',
#     'cur_spread',
#     'cur_over_under',
#     'home_timeouts_remaining',
#     'away_timeouts_remaining',
#     'ytg',
#     'yd_from_goal',
#     'down',
#     'home_team_has_ball',
#     'kick_off',
#     'is_playoff_game',
#     'home_search_rf_play_first_down',
#     'away_search_rf_play_first_down',
#     'home_search_rf_drive_clock',
#     'away_search_rf_drive_clock',
#     'home_search_rf_drive_field_goal_made',
#     'away_search_rf_drive_field_goal_made',
#     'home_search_rf_drive_field_goal_missed',
#     'away_search_rf_drive_field_goal_missed',
#     'home_search_rf_drive_punt',
#     'away_search_rf_drive_punt',
#     # 'home_search_rf_drive_safety',
#     # 'away_search_rf_drive_safety',
#     'home_search_rf_drive_touch_down',
#     'away_search_rf_drive_touch_down',
#     'home_search_rf_drive_turnover',
#     'away_search_rf_drive_turnover',
#     'home_search_rf_drive_turnover_on_downs',
#     'away_search_rf_drive_turnover_on_downs',
#     'field_goal_ends_game',
# ]


In [35]:
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)
# overtime_df[search_rf_drive_outcome_classes] = pd.DataFrame(search_rf_drive_outcome.predict_proba(model_df[mask_model][search_rf_drive_outcome.feature_names_in_]), index=model_df[mask_model].index)
# overtime_df[search_rf_drive_outcome_classes] = pd.DataFrame(search_rf_drive_outcome.predict_proba(model_df[mask_model][search_rf_drive_outcome.feature_names_in_]), index=model_df[mask_model].index)
input_names_score_pred_logit = [
    'play_start_time',
    'play_start_time_sq',
    'current_score_diff',
    'current_score_total',
    'team_vegas_score_pred',
    'opp_vegas_score_pred',
    'team_timeouts_remaining',
    'opp_timeouts_remaining',
    'team_ytg',
    'opp_ytg',
    'team_ytg_sq',
    'opp_ytg_sq',
    'team_yd_from_goal',
    'opp_yd_from_goal',
    'team_yd_from_goal_sq',
    'opp_yd_from_goal_sq',
    'team_yd_from_goal_cu',
    'opp_yd_from_goal_cu',
    'team_down',
    'opp_down',
    'team_down_sq',
    'opp_down_sq',
    'team_team_has_ball',
    'opp_team_has_ball',
    'is_team',
    'is_opp',
    'team_kick_off',
    'opp_kick_off',
    'xfield_goal_made_team_ends_game',
    'xfield_goal_made_opp_ends_game',
    'is_playoff_game',
]
input_names_score_pred_non_linear = [
    'play_start_time',
    'current_score_diff',
    'current_score_total',
    'team_vegas_score_pred',
    'opp_vegas_score_pred',
    'team_timeouts_remaining',
    'opp_timeouts_remaining',
    'team_ytg',
    'opp_ytg',
    'team_yd_from_goal',
    'opp_yd_from_goal',
    'team_down',
    'opp_down',
    'team_team_has_ball',
    'opp_team_has_ball',
    'is_team',
    'is_opp',
    'team_kick_off',
    'opp_kick_off',
    'xfield_goal_made_team_ends_game',
    'xfield_goal_made_opp_ends_game',
    'is_playoff_game',
]

output_name_logit = output_name
log_loss_scorer = make_scorer(log_loss, greater_is_better=False, needs_proba=True)

cv=GroupKFold(n_splits=3)

X_train, y_train, group_train, X_test, y_test, group_test, X_val, y_val, group_val = create_train_test_val_df(overtime_df, input_names_score_pred_logit, output_name_logit, normalize=True)
logit_grid = {
    'penalty' : ['l1', 'l2'],
    'C' : np.logspace(-4, 4, 20),
    'solver' : ['liblinear']
}

logit_overtime_base_model = LogisticRegression(solver='liblinear', random_state=0)

logit_overtime_model = GridSearchCV(logit_overtime_base_model, logit_grid, cv=cv,n_jobs=-1,verbose=100, scoring=log_loss_scorer)
logit_overtime_model.fit(X_train, y_train, groups=group_train)
pickle.dump(logit_overtime_model, open(os.path.join(root_dir, 'models/logit_overtime_model.p'), 'wb'))



output_name_non_linear = output_name
X_train, y_train, group_train, X_test, y_test, group_test, X_val, y_val, group_val = create_train_test_val_df(overtime_df, input_names_score_pred_non_linear, output_name_logit, normalize=True)

mlp_overtime_base_model = MLPClassifier(verbose=False, early_stopping=True, n_iter_no_change=5, random_state=1, max_iter=10000)
mlp_grid = {
    'hidden_layer_sizes': [(20,), (50,50,50), (10,30,10),(100,)],
    'activation': ['tanh', 'relu'],
    'solver': ['sgd', 'adam'],
    'alpha': [0.0001, 0.05],
    'learning_rate': ['constant'],
}
# mlp_grid = {
#     'learning_rate': ['constant'],
# }

mlp_overtime_model = GridSearchCV(mlp_overtime_base_model, mlp_grid, cv=cv,n_jobs=-1,verbose=100, scoring=log_loss_scorer)
mlp_overtime_model.fit(X_train.values, y_train, groups=group_train.values)
mlp_overtime_model.best_estimator_.feature_names_in_ = input_names_score_pred_non_linear
pickle.dump(mlp_overtime_model, open(os.path.join(root_dir, 'models/mlp_overtime_model.p'), 'wb'))

rf_grid = {
    "n_estimators": np.linspace(start=10, stop=100, num=5, dtype=int),
    # "max_features": ["auto", "sqrt"],
    "max_depth": np.linspace(2, 8, num=4, dtype=int),
    "min_samples_split": [2],
    "min_samples_leaf": [1],
    "bootstrap": [True, False],
}
# rf_grid = {
#     "min_samples_leaf": [1],
# }


rf_overtime_base_model = RandomForestClassifier(verbose=0, n_jobs=1, random_state=1)
X_train, y_train, group_train, X_test, y_test, group_test, X_val, y_val, group_val = create_train_test_val_df(overtime_df, input_names_score_pred_non_linear, output_name_logit, normalize=False)
rf_overtime_model = GridSearchCV(rf_overtime_base_model, rf_grid, cv=cv,n_jobs=-1,verbose=100, scoring=log_loss_scorer)
rf_overtime_model.fit(X_train, y_train, groups=group_train)
pickle.dump(rf_overtime_model, open(os.path.join(root_dir, 'models/rf_overtime_model.p'), 'wb'))





# mlp_overtime_model = pickle.load(open(os.path.join(root_dir, "models/mlp_overtime_model.p"), 'rb'))
# rf_overtime_model = pickle.load(open(os.path.join(root_dir, "models/rf_overtime_model.p"), 'rb'))
# logit_play_outcome_basic = pickle.load(open(os.path.join(root_dir, "models/logit_play_outcome_basic.p"), 'rb'))
# os.system('say "done"')




Fitting 3 folds for each of 40 candidates, totalling 120 fits
[CV 2/3; 1/40] START C=0.0001, penalty=l1, solver=liblinear.....................
[CV 3/3; 1/40] START C=0.0001, penalty=l1, solver=liblinear.....................
[CV 1/3; 1/40] START C=0.0001, penalty=l1, solver=liblinear.....................[CV 2/3; 2/40] START C=0.0001, penalty=l2, solver=liblinear.....................

[CV 3/3; 2/40] START C=0.0001, penalty=l2, solver=liblinear.....................
[CV 1/3; 2/40] START C=0.0001, penalty=l2, solver=liblinear.....................
[CV 1/3; 3/40] START C=0.00026366508987303583, penalty=l1, solver=liblinear.....
[CV 1/3; 1/40] END C=0.0001, penalty=l1, solver=liblinear;, score=-1.609 total time=   0.1s
[CV 2/3; 1/40] END C=0.0001, penalty=l1, solver=liblinear;, score=-1.609 total time=   0.1s
[CV 3/3; 1/40] END C=0.0001, penalty=l1, solver=liblinear;, score=-1.609 total time=   0.1s
[CV 1/3; 3/40] END C=0.00026366508987303583, penalty=l1, solver=liblinear;, score=-1.609 total 

In [15]:
pd.DataFrame(rf_overtime_model.best_estimator_.feature_importances_, rf_overtime_model.feature_names_in_)


Unnamed: 0,0
play_start_time,0.090021
current_score_diff,0.020973
current_score_total,0.094784
team_vegas_score_pred,0.081717
opp_vegas_score_pred,0.084049
team_timeouts_remaining,0.022452
opp_timeouts_remaining,0.023496
team_ytg,0.02817
opp_ytg,0.041426
team_yd_from_goal,0.085934


In [24]:
pd.DataFrame(rf_overtime_model.best_estimator_.feature_importances_, rf_overtime_model.feature_names_in_)


Unnamed: 0,0
play_start_time,0.083381
current_score_diff,0.016299
current_score_total,0.102892
team_vegas_score_pred,0.092546
opp_vegas_score_pred,0.08892
team_timeouts_remaining,0.028679
opp_timeouts_remaining,0.026306
team_ytg,0.016487
opp_ytg,0.023796
team_yd_from_goal,0.066771


In [18]:
pd.DataFrame(logit_overtime_model.cv_results_).to_clipboard()

In [None]:
pd.DataFrame(rf_overtime_model.cv_results_).to_clipboard()

In [None]:
pd.DataFrame(mlp_overtime_model.cv_results_).to_clipboard()

In [26]:
pd.concat([pd.DataFrame(logit_overtime_model.best_estimator_.intercept_, columns=["intercept"]), pd.DataFrame(logit_overtime_model.best_estimator_.coef_, columns=logit_overtime_model.best_estimator_.feature_names_in_)], axis=1)

Unnamed: 0,intercept,play_start_time,play_start_time_sq,current_score_diff,current_score_total,team_vegas_score_pred,opp_vegas_score_pred,team_timeouts_remaining,opp_timeouts_remaining,team_ytg,opp_ytg,team_ytg_sq,opp_ytg_sq,team_yd_from_goal,opp_yd_from_goal,team_yd_from_goal_sq,opp_yd_from_goal_sq,team_yd_from_goal_cu,opp_yd_from_goal_cu,team_down,opp_down,team_down_sq,opp_down_sq,team_team_has_ball,opp_team_has_ball,is_team,is_opp,team_kick_off,opp_kick_off,xfield_goal_made_team_ends_game,xfield_goal_made_opp_ends_game,is_playoff_game
0,-1.122753,0.171865,0.394673,-1.140022,0.315264,-1.035293,0.898962,0.729752,-0.197108,-0.152917,-0.022651,0.136191,-0.119877,-0.154381,-0.733891,0.317061,-0.752742,0.488017,-0.592751,-0.216462,-0.031866,0.215053,-0.586343,-1.731587,0.608834,-0.541698,-0.581055,0.002851,-0.738086,-0.853761,-1.571263,0.660033
1,-0.483671,0.422525,-0.219975,-0.560022,-0.245531,-0.332896,0.19671,0.243565,0.078667,-0.352694,0.257507,-0.116559,-0.025002,0.508987,0.181664,0.565428,-0.224618,0.431417,-0.402173,0.025688,0.044286,0.556026,-0.192843,-0.7384,0.254729,-0.376849,-0.106822,0.804071,-0.353967,-0.955716,2.086878,-0.187545
2,0.224565,-1.336455,-0.653761,0.0864,-0.098787,-0.13965,-0.136302,-0.858116,-0.856405,-0.003936,-0.006198,0.12793,0.127677,0.120813,0.122363,0.101079,0.103929,0.002851,0.005452,-0.239151,-0.240895,-0.178936,-0.179182,0.115262,0.109304,0.111615,0.112951,0.198011,0.196251,-0.913866,-0.906605,-0.920382
3,-0.639277,0.414241,-0.212392,0.185372,-0.256436,0.183472,-0.355606,0.071088,0.230327,0.242548,-0.361674,-0.022228,-0.115783,0.171404,0.491993,-0.220008,0.559334,-0.395084,0.430198,0.03054,0.016246,-0.189942,0.55757,0.166475,-0.805752,-0.183011,-0.456266,-0.366105,0.78986,2.088656,-0.978687,-0.184817
4,-1.346044,0.158354,0.417033,0.104466,0.275357,0.857036,-1.089035,-0.222637,0.678257,-0.052741,-0.171195,-0.121345,0.133799,-0.751953,-0.18351,-0.746904,0.301768,-0.582654,0.478831,-0.058525,-0.235274,-0.588021,0.209451,0.472397,-1.818442,-0.687221,-0.658823,-0.764108,-0.018381,-1.570478,-0.882465,0.669075


In [None]:
pd.concat([pd.DataFrame(logit_overtime_model.best_estimator_.intercept_, columns=["intercept"]), pd.DataFrame(logit_overtime_model.best_estimator_.coef_, columns=logit_overtime_model.best_estimator_.feature_names_in_)], axis=1)

In [None]:
mlp_overtime_model.best_params_

In [None]:
rf_overtime_model.best_params_

In [36]:
# overtime_df = overtime_df.reindex()

rf_overtime_model.verbose = 0
rf_overtime_outcomes = ["rf_pred_" + str(x) for x in rf_overtime_model.classes_]
overtime_df[rf_overtime_outcomes] = pd.DataFrame(cross_val_predict(rf_overtime_model.best_estimator_, overtime_df[rf_overtime_model.feature_names_in_], overtime_df[output_name], groups=overtime_df["game_code"], cv=5, verbose=10,method='predict_proba', n_jobs=-1), index=overtime_df.index)

normalize_values = normalize_df(overtime_df[mlp_overtime_model.best_estimator_.feature_names_in_], overtime_df[(overtime_df.season<2020)][mlp_overtime_model.best_estimator_.feature_names_in_])

mlp_overtime_outcomes = ["mlp_pred_" + str(x) for x in mlp_overtime_model.classes_]
overtime_df[mlp_overtime_outcomes] = pd.DataFrame(cross_val_predict(mlp_overtime_model.best_estimator_, normalize_values[mlp_overtime_model.best_estimator_.feature_names_in_], overtime_df[output_name], groups=overtime_df["game_code"], cv=5, verbose=10,method='predict_proba', n_jobs=-1), index=overtime_df.index)

normalize_values = normalize_df(overtime_df[logit_overtime_model.best_estimator_.feature_names_in_], overtime_df[(overtime_df.season<2020)][logit_overtime_model.best_estimator_.feature_names_in_])
logit_overtime_outcomes = ["logit_pred_" + str(x) for x in logit_overtime_model.classes_]
overtime_df[logit_overtime_outcomes] = pd.DataFrame(cross_val_predict(logit_overtime_model.best_estimator_, normalize_values[logit_overtime_model.best_estimator_.feature_names_in_], overtime_df[output_name], groups=overtime_df["game_code"], cv=5, verbose=10,method='predict_proba', n_jobs=-1), index=overtime_df.index)


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 out of   5 | elapsed:   11.4s remaining:   17.1s
[Parallel(n_jobs=-1)]: Done   3 out of   5 | elapsed:   11.4s remaining:    7.6s
[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed:   11.5s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed:   11.5s finished
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col] = (data - np.min(anchor_df[col])) / (np.max(anchor_df[col]) - np.min(anchor_df[col]))
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 out of   5 | elapsed:    1.3s remaining:    1.9s
[Parallel(n_jobs=-1)]: Done   3 out of   5 | elapsed:    2.5s remaining:    1.7s
[Parallel(n_j

In [37]:
overtime_df[overtime_df["is_team"]==1][["game_code", "nevent", "home_team", "away_team", "game_date"] + list(rf_overtime_model.feature_names_in_)+["first_drive", "event_name", output_name] + rf_overtime_outcomes + mlp_overtime_outcomes + logit_overtime_outcomes].to_clipboard()

In [None]:
overtime_df["minute_bucket"] = round((overtime_df["play_start_time"]-29.9)/60, 0)
overtime_df[overtime_df.is_playoff_game==0].groupby(['minute_bucket']).mean()[["opp_team_win", "draw", "team_team_win"]+logit_overtime_outcomes]

In [None]:
overtime_df["minute_bucket"] = round((overtime_df["play_start_time"])/60, 0)
overtime_df[overtime_df.is_playoff_game==0].groupby(['minute_bucket']).mean()[["opp_team_win", "draw", "team_team_win"]+rf_overtime_outcomes + mlp_overtime_outcomes +logit_overtime_outcomes]

In [None]:
log_loss_df = overtime_df
print(log_loss(log_loss_df[output_name], log_loss_df[rf_overtime_outcomes]))
print(log_loss(log_loss_df[output_name], log_loss_df[mlp_overtime_outcomes]))
print(log_loss(log_loss_df[output_name], log_loss_df[logit_overtime_outcomes]))


In [None]:
log_loss_df = overtime_df[overtime_df.season<2020]
print(log_loss(log_loss_df[output_name], log_loss_df[rf_overtime_outcomes]))
print(log_loss(log_loss_df[output_name], log_loss_df[mlp_overtime_outcomes]))
print(log_loss(log_loss_df[output_name], log_loss_df[logit_overtime_outcomes]))


In [None]:
log_loss_df = overtime_df[overtime_df.season>=2020]
print(log_loss(log_loss_df[output_name], log_loss_df[rf_overtime_outcomes]))
print(log_loss(log_loss_df[output_name], log_loss_df[mlp_overtime_outcomes]))
print(log_loss(log_loss_df[output_name], log_loss_df[logit_overtime_outcomes]))


In [None]:
log_loss_df = overtime_df[overtime_df.season>=2020]
print(log_loss(log_loss_df[output_name], log_loss_df[rf_overtime_outcomes]))
print(log_loss(log_loss_df[output_name], log_loss_df[mlp_overtime_outcomes]))
print(log_loss(log_loss_df[output_name], log_loss_df[logit_overtime_outcomes]))


In [None]:
calb_df = overtime_df
col = 0
bins = 10
print(rf_overtime_outcomes[col])
calb = calibration.calibration_curve(pd.get_dummies(calb_df[output_name]).iloc[:,col], calb_df[rf_overtime_outcomes[col]], n_bins=bins, strategy="quantile") 
plt.plot(calb[1], calb[0], marker="o")
calb = calibration.calibration_curve(pd.get_dummies(calb_df[output_name]).iloc[:,col], calb_df[mlp_overtime_outcomes[col]], n_bins=bins, strategy="quantile") 
plt.plot(calb[1], calb[0], marker="o")
calb = calibration.calibration_curve(pd.get_dummies(calb_df[output_name]).iloc[:,col], calb_df[logit_overtime_outcomes[col]], n_bins=bins, strategy="quantile") 
plt.plot(calb[1], calb[0], marker="o")
plt.plot([0, np.max(calb[1])], [0, np.max(calb[1])])



In [None]:
calb_df = overtime_df
col = 1
bins = 10
print(rf_overtime_outcomes[col])
calb = calibration.calibration_curve(pd.get_dummies(calb_df[output_name]).iloc[:,col], calb_df[rf_overtime_outcomes[col]], n_bins=bins, strategy="quantile") 
plt.plot(calb[1], calb[0], marker="o")
calb = calibration.calibration_curve(pd.get_dummies(calb_df[output_name]).iloc[:,col], calb_df[mlp_overtime_outcomes[col]], n_bins=bins, strategy="quantile") 
plt.plot(calb[1], calb[0], marker="o")
calb = calibration.calibration_curve(pd.get_dummies(calb_df[output_name]).iloc[:,col], calb_df[logit_overtime_outcomes[col]], n_bins=bins, strategy="quantile") 
plt.plot(calb[1], calb[0], marker="o")
plt.plot([0, np.max(calb[1])], [0, np.max(calb[1])])



In [None]:
calb = calibration.calibration_curve(pd.get_dummies(calb_df[output_name]).iloc[:,col], calb_df[mlp_overtime_outcomes[col]], n_bins=5, strategy="quantile") 
calb

In [None]:
mlp_overtime_outcomes

In [None]:
overtime_df["game_info"] = (
    overtime_df["home_team"]
    + " "
    + overtime_df["away_team"]
    + " "
    + overtime_df["game_date"].apply(lambda x: x.strftime("%Y-%m-%d"))
    + " "
    + overtime_df["season"].apply(str)
    + " ("
    + (overtime_df["game_code"]).apply(str)
    + ")"
)
def ordinaltg(n):
    return n.replace({1: "1st", 2: "2nd", 3: "3rd", 4: "4th", 5: "5th", 6: "6th"})

overtime_df["absolute_score_diff"] = abs(overtime_df["home_start_score"] - overtime_df["away_start_score"])

overtime_df["minutes"] = (overtime_df["play_start_time"] // 60).fillna(0).apply(int)
overtime_df["seconds"] = (overtime_df["play_start_time"] - (overtime_df["play_start_time"] // 60) * 60).fillna(0).apply(
    int
)
overtime_df["seconds_str"] = np.where(
    overtime_df["seconds"] >= 10, overtime_df["seconds"].apply(str), "0" + overtime_df["seconds"].apply(str)
)
overtime_df["time_str"] = overtime_df["minutes"].apply(str) + ":" + overtime_df["seconds_str"]

overtime_df["team_score_desc"] = np.where(
    overtime_df["home_team_has_ball"] == 1,
    np.where(
        overtime_df["home_start_score"] > overtime_df["away_start_score"],
        "Up by " + overtime_df["absolute_score_diff"].apply(str),
        np.where(
            overtime_df["home_start_score"] < overtime_df["away_start_score"],
            "Down by " + overtime_df["absolute_score_diff"].apply(str),
            "Tied",
        ),
    ),
    np.where(
        overtime_df["home_start_score"] < overtime_df["away_start_score"],
        "Up by " + overtime_df["absolute_score_diff"].apply(str),
        np.where(
            overtime_df["home_start_score"] > overtime_df["away_start_score"],
            "Down by " + overtime_df["absolute_score_diff"].apply(str),
            "Tied",
        ),
    ),
)
overtime_df["play_description"] = (
    ordinaltg(overtime_df["quarter"])
    + " Qtr "
    + overtime_df["minutes"].apply(str)
    + ":"
    + overtime_df["seconds_str"]
    + ", "
    + overtime_df["team_score_desc"]
    + ", "
    + ordinaltg(overtime_df["down"]).apply(str)
    + " & "
    + overtime_df["ytg"].apply(str)
    + ", "
    + overtime_df["yd_from_goal"].apply(str)
    + " Yards From Goal, "
    + np.where(
        overtime_df["home_team_has_ball"] == 1,
        overtime_df["home_team"],
        overtime_df["away_team"],
    )
    + " has ball, "
    + "Off TO: "
    + np.where(
        overtime_df["home_team_has_ball"] == 1,
        overtime_df["home_timeouts_remaining"],
        overtime_df["away_timeouts_remaining"],
    ).astype(str)
    + ", Def TO: "
    + np.where(
        overtime_df["home_team_has_ball"] == 0,
        overtime_df["home_timeouts_remaining"],
        overtime_df["away_timeouts_remaining"],
    ).astype(str)
    + " ("
    + overtime_df["nevent"].apply(str)
    + ")"
)




In [None]:
model_df["game_info"] = (
    model_df["home_team"]
    + " "
    + model_df["away_team"]
    + " "
    + model_df["game_date"].apply(lambda x: x.strftime("%Y-%m-%d"))
    + " "
    + model_df["season"].apply(str)
    + " ("
    + (model_df["game_code"]).apply(str)
    + ")"
)
def ordinaltg(n):
    return n.replace({1: "1st", 2: "2nd", 3: "3rd", 4: "4th", 5: "5th", 6: "6th"})

model_df["absolute_score_diff"] = abs(model_df["home_start_score"] - model_df["away_start_score"])

model_df["minutes"] = (model_df["play_start_time"] // 60).fillna(0).apply(int)
model_df["seconds"] = (model_df["play_start_time"] - (model_df["play_start_time"] // 60) * 60).fillna(0).apply(
    int
)
model_df["seconds_str"] = np.where(
    model_df["seconds"] >= 10, model_df["seconds"].apply(str), "0" + model_df["seconds"].apply(str)
)
model_df["time_str"] = model_df["minutes"].apply(str) + ":" + model_df["seconds_str"]

model_df["team_score_desc"] = np.where(
    model_df["home_team_has_ball"] == 1,
    np.where(
        model_df["home_start_score"] > model_df["away_start_score"],
        "Up by " + model_df["absolute_score_diff"].apply(str),
        np.where(
            model_df["home_start_score"] < model_df["away_start_score"],
            "Down by " + model_df["absolute_score_diff"].apply(str),
            "Tied",
        ),
    ),
    np.where(
        model_df["home_start_score"] < model_df["away_start_score"],
        "Up by " + model_df["absolute_score_diff"].apply(str),
        np.where(
            model_df["home_start_score"] > model_df["away_start_score"],
            "Down by " + model_df["absolute_score_diff"].apply(str),
            "Tied",
        ),
    ),
)
model_df["play_description"] = (
    ordinaltg(model_df["quarter"])
    + " Qtr "
    + model_df["minutes"].apply(str)
    + ":"
    + model_df["seconds_str"]
    + ", "
    + model_df["team_score_desc"]
    + ", "
    + ordinaltg(model_df["down"]).apply(str)
    + " & "
    + model_df["ytg"].apply(str)
    + ", "
    + model_df["yd_from_goal"].apply(str)
    + " Yards From Goal, "
    + np.where(
        model_df["home_team_has_ball"] == 1,
        model_df["home_team"],
        model_df["away_team"],
    )
    + " has ball, "
    + "Off TO: "
    + np.where(
        model_df["home_team_has_ball"] == 1,
        model_df["home_timeouts_remaining"],
        model_df["away_timeouts_remaining"],
    ).astype(str)
    + ", Def TO: "
    + np.where(
        model_df["home_team_has_ball"] == 0,
        model_df["home_timeouts_remaining"],
        model_df["away_timeouts_remaining"],
    ).astype(str)
    + " ("
    + model_df["nevent"].apply(str)
    + ")"
)




In [None]:
import ipywidgets as widgets
from ipywidgets import interact, interactive, fixed, interact_manual, Layout
import plotly
import plotly.graph_objects as go
from plotly.offline import iplot
from plotly.subplots import make_subplots
import math


plotly.offline.init_notebook_mode()
overtime_df["home_team_has_ball_fixed"] = overtime_df["home_team_has_ball"]
def plot_game_lwp(game_info, model, width, height):
    game_df = deepcopy(overtime_df[(overtime_df["game_info"] == game_info)&(overtime_df["is_team"] == 1)&(overtime_df["event_id"].isin([1,2,3,4,5,7,9,14,17,18,22,35,41,47,52,53,54,55,56]))&(overtime_df["continuation"]==0)])
    # print(game_df)
    fig = make_subplots(specs=[[{"secondary_y": True}]])
    if model =="mlp":
        y=mlp_overtime_outcomes
        # y1="xscore_diff_end_of_regulation_mlp"
    elif model =="rf":
        y=rf_overtime_outcomes
        # y1="xscore_diff_end_of_regulation_rf"
    elif model =="logit":
        y=logit_overtime_outcomes
        # y1="xscore_diff_end_of_regulation_rf"
    colors = ["darkkhaki", "skyblue", "gray"]
    game_df["yards_description"] = (
        game_df["event_name"]
        + " for "
        + game_df["yards_gained"].fillna(0).apply(int).apply(str)
        + " yards"
    )
    game_df["wpa"] = game_df[y[2]] - game_df[y[2]].shift(1)
    game_df["nevent"] = range(len(game_df))

    v = pd.DataFrame(game_df[["play_start_time", "nevent"]]).reset_index(drop=True)
    # print(v["minutes"][1:].reset_index(drop=True))
    # print(v["minutes"][:-1].reset_index(drop=True))
    v["minutes"] = round((v["play_start_time"]+29.9)/60, 0).apply(int)
    mask_ticks = v["minutes"][:-1].reset_index(drop=True) == v["minutes"][1:].reset_index(drop=True)
    # print(v)
    # print(mask_ticks)
    ticks_idx = [min(v["nevent"])] + list(v[:-1][~mask_ticks]["nevent"] + 1)
    ticks_values = list(v["minutes"].drop_duplicates().apply(str))
    # if len(ticks_idx) == 4:
    #     ticks_values = [1, 2, 3, 4]
    # else:
    #     ticks_values = [1, 2, 3, 4, "OT"]
    # ['search_rf_drive_clock',
    # 'search_rf_drive_field_goal_made',
    # 'search_rf_drive_field_goal_missed',
    # 'search_rf_drive_punt',
    # 'search_rf_drive_safety',
    # 'search_rf_drive_touch_down',
    # 'search_rf_drive_turnover',
    # 'search_rf_drive_turnover_on_downs']

    fig.add_trace(
        go.Scatter(
            x=game_df["nevent"],
            y=game_df[y[2]],
            customdata=game_df[["yards_description", "play_description", "cur_spread", "wpa"]],
            stackgroup="one",
            mode="lines",
            line=dict(width=0.5, color=colors[0]),
            name=game_df["home_team"].tolist()[0],
            hovertemplate="<br>".join(
                [
                    "%{y}",
                    "%{customdata[0]}",
                    "%{customdata[1]}",
            #         "Spread: %{customdata[2]}",
            #         "xScore Diff: %{customdata[17]}",
            #         "Over/Under: %{customdata[16]}",
            #         "xScore Total: %{customdata[4]}",
            #         "xHome Final Score: %{customdata[14]}",
            #         "xAway Final Score: %{customdata[15]}",
            #         "Clock: %{customdata[5]}",
            #         "FG Made: %{customdata[6]}",
            #         "FG Miss: %{customdata[7]}",
            #         "Punt: %{customdata[8]}",
            #         "TD: %{customdata[10]}",
            #         "TO: %{customdata[11]}",
            #         "TOD: %{customdata[12]}",
            #         "First Down: %{customdata[13]}",
            #         "WPA: %{customdata[3]}",
                ]
            ),
        )
    )
    try:
        fig.add_trace(go.Scatter(
            x=game_df["nevent"],
            y=game_df[y[1]],
            # hovertext=game_df[hover_values],
            stackgroup="one",
            mode="lines",
            line=dict(width=0.5, color=colors[2]),
            name="Tied",
        ))
    except:
        pass
    fig.add_trace(go.Scatter(
        x=game_df["nevent"],
        y=game_df[y[0]],
        # hovertext=game_df[hover_values],
        stackgroup="one",
        mode="lines",
        line=dict(width=0.5, color=colors[1]),
        name=game_df["away_team"].tolist()[0],
    ))
    # try:
    #     fig.add_trace(go.Scatter(
    #         x=game_df["nevent"],
    #         y=game_df[y1],
    #         # hovertext=game_df[hover_values],
    #         # stackgroup="one",
    #         mode="lines",
    #         line=dict(width=0.5, color="black", dash='dash'),
    #         name="xfinal_score_diff",
    #     ), secondary_y=True)
    # except:
    #     pass
    
    fig.update_xaxes(range=[np.min(game_df["nevent"]), np.max(game_df["nevent"])])
    fig.update_yaxes(range=[0, 1], secondary_y=False)
    fig.update_yaxes(range=[-20, 20], secondary_y=True, showgrid=False)
    # x_range = [-math.ceil(np.max(abs(game_df["xscore_diff_end_of_regulation"]))/10) * 10, math.ceil(np.max(abs(game_df["xscore_diff_end_of_regulation"]))/10) *10]
    # print(x_range)
    # fig.update_yaxes(range=x_range, secondary_y=True)
    # fig.update_yaxes(range=[-30, 30], secondary_y=True, showgrid=False)

    # iplot([fig1, fig2, fig3])
    
    fig.update_layout(
        title = game_df["game_info"].tolist()[0] + " (" + model + " model)",
        hovermode="x unified",
        width=width,
        height=height,
        xaxis=dict(
            tickvals=ticks_idx, ticktext=ticks_values, gridwidth=2
        ),
        yaxis=dict(tick0=0, dtick=0.25),
        # yaxis=dict(tick0=-30, dtick=30, gridcolor="black", gridwidth=2),
    )
    game_df["score_change"] = game_df["home_score_added"] + game_df["away_score_added"]
    game_df["score_str"] = (
        (game_df["away_score_added"] + game_df["away_start_score"]).apply(str)
        + "-"
        + (game_df["home_score_added"] + game_df["home_start_score"]).apply(str)
    )
    scores_idx_home = game_df[game_df["home_score_added"] >= 3]["nevent"].tolist()
    score_y_home = game_df[game_df["home_score_added"] >= 3][y[2]].tolist()
    score_home_value = game_df[game_df["home_score_added"] >= 3]["home_score_added"]
    score_home_str = np.where(score_home_value == 3, " FG", " TD")
    score_display_home = game_df[game_df["home_score_added"] >= 3]["score_str"].tolist()
    scores_idx_away = game_df[game_df["away_score_added"] >= 3]["nevent"].tolist()
    score_y_away = (game_df[game_df["away_score_added"] >= 3][y[2]]).tolist()
    score_away_value = game_df[game_df["away_score_added"] >= 3]["away_score_added"]
    score_away_str = np.where(score_away_value == 3, " FG", " TD")
    score_display_away = game_df[game_df["away_score_added"] >= 3]["score_str"].tolist()
    mask_poss_change = (game_df["home_team_has_ball_fixed"].shift(-1)!=game_df["home_team_has_ball_fixed"])&(game_df["score_change"]==0)&(game_df["score_change"].shift(-1)==0)
    ball_change_idx = game_df[mask_poss_change]["nevent"].tolist()
    ball_change_y = (game_df[mask_poss_change][y[2]]).tolist()
    # print(ball_change_idx)
    # print(ball_change_y)
    for x in range(len(scores_idx_home)):
        fig.add_annotation(
            x=scores_idx_home[x],
            y=score_y_home[x],
            text=game_df["home_team_abbrev"].tolist()[0]
            + score_home_str[x]
            + " "
            + score_display_home[x],
            showarrow=True,
        )
    for x in range(len(scores_idx_away)):
        fig.add_annotation(
            x=scores_idx_away[x],
            y=score_y_away[x],
            text=game_df["away_team_abbrev"].tolist()[0]
            + score_away_str[x]
            + " "
            + score_display_away[x],
            showarrow=True,
        )
    fig.add_trace(go.Scatter(
        x=ball_change_idx,
        y=ball_change_y,
        mode="markers",
        name="Possession Change",
        textposition="bottom center",
        marker=dict(color="blue")
    ))


    iplot(fig)
interact(
    plot_game_lwp, 
    game_info=widgets.Dropdown(options=list(overtime_df.sort_values("game_date", ascending=False)["game_info"].drop_duplicates())),
    model=widgets.RadioButtons(options=["mlp", "rf", "logit"], value="mlp"),
    width=widgets.IntSlider(min=500, max=1500, step=50, value=1200),
    height=widgets.IntSlider(min=500, max=1500, step=50, value=700),
)
    # return ()


In [None]:
overtime_beginning_of_game = overtime_df.drop_duplicates("game_code")

input_names_beginning_of_overtime = ["play_start_time", "current_score_total", "cur_spread", "cur_over_under", "is_playoff_game"]
output_name_beginning_of_overtime = 'home_team_outcome_number'
X_train, y_train, group_train, X_test, y_test, group_test, X_val, y_val, group_val = create_train_test_val_df(overtime_beginning_of_game, input_names_beginning_of_overtime, output_name_beginning_of_overtime, normalize=True)

logit_overtime_start = LogisticRegression()
logit_overtime_start.fit(X_train, y_train)
mlp_overtime_start = MLPClassifier(verbose=True, hidden_layer_sizes=(20,), random_state=1, early_stopping=True, )
mlp_overtime_start.fit(X_train, y_train)


In [None]:
pd.concat([pd.DataFrame(logit_overtime_start.intercept_, columns=["intercept"]), pd.DataFrame(logit_overtime_start.coef_, columns=logit_overtime_start.feature_names_in_)], axis=1)

In [None]:
overtime_beginning_of_game[logit_overtime_outcomes] = pd.DataFrame(cross_val_predict(logit_overtime_start, pd.concat([X_train, X_val, X_test]), pd.concat([y_train, y_val, y_test]), method="predict_proba"), index=overtime_beginning_of_game.index)
overtime_beginning_of_game[mlp_overtime_outcomes] = pd.DataFrame(cross_val_predict(mlp_overtime_start, pd.concat([X_train, X_val, X_test]), pd.concat([y_train, y_val, y_test]), method="predict_proba"), index=overtime_beginning_of_game.index)

In [None]:
plt.scatter(overtime_beginning_of_game["home_vegas_score_pred"] - overtime_beginning_of_game["away_vegas_score_pred"], overtime_beginning_of_game["mlp_pred_0"])

In [None]:
plt.scatter(overtime_beginning_of_game["home_vegas_score_pred"], overtime_beginning_of_game["logit_pred_-1"])

In [None]:
plt.scatter(overtime_beginning_of_game["home_vegas_score_pred"], overtime_beginning_of_game["logit_pred_1"])

In [None]:
logit_overtime_start.coef_

In [None]:
calb = calibration.calibration_curve(pd.get_dummies(calb_df[output_name]).iloc[:,col], calb_df[logit_overtime_outcomes[col]], n_bins=10, strategy="quantile") 
calb

In [None]:
calb_df = overtime_df
col = 1
print(rf_overtime_outcomes[col])
bins=5
# calb = calibration.calibration_curve(pd.get_dummies(calb_df[output_name]).iloc[:,col], calb_df[rf_overtime_outcomes[col]], n_bins=bins, strategy="quantile") 
# plt.plot(calb[1], calb[0], marker="o")
calb = calibration.calibration_curve(pd.get_dummies(calb_df[output_name]).iloc[:,col], calb_df[mlp_overtime_outcomes[col]], n_bins=bins, strategy="quantile") 
plt.plot(calb[1], calb[0], marker="o")
calb = calibration.calibration_curve(pd.get_dummies(calb_df[output_name]).iloc[:,col], calb_df[logit_overtime_outcomes[col]], n_bins=bins, strategy="quantile") 
plt.plot(calb[1], calb[0], marker="o")
plt.plot([0, np.max(calb[1])], [0, np.max(calb[1])])



In [None]:
plt.hist(overtime_df[rf_overtime_outcomes[col]])

In [None]:
plt.hist(overtime_df[mlp_overtime_outcomes[col]])

In [None]:
plt.hist(overtime_df[logit_overtime_outcomes[col]])

In [None]:
calb_df = overtime_df
col = 2
print(rf_overtime_outcomes[col])
# calb = calibration.calibration_curve(pd.get_dummies(calb_df[output_name]).iloc[:,col], calb_df[rf_overtime_outcomes[col]], n_bins=5, strategy="quantile") 
# plt.plot(calb[1], calb[0], marker="o")
calb = calibration.calibration_curve(pd.get_dummies(calb_df[output_name]).iloc[:,col], calb_df[mlp_overtime_outcomes[col]], n_bins=5, strategy="quantile") 
plt.plot(calb[1], calb[0], marker="o")
calb = calibration.calibration_curve(pd.get_dummies(calb_df[output_name]).iloc[:,col], calb_df[logit_overtime_outcomes[col]], n_bins=5, strategy="quantile") 
plt.plot(calb[1], calb[0], marker="o")
plt.plot([0, np.max(calb[1])], [0, np.max(calb[1])])



In [None]:
# calb_df = overtime_df
# col = 3
# calb = calibration.calibration_curve(pd.get_dummies(calb_df[output_name]).iloc[:,col], calb_df[rf_overtime_outcomes[col]], n_bins=5, strategy="quantile") 
# plt.plot(calb[1], calb[0], marker="o")
# calb = calibration.calibration_curve(pd.get_dummies(calb_df[output_name]).iloc[:,col], calb_df[mlp_overtime_outcomes[col]], n_bins=5, strategy="quantile") 
# plt.plot(calb[1], calb[0], marker="o")
# calb = calibration.calibration_curve(pd.get_dummies(calb_df[output_name]).iloc[:,col], calb_df[logit_overtime_outcomes[col]], n_bins=5, strategy="quantile") 
# plt.plot(calb[1], calb[0], marker="o")
# plt.plot([0, np.max(calb[1])], [0, np.max(calb[1])])



In [None]:
# calb_df = overtime_df
# col = 4
# calb = calibration.calibration_curve(pd.get_dummies(calb_df[output_name]).iloc[:,col], calb_df[rf_overtime_outcomes[col]], n_bins=5, strategy="quantile") 
# plt.plot(calb[1], calb[0], marker="o")
# calb = calibration.calibration_curve(pd.get_dummies(calb_df[output_name]).iloc[:,col], calb_df[mlp_overtime_outcomes[col]], n_bins=5, strategy="quantile") 
# plt.plot(calb[1], calb[0], marker="o")
# calb = calibration.calibration_curve(pd.get_dummies(calb_df[output_name]).iloc[:,col], calb_df[logit_overtime_outcomes[col]], n_bins=5, strategy="quantile") 
# plt.plot(calb[1], calb[0], marker="o")
# plt.plot([0, np.max(calb[1])], [0, np.max(calb[1])])



In [None]:
log_loss_df = overtime_df[overtime_df.season<2020]
print(log_loss(log_loss_df[output_name], log_loss_df[rf_overtime_outcomes]))
print(log_loss(log_loss_df[output_name], log_loss_df[mlp_overtime_outcomes]))
print(log_loss(log_loss_df[output_name], log_loss_df[logit_overtime_outcomes]))


In [None]:
overtime_df

In [None]:
overtime_df

In [None]:
plt.hist(overtime_df["rf_pred_-1"], alpha=1, bins = 5, label = "-1", range = [0, 1])
# plt.hist(overtime_df["rf_pred_-3"], alpha=.75, bins = 5, label = "-3", range = [0, 1])
plt.hist(overtime_df["rf_pred_0"], alpha=.5, bins = 5, label = "0", range = [0, 1])
plt.hist(overtime_df["rf_pred_1"], alpha=.5, bins = 5, label = "1", range = [0, 1])
# plt.hist(overtime_df["rf_pred_6"], alpha=.5, bins = 5, label = "6", range = [0, 1])
plt.legend()

In [None]:
plt.hist(overtime_df["mlp_pred_-1"], alpha=1, bins = 5, label = "-1", range = [0, 1])
# plt.hist(overtime_df["mlp_pred_-3"], alpha=.75, bins = 5, label = "-3", range = [0, 1])
plt.hist(overtime_df["mlp_pred_0"], alpha=.5, bins = 5, label = "0", range = [0, 1])
plt.hist(overtime_df["mlp_pred_1"], alpha=.5, bins = 5, label = "1", range = [0, 1])
# plt.hist(overtime_df["mlp_pred_6"], alpha=.5, bins = 5, label = "6", range = [0, 1])
plt.legend()

In [None]:
plt.hist(overtime_df["logit_pred_-1"], alpha=1, bins = 5, label = "-1", range = [0, 1])
# plt.hist(overtime_df["logit_pred_-3"], alpha=.75, bins = 5, label = "-3", range = [0, 1])
plt.hist(overtime_df["logit_pred_0"], alpha=.5, bins = 5, label = "0", range = [0, 1])
# plt.hist(overtime_df["logit_pred_1"], alpha=.25, bins = 5, label = "1", range = [0, 1])
# plt.hist(overtime_df["logit_pred_6"], alpha=.5, bins = 5, label = "6", range = [0, 1])
plt.legend()

In [None]:
overtime_df.to_clipboard()

In [None]:
# plt.hist(model_df[mask_model][rf_overtime_outcomes[5]])

pd.DataFrame(rf_overtime_model.feature_importances_, rf_overtime_model.feature_names_in_)

In [None]:
first_play_ot = model_df[mask_model].drop_duplicates("game_code")
first_play_ot[["home_team_win", "away_team_win", "home_team_has_ball"] + [output_name]].value_counts().to_clipboard()



In [None]:
pd.DataFrame(rf_overtime_model.feature_importances_, rf_overtime_model.feature_names_in_)

In [None]:
pd.DataFrame(mlp_overtime_model.coefs_[0], mlp_overtime_model.feature_names_in_).T.to_clipboard()
# len(mlp_overtime_model.coefs_)

In [None]:
plt.hist(mlp_overtime_model.coefs_[0][3])