## Gradient Boosting Model
Fit the Gradient Boosting models on the survival dataframe for each of the seasons on both the pitch and game levels.

In [1]:
import pandas as pd

%matplotlib inline

from sklearn import set_config
from sksurv.ensemble import GradientBoostingSurvivalAnalysis

set_config(display='text')

## Fitting Gradient Boosting Models on each of the 2020-2023 seasonal survival dataframe at Pitch Level

In [2]:
def fit_gradient_boosting_model_on_season_pitch_level(season_year, recurrence):
    survival_df_time_invariant_pitch_level = pd.read_csv(f"../Survival-Dataframes/Time-Invariant/survival_df_{season_year}_time_invariant_pitch_level.csv")

    survival_df_time_invariant_pitch_level.drop(columns=['player_name','previous_injury_date','next_injury_date'],
                                                inplace=True)

    if not recurrence:
        survival_df_time_invariant_pitch_level.drop(columns=['recurrence'],inplace=True)

    # the X_train for fitting the random survival forests
    X_train = survival_df_time_invariant_pitch_level.drop(columns=['num_pitches', 'EVENT'])
    
    # the y_train for fitting the random survival forests
    y_train = survival_df_time_invariant_pitch_level[['EVENT', 'num_pitches']].to_records(index=False)

    # fit the random survival forest model
    est_cph_tree = GradientBoostingSurvivalAnalysis(n_estimators=100, min_samples_split=10, min_samples_leaf=15, random_state=100)
    est_cph_tree.fit(X_train,y_train)
    
    return est_cph_tree

In [3]:
# held-out 2024 survival dataframe
def evaluate_held_out_gradient_boosting_model_pitch_level(est_cph_tree, held_out_season_year, recurrence):
    survival_df_held_out_time_invariant_pitch_level = pd.read_csv(f"../Survival-Dataframes/Time-Invariant/"                                                          f"survival_df_{held_out_season_year}_time_invariant_pitch_level.csv")
    survival_df_held_out_time_invariant_pitch_level.drop(columns=['player_name', 'previous_injury_date', 'next_injury_date'],inplace=True)

    if not recurrence:
        survival_df_held_out_time_invariant_pitch_level.drop(columns=['recurrence'],inplace=True)

    X_test = survival_df_held_out_time_invariant_pitch_level.drop(columns = ['num_pitches', 'EVENT'])
    y_test = survival_df_held_out_time_invariant_pitch_level[['EVENT', 'num_pitches']].to_records(index=False)

    concordance_score_gradient_boosting_model_held_out = est_cph_tree.score(X_test, y_test)
    return concordance_score_gradient_boosting_model_held_out

In [4]:
est_cph_tree_pitch_level_2020 = fit_gradient_boosting_model_on_season_pitch_level("2020", recurrence=False)
held_out_concordance_est_cph_tree_pitch_level_2021 = evaluate_held_out_gradient_boosting_model_pitch_level(est_cph_tree_pitch_level_2020, "2021",
                                                                                                           recurrence=False)
print(f"The Concordance Index of the Gradient Boosting Model fitted on the 2020 season on pitch level on the 2021 survival dataframe is {round(held_out_concordance_est_cph_tree_pitch_level_2021,2)}")

The Concordance Index of the Gradient Boosting Model fitted on the 2020 season on pitch level on the 2021 survival dataframe is 0.53


In [5]:
est_cph_tree_pitch_level_2020_recurrence = fit_gradient_boosting_model_on_season_pitch_level("2020", recurrence=True)
held_out_concordance_est_cph_tree_pitch_level_2021_recurrence = evaluate_held_out_gradient_boosting_model_pitch_level(est_cph_tree_pitch_level_2020_recurrence, "2021", recurrence=True)
print(f"The Concordance Index of the Gradient Boosting Model with recurrence fitted on the 2020 season on pitch level on the 2021 survival dataframe is {round(held_out_concordance_est_cph_tree_pitch_level_2021_recurrence,2)}")

The Concordance Index of the Gradient Boosting Model with recurrence fitted on the 2020 season on pitch level on the 2021 survival dataframe is 0.65


In [6]:
est_cph_tree_pitch_level_2021 = fit_gradient_boosting_model_on_season_pitch_level("2021", recurrence=False)
held_out_concordance_est_cph_tree_pitch_level_2022 = evaluate_held_out_gradient_boosting_model_pitch_level(est_cph_tree_pitch_level_2021, "2022", recurrence=False)
print(f"The Concordance Index of the Gradient Boosting Model fitted on the 2021 season on pitch level on the 2022 survival dataframe is {round(held_out_concordance_est_cph_tree_pitch_level_2022,2)}")

The Concordance Index of the Gradient Boosting Model fitted on the 2021 season on pitch level on the 2022 survival dataframe is 0.56


In [7]:
est_cph_tree_pitch_level_2021_recurrence = fit_gradient_boosting_model_on_season_pitch_level("2021", recurrence=True)
held_out_concordance_est_cph_tree_pitch_level_2022_recurrence = evaluate_held_out_gradient_boosting_model_pitch_level(est_cph_tree_pitch_level_2021_recurrence, "2022", recurrence=True)
print(f"The Concordance Index of the Gradient Boosting Model with recurrence fitted on the 2021 season on pitch level on the 2022 survival dataframe is {round(held_out_concordance_est_cph_tree_pitch_level_2022_recurrence,2)}")

The Concordance Index of the Gradient Boosting Model with recurrence fitted on the 2021 season on pitch level on the 2022 survival dataframe is 0.65


In [8]:
est_cph_tree_pitch_level_2022 = fit_gradient_boosting_model_on_season_pitch_level("2022", recurrence=False)
held_out_concordance_est_cph_tree_pitch_level_2023 = evaluate_held_out_gradient_boosting_model_pitch_level(est_cph_tree_pitch_level_2022, "2023",
                                                                                                           recurrence=False)
print(f"The Concordance Index of the Gradient Boosting Model fitted on the 2022 season on pitch level on the 2023 survival dataframe is {round(held_out_concordance_est_cph_tree_pitch_level_2023,2)}")

The Concordance Index of the Gradient Boosting Model fitted on the 2022 season on pitch level on the 2023 survival dataframe is 0.57


In [9]:
est_cph_tree_pitch_level_2022_recurrence = fit_gradient_boosting_model_on_season_pitch_level("2022", recurrence=True)
held_out_concordance_est_cph_tree_pitch_level_2023_recurrence = evaluate_held_out_gradient_boosting_model_pitch_level(est_cph_tree_pitch_level_2022_recurrence, "2023", recurrence=True)
print(f"The Concordance Index of the Gradient Boosting Model with recurrence fitted on the 2022 season on pitch level on the 2023 survival dataframe is {round(held_out_concordance_est_cph_tree_pitch_level_2023_recurrence,2)}")

The Concordance Index of the Gradient Boosting Model with recurrence fitted on the 2022 season on pitch level on the 2023 survival dataframe is 0.64


In [10]:
est_cph_tree_pitch_level_2023 = fit_gradient_boosting_model_on_season_pitch_level("2023", recurrence=False)
held_out_concordance_est_cph_tree_pitch_level_2024 = evaluate_held_out_gradient_boosting_model_pitch_level(est_cph_tree_pitch_level_2023, "2024",
                                                                                                           recurrence=False)
print(f"The Concordance Index of the Gradient Boosting Model fitted on the 2023 season on pitch level on the 2024 survival dataframe is {round(held_out_concordance_est_cph_tree_pitch_level_2024,2)}")

The Concordance Index of the Gradient Boosting Model fitted on the 2023 season on pitch level on the 2024 survival dataframe is 0.5


In [11]:
est_cph_tree_pitch_level_2023_recurrence = fit_gradient_boosting_model_on_season_pitch_level("2023", recurrence=True)
held_out_concordance_est_cph_tree_pitch_level_2024_recurrence = evaluate_held_out_gradient_boosting_model_pitch_level(est_cph_tree_pitch_level_2023_recurrence, "2024",recurrence=True)
print(f"The Concordance Index of the Gradient Boosting Model with recurrence fitted on the 2023 season on pitch level on the 2024 survival dataframe is {round(held_out_concordance_est_cph_tree_pitch_level_2024_recurrence,2)}")

The Concordance Index of the Gradient Boosting Model with recurrence fitted on the 2023 season on pitch level on the 2024 survival dataframe is 0.63


## Fitting Gradient Boosting Model on each of the 2020-2023 seasonal survival dataframe at Game Level

In [12]:
def fit_gradient_boosting_model_on_season_game_level(season_year, recurrence):
    survival_df_time_invariant_game_level = pd.read_csv(f"../Survival-Dataframes/Time-Invariant/survival_df_{season_year}_time_invariant_game_level.csv")

    survival_df_time_invariant_game_level.drop(columns=['player_name', 'previous_injury_date', 'next_injury_date'],inplace=True)

    if not recurrence:
        survival_df_time_invariant_game_level.drop(columns=['recurrence'],inplace=True)

    # the X_train for fitting the random survival forests
    X_train = survival_df_time_invariant_game_level.drop(columns=['num_games', 'EVENT'])

    # the y_train for fitting the random survival forests
    y_train = survival_df_time_invariant_game_level[['EVENT', 'num_games']].to_records(index=False)

    # fit the random survival forest model
    est_cph_tree = GradientBoostingSurvivalAnalysis(n_estimators=100, min_samples_split=10, min_samples_leaf=15,random_state=100)
    est_cph_tree.fit(X_train,y_train)

    return est_cph_tree

In [13]:
# held-out 2024 survival dataframe
def evaluate_held_out_gradient_boosting_model_game_level(est_cph_tree, held_out_season_year, recurrence):
    survival_df_held_out_time_invariant_game_level = pd.read_csv(f"../Survival-Dataframes/Time-Invariant/"
                                                                 f"survival_df_{held_out_season_year}_time_invariant_game_level.csv")

    survival_df_held_out_time_invariant_game_level.drop(columns=['player_name','previous_injury_date', 'next_injury_date'],inplace=True)

    if not recurrence:
        survival_df_held_out_time_invariant_game_level.drop(columns=['recurrence'], inplace=True)

    X_test = survival_df_held_out_time_invariant_game_level.drop(columns = ['num_games', 'EVENT'])
    y_test = survival_df_held_out_time_invariant_game_level[['EVENT', 'num_games']].to_records(index=False)

    concordance_score_gradient_boosting_model_held_out = est_cph_tree.score(X_test, y_test)
    return concordance_score_gradient_boosting_model_held_out

In [14]:
est_cph_tree_game_level_2020 = fit_gradient_boosting_model_on_season_game_level("2020", recurrence=False)
held_out_concordance_est_cph_tree_game_level_2021 = evaluate_held_out_gradient_boosting_model_game_level(est_cph_tree_game_level_2020, "2021",
                                                                                                         recurrence=False)
print(f"The Concordance Index of the Gradient Boosting Model fitted on the 2020 season on game level on the 2021 survival dataframe is {round(held_out_concordance_est_cph_tree_game_level_2021,2)}")

The Concordance Index of the Gradient Boosting Model fitted on the 2020 season on game level on the 2021 survival dataframe is 0.53


In [15]:
est_cph_tree_game_level_2020_recurrence = fit_gradient_boosting_model_on_season_game_level("2020", recurrence=True)
held_out_concordance_est_cph_tree_game_level_2021_recurrence = evaluate_held_out_gradient_boosting_model_game_level(est_cph_tree_game_level_2020_recurrence, "2021", recurrence=True)
print(f"The Concordance Index of the Gradient Boosting Model with recurrence fitted on the 2020 season on game level on the 2021 survival dataframe is {round(held_out_concordance_est_cph_tree_game_level_2021_recurrence,2)}")

The Concordance Index of the Gradient Boosting Model with recurrence fitted on the 2020 season on game level on the 2021 survival dataframe is 0.86


In [16]:
est_cph_tree_game_level_2021 = fit_gradient_boosting_model_on_season_game_level("2021", recurrence=False)
held_out_concordance_est_cph_tree_game_level_2022 = evaluate_held_out_gradient_boosting_model_game_level(est_cph_tree_game_level_2021, "2022", recurrence=False)
print(f"The Concordance Index of the Gradient Boosting Model fitted on the 2021 season on game level on the 2022 survival dataframe is {round(held_out_concordance_est_cph_tree_game_level_2022,2)}")

The Concordance Index of the Gradient Boosting Model fitted on the 2021 season on game level on the 2022 survival dataframe is 0.55


In [17]:
est_cph_tree_game_level_2021_recurrence = fit_gradient_boosting_model_on_season_game_level("2021", recurrence=True)
held_out_concordance_est_cph_tree_game_level_2022_recurrence = evaluate_held_out_gradient_boosting_model_game_level(est_cph_tree_game_level_2021_recurrence, "2022", recurrence=True)
print(f"The Concordance Index of the Gradient Boosting Model with recurrence fitted on the 2021 season on game level on the 2022 survival dataframe is {round(held_out_concordance_est_cph_tree_game_level_2022_recurrence,2)}")

The Concordance Index of the Gradient Boosting Model with recurrence fitted on the 2021 season on game level on the 2022 survival dataframe is 0.86


In [18]:
est_cph_tree_game_level_2022 = fit_gradient_boosting_model_on_season_game_level("2022", recurrence=False)
held_out_concordance_est_cph_tree_game_level_2023 = evaluate_held_out_gradient_boosting_model_game_level(est_cph_tree_game_level_2022, "2023", recurrence=False)
print(f"The Concordance Index of the Gradient Boosting Model fitted on the 2022 season on game level on the 2023 survival dataframe is {round(held_out_concordance_est_cph_tree_game_level_2023,2)}")

The Concordance Index of the Gradient Boosting Model fitted on the 2022 season on game level on the 2023 survival dataframe is 0.56


In [19]:
est_cph_tree_game_level_2022_recurrence = fit_gradient_boosting_model_on_season_game_level("2022", recurrence=True)
held_out_concordance_est_cph_tree_game_level_2023_recurrence = evaluate_held_out_gradient_boosting_model_game_level(est_cph_tree_game_level_2022_recurrence, "2023", recurrence=True)
print(f"The Concordance Index of the Gradient Boosting Model with recurrence fitted on the 2022 season on game level on the 2023 survival dataframe is {round(held_out_concordance_est_cph_tree_game_level_2023_recurrence,2)}")

The Concordance Index of the Gradient Boosting Model with recurrence fitted on the 2022 season on game level on the 2023 survival dataframe is 0.86


In [20]:
est_cph_tree_game_level_2023 = fit_gradient_boosting_model_on_season_game_level("2023", recurrence=False)
held_out_concordance_est_cph_tree_game_level_2024 = evaluate_held_out_gradient_boosting_model_game_level(est_cph_tree_game_level_2023, "2024", recurrence=False)
print(f"The Concordance Index of the Gradient Boosting Model fitted on the 2023 season on game level on the 2024 survival dataframe is {round(held_out_concordance_est_cph_tree_game_level_2024,2)}")

The Concordance Index of the Gradient Boosting Model fitted on the 2023 season on game level on the 2024 survival dataframe is 0.57


In [21]:
est_cph_tree_game_level_2023_recurrence = fit_gradient_boosting_model_on_season_game_level("2023", recurrence=True)
held_out_concordance_est_cph_tree_game_level_2024_recurrence = evaluate_held_out_gradient_boosting_model_game_level(est_cph_tree_game_level_2023_recurrence, "2024", recurrence=True)
print(f"The Concordance Index of the Gradient Boosting Model with recurrence fitted on the 2023 season on game level on the 2024 survival dataframe is {round(held_out_concordance_est_cph_tree_game_level_2024_recurrence,2)}")

The Concordance Index of the Gradient Boosting Model with recurrence fitted on the 2023 season on game level on the 2024 survival dataframe is 0.87
