## Gradient Boosting Model
Fit the Gradient Boosting models on the survival dataframe for each of the seasons on both the pitch and game levels.

In [1]:
import pandas as pd
%matplotlib inline

from sklearn import set_config
from sksurv.ensemble import GradientBoostingSurvivalAnalysis

set_config(display='text')

  from pandas.core import (


## Fitting Gradient Boosting Models on each of the 2020-2023 seasonal survival dataframe at Pitch Level

In [2]:
def fit_gradient_boosting_model_on_season_pitch_level(season_year):
    survival_df_time_invariant_pitch_level = pd.read_csv(f"../Time-Invariant-Survival-Analysis/survival_df_{season_year}_time_invariant_pitch_level.csv")
    
    # the X_train for fitting the random survival forests
    X_train = survival_df_time_invariant_pitch_level.drop(columns=['Pitches', 'EVENT'])
    
    # the y_train for fitting the random survival forests
    y_train = survival_df_time_invariant_pitch_level[['EVENT', 'Pitches']].to_records(index=False)

    # fit the random survival forest model
    est_cph_tree = GradientBoostingSurvivalAnalysis(n_estimators=100, min_samples_split=10, min_samples_leaf=15, random_state=100)
    est_cph_tree.fit(X_train,y_train)
    
    return est_cph_tree

### Evaluate the performance of the Random Survival Forest Model on the held-out survival dataframe for injured players for each of the seasonal models.

In [3]:
# held-out 2024 survival dataframe
def evaluate_held_out_gradient_boosting_model_pitch_level(est_cph_tree, held_out_season_year):
    survival_df_held_out_time_invariant_pitch_level = pd.read_csv(f"../Time-Invariant-Survival-Analysis/"                                                          f"survival_df_{held_out_season_year}_time_invariant_pitch_level.csv")
    X_test = survival_df_held_out_time_invariant_pitch_level.drop(columns = ['Pitches', 'EVENT'])
    y_test = survival_df_held_out_time_invariant_pitch_level[['EVENT', 'Pitches']].to_records(index=False)

    concordance_score_gradient_boosting_model_held_out = est_cph_tree.score(X_test, y_test)
    return concordance_score_gradient_boosting_model_held_out

In [4]:
est_cph_tree_pitch_level_2020 = fit_gradient_boosting_model_on_season_pitch_level("2020")
held_out_concordance_est_cph_tree_pitch_level_2021 = evaluate_held_out_gradient_boosting_model_pitch_level(est_cph_tree_pitch_level_2020, "2021")
print(f"The Concordance Index of the Gradient Boosting Model fitted on the 2020 season on pitch level on the 2021 survival dataframe is {round(held_out_concordance_est_cph_tree_pitch_level_2021,2)}")

The Concordance Index of the Gradient Boosting Model fitted on the 2020 season on pitch level on the 2021 survival dataframe is 0.62


In [5]:
est_cph_tree_pitch_level_2021 = fit_gradient_boosting_model_on_season_pitch_level("2021")
held_out_concordance_est_cph_tree_pitch_level_2022 = evaluate_held_out_gradient_boosting_model_pitch_level(est_cph_tree_pitch_level_2021, "2022")
print(f"The Concordance Index of the Gradient Boosting Model fitted on the 2021 season on pitch level on the 2022 survival dataframe is {round(held_out_concordance_est_cph_tree_pitch_level_2022,2)}")

The Concordance Index of the Gradient Boosting Model fitted on the 2021 season on pitch level on the 2022 survival dataframe is 0.67


In [6]:
est_cph_tree_pitch_level_2022 = fit_gradient_boosting_model_on_season_pitch_level("2022")
held_out_concordance_est_cph_tree_pitch_level_2023 = evaluate_held_out_gradient_boosting_model_pitch_level(est_cph_tree_pitch_level_2022, "2023")
print(f"The Concordance Index of the Gradient Boosting Model fitted on the 2022 season on pitch level on the 2023 survival dataframe is {round(held_out_concordance_est_cph_tree_pitch_level_2023,2)}")

The Concordance Index of the Gradient Boosting Model fitted on the 2022 season on pitch level on the 2023 survival dataframe is 0.67


In [7]:
est_cph_tree_pitch_level_2023 = fit_gradient_boosting_model_on_season_pitch_level("2023")
held_out_concordance_est_cph_tree_pitch_level_2024 = evaluate_held_out_gradient_boosting_model_pitch_level(est_cph_tree_pitch_level_2023, "2024")
print(f"The Concordance Index of the Gradient Boosting Model fitted on the 2023 season on pitch level on the 2024 survival dataframe is {round(held_out_concordance_est_cph_tree_pitch_level_2024,2)}")

The Concordance Index of the Gradient Boosting Model fitted on the 2023 season on pitch level on the 2024 survival dataframe is 0.65


## Fitting Random Survival Forest Model on each of the 2020-2023 seasonal survival dataframe at Game Level

In [8]:
def fit_gradient_boosting_model_on_season_game_level(season_year):
    survival_df_time_invariant_game_level = pd.read_csv(f"../Time-Invariant-Survival-Analysis/survival_df_{season_year}_time_invariant_game_level.csv")

    # the X_train for fitting the random survival forests
    X_train = survival_df_time_invariant_game_level.drop(columns=['Games', 'EVENT'])

    # the y_train for fitting the random survival forests
    y_train = survival_df_time_invariant_game_level[['EVENT', 'Games']].to_records(index=False)

    # fit the random survival forest model
    est_cph_tree = GradientBoostingSurvivalAnalysis(n_estimators=100, min_samples_split=10, min_samples_leaf=15,random_state=100)
    est_cph_tree.fit(X_train,y_train)

    return est_cph_tree

### Evaluate the performance of the Random Survival Forest Model on the held-out survival dataframe for injured players for each of the seasonal models at game level

In [9]:
# held-out 2024 survival dataframe
def evaluate_held_out_gradient_boosting_model_game_level(est_cph_tree, held_out_season_year):
    survival_df_held_out_time_invariant_game_level = pd.read_csv(f"../Time-Invariant-Survival-Analysis/"
                                                                  f"survival_df_{held_out_season_year}_time_invariant_game_level.csv")
    X_test = survival_df_held_out_time_invariant_game_level.drop(columns = ['Games', 'EVENT'])
    y_test = survival_df_held_out_time_invariant_game_level[['EVENT', 'Games']].to_records(index=False)

    concordance_score_gradient_boosting_model_held_out = est_cph_tree.score(X_test, y_test)
    return concordance_score_gradient_boosting_model_held_out

In [10]:
est_cph_tree_game_level_2020 = fit_gradient_boosting_model_on_season_game_level("2020")
held_out_concordance_est_cph_tree_game_level_2021 = evaluate_held_out_gradient_boosting_model_game_level(est_cph_tree_game_level_2020, "2021")
print(f"The Concordance Index of the Gradient Boosting Model fitted on the 2020 season on game level on the 2021 survival dataframe is {round(held_out_concordance_est_cph_tree_game_level_2021,2)}")

The Concordance Index of the Gradient Boosting Model fitted on the 2020 season on game level on the 2021 survival dataframe is 0.62


In [11]:
est_cph_tree_game_level_2021 = fit_gradient_boosting_model_on_season_game_level("2021")
held_out_concordance_est_cph_tree_game_level_2022 = evaluate_held_out_gradient_boosting_model_game_level(est_cph_tree_game_level_2021, "2022")
print(f"The Concordance Index of the Gradient Boosting Model fitted on the 2021 season on game level on the 2022 survival dataframe is {round(held_out_concordance_est_cph_tree_game_level_2022,2)}")

The Concordance Index of the Gradient Boosting Model fitted on the 2021 season on game level on the 2022 survival dataframe is 0.64


In [12]:
est_cph_tree_game_level_2022 = fit_gradient_boosting_model_on_season_game_level("2022")
held_out_concordance_est_cph_tree_game_level_2023 = evaluate_held_out_gradient_boosting_model_game_level(est_cph_tree_game_level_2022, "2023")
print(f"The Concordance Index of the Gradient Boosting Model fitted on the 2022 season on game level on the 2023 survival dataframe is {round(held_out_concordance_est_cph_tree_game_level_2023,2)}")

The Concordance Index of the Gradient Boosting Model fitted on the 2022 season on game level on the 2023 survival dataframe is 0.63


In [13]:
est_cph_tree_game_level_2023 = fit_gradient_boosting_model_on_season_game_level("2023")
held_out_concordance_est_cph_tree_game_level_2024 = evaluate_held_out_gradient_boosting_model_game_level(est_cph_tree_game_level_2023, "2024")
print(f"The Concordance Index of the Gradient Boosting Model fitted on the 2023 season on game level on the 2024 survival dataframe is {round(held_out_concordance_est_cph_tree_game_level_2024,2)}")

The Concordance Index of the Gradient Boosting Model fitted on the 2023 season on game level on the 2024 survival dataframe is 0.59
