## Random Survival Forest Model
Fit the Random Survival Forest Model on the survival dataframe for each of the seasons on both the pitch and game levels.

In [1]:
import pandas as pd
%matplotlib inline

from sklearn import set_config
from sksurv.ensemble import RandomSurvivalForest

set_config(display='text')

## Fitting Random Survival Forest Model on each of the 2020-2023 seasonal survival dataframe at Pitch Level

In [2]:
def fit_random_survival_forest_model_on_season_pitch_level(season_year):
    survival_df_time_invariant_pitch_level = pd.read_csv(f"../Time-Invariant-Survival-Analysis/survival_df_{season_year}_time_invariant_pitch_level.csv")
    
    # the X_train for fitting the random survival forests
    X_train = survival_df_time_invariant_pitch_level.drop(columns=['Pitches', 'EVENT'])
    
    # the y_train for fitting the random survival forests
    y_train = survival_df_time_invariant_pitch_level[['EVENT', 'Pitches']].to_records(index=False)

    # fit the random survival forest model
    random_survival_forest = RandomSurvivalForest(n_estimators=100, min_samples_split=10, min_samples_leaf=15, n_jobs=-1, random_state=100)
    random_survival_forest.fit(X_train,y_train)
    
    return random_survival_forest

### Evaluate the performance of the Random Survival Forest Model on the held-out survival dataframe for injured players for each of the seasonal models.

In [3]:
# held-out 2024 survival dataframe
def evaluate_held_out_random_survival_forest_pitch_level(rsf_model, held_out_season_year):
    survival_df_held_out_time_invariant_pitch_level = pd.read_csv(f"../Time-Invariant-Survival-Analysis/"
                                                                  f"survival_df_{held_out_season_year}_time_invariant_pitch_level.csv")
    X_test = survival_df_held_out_time_invariant_pitch_level.drop(columns = ['Pitches', 'EVENT'])
    y_test = survival_df_held_out_time_invariant_pitch_level[['EVENT', 'Pitches']].to_records(index=False)

    concordance_score_random_survival_forest_held_out = rsf_model.score(X_test, y_test)
    return concordance_score_random_survival_forest_held_out

In [4]:
rsf_pitch_level_2020 = fit_random_survival_forest_model_on_season_pitch_level("2020")
held_out_concordance_rsf_pitch_level_2021 = evaluate_held_out_random_survival_forest_pitch_level(rsf_pitch_level_2020, "2021")
print(f"The Concordance Index of the Random Survival Forest Model fitted on the 2020 season on pitch level on the 2021 survival dataframe is {round(held_out_concordance_rsf_pitch_level_2021,2)}")

In [5]:
rsf_pitch_level_2021 = fit_random_survival_forest_model_on_season_pitch_level("2021")
held_out_concordance_rsf_pitch_level_2022 = evaluate_held_out_random_survival_forest_pitch_level(rsf_pitch_level_2021, "2022")
print(f"The Concordance Index of the Random Survival Forest Model fitted on the 2021 season on pitch level on the 2022 survival dataframe is {round(held_out_concordance_rsf_pitch_level_2022,2)}")

In [6]:
rsf_pitch_level_2022 = fit_random_survival_forest_model_on_season_pitch_level("2022")
held_out_concordance_rsf_pitch_level_2023 = evaluate_held_out_random_survival_forest_pitch_level(rsf_pitch_level_2022, "2023")
print(f"The Concordance Index of the Random Survival Forest Model fitted on the 2022 season on pitch level on the 2023 survival dataframe is {round(held_out_concordance_rsf_pitch_level_2023,2)}")

In [7]:
rsf_pitch_level_2023 = fit_random_survival_forest_model_on_season_pitch_level("2023")
held_out_concordance_rsf_pitch_level_2024 = evaluate_held_out_random_survival_forest_pitch_level(rsf_pitch_level_2023, "2024")
print(f"The Concordance Index of the Random Survival Forest Model fitted on the 2023 season on pitch level on the 2024 survival dataframe is {round(held_out_concordance_rsf_pitch_level_2024,2)}")

## Fitting Random Survival Forest Model on each of the 2020-2023 seasonal survival dataframe at Game Level

In [8]:
def fit_random_survival_forest_model_on_season_game_level(season_year):
    survival_df_time_invariant_game_level = pd.read_csv(f"../Time-Invariant-Survival-Analysis/survival_df_{season_year}_time_invariant_game_level.csv")

    # the X_train for fitting the random survival forests
    X_train = survival_df_time_invariant_game_level.drop(columns=['Games', 'EVENT'])

    # the y_train for fitting the random survival forests
    y_train = survival_df_time_invariant_game_level[['EVENT', 'Games']].to_records(index=False)

    # fit the random survival forest model
    random_survival_forest = RandomSurvivalForest(n_estimators=100, min_samples_split=10, min_samples_leaf=15, n_jobs=-1, random_state=100)
    random_survival_forest.fit(X_train,y_train)

    return random_survival_forest

### Evaluate the performance of the Random Survival Forest Model on the held-out survival dataframe for injured players for each of the seasonal models at game level

In [9]:
# held-out 2024 survival dataframe
def evaluate_held_out_random_survival_forest_game_level(rsf_model, held_out_season_year):
    survival_df_held_out_time_invariant_game_level = pd.read_csv(f"../Time-Invariant-Survival-Analysis/"
                                                                  f"survival_df_{held_out_season_year}_time_invariant_game_level.csv")
    X_test = survival_df_held_out_time_invariant_game_level.drop(columns = ['Games', 'EVENT'])
    y_test = survival_df_held_out_time_invariant_game_level[['EVENT', 'Games']].to_records(index=False)

    concordance_score_random_survival_forest_held_out = rsf_model.score(X_test, y_test)
    return concordance_score_random_survival_forest_held_out

In [10]:
rsf_game_level_2020 = fit_random_survival_forest_model_on_season_game_level("2020")
held_out_concordance_rsf_game_level_2021 = evaluate_held_out_random_survival_forest_game_level(rsf_game_level_2020, "2021")
print(f"The Concordance Index of the Random Survival Forest Model fitted on the 2020 season on game level on the 2021 survival dataframe is {round(held_out_concordance_rsf_game_level_2021,2)}")

In [11]:
rsf_game_level_2021 = fit_random_survival_forest_model_on_season_game_level("2021")
held_out_concordance_rsf_game_level_2022 = evaluate_held_out_random_survival_forest_game_level(rsf_game_level_2021, "2022")
print(f"The Concordance Index of the Random Survival Forest Model fitted on the 2021 season on game level on the 2022 survival dataframe is {round(held_out_concordance_rsf_game_level_2022,2)}")

In [12]:
rsf_game_level_2022 = fit_random_survival_forest_model_on_season_game_level("2022")
held_out_concordance_rsf_game_level_2023 = evaluate_held_out_random_survival_forest_game_level(rsf_game_level_2022, "2023")
print(f"The Concordance Index of the Random Survival Forest Model fitted on the 2022 season on game level on the 2023 survival dataframe is {round(held_out_concordance_rsf_game_level_2023,2)}")

In [13]:
rsf_game_level_2023 = fit_random_survival_forest_model_on_season_game_level("2023")
held_out_concordance_rsf_game_level_2024 = evaluate_held_out_random_survival_forest_game_level(rsf_game_level_2023, "2024")
print(f"The Concordance Index of the Random Survival Forest Model fitted on the 2023 season on game level on the 2024 survival dataframe is {round(held_out_concordance_rsf_game_level_2024,2)}")