## Random Survival Forest Model
Fit the Random Survival Forest Model on the survival dataframe for each of the seasons on both the pitch and game levels.

In [1]:
import pandas as pd
%matplotlib inline

from sklearn import set_config
from sksurv.ensemble import RandomSurvivalForest

set_config(display='text')

## Fitting Random Survival Forest Model on each of the 2020-2023 seasonal survival dataframe at Pitch Level

In [2]:
def fit_random_survival_forest_model_on_season_pitch_level(season_year, recurrence):
    survival_df_time_invariant_pitch_level = pd.read_csv(f"../Survival-Dataframes/Time-Invariant/survival_df_{season_year}_time_invariant_pitch_level.csv")

    survival_df_time_invariant_pitch_level.drop(columns=['player_name','previous_injury_date', 'next_injury_date'],inplace=True)

    if not recurrence:
        survival_df_time_invariant_pitch_level.drop(columns=['recurrence'],inplace=True)

    # the X_train for fitting the random survival forests
    X_train = survival_df_time_invariant_pitch_level.drop(columns=['num_pitches', 'EVENT'])
    
    # the y_train for fitting the random survival forests
    y_train = survival_df_time_invariant_pitch_level[['EVENT', 'num_pitches']].to_records(index=False)

    # fit the random survival forest model
    random_survival_forest = RandomSurvivalForest(n_estimators=100, min_samples_split=10, min_samples_leaf=15, n_jobs=-1, random_state=100)
    random_survival_forest.fit(X_train,y_train)
    
    return random_survival_forest

### Evaluate the performance of the Random Survival Forest Model on the held-out survival dataframe for injured players for each of the seasonal models.

In [3]:
# held-out 2024 survival dataframe
def evaluate_held_out_random_survival_forest_pitch_level(rsf_model, held_out_season_year, recurrence):
    survival_df_held_out_time_invariant_pitch_level = pd.read_csv(f"../Survival-Dataframes/Time-Invariant/"
                                                                  f"survival_df_{held_out_season_year}_time_invariant_pitch_level.csv")

    survival_df_held_out_time_invariant_pitch_level.drop(columns=['player_name','previous_injury_date','next_injury_date'],inplace=True)
    if not recurrence:
        survival_df_held_out_time_invariant_pitch_level.drop(columns=['recurrence'],inplace=True)

    X_test = survival_df_held_out_time_invariant_pitch_level.drop(columns = ['num_pitches', 'EVENT'])
    y_test = survival_df_held_out_time_invariant_pitch_level[['EVENT', 'num_pitches']].to_records(index=False)

    concordance_score_random_survival_forest_held_out = rsf_model.score(X_test, y_test)
    return concordance_score_random_survival_forest_held_out

In [4]:
rsf_pitch_level_2020 = fit_random_survival_forest_model_on_season_pitch_level("2020",recurrence=False)
held_out_concordance_rsf_pitch_level_2021 = evaluate_held_out_random_survival_forest_pitch_level(rsf_pitch_level_2020, "2021",recurrence=False)
print(f"The Concordance Index of the Random Survival Forest Model fitted on the 2020 season on pitch level on the 2021 survival dataframe is {round(held_out_concordance_rsf_pitch_level_2021,2)}")

The Concordance Index of the Random Survival Forest Model fitted on the 2020 season on pitch level on the 2021 survival dataframe is 0.56


In [5]:
rsf_pitch_level_2020_recurrence = fit_random_survival_forest_model_on_season_pitch_level("2020",recurrence=True)
held_out_concordance_rsf_pitch_level_2021_recurrence = evaluate_held_out_random_survival_forest_pitch_level(rsf_pitch_level_2020_recurrence, "2021",recurrence=True)
print(f"The Concordance Index of the Random Survival Forest Model with recurrence fitted on the 2020 season on pitch level on the 2021 survival dataframe is {round(held_out_concordance_rsf_pitch_level_2021_recurrence,2)}")

The Concordance Index of the Random Survival Forest Model with recurrence fitted on the 2020 season on pitch level on the 2021 survival dataframe is 0.66


In [6]:
rsf_pitch_level_2021 = fit_random_survival_forest_model_on_season_pitch_level("2021", recurrence=False)
held_out_concordance_rsf_pitch_level_2022 = evaluate_held_out_random_survival_forest_pitch_level(rsf_pitch_level_2021, "2022", recurrence=False)
print(f"The Concordance Index of the Random Survival Forest Model fitted on the 2021 season on pitch level on the 2022 survival dataframe is {round(held_out_concordance_rsf_pitch_level_2022,2)}")

The Concordance Index of the Random Survival Forest Model fitted on the 2021 season on pitch level on the 2022 survival dataframe is 0.55


In [7]:
rsf_pitch_level_2021_recurrence = fit_random_survival_forest_model_on_season_pitch_level("2021", recurrence=True)
held_out_concordance_rsf_pitch_level_2022_recurrence = evaluate_held_out_random_survival_forest_pitch_level(rsf_pitch_level_2021_recurrence, "2022", recurrence=True)
print(f"The Concordance Index of the Random Survival Forest Model with recurrence fitted on the 2021 season on pitch level on the 2022 survival dataframe is {round(held_out_concordance_rsf_pitch_level_2022_recurrence,2)}")

The Concordance Index of the Random Survival Forest Model with recurrence fitted on the 2021 season on pitch level on the 2022 survival dataframe is 0.65


In [8]:
rsf_pitch_level_2022 = fit_random_survival_forest_model_on_season_pitch_level("2022", recurrence=False)
held_out_concordance_rsf_pitch_level_2023 = evaluate_held_out_random_survival_forest_pitch_level(rsf_pitch_level_2022, "2023", recurrence=False)
print(f"The Concordance Index of the Random Survival Forest Model fitted on the 2022 season on pitch level on the 2023 survival dataframe is {round(held_out_concordance_rsf_pitch_level_2023,2)}")

The Concordance Index of the Random Survival Forest Model fitted on the 2022 season on pitch level on the 2023 survival dataframe is 0.58


In [9]:
rsf_pitch_level_2022_recurrence = fit_random_survival_forest_model_on_season_pitch_level("2022", recurrence=True)
held_out_concordance_rsf_pitch_level_2023_recurrence = evaluate_held_out_random_survival_forest_pitch_level(rsf_pitch_level_2022_recurrence, "2023", recurrence=True)
print(f"The Concordance Index of the Random Survival Forest Model with recurrence fitted on the 2022 season on pitch level on the 2023 survival dataframe is {round(held_out_concordance_rsf_pitch_level_2023_recurrence,2)}")

The Concordance Index of the Random Survival Forest Model with recurrence fitted on the 2022 season on pitch level on the 2023 survival dataframe is 0.64


In [10]:
rsf_pitch_level_2023 = fit_random_survival_forest_model_on_season_pitch_level("2023", recurrence=False)
held_out_concordance_rsf_pitch_level_2024 = evaluate_held_out_random_survival_forest_pitch_level(rsf_pitch_level_2023, "2024", recurrence=False)
print(f"The Concordance Index of the Random Survival Forest Model fitted on the 2023 season on pitch level on the 2024 survival dataframe is {round(held_out_concordance_rsf_pitch_level_2024,2)}")

The Concordance Index of the Random Survival Forest Model fitted on the 2023 season on pitch level on the 2024 survival dataframe is 0.52


In [11]:
rsf_pitch_level_2023_recurrence = fit_random_survival_forest_model_on_season_pitch_level("2023", recurrence=True)
held_out_concordance_rsf_pitch_level_2024_recurrence = evaluate_held_out_random_survival_forest_pitch_level(rsf_pitch_level_2023_recurrence, "2024", recurrence=True)
print(f"The Concordance Index of the Random Survival Forest Model with recurrence fitted on the 2023 season on pitch level on the 2024 survival dataframe is {round(held_out_concordance_rsf_pitch_level_2024_recurrence,2)}")

The Concordance Index of the Random Survival Forest Model with recurrence fitted on the 2023 season on pitch level on the 2024 survival dataframe is 0.62


## Fitting Random Survival Forest Model on each of the 2020-2023 seasonal survival dataframe at Game Level

In [12]:
def fit_random_survival_forest_model_on_season_game_level(season_year, recurrence):
    survival_df_time_invariant_game_level = pd.read_csv(f"../Survival-Dataframes/Time-Invariant/survival_df_{season_year}_time_invariant_game_level.csv")

    survival_df_time_invariant_game_level.drop(columns=['player_name', 'previous_injury_date', 'next_injury_date'],inplace=True)

    if not recurrence:
        survival_df_time_invariant_game_level.drop(columns=['recurrence'],inplace=True)

    # the X_train for fitting the random survival forests
    X_train = survival_df_time_invariant_game_level.drop(columns=['num_games', 'EVENT'])

    # the y_train for fitting the random survival forests
    y_train = survival_df_time_invariant_game_level[['EVENT', 'num_games']].to_records(index=False)

    # fit the random survival forest model
    random_survival_forest = RandomSurvivalForest(n_estimators=100, min_samples_split=10, min_samples_leaf=15, n_jobs=-1, random_state=100)
    random_survival_forest.fit(X_train,y_train)

    return random_survival_forest

### Evaluate the performance of the Random Survival Forest Model on the held-out survival dataframe for injured players for each of the seasonal models at game level

In [13]:
# held-out 2024 survival dataframe
def evaluate_held_out_random_survival_forest_game_level(rsf_model, held_out_season_year, recurrence):
    survival_df_held_out_time_invariant_game_level = pd.read_csv(f"../Survival-Dataframes/Time-Invariant/"
                                                                      f"survival_df_{held_out_season_year}_time_invariant_game_level.csv")

    survival_df_held_out_time_invariant_game_level.drop(columns=['player_name', 'previous_injury_date', 'next_injury_date'],inplace=True)

    if not recurrence:
        survival_df_held_out_time_invariant_game_level.drop(columns=['recurrence'],inplace=True)

    X_test = survival_df_held_out_time_invariant_game_level.drop(columns = ['num_games', 'EVENT'])
    y_test = survival_df_held_out_time_invariant_game_level[['EVENT', 'num_games']].to_records(index=False)

    concordance_score_random_survival_forest_held_out = rsf_model.score(X_test, y_test)
    return concordance_score_random_survival_forest_held_out

In [14]:
rsf_game_level_2020 = fit_random_survival_forest_model_on_season_game_level("2020", False)
held_out_concordance_rsf_game_level_2021 = evaluate_held_out_random_survival_forest_game_level(rsf_game_level_2020, "2021", False)
print(f"The Concordance Index of the Random Survival Forest Model fitted on the 2020 season on game level on the 2021 survival dataframe is {round(held_out_concordance_rsf_game_level_2021,2)}")

The Concordance Index of the Random Survival Forest Model fitted on the 2020 season on game level on the 2021 survival dataframe is 0.56


In [15]:
rsf_game_level_2020_recurrence = fit_random_survival_forest_model_on_season_game_level("2020", True)
held_out_concordance_rsf_game_level_2021_recurrence = evaluate_held_out_random_survival_forest_game_level(rsf_game_level_2020_recurrence, "2021", True)
print(f"The Concordance Index of the Random Survival Forest Model with recurrence fitted on the 2020 season on game level on the 2021 survival dataframe is {round(held_out_concordance_rsf_game_level_2021_recurrence,2)}")

The Concordance Index of the Random Survival Forest Model with recurrence fitted on the 2020 season on game level on the 2021 survival dataframe is 0.85


In [16]:
rsf_game_level_2021 = fit_random_survival_forest_model_on_season_game_level("2021", False)
held_out_concordance_rsf_game_level_2022 = evaluate_held_out_random_survival_forest_game_level(rsf_game_level_2021, "2022", False)
print(f"The Concordance Index of the Random Survival Forest Model fitted on the 2021 season on game level on the 2022 survival dataframe is {round(held_out_concordance_rsf_game_level_2022,2)}")

The Concordance Index of the Random Survival Forest Model fitted on the 2021 season on game level on the 2022 survival dataframe is 0.55


In [17]:
rsf_game_level_2021_recurrence = fit_random_survival_forest_model_on_season_game_level("2021", True)
held_out_concordance_rsf_game_level_2022_recurrence = evaluate_held_out_random_survival_forest_game_level(rsf_game_level_2021_recurrence, "2022", True)
print(f"The Concordance Index of the Random Survival Forest Model with recurrence fitted on the 2021 season on game level on the 2022 survival dataframe is {round(held_out_concordance_rsf_game_level_2022_recurrence,2)}")

The Concordance Index of the Random Survival Forest Model with recurrence fitted on the 2021 season on game level on the 2022 survival dataframe is 0.85


In [18]:
rsf_game_level_2022 = fit_random_survival_forest_model_on_season_game_level("2022",False)
held_out_concordance_rsf_game_level_2023 = evaluate_held_out_random_survival_forest_game_level(rsf_game_level_2022, "2023", False)
print(f"The Concordance Index of the Random Survival Forest Model fitted on the 2022 season on game level on the 2023 survival dataframe is {round(held_out_concordance_rsf_game_level_2023,2)}")

The Concordance Index of the Random Survival Forest Model fitted on the 2022 season on game level on the 2023 survival dataframe is 0.51


In [19]:
rsf_game_level_2022_recurrence = fit_random_survival_forest_model_on_season_game_level("2022",True)
held_out_concordance_rsf_game_level_2023_recurrence = evaluate_held_out_random_survival_forest_game_level(rsf_game_level_2022_recurrence, "2023", True)
print(f"The Concordance Index of the Random Survival Forest Model with recurrence fitted on the 2022 season on game level on the 2023 survival dataframe is {round(held_out_concordance_rsf_game_level_2023_recurrence,2)}")

The Concordance Index of the Random Survival Forest Model with recurrence fitted on the 2022 season on game level on the 2023 survival dataframe is 0.83


In [20]:
rsf_game_level_2023 = fit_random_survival_forest_model_on_season_game_level("2023", False)
held_out_concordance_rsf_game_level_2024 = evaluate_held_out_random_survival_forest_game_level(rsf_game_level_2023, "2024", False)
print(f"The Concordance Index of the Random Survival Forest Model fitted on the 2023 season on game level on the 2024 survival dataframe is {round(held_out_concordance_rsf_game_level_2024,2)}")

The Concordance Index of the Random Survival Forest Model fitted on the 2023 season on game level on the 2024 survival dataframe is 0.58


In [21]:
rsf_game_level_2023_recurrence = fit_random_survival_forest_model_on_season_game_level("2023", True)
held_out_concordance_rsf_game_level_2024_recurrence = evaluate_held_out_random_survival_forest_game_level(rsf_game_level_2023_recurrence, "2024", True)
print(f"The Concordance Index of the Random Survival Forest Model with recurrence fitted on the 2023 season on game level on the 2024 survival dataframe is {round(held_out_concordance_rsf_game_level_2024_recurrence,2)}")

The Concordance Index of the Random Survival Forest Model with recurrence fitted on the 2023 season on game level on the 2024 survival dataframe is 0.87
