In [2]:
!pip install scikit-survival



In [3]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sksurv.ensemble import RandomSurvivalForest
from sksurv.util import Surv
from sksurv.metrics import concordance_index_censored
import numpy as np

In [5]:
data_file_csv = "/content/drive/MyDrive/Ufes/Survival/dataset_fit.csv"

# Read the CSV file
data = pd.read_csv(data_file_csv)

# dados de saida
y = data["time_years"]

# transforma os dados em numeros
y_encoded = Surv.from_arrays(data['falha'], y, name_event="falha", name_time="time_years")

# dados de entrada filtrados
X = data.drop(columns=["time_years_cat", "time_years", "falha"])
# transforma os dados em numeros
X_encoded = pd.get_dummies(X)

# 3. Divisão treino/teste
X_train, X_test, y_train, y_test = train_test_split(X_encoded, y_encoded, test_size=0.25, random_state=42)

# Modelo base
mc = RandomSurvivalForest(random_state=42)

In [6]:
# scorer de C‑index
def cindex_scorer(estimator, X_, y_):
    # estimator.predict retorna tempo medio de sobrevivencia;
    # virar "risco" com sinal invertido
    pred = estimator.predict(X_)
    # retorna tupla (cindex, concordante, discordante, ...) -> index e [0]
    return concordance_index_censored(y_['event'], y_['time'], -pred)[0]

In [7]:
best_model = mc.fit(X_train, y_train)

In [14]:
# Previsões
y_train_pred = best_model.predict(X_train)

y_pred_test = best_model.predict(X_test)

# Métricas - Treino
y_event = np.array([y[0] for y in y_train])
y_time = np.array([y[1] for y in y_train])
c_train = concordance_index_censored(y_event, y_time, -y_train_pred)[0]

# Métricas - Teste
y_event = np.array([y[0] for y in y_test])
y_time = np.array([y[1] for y in y_test])
c_test = concordance_index_censored(y_event,  y_time, -y_pred_test)[0]

print("Hiperparâmetros ótimos:\n")
print(str(best_model.get_params()))
print("\n\nMétricas - Treinamento:\n")
print(f"C‑index: {c_train:.4f}\n")
print("\nMétricas - Teste:\n")
print(f"C‑index:  {c_test:.4f}\n")

Hiperparâmetros ótimos:

{'bootstrap': True, 'low_memory': False, 'max_depth': None, 'max_features': 'sqrt', 'max_leaf_nodes': None, 'max_samples': None, 'min_samples_leaf': 3, 'min_samples_split': 6, 'min_weight_fraction_leaf': 0.0, 'n_estimators': 100, 'n_jobs': None, 'oob_score': False, 'random_state': 42, 'verbose': 0, 'warm_start': False}


Métricas - Treinamento:

C‑index: 0.1986


Métricas - Teste:

C‑index:  0.2422

