---
authors:
  - name: Mathis Derenne
    affiliation: M2 MIASHS - Université de Lyon
    github: mathisdrn
  - name: Djida Boukari
    affiliation: M2 MIASHS - Université de Lyon
  - name: Ines Nakhli
    affiliation: M2 MIASHS - Université de Lyon
date: 2026-02-01
---

# Présentation et nettoyage des données

## Téléchargement des données

In [1]:
import zipfile
from io import BytesIO
from pathlib import Path

import requests

data_dir = Path("../../data/")
data_dir.mkdir(parents=True, exist_ok=True)
filepath = data_dir / "df_study_L18_w6.csv"

if not filepath.exists():
    # Download and extract zip.
    url = "https://github.com/MINCHELLA-Paul/Master-MIASHS/raw/6abd32cc11d73850a0d8c54a3ab9a31200b6d97b/Atelier_SigBERT/df_study_selected.zip"
    response = requests.get(url)

    with zipfile.ZipFile(BytesIO(response.content)) as zip_ref:
        zip_ref.extractall(data_dir)

## Chargement des données

Le jeu de données contient 3555 lignes et 759 colonnes.

Chaque ligne représente un individu représenté par un ID, un temps de survie, un indicateur de censure, et 756 caractéristiques correspondant aux signatures calculées à partir des données médicales. 

Le taux de censure est d'environ 33%. Le taux de survie médian est de 1329 jours.

Le jeu de données ne contient pas de valeurs manquantes.

In [2]:
import polars as pl
import polars.selectors as cs

# Polars display options
pl.Config.set_tbl_hide_dataframe_shape(True)
pl.Config.set_float_precision(3)

# Chargement des données
df = pl.read_csv(filepath, infer_schema_length=None)
df = df.select(["time", "event", cs.starts_with("sig_")])
df

time,event,sig_1,sig_2,sig_3,sig_4,sig_5,sig_6,sig_7,sig_8,sig_9,sig_10,sig_11,sig_12,sig_13,sig_14,sig_15,sig_16,sig_17,sig_18,sig_19,sig_20,sig_21,sig_22,sig_23,sig_24,sig_25,sig_26,sig_27,sig_28,sig_29,sig_30,sig_31,sig_32,sig_33,sig_34,sig_35,…,sig_720,sig_721,sig_722,sig_723,sig_724,sig_725,sig_726,sig_727,sig_728,sig_729,sig_730,sig_731,sig_732,sig_733,sig_734,sig_735,sig_736,sig_737,sig_738,sig_739,sig_740,sig_741,sig_742,sig_743,sig_744,sig_745,sig_746,sig_747,sig_748,sig_749,sig_750,sig_751,sig_752,sig_753,sig_754,sig_755,sig_756
f64,bool,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,…,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
5138.000,false,0.000,0.191,-0.111,-0.013,0.022,0.021,0.005,0.005,0.011,-0.026,0.041,0.013,-0.004,-0.009,-0.005,0.010,0.002,0.005,0.009,-0.002,0.003,0.003,-0.005,-0.001,0.002,-0.001,0.201,0.000,0.000,-0.000,-0.000,0.000,0.000,0.000,0.000,…,-0.000,-0.000,0.000,-0.000,-0.000,0.000,0.000,-0.000,0.000,-0.000,0.000,0.019,-0.011,-0.001,0.002,0.002,0.000,0.001,0.001,-0.003,0.004,0.001,-0.000,-0.001,-0.000,0.001,0.000,0.001,0.001,-0.000,0.000,0.000,-0.001,-0.000,0.000,-0.000,0.020
848.000,true,1.000,0.066,-0.022,-0.005,-0.011,-0.015,0.008,-0.004,0.013,0.002,0.011,0.006,0.002,-0.005,-0.012,0.007,0.001,-0.002,0.001,0.002,0.001,-0.005,-0.001,-0.001,0.001,0.002,0.085,0.500,-0.236,0.101,-0.006,-0.042,0.021,0.003,0.016,…,-0.000,0.000,-0.000,-0.000,-0.000,0.000,-0.000,-0.000,0.000,0.000,0.085,0.003,-0.001,-0.000,-0.000,-0.001,0.000,-0.000,0.001,0.000,0.000,0.000,0.000,-0.000,-0.000,0.000,0.000,-0.000,0.000,0.000,0.000,-0.000,-0.000,-0.000,0.000,0.000,0.004
2340.000,false,1.000,0.208,-0.045,0.022,0.016,0.012,-0.013,0.011,0.013,-0.013,0.026,0.018,0.014,-0.013,-0.012,-0.006,-0.005,0.002,0.001,0.010,-0.006,-0.007,0.000,0.001,-0.005,-0.001,0.215,0.500,-0.043,0.048,0.006,-0.004,0.016,-0.009,-0.006,…,-0.000,0.000,-0.000,-0.000,0.000,-0.000,0.000,0.000,0.000,0.000,0.215,0.022,-0.001,0.003,0.000,0.002,-0.003,0.002,0.002,-0.002,0.002,0.002,0.003,-0.001,-0.002,-0.001,-0.001,-0.000,0.001,0.002,-0.001,-0.001,0.000,0.000,-0.001,-0.000,0.023
1800.000,true,0.000,0.784,-0.095,-0.059,-0.046,0.018,-0.004,-0.011,-0.001,-0.010,0.011,0.012,0.009,-0.033,-0.015,-0.008,0.022,0.005,0.001,-0.032,0.024,0.000,-0.016,-0.030,0.004,0.016,0.215,0.000,0.000,-0.000,-0.000,-0.000,0.000,-0.000,-0.000,…,0.000,0.000,-0.000,0.000,0.000,-0.000,-0.000,0.000,0.000,0.002,0.000,0.084,-0.010,-0.006,-0.005,0.002,-0.000,-0.001,-0.000,-0.001,0.001,0.001,0.001,-0.004,-0.002,-0.001,0.002,0.001,0.000,-0.003,0.003,0.000,-0.002,-0.003,0.000,0.002,0.023
5248.000,false,0.000,0.432,-0.215,-0.001,0.029,0.030,-0.006,0.004,0.006,-0.016,0.028,0.022,0.006,-0.008,0.011,0.025,-0.005,-0.002,0.023,-0.008,0.022,-0.011,-0.010,0.002,0.024,-0.007,0.201,0.000,0.000,-0.000,-0.000,0.000,0.000,-0.000,0.000,…,0.000,-0.000,0.000,-0.000,0.000,0.000,-0.000,-0.000,0.000,-0.001,0.000,0.043,-0.022,-0.000,0.003,0.003,-0.001,0.000,0.001,-0.002,0.003,0.002,0.001,-0.001,0.001,0.003,-0.000,-0.000,0.002,-0.001,0.002,-0.001,-0.001,0.000,0.002,-0.001,0.020
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
613.000,true,1.000,0.024,-0.010,0.000,-0.002,-0.009,0.001,-0.005,0.007,0.006,0.012,0.002,-0.002,-0.003,-0.003,0.001,-0.000,-0.003,0.001,-0.003,0.001,-0.002,-0.002,0.003,0.001,0.001,-5.504,0.500,-0.159,0.059,-0.005,0.020,0.011,-0.005,0.016,…,0.000,0.000,-0.000,0.000,0.000,0.000,-0.000,-0.000,0.000,0.004,-5.504,0.437,-0.032,0.073,-0.050,-0.027,0.008,-0.056,0.027,-0.036,0.023,0.012,-0.004,-0.020,-0.032,0.026,0.004,0.002,-0.001,0.042,-0.003,-0.004,-0.012,0.000,0.003,-0.009,15.147
645.000,true,1.000,0.995,0.078,0.071,-0.026,-0.004,-0.024,0.006,0.004,0.008,0.057,-0.003,0.012,0.007,0.002,0.011,0.012,-0.035,-0.014,-0.011,-0.003,0.007,-0.026,0.002,0.005,0.019,0.241,0.500,0.345,0.043,0.022,-0.010,0.016,-0.017,0.012,…,-0.000,0.000,0.000,-0.000,0.000,-0.000,-0.000,0.000,0.000,0.002,0.241,0.193,0.044,0.018,-0.012,0.006,-0.005,0.005,-0.000,-0.000,0.012,-0.003,0.004,0.004,0.001,0.002,0.003,-0.008,-0.004,-0.004,0.000,0.002,-0.006,0.001,0.001,0.003,0.029
982.000,true,1.000,0.385,-0.138,0.004,0.045,-0.010,-0.010,-0.030,0.035,0.009,0.021,-0.007,0.021,-0.017,-0.006,0.003,-0.018,0.002,0.008,-0.011,0.024,-0.011,-0.009,0.003,0.001,0.008,0.215,0.500,-0.135,0.000,-0.002,0.025,-0.006,-0.027,-0.009,…,0.000,0.000,-0.000,0.000,-0.000,-0.000,-0.000,0.000,0.000,0.002,0.215,0.030,-0.013,-0.001,0.008,-0.002,-0.003,-0.005,0.003,0.002,0.003,-0.002,0.001,-0.002,-0.001,0.000,-0.005,0.000,0.001,-0.001,0.003,-0.002,-0.001,0.001,0.002,-0.000,0.023
959.000,true,1.000,0.404,-0.197,-0.023,0.034,-0.013,-0.012,-0.006,0.039,0.007,0.032,-0.003,-0.030,-0.018,0.001,0.001,-0.016,-0.011,0.009,0.008,-0.001,0.003,0.004,-0.009,0.004,-0.002,0.088,0.500,0.105,-0.031,-0.023,0.013,-0.008,-0.004,0.003,…,0.000,0.000,0.000,0.000,0.000,0.000,-0.000,0.000,0.000,0.000,0.088,0.023,-0.007,-0.002,0.002,-0.001,-0.002,-0.001,0.003,0.001,0.002,-0.000,-0.001,-0.001,0.001,-0.000,-0.001,-0.001,0.000,0.001,0.001,0.000,0.000,-0.000,0.000,-0.000,0.004


In [3]:
df.describe()

statistic,time,event,sig_1,sig_2,sig_3,sig_4,sig_5,sig_6,sig_7,sig_8,sig_9,sig_10,sig_11,sig_12,sig_13,sig_14,sig_15,sig_16,sig_17,sig_18,sig_19,sig_20,sig_21,sig_22,sig_23,sig_24,sig_25,sig_26,sig_27,sig_28,sig_29,sig_30,sig_31,sig_32,sig_33,sig_34,…,sig_720,sig_721,sig_722,sig_723,sig_724,sig_725,sig_726,sig_727,sig_728,sig_729,sig_730,sig_731,sig_732,sig_733,sig_734,sig_735,sig_736,sig_737,sig_738,sig_739,sig_740,sig_741,sig_742,sig_743,sig_744,sig_745,sig_746,sig_747,sig_748,sig_749,sig_750,sig_751,sig_752,sig_753,sig_754,sig_755,sig_756
str,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,…,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
"""count""",3555.0,3555.0,3555.0,3555.0,3555.0,3555.0,3555.0,3555.0,3555.0,3555.0,3555.0,3555.0,3555.0,3555.0,3555.0,3555.0,3555.0,3555.0,3555.0,3555.0,3555.0,3555.0,3555.0,3555.0,3555.0,3555.0,3555.0,3555.0,3555.0,3555.0,3555.0,3555.0,3555.0,3555.0,3555.0,3555.0,…,3555.0,3555.0,3555.0,3555.0,3555.0,3555.0,3555.0,3555.0,3555.0,3555.0,3555.0,3555.0,3555.0,3555.0,3555.0,3555.0,3555.0,3555.0,3555.0,3555.0,3555.0,3555.0,3555.0,3555.0,3555.0,3555.0,3555.0,3555.0,3555.0,3555.0,3555.0,3555.0,3555.0,3555.0,3555.0,3555.0,3555.0
"""null_count""",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,…,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"""mean""",1914.108,0.673,0.757,0.434,-0.111,-0.001,0.007,-0.007,0.004,-0.005,0.015,-0.003,0.019,0.004,-0.003,-0.01,-0.009,0.008,-0.003,-0.004,-0.001,-0.004,0.002,-0.003,-0.003,0.0,0.003,0.003,0.066,0.379,0.006,0.001,-0.001,0.0,0.0,0.0,…,-0.0,0.0,-0.0,0.0,-0.0,-0.0,-0.0,0.0,0.0,0.0,0.028,0.016,-0.003,0.0,0.0,-0.0,0.0,-0.0,0.001,-0.0,0.001,0.0,-0.0,-0.0,-0.0,0.0,0.0,-0.0,-0.0,-0.0,-0.0,-0.0,0.0,0.0,0.0,0.0,0.361
"""std""",1605.145,,0.429,0.276,0.111,0.034,0.026,0.023,0.02,0.017,0.016,0.015,0.015,0.015,0.012,0.011,0.012,0.011,0.011,0.011,0.01,0.011,0.01,0.009,0.009,0.008,0.009,0.008,0.848,0.214,0.175,0.076,0.023,0.018,0.015,0.014,…,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.004,0.816,0.312,0.106,0.023,0.021,0.016,0.021,0.015,0.015,0.015,0.015,0.012,0.009,0.012,0.014,0.011,0.012,0.008,0.008,0.01,0.008,0.01,0.009,0.007,0.007,0.007,2.751
"""min""",540.0,0.0,0.0,0.007,-0.369,-0.24,-0.099,-0.102,-0.07,-0.049,-0.05,-0.053,-0.041,-0.042,-0.069,-0.069,-0.05,-0.041,-0.052,-0.049,-0.047,-0.059,-0.035,-0.04,-0.046,-0.038,-0.028,-0.032,-11.774,0.0,-0.91,-0.388,-0.137,-0.098,-0.081,-0.076,…,-0.001,-0.001,-0.001,-0.001,-0.001,-0.001,-0.001,-0.0,0.0,-0.073,-11.774,-11.876,-2.301,-0.345,-0.399,-0.275,-0.462,-0.452,-0.325,-0.291,-0.325,-0.217,-0.131,-0.126,-0.401,-0.331,-0.121,-0.192,-0.102,-0.307,-0.162,-0.306,-0.232,-0.219,-0.222,-0.115,0.004
"""25%""",854.0,,1.0,0.268,-0.193,-0.012,-0.009,-0.021,-0.005,-0.016,0.006,-0.014,0.011,-0.005,-0.01,-0.016,-0.016,0.001,-0.009,-0.009,-0.006,-0.009,-0.004,-0.009,-0.008,-0.004,-0.002,-0.002,0.19,0.5,-0.059,-0.026,-0.007,-0.006,-0.005,-0.004,…,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,0.0,-0.0,0.0,0.013,-0.022,-0.002,-0.002,-0.003,-0.001,-0.002,-0.0,-0.002,0.0,-0.001,-0.001,-0.002,-0.002,-0.0,-0.001,-0.001,-0.001,-0.001,-0.001,-0.001,-0.001,-0.001,-0.001,-0.001,0.02
"""50%""",1329.0,,1.0,0.385,-0.123,0.001,0.01,-0.009,0.005,-0.006,0.015,-0.004,0.019,0.002,-0.003,-0.009,-0.008,0.008,-0.002,-0.003,-0.0,-0.003,0.002,-0.003,-0.002,0.0,0.003,0.002,0.215,0.5,0.0,-0.0,0.0,0.0,0.0,0.0,…,-0.0,0.0,-0.0,0.0,-0.0,-0.0,0.0,0.0,0.0,0.0,0.201,0.037,-0.011,0.0,0.001,-0.001,0.001,-0.0,0.001,-0.0,0.002,0.0,-0.0,-0.001,-0.001,0.001,-0.0,-0.0,-0.0,-0.0,0.0,-0.0,-0.0,0.0,0.0,0.0,0.023
"""75%""",2339.0,,1.0,0.537,-0.033,0.014,0.025,0.007,0.017,0.003,0.026,0.007,0.029,0.013,0.004,-0.003,-0.002,0.015,0.003,0.003,0.005,0.002,0.008,0.002,0.002,0.005,0.008,0.007,0.215,0.5,0.062,0.03,0.007,0.007,0.006,0.006,…,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.001,0.215,0.064,0.0,0.002,0.003,0.002,0.002,0.001,0.003,0.001,0.003,0.002,0.001,0.0,0.0,0.002,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.023
"""max""",11220.0,1.0,1.0,1.482,0.361,0.203,0.081,0.068,0.078,0.096,0.074,0.066,0.072,0.067,0.044,0.047,0.06,0.063,0.041,0.066,0.039,0.055,0.045,0.035,0.041,0.044,0.049,0.046,0.268,0.5,1.043,0.382,0.152,0.078,0.091,0.07,…,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.066,0.268,2.06,2.235,0.5,0.364,0.311,0.638,0.199,0.156,0.304,0.269,0.175,0.186,0.247,0.291,0.115,0.452,0.131,0.166,0.205,0.088,0.137,0.329,0.098,0.104,0.197,69.312


## Nettoyage des données

On retire les signatures qui possèdent une déviation standard faible (seuil de 1e-4). 

La signature 54 présente par exmeple une variance nulle et est supprimée.

En pratique cela évite de nombreux problèmes de convergence notamment pour les modèles de Cox et sans affecte les performances des modèles.

In [4]:
variance_thresold = 1e-4
variances = df.select(cs.starts_with("sig_")).std()
cols_to_drop = [
    col for col in variances.columns if variances.select(col).item() < variance_thresold
]

print(f"{len(cols_to_drop)} colonnes supprimées : {cols_to_drop}")
df = df.drop(cols_to_drop)

19 colonnes supprimées : ['sig_54', 'sig_532', 'sig_588', 'sig_592', 'sig_616', 'sig_619', 'sig_644', 'sig_646', 'sig_672', 'sig_673', 'sig_674', 'sig_699', 'sig_700', 'sig_701', 'sig_723', 'sig_724', 'sig_726', 'sig_727', 'sig_728']


On retire aussi les signatures fortement corrélées (corrélation > 0.9) pour éviter la multicolinéarité.

In [5]:
correlation_threshold = 0.9
corr_matrix = df.select(cs.starts_with("sig_")).corr().select(pl.all().abs())

# Filter les paires de colonnes fortement corrélées
cols_to_drop = set()
for i, col1 in enumerate(corr_matrix.columns):
    for j, col2 in enumerate(corr_matrix.columns):
        if i < j and corr_matrix[col1][j] > correlation_threshold:
            cols_to_drop.add(col2)

print(f"{len(cols_to_drop)} colonnes supprimées : {cols_to_drop}")
df = df.drop(list(cols_to_drop))

4 colonnes supprimées : {'sig_756', 'sig_730', 'sig_28', 'sig_56'}


## Séparation des données

Pour la sépération des données on stratifie par rapport à l'indicateur de censure afin de conserver la même proportion de censurés dans les ensembles d'entrainement et de test.

In [6]:
from sklearn.model_selection import train_test_split

X_train, X_test = train_test_split(df, test_size=0.33, stratify=df.get_column("event"))

print(f"Taille de l'ensemble d'entraînement : {X_train.height}")
print(f"Taille de l'ensemble de test : {X_test.height}")

Taille de l'ensemble d'entraînement : 2381
Taille de l'ensemble de test : 1174


## Standardisation des données

In [7]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler().set_output(transform="polars")

# Standardisation des signatures
scaler.fit(X_train.select(cs.starts_with("sig_")))

X_train = X_train.with_columns(scaler.transform(X_train.select(cs.starts_with("sig_"))))
X_test = X_test.with_columns(scaler.transform(X_test.select(cs.starts_with("sig_"))))

## Réduction de dimensionalité avec PCA

Les données sont de grandes dimensions (756 caractéristiques) par rapport à la taille du jeu de données (3555 individus). 

On applique une réduction de dimensionnalité avec la PCA.

In [8]:
from sklearn.decomposition import PCA

pca = PCA(n_components=0.90).set_output(transform="polars")

# Réduction de dimensionnalité
pca.fit(X_train.select(cs.starts_with("sig_")))

pca_train = pca.transform(X_train.select(cs.starts_with("sig_")))
pca_test = pca.transform(X_test.select(cs.starts_with("sig_")))

X_train = X_train.drop(cs.starts_with("sig_")).with_columns(pca_train)
X_test = X_test.drop(cs.starts_with("sig_")).with_columns(pca_test)

f"Nombre de composantes : {pca.n_components_} / {len(variances.columns)}"

'Nombre de composantes : 332 / 756'

## Sauvegarde des données préparées

In [10]:
X_train.write_parquet("../../data/df_study_L18_w6_train.parquet")
X_test.write_parquet("../../data/df_study_L18_w6_test.parquet")

### Fonction utilitaire

Ci-dessous la fonction principale utilisé permettant de calculer les métriques de performances pour les modèles de survie.

In [None]:
import numpy as np
import pandas as pd
import polars as pl
from sksurv.metrics import (
    brier_score,
    concordance_index_censored,
    cumulative_dynamic_auc,
    integrated_brier_score,
)
from sksurv.util import Surv


def get_survival_at_t(surv_fns, t):
    """Standardizes survival probability extraction across libraries."""
    if isinstance(surv_fns, pd.DataFrame):  # lifelines
        return np.array(
            [np.interp(t, surv_fns.index, surv_fns[col]) for col in surv_fns.columns]
        )
    else:  # sksurv (StepFunction objects)
        return np.array([fn(t) for fn in surv_fns])


def evaluate_survival_model(
    y_train: pl.DataFrame, y_test: pl.DataFrame, risk_scores, surv_fns=None
) -> pl.DataFrame:
    """Calcule les métriques d'évaluation pour un modèle de survie."""

    # Convertir les DataFrames polars en structures sksurv
    y_train = Surv.from_dataframe("event", "time", y_train.to_pandas())
    y_test = Surv.from_dataframe("event", "time", y_test.to_pandas())

    metrics = dict()

    # 1. C-Index
    metrics["C-index"] = concordance_index_censored(
        y_test["event"], y_test["time"], risk_scores
    )[0]

    # 2. td-AUC
    safe_limit = y_train["time"].max() * 0.95
    times_auc = np.quantile(y_test["time"][y_test["event"] == 1], [0.25, 0.5, 0.75])
    times_auc = times_auc[times_auc < safe_limit]

    mask = y_test["time"] < safe_limit
    _, mean_auc = cumulative_dynamic_auc(
        y_train, y_test[mask], risk_scores[mask], times_auc
    )
    metrics["Mean td-AUC"] = mean_auc

    # Integrated Brier Score and Brier Score at t_median
    if surv_fns is not None:
        # 1. Define evaluation times
        test_times = np.percentile(y_test["time"], np.linspace(10, 90, 15))

        if isinstance(surv_fns, pd.DataFrame):
            preds_at_times = np.array(
                [
                    np.interp(test_times, surv_fns.index, surv_fns[col])
                    for col in surv_fns.columns
                ]
            )
        else:
            # Standard sksurv StepFunction handling
            preds_at_times = np.array([f(test_times) for f in surv_fns])

        # 3. Integrated Brier Score
        metrics["IBS"] = integrated_brier_score(
            y_train, y_test, preds_at_times, test_times
        )

        # 4. Specific Brier at Median
        t_med = np.median(y_train["time"][y_train["event"]])

        if isinstance(surv_fns, pd.DataFrame):
            s_at_t_med = np.array(
                [
                    np.interp(t_med, surv_fns.index, surv_fns[col])
                    for col in surv_fns.columns
                ]
            )
        else:
            s_at_t_med = np.array([fn(t_med) for fn in surv_fns])

        _, brier_med = brier_score(y_train, y_test, s_at_t_med, t_med)
        metrics[f"Brier (t={t_med})"] = brier_med[0]

    return pl.DataFrame(metrics)
