In [1]:
import os

import numpy as np
import pandas as pd
import mlflow
from tqdm import tqdm
import json
from azureml.core import Workspace, Dataset

import sys

sys.path.append("../04_survival_models/src")

from sklearn.base import BaseEstimator
from sksurv.metrics import concordance_index_censored

# Goal

Validate the Byun model on DBURI (HSR) dataset using Bootstrap validation.

The Byun model is a Cox proportional hazards model with predefined coefficients (betas) from the original manuscript's hazard ratios. We'll:
1. Recreate the Cox model from the published hazard ratios (converting to betas)
2. Validate on 100% of HSR (DBURI) - external validation without fitting
3. Use Bootstrap resampling to compute confidence intervals for Harrell's C-index
4. Only compute Harrell's C-index (no IPCW or IBS as they require training data for censoring estimation)

https://www.nature.com/articles/s41598-020-80262-9

# Parameters

In [2]:
# Directories
DIR_SC = os.path.join(os.path.dirname(os.getcwd()), "sc")
N_BOOTSTRAP = 100  # Number of bootstrap iterations
RANDOM_STATE = 42

# Functions

In [3]:
class ByunCoxModel(BaseEstimator):
    """
    Custom Cox model implementing the Byun model with predefined coefficients.

    This class recreates the Byun et al. Cox model using published hazard ratios.
    The coefficients (betas) are computed as log(HR).

    Hazard Ratios from Byun et al.'s multivariable Cox model:
    - Age (per 1 year): HR = 1.030
    - Gender (Male vs Female): HR = 0.980
    - BMI (per 1 kg/m²): HR = 0.870
    - Diabetes (Yes vs No): HR = 1.960
    - Hypertension (Yes vs No): HR = 1.090
    - ECOG PS (≥1 vs 0): HR = 3.230
    - Symptoms at presentation (Yes vs No): HR = 2.220
    - T stage 2 vs 1: HR = 1.440
    - T stage 3-4 vs 1: HR = 2.440
    - Tumor size 40-70mm vs <40mm: HR = 1.070
    - Tumor size ≥70mm vs <40mm: HR = 1.060
    - Fuhrman grade 3-4 vs 1-2: HR = 1.260
    - Sarcomatoid differentiation (Yes vs No): HR = 4.380
    - Tumor necrosis (Yes vs No): HR = 1.150
    """

    def __init__(self):
        self.coef_ = None
        self.feature_names_in_ = None
        self.n_features_in_ = None

    def fit(self, X, y=None):
        """
        Set up the Byun model coefficients based on exact feature names.

        Parameters:
        - X: DataFrame with specific feature names matching Byun variables
        - y: Not used (model coefficients are predefined)
        """
        import numpy as np

        self.feature_names_in_ = X.columns.tolist()
        self.n_features_in_ = len(self.feature_names_in_)

        # Define Byun hazard ratios and convert to betas (β = log(HR))
        byun_hrs = {
            "age": 1.030,
            "sex_male": 0.980,
            "bmi": 0.870,
            "diabetes_yes": 1.960,
            "hypertension_yes": 1.090,
            "ecog_ge1": 3.230,
            "symptoms_yes": 2.220,
            "t_stage_2": 1.440,
            "t_stage_34": 2.440,
            "tumor_size_40_70": 1.070,
            "tumor_size_ge70": 1.060,
            "fuhrman_34": 1.260,
            "sarcomatoid_yes": 4.380,
            "necrosis_yes": 1.150,
        }

        # Convert HRs to betas
        byun_coefs = {k: np.log(v) for k, v in byun_hrs.items()}

        # Map feature names to coefficients using exact matching
        self.coef_ = np.zeros(self.n_features_in_)

        for idx, feature_name in enumerate(self.feature_names_in_):
            if feature_name in byun_coefs:
                self.coef_[idx] = byun_coefs[feature_name]
            else:
                raise ValueError(
                    f"Feature '{feature_name}' not recognized. "
                    f"Expected features: {list(byun_coefs.keys())}"
                )

        return self

    def predict(self, X):
        """
        Predict risk scores (linear predictor).

        Parameters:
        - X: DataFrame or array-like

        Returns:
        - risk_scores: Array of risk scores (higher = higher risk)
        """
        import numpy as np

        if hasattr(X, "values"):
            X_array = X.values
        else:
            X_array = np.array(X)

        return np.dot(X_array, self.coef_)

In [4]:
def prepare_byun_features(df):
    """
    Prepare features for Byun model from DBURI dataset using specific variable names.

    Expected Byun variables:
    1. Age (continuous): ANM_1_age
    2. Sex: Male=1, Female=0: P_1_sex_M
    3. BMI (continuous): ANM_1_patientBMI
    4. Diabetes: Yes=1, No=0: diabetes (0.0="No", 1.0="Diabetes without organ damage", 2.0="Yes with organ damage")
    5. Hypertension: Yes=1, No=0: ANM_1_hypertension
    6. ECOG PS: ≥1 = 1, 0 = 0: ecog
    7. Symptoms at presentation: Yes=1, No=0: ANM_1_symptomsBeginning_Sintomiall'esordio
    8. T stage: IST_1_kidney1PathologicalStage2009 (1.0=T1a, 2.0=T1b, 3.0=T2a, 4.0=T2b, 5.0=T3a, 6.0=T3b, 7.0=T3c, 8.0=T4)
       Create dummies for T2 (T2a/T2b) and T3-4 (T3a/T3b/T3c/T4), reference T1 (T1a/T1b)
    9. Tumor size: IST_1_kidney1TumorDimension (create dummies for 40-70mm and ≥70mm, reference <40mm)
    10. Fuhrman grade: IST_1_kidney1Grading (1.0=G1, 2.0=G2, 3.0=G3, 4.0=G4; binary: 3-4=1, 1-2=0)
    11. Sarcomatoid differentiation: IST_1_kidney1HistologicalTypeMalignantSarcomatoid
    12. Tumor necrosis: IST_1_kidney1Necrosis

    Returns:
    - DataFrame with Byun features
    """
    import pandas as pd
    import numpy as np

    byun_df = pd.DataFrame(index=df.index)

    # 1. Age (continuous)
    byun_df["age"] = df["ANM_1_age"]

    # 2. Sex (Male=1, Female=0)
    # Keep as float to preserve NaN values
    byun_df["sex_male"] = df["P_1_sex_M"].astype(float)

    # 3. BMI (continuous)
    byun_df["bmi"] = df["ANM_1_patientBMI"]

    # 4. Diabetes (Yes=1, No=0)
    # 0.0="No", 1.0="Diabetes without organ damage", 2.0="Yes with organ damage"
    # Treat both 1.0 and 2.0 as "Yes" (diabetes present)
    # Use .where() to preserve NaN values
    byun_df["diabetes_yes"] = np.where(
        df["diabetes"].notna(), (df["diabetes"] >= 1.0).astype(float), np.nan
    )

    # 5. Hypertension (Yes=1, No=0)
    # Keep as float to preserve NaN values
    byun_df["hypertension_yes"] = df["ANM_1_hypertension"].astype(float)

    # 6. ECOG PS (≥1 vs 0)
    # Use .where() to preserve NaN values
    byun_df["ecog_ge1"] = np.where(
        df["ecog"].notna(), (df["ecog"] >= 1.0).astype(float), np.nan
    )

    # 7. Symptoms at presentation (Yes=1, No=0)
    # Keep as float to preserve NaN values
    byun_df["symptoms_yes"] = df["ANM_1_symptomsBeginning_Sintomiall'esordio"].astype(
        float
    )

    # 8. T stage - create dummies for T2 and T3-4 (reference T1)
    # IST_1_kidney1PathologicalStage2009 mapping (from preprocessing):
    # 1.0=T1a, 2.0=T1b, 3.0=T2a, 4.0=T2b, 5.0=T3a, 6.0=T3b, 7.0=T3c, 8.0=T4
    t_stage = df["IST_1_kidney1PathologicalStage2009"]
    # T1 (reference): 1.0, 2.0 → dummy = 0
    # T2: 3.0, 4.0 → t_stage_2 = 1
    byun_df["t_stage_2"] = np.where(
        t_stage.notna(), ((t_stage == 3.0) | (t_stage == 4.0)).astype(float), np.nan
    )
    # T3-4: 5.0, 6.0, 7.0, 8.0 → t_stage_34 = 1
    byun_df["t_stage_34"] = np.where(
        t_stage.notna(),
        (
            (t_stage == 5.0) | (t_stage == 6.0) | (t_stage == 7.0) | (t_stage == 8.0)
        ).astype(float),
        np.nan,
    )

    # 9. Tumor size - create dummies for 40-70mm and ≥70mm (reference <40mm)
    # IST_1_kidney1TumorDimension is in cm, need to convert to mm
    tumor_size_mm = df["IST_1_kidney1TumorDimension"] * 10
    byun_df["tumor_size_40_70"] = np.where(
        tumor_size_mm.notna(),
        ((tumor_size_mm >= 40) & (tumor_size_mm < 70)).astype(float),
        np.nan,
    )
    byun_df["tumor_size_ge70"] = np.where(
        tumor_size_mm.notna(), (tumor_size_mm >= 70).astype(float), np.nan
    )

    # 10. Fuhrman grade (3-4=1, 1-2=0)
    # IST_1_kidney1Grading: 1.0=G1, 2.0=G2, 3.0=G3, 4.0=G4
    grade = df["IST_1_kidney1Grading"]
    byun_df["fuhrman_34"] = np.where(
        grade.notna(), ((grade == 3.0) | (grade == 4.0)).astype(float), np.nan
    )

    # 11. Sarcomatoid differentiation (Yes=1, No=0)
    # Keep as float to preserve NaN values
    byun_df["sarcomatoid_yes"] = df[
        "IST_1_kidney1HistologicalTypeMalignantSarcomatoid"
    ].astype(float)

    # 12. Tumor necrosis (Yes=1, No=0)
    # Keep as float to preserve NaN values
    byun_df["necrosis_yes"] = df["IST_1_kidney1Necrosis"].astype(float)

    return byun_df

# Data ingestion

## Non-preprocessed Internal Dataset (HSR/DBURI)

The variables Diabetes and EGOG were dropped by our preprocessing: here we read the un-processed dataset, so then we can add those variables to the same dataset being used for the analysis.

In [5]:
# Load HSR/DBURI internal dataset (preprocessed, one-hot encoded)
subscription_id = "753a0b42-95dc-4871-b53e-160ceb0e6bc1"
resource_group = "rg-s-race-aml-dev-we"
workspace_name = "amlsraceamldevwe01"

workspace = Workspace(subscription_id, resource_group, workspace_name)

dataset = Dataset.get_by_name(workspace, name="srace_urologia_nonmeta_q1_all_survival")
df_all = dataset.to_pandas_dataframe()
print(df_all.shape)
df_all.head()

{'infer_column_types': 'False', 'activity': 'to_pandas_dataframe'}
{'infer_column_types': 'False', 'activity': 'to_pandas_dataframe', 'activityApp': 'TabularDataset'}
(3081, 7056)
(3081, 7056)


Unnamed: 0,index,P_1_id,isalive,censor,__index_level_0___x,Column1,P_1_sex,P_1_exitusState,P_1_exitusDate,P_1_exitusCause,...,gfr_108_EPI,gfr_108_BIS,gfr_108,gfr_120_EPI,gfr_120_BIS,gfr_120,yob,delta_secondo_tumore_mesi,num_int_doc,__index_level_0___y
0,0,1,True,False,0,1,M,Si,13512960000.0,,...,,,,,,,1918,,2,0
1,1,2,False,False,1,2,F,Si,13081824000.0,Altracausa,...,,,,,,,1945,,32,1
2,2,4,True,False,2,3,M,Si,13340160000.0,,...,,,,,,,1925,,7,2
3,3,5,True,False,3,4,F,No,,,...,,,,67.0108377500802,66.7416388846908,67.01,1949,,346,3
4,4,6,True,False,4,5,M,No,,,...,,,,,,,1952,,430,4


## Load Internal Dataset (HSR/DBURI)

In [6]:
# Load HSR/DBURI internal dataset (preprocessed, one-hot encoded)
subscription_id = "753a0b42-95dc-4871-b53e-160ceb0e6bc1"
resource_group = "rg-s-race-aml-dev-we"
workspace_name = "amlsraceamldevwe01"

workspace = Workspace(subscription_id, resource_group, workspace_name)

dataset = Dataset.get_by_name(workspace, name="UC2_raw_survival_csm_ohe_5yrs")
df_internal = dataset.to_pandas_dataframe()
print(df_internal.shape)
df_internal.head()

{'infer_column_types': 'False', 'activity': 'to_pandas_dataframe'}
{'infer_column_types': 'False', 'activity': 'to_pandas_dataframe', 'activityApp': 'TabularDataset'}
(2536, 211)
(2536, 211)


Unnamed: 0,P_1_id,ANM_1_previousAbdominalOperations,ANM_1_moduliOK,ANM_1_performanceStatus,ANM_1_asa,ANM_1_patientBMI,ANM_1_hypertension,ANM_1_TerapiaDiabeteIpoglicemizzanti,ANM_1_charlsonIndexComorbiditiesMalattiaPolmonare,ANM_1_charlsonIndexComorbiditiesUlceraPeptica,...,DEG_1_examEmCreatininemiaRange_14-18,DEG_1_examEmCalcioRange_14-18,"DEG_1_examEmCalcioRange_2,10-2,60",DEG_1_examEmCalcioRange_2_1-2_6,DEG_1_examEmCalcioRange_2_10-2_60,DEG_1_examEmCalcioRange_2_25-2_75,death,csm,ocm,ttdeath
0,1.0,False,False,0.0,1.0,24.82,False,,True,False,...,False,False,False,False,True,False,False,False,True,60.0
1,4.0,False,False,1.0,2.0,,True,False,False,False,...,False,False,False,False,True,False,False,,,60.0
2,5.0,True,False,1.0,2.0,,True,False,False,False,...,False,False,False,False,True,False,False,False,False,60.0
3,6.0,False,False,1.0,2.0,29.76,False,,False,False,...,False,False,False,False,True,False,False,False,False,60.0
4,7.0,True,False,1.0,2.0,26.57,False,False,False,False,...,False,False,False,False,False,False,False,False,False,60.0


### Recreate schema from dataset tags

In [7]:
tags = dataset.tags

dtypes = json.loads(tags["dtypes_json"])
is_ordinal = json.loads(tags["is_ordinal_json"])

for col in dtypes.keys():
    if dtypes[col] == "category":
        categories = (
            sorted(df_internal[col].dropna().unique())
            if is_ordinal[col]
            else df_internal[col].dropna().unique()
        )
        df_internal[col] = pd.Categorical(
            df_internal[col], categories=categories, ordered=is_ordinal[col]
        )
    else:
        df_internal[col] = df_internal[col].astype(dtypes[col])

## Add Diabetes and ECOG variables to the dataset

In [8]:
df_internal = pd.merge(
    df_internal,
    df_all[["P_1_id", "diabetes", "ecog"]],
    on="P_1_id",
    how="left",
)
del df_all
print(df_internal.shape)

(2536, 213)


# Define Byun variables

In [None]:
v_byun = [
    "ANM_1_age",
    "P_1_sex_M",
    "ANM_1_patientBMI",
    "diabetes",
    "ANM_1_hypertension",
    "ecog",
    "ANM_1_symptomsBeginning_Sintomiall'esordio",
    "IST_1_kidney1PathologicalStage2009",
    "IST_1_kidney1TumorDimension",
    "IST_1_kidney1Grading",
    "IST_1_kidney1HistologicalTypeMalignantSarcomatoid",
    "IST_1_kidney1Necrosis",
]

for v in v_byun:
    print(df_internal[v].info())
    print(df_internal[v].value_counts(dropna=False))
    print()
    print()

<class 'pandas.core.series.Series'>
RangeIndex: 2536 entries, 0 to 2535
Series name: ANM_1_age
Non-Null Count  Dtype  
--------------  -----  
2536 non-null   float64
dtypes: float64(1)
memory usage: 19.9 KB
None
ANM_1_age
72.0    90
59.0    87
69.0    87
62.0    86
67.0    85
        ..
16.0     1
21.0     1
26.0     1
89.0     1
18.0     1
Name: count, Length: 73, dtype: int64


<class 'pandas.core.series.Series'>
RangeIndex: 2536 entries, 0 to 2535
Series name: P_1_sex_M
Non-Null Count  Dtype  
--------------  -----  
2536 non-null   boolean
dtypes: boolean(1)
memory usage: 5.1 KB
None
P_1_sex_M
True     1749
False     787
Name: count, dtype: Int64


<class 'pandas.core.series.Series'>
RangeIndex: 2536 entries, 0 to 2535
Series name: ANM_1_patientBMI
Non-Null Count  Dtype  
--------------  -----  
2105 non-null   float64
dtypes: float64(1)
memory usage: 19.9 KB
None
ANM_1_patientBMI
NaN      431
24.22     28
27.34     18
22.04     18
24.91     16
        ... 
29.96      1
33.96     

# Prepare Byun Features

In [10]:
# Prepare Byun features from internal dataset
X_byun = prepare_byun_features(df_internal)

print(f"Byun features shape: {X_byun.shape}")
print(f"\nByun features columns:\n{X_byun.columns.tolist()}")
print(f"\nMissing values:\n{X_byun.isnull().sum()}")
print(f"\nFirst few rows:")
X_byun.head()

Byun features shape: (2536, 14)

Byun features columns:
['age', 'sex_male', 'bmi', 'diabetes_yes', 'hypertension_yes', 'ecog_ge1', 'symptoms_yes', 't_stage_2', 't_stage_34', 'tumor_size_40_70', 'tumor_size_ge70', 'fuhrman_34', 'sarcomatoid_yes', 'necrosis_yes']

Missing values:
age                   0
sex_male              0
bmi                 431
diabetes_yes          0
hypertension_yes     23
ecog_ge1             21
symptoms_yes          0
t_stage_2           336
t_stage_34          336
tumor_size_40_70      0
tumor_size_ge70       0
fuhrman_34          344
sarcomatoid_yes       0
necrosis_yes        367
dtype: int64

First few rows:


Unnamed: 0,age,sex_male,bmi,diabetes_yes,hypertension_yes,ecog_ge1,symptoms_yes,t_stage_2,t_stage_34,tumor_size_40_70,tumor_size_ge70,fuhrman_34,sarcomatoid_yes,necrosis_yes
0,76.0,1.0,24.82,1.0,0.0,0.0,1.0,,,0.0,0.0,0.0,0.0,0.0
1,72.0,1.0,,0.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0
2,50.0,0.0,,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0
3,49.0,1.0,29.76,1.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
4,66.0,1.0,26.57,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


## Drop missing values

In [11]:
# Drop rows with missing values in Byun features or survival outcomes
df_complete = df_internal.copy()
df_complete = df_complete[X_byun.notna().all(axis=1)]

# Filter X_byun as well
X_byun = X_byun[X_byun.notna().all(axis=1)]

print(f"Complete cases: {len(df_complete)} / {len(df_internal)}")
print(
    f"Dropped: {len(df_internal) - len(df_complete)} cases with missing Byun variables"
)

Complete cases: 1755 / 2536
Dropped: 781 cases with missing Byun variables


## Prepare survival outcomes

In [13]:
# Create structured array for survival data (cancer-specific mortality)
y = np.array(
    [(bool(e), float(t)) for e, t in zip(df_complete["death"], df_complete["ttdeath"])],
    dtype=[("event", "?"), ("time", "<f8")],
)

print(f"Survival data shape: {y.shape}")
print(f"Number of events: {y['event'].sum()}")
print(f"Event rate: {y['event'].mean():.2%}")

Survival data shape: (1755,)
Number of events: 81
Event rate: 4.62%


# Bootstrap Validation

We validate the Byun model on 100% of HSR (DBURI) using bootstrap resampling.

## Instantiate the Byun model

In [14]:
# Instantiate the Byun model (no training needed - coefficients are predefined)
byun_model = ByunCoxModel()
byun_model.fit(X_byun)  # Just sets up the coefficient mapping

print("Byun model coefficients (betas = log(HR)):")
for feature, coef in zip(byun_model.feature_names_in_, byun_model.coef_):
    print(f"  {feature}: β = {coef:.4f} (HR = {np.exp(coef):.3f})")

Byun model coefficients (betas = log(HR)):
  age: β = 0.0296 (HR = 1.030)
  sex_male: β = -0.0202 (HR = 0.980)
  bmi: β = -0.1393 (HR = 0.870)
  diabetes_yes: β = 0.6729 (HR = 1.960)
  hypertension_yes: β = 0.0862 (HR = 1.090)
  ecog_ge1: β = 1.1725 (HR = 3.230)
  symptoms_yes: β = 0.7975 (HR = 2.220)
  t_stage_2: β = 0.3646 (HR = 1.440)
  t_stage_34: β = 0.8920 (HR = 2.440)
  tumor_size_40_70: β = 0.0677 (HR = 1.070)
  tumor_size_ge70: β = 0.0583 (HR = 1.060)
  fuhrman_34: β = 0.2311 (HR = 1.260)
  sarcomatoid_yes: β = 1.4770 (HR = 4.380)
  necrosis_yes: β = 0.1398 (HR = 1.150)


## Bootstrap validation loop

In [15]:
# Bootstrap validation
np.random.seed(RANDOM_STATE)

c_index_list = []

print(f"Running bootstrap validation with {N_BOOTSTRAP} iterations...")
print(f"Dataset size: {len(X_byun)} complete cases")
print(f"{'=' * 60}\n")

for i in tqdm(range(N_BOOTSTRAP)):
    # Sample with replacement
    bootstrap_indices = np.random.choice(len(X_byun), size=len(X_byun), replace=True)

    X_boot = X_byun.iloc[bootstrap_indices]
    y_boot = y[bootstrap_indices]

    # Predict risk scores
    risk_scores = byun_model.predict(X_boot)

    # Compute Harrell's C-index
    c_index = concordance_index_censored(
        event_indicator=y_boot["event"], event_time=y_boot["time"], estimate=risk_scores
    )[0]

    c_index_list.append(c_index)

print(f"\n{'=' * 60}")
print(f"Bootstrap validation completed: {len(c_index_list)} iterations")
print(f"{'=' * 60}")

Running bootstrap validation with 100 iterations...
Dataset size: 1755 complete cases



100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:01<00:00, 56.98it/s]


Bootstrap validation completed: 100 iterations





# Results

## Compute confidence intervals

In [17]:
# Compute summary statistics and 95% confidence intervals
mean_c_index = np.mean(c_index_list)
std_c_index = np.std(c_index_list, ddof=1)
ci_lower = mean_c_index - 1.96 * std_c_index
ci_upper = mean_c_index + 1.96 * std_c_index

median_c_index = np.median(c_index_list)
q25 = np.percentile(c_index_list, 25)
q75 = np.percentile(c_index_list, 75)

print("=" * 80)
print("BYUN MODEL PERFORMANCE - BOOTSTRAP VALIDATION ON HSR (DBURI)")
print("=" * 80)
print(f"\nDataset: HSR (DBURI) - {len(X_byun)} complete cases")
print(f"Validation approach: Bootstrap with {N_BOOTSTRAP} iterations")
print(f"Outcome: Cancer-specific mortality (CSM)")
print(f"Event rate: {y['event'].mean():.2%} ({y['event'].sum()} events)")
print(f"\n" + "-" * 80)
print("HARRELL'S C-INDEX")
print("-" * 80)
print(f"Mean ± SD:        {mean_c_index:.4f} ± {std_c_index:.4f}")
print(f"95% CI:           [{ci_lower:.4f}, {ci_upper:.4f}]")
print(f"Median [IQR]:     {median_c_index:.4f} [{q25:.4f}, {q75:.4f}]")
print("=" * 80)

BYUN MODEL PERFORMANCE - BOOTSTRAP VALIDATION ON HSR (DBURI)

Dataset: HSR (DBURI) - 1755 complete cases
Validation approach: Bootstrap with 100 iterations
Outcome: Cancer-specific mortality (CSM)
Event rate: 4.62% (81 events)

--------------------------------------------------------------------------------
HARRELL'S C-INDEX
--------------------------------------------------------------------------------
Mean ± SD:        0.8168 ± 0.0218
95% CI:           [0.7740, 0.8595]
Median [IQR]:     0.8205 [0.8020, 0.8308]
