In [19]:
import pandas as pd
from azureml.core import Dataset, Workspace
import json
import os
from lifelines import KaplanMeierFitter
import numpy as np

# Parameters

In [20]:
# Directories
DIR_SC = os.path.join(os.getcwd(), "..", "sc")
PATH_EXTERNAL = "External_Validation_of_AI_model_Florence_MAP.xlsx"

# Functions

In [21]:
def handle_ocm_csm_none(df):
    # Ensure the 'ocm' and 'csm' columns are of nullable boolean type
    df["ocm"] = df["ocm"].astype(pd.BooleanDtype())
    df["csm"] = df["csm"].astype(pd.BooleanDtype())

    # Loop through the DataFrame rows
    for index, row in df.iterrows():
        if row["death"] == False:
            # Fill nans in ocm and csm with False (uniform with DBURI)
            if pd.isna(row["ocm"]) and pd.isna(row["csm"]):
                df.loc[index, "ocm"] = False
                df.loc[index, "csm"] = False
            else:
                raise ValueError(
                    f"Error at index {index}: Inconsistent data where death is False"
                )
        if row["death"] == True:
            if pd.isna(row["ocm"]) and pd.isna(row["csm"]):
                raise ValueError(
                    f"Error at index {index}: Inconsistent data where death is True"
                )
            else:
                continue
    return df

# Data ingestion

In [22]:
subscription_id = "753a0b42-95dc-4871-b53e-160ceb0e6bc1"
resource_group = "rg-s-race-aml-dev-we"
workspace_name = "amlsraceamldevwe01"

workspace = Workspace(subscription_id, resource_group, workspace_name)

## "Original" (no inclusion criteria and no preprocessing)

In [5]:
dataset = Dataset.get_by_name(workspace, name="urologia")
df_urologia = dataset.to_pandas_dataframe()
print(df_urologia.shape)
df_urologia.head()

(3796, 7049)


Unnamed: 0,Column1,P.1.id,P.1.sex,P.1.exitusState,P.1.exitusDate,P.1.exitusCause,P.1.exitusCauseSpecific,P.1.exitusCauseConcurrent,P.1.exitusSurvivalTimeProstate,P.1.exitusSurvivalTimeKidney,...,gfr.96.EPI,gfr.96.BIS,gfr.96,gfr.108.EPI,gfr.108.BIS,gfr.108,gfr.120.EPI,gfr.120.BIS,gfr.120,yob
0,1,1,M,Si,13512960000.0,,...,...,,198.0,...,,,,,,,,,,1918
1,2,2,F,Si,13081824000.0,Altra causa,Osseocitoma e Leiomioma Uterino ...,"Carcinosi Peritoneali, Cachessia ...",,13.0,...,,,,,,,,,,1945
2,3,4,M,Si,13340160000.0,,...,...,,87.0,...,,,,,,,,,,1925
3,4,5,F,No,,,...,...,,,...,,,,,,,67.0108377500802,66.7416388846908,67.01,1949
4,5,6,M,No,,,...,...,,,...,81.2096919585057,78.8576090569383,81.21,,,,,,,1952


## No preprocessing

In [6]:
dataset = Dataset.get_by_name(workspace, name="srace_urologia_nonmeta_q1_all_survival")
df_all = dataset.to_pandas_dataframe()
print(df_all.shape)
df_all.head()

{'infer_column_types': 'False', 'activity': 'to_pandas_dataframe'}
{'infer_column_types': 'False', 'activity': 'to_pandas_dataframe', 'activityApp': 'TabularDataset'}
(3081, 7056)


Unnamed: 0,index,P_1_id,isalive,censor,__index_level_0___x,Column1,P_1_sex,P_1_exitusState,P_1_exitusDate,P_1_exitusCause,...,gfr_108_EPI,gfr_108_BIS,gfr_108,gfr_120_EPI,gfr_120_BIS,gfr_120,yob,delta_secondo_tumore_mesi,num_int_doc,__index_level_0___y
0,0,1,True,False,0,1,M,Si,13512960000.0,,...,,,,,,,1918,,2,0
1,1,2,False,False,1,2,F,Si,13081824000.0,Altracausa,...,,,,,,,1945,,32,1
2,2,4,True,False,2,3,M,Si,13340160000.0,,...,,,,,,,1925,,7,2
3,3,5,True,False,3,4,F,No,,,...,,,,67.0108377500802,66.7416388846908,67.01,1949,,346,3
4,4,6,True,False,4,5,M,No,,,...,,,,,,,1952,,430,4


### Median fup (before administrative censoring at 5 years)

In [7]:
print(df_all["ttdeath"].median())

67.0


## One-hot encoding version (the one used for models)

In [8]:
dataset = Dataset.get_by_name(workspace, name="UC2_raw_survival_csm_ohe_5yrs", version="23")
df_ohe_censored = dataset.to_pandas_dataframe()
print(df_ohe_censored.shape)
df_ohe_censored.head()

{'infer_column_types': 'False', 'activity': 'to_pandas_dataframe'}
{'infer_column_types': 'False', 'activity': 'to_pandas_dataframe', 'activityApp': 'TabularDataset'}
(2536, 211)


Unnamed: 0,P_1_id,ANM_1_previousAbdominalOperations,ANM_1_moduliOK,ANM_1_performanceStatus,ANM_1_asa,ANM_1_patientBMI,ANM_1_hypertension,ANM_1_TerapiaDiabeteIpoglicemizzanti,ANM_1_charlsonIndexComorbiditiesMalattiaPolmonare,ANM_1_charlsonIndexComorbiditiesUlceraPeptica,...,DEG_1_examEmCreatininemiaRange_14-18,DEG_1_examEmCalcioRange_14-18,"DEG_1_examEmCalcioRange_2,10-2,60",DEG_1_examEmCalcioRange_2_1-2_6,DEG_1_examEmCalcioRange_2_10-2_60,DEG_1_examEmCalcioRange_2_25-2_75,death,csm,ocm,ttdeath
0,1.0,False,False,0.0,1.0,24.82,False,,True,False,...,False,False,False,False,True,False,False,False,True,60.0
1,4.0,False,False,1.0,2.0,,True,False,False,False,...,False,False,False,False,True,False,False,,,60.0
2,5.0,True,False,1.0,2.0,,True,False,False,False,...,False,False,False,False,True,False,False,False,False,60.0
3,6.0,False,False,1.0,2.0,29.76,False,,False,False,...,False,False,False,False,True,False,False,False,False,60.0
4,7.0,True,False,1.0,2.0,26.57,False,False,False,False,...,False,False,False,False,False,False,False,False,False,60.0


### Use schema

Recreate the schema from tags:

In [9]:
tags = dataset.tags

dtypes = json.loads(tags["dtypes_json"])
is_ordinal = json.loads(tags["is_ordinal_json"])

for col in dtypes.keys():
    if dtypes[col] == "category":
        categories = (
            sorted(df_ohe_censored[col].dropna().unique())
            if is_ordinal[col]
            else df_ohe_censored[col].dropna().unique()
        )
        df_ohe_censored[col] = pd.Categorical(
            df_ohe_censored[col], categories=categories, ordered=is_ordinal[col]
        )
    else:
        df_ohe_censored[col] = df_ohe_censored[col].astype(dtypes[col])

### Align with notebooks used for creating models

In [10]:
# Drop na on target columns
print(df_ohe_censored.shape[0])
df_ohe_censored = df_ohe_censored.dropna(subset=["ttdeath", "death"])
print(df_ohe_censored.shape[0])

2536
2536


### Median fup (after administrative censoring at 5 years)

In [11]:
print(df_ohe_censored["ttdeath"].median())

60.0


## External

In [12]:
df_external = pd.read_excel(os.path.join(DIR_SC, PATH_EXTERNAL))
print(df_external.shape)
df_external.head(2)

(720, 44)


Unnamed: 0,ANM_1_age,ANM_1_asa,ANM_1_charlsonIndex,ANM_1_cciAge,ANM_1_performanceStatus,ANM_1_patientBMI,ANM_1_examEmCreatininemia,ANM_1_examEmeGFR,ANM_1_examEmEmoglobina,ANM_1_examEmEmoglobinaRange_14_0-18_0,...,IST_1_kidney1PN2009_1_0,IST_1_kidney1Grading,IST_1_kidney1MayoPN,IST_1_kidney1MayoGrading,IST_1_kidney1MayoScore,IST_1_kidney1MayoRisk,ocm,csm,death,ttdeath
0,32,3,4,4,0,22.9,14.5,4.2,12.1,0.0,...,pNx,1,pNx,1,0,Low,,,0.0,29
1,82,2,5,9,0,28.7,1.21,72.0,14.7,1.0,...,0,4,pN0,4,3,Intermediate,,,0.0,51


### Align with notebooks used for creating models

In [13]:
# Drop na on target columns
print(df_external.shape)
df_external = df_external.dropna(subset=["death", "ttdeath"])
print(df_external.shape)

# Drop unknown cause of death
print(df_external.shape)
df_external = df_external[
    ~(
        (df_external["death"] == True)
        & (pd.isna(df_external["csm"]))
        & (pd.isna(df_external["ocm"]))
    )
]
print(df_external.shape)

# Fill nans in ocm and csm with False as in [internal]
df_external = handle_ocm_csm_none(df_external)

(720, 44)
(718, 44)
(718, 44)
(701, 44)


### Median fup (before administrative censoring at 5 years)

In [14]:
print(df_external["ttdeath"].median())

33.0


### Administrative censoring at 5 years

In [15]:
# Only cancer-specific mortality (cut at 5 years)
print(df_external.shape)
df_external_censored = df_external.copy()
cut_months = 60
df_external_censored = df_external_censored[
    (df_external_censored["ocm"] == False)
    | (df_external_censored["ttdeath"] >= cut_months)
]
# Clip the ttdeath column at 60
df_external_censored["ttdeath"] = df_external_censored["ttdeath"].apply(
    lambda x: x if x <= cut_months else cut_months
)
# Update the death column based on ttdeath values (censor over 60 months)
df_external_censored.loc[df_external_censored["ttdeath"] == cut_months, "death"] = False
print(df_external_censored.shape)

(701, 44)
(674, 44)


  df_external_censored.loc[df_external_censored["ttdeath"] == cut_months, "death"] = False


### Median fup (after administrative censoring at 5 years)

In [16]:
print(df_external_censored["ttdeath"].median())

33.0


# Q1

Q: The model is designed for ccRCC, but histologic subtype is rarely available preoperatively. This is a fundamental limitation that should be explicitly acknowledged and discussed.

Notes: Variables of interest: `IST_1_kidney1HistologicalTypeMalignant` and `IST_1_kidney1HistologicalTypeBenign`.

- The dataframe `df_ohe_censored` used for modelling has one-hot encoding for the variable `IST_1_kidney1HistologicalTypeMalignant`. So here we align the non-preprocessed `df_all` with `df_ohe_censored` (same IDs), and print a value counts.
- The variable `IST_1_kidney1HistologicalTypeBenign` has a high percentage of missing values, so it was filtered by preprocessing. We print a value counts on the non-preprocessed `df_all`, after alignment (same IDs).

In [17]:
# Align IDs
ids_included = df_ohe_censored["P_1_id"].astype(int).tolist()
# Filter df_all with only included IDs
print(df_all.shape)
df_all = df_all[df_all["P_1_id"].isin(ids_included)]
print(df_all.shape)
# Value counts IST_1_kidney1HistologicalTypeMalignant
print(
    df_all["IST_1_kidney1HistologicalTypeMalignant"]
    .value_counts(dropna=False)
    .sort_values(ascending=False)
)
print()
print(
    df_all["IST_1_kidney1HistologicalTypeMalignant"]
    .value_counts(dropna=False, normalize=True)
    .sort_values(ascending=False)
)
print()
# Value counts IST_1_kidney1HistologicalTypeBenign
print(
    df_all["IST_1_kidney1HistologicalTypeBenign"]
    .value_counts(dropna=False)
    .sort_values(ascending=False)
)
print()
print(
    df_all["IST_1_kidney1HistologicalTypeBenign"]
    .value_counts(dropna=False, normalize=True)
    .sort_values(ascending=False)
)

(3081, 7056)
(2536, 7056)
IST_1_kidney1HistologicalTypeMalignant
CarcinomaaCellulechiare        1689
None                            325
CarcinomaditipoPapillare1       178
CarcinomaditipoPapillare2       152
Carcinomacromofobo              141
Altro...                         47
Carcinomadeidotticollettori       2
Liposarcoma                       2
Name: count, dtype: int64

IST_1_kidney1HistologicalTypeMalignant
CarcinomaaCellulechiare        0.666009
None                           0.128155
CarcinomaditipoPapillare1      0.070189
CarcinomaditipoPapillare2      0.059937
Carcinomacromofobo             0.055599
Altro...                       0.018533
Carcinomadeidotticollettori    0.000789
Liposarcoma                    0.000789
Name: proportion, dtype: float64

IST_1_kidney1HistologicalTypeBenign
None                   2212
Oncocitoma              219
Angiomiolipoma           89
Altro...                  8
Nefromacistico            6
Nefromamesoblastico       1
Leiomioma              

# Q8 + Q9 + Q11

Q8: The number of patients used in training is not clear. The abstract suggests 3081 patients, Table 1a suggests 2511, and the outputs for the two datasets suggest 2536. Please clarify.

Q9: Figure 4, Panel C, suggests that there are 580 patients at risk at baseline. Why does this differ than the 674 patients used in external validation?

Q11: The abstract states that external validation was performed on 720 patients but only 674 patients were included in external validation analysis. Please revise. 

Numerics internal dataset (DBURI HSR):

- "original" dataset `df_urologia` (data asset `urologia`) --> 3796
- apply inclusion criteria to obtain `df_all` (data asset `srace_urologia_nonmeta_q1_all_survival`) --> 3081
- during preprocessing filter age = 0 (data entry error) --> 3080
- during preprocessing filter bmi < 10 and > 100 (clear outliers linked to data entry errors) --> 3075
- during preprocessing filter only cancer-specific mortality, administrative censoring at 5 years, one hot encoding to obtain `df_ohe_censored` (data asset `UC2_raw_survival_csm_ohe_5yrs`)--> 2536

Numerics external dataset (Firenze):
- "original" dataset from external institution, already matching inclusion criteria --> 720
- drop if missing values on event --> 701
- filter only cancer-specific mortality, administrative censoring at 5 years --> 674

# Q13

Q13: The rationale for excluding patients undergoing total cold ischemia is unclear and should be justified.

Note: Both the dataframe `df_ohe_censored` used for modelling and non-preprocessed `df_all` have inclusion criteria applied. So here we need to use the "original" dataframe `df_urologia`, without inclusion criteria or preprocessing applied.

In [18]:
vv = ["INT.1.kidney1IschemiaType", "INT.1.kidney2IschemiaType"]

for v in vv:
    print(df_urologia[v].str.strip().replace("", np.nan).value_counts(dropna=False))
    print()
    print(
        df_urologia[v]
        .str.strip()
        .replace("", np.nan)
        .value_counts(dropna=False, normalize=True)
    )
    print()

INT.1.kidney1IschemiaType
NaN                                           2092
Ischemia totale calda                         1170
No ischemia                                    420
Ischemia parziale calda                         79
Totale Fredda                                   19
Ischemia da clampaggio parenchimale             10
Ischemia da parenchimale a totale                5
Ischemia da clampaggio arterioso selettivo       1
Name: count, dtype: int64

INT.1.kidney1IschemiaType
NaN                                           0.551106
Ischemia totale calda                         0.308219
No ischemia                                   0.110643
Ischemia parziale calda                       0.020811
Totale Fredda                                 0.005005
Ischemia da clampaggio parenchimale           0.002634
Ischemia da parenchimale a totale             0.001317
Ischemia da clampaggio arterioso selettivo    0.000263
Name: proportion, dtype: float64

INT.1.kidney2IschemiaType
NaN          

# Q19

Q19: While the proposed model showed improved C-index and Brier scores in the external validation set compared to the GRANT model, inconsistencies between C-index and Brier score trends were not discussed. Specifically, GRANT’s performance declined (C-index 0.82 → 0.77) while Brier score improved (0.033 → 0.017), which may reflect biases in the validation cohort, particularly given its shorter follow-up duration. Given the reported better performance of predicting 1-year overall survival, this could be confounded by the relatively limited follow-up time in the external cohort, which may disproportionately influence early survival prediction and inflate short-term model performance. This discrepancy warrants further investigation and discussion. 

In this enlightening comment, there are some typos and a mixing of the values of the two C-indices recorded in the analysis (Harrell and IPCW). Below is a recap of the actual values. Regardless, the observation is accurate and matches the actual values. We attribute this to the different follow-up durations; below is a recap of the median follow-up times.

```
Model GRANT (update with actual model name in the manuscript)
                    concordance_index_censored	concordance_index_ipcw	mean_cumulative_dynamic_auc	integrated_brier_score
Internal dataset    0.81864                     0.80674	                0.84417	                        0.03281
External dataset    0.7580                      0.7721                  0.7304                          0.0174
```

```
Median follow-up in months
                    before_administrative_censoring_5yrs    after_administrative_censoring_5yrs
Internal dataset    67                                     60
External dataset    33                                      33
```