In [1]:
import sys

# Add the parent directory to the system path
sys.path.append("../04_survival_models/src")

In [2]:
import json
import os

import joblib
import matplotlib.pyplot as plt
import mlflow
import numpy as np
import pandas as pd
from azureml.core import Dataset, Workspace
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
from sklearn.model_selection import (
    train_test_split,
)
from sksurv.linear_model import CoxPHSurvivalAnalysis
from uc2_functions import count_columns_by_dtype, validate_sksurv_model

# Goal

The goal is to fine-tune a Cox model to enable the computation of survival metrics using the SSIGN features.
We consider two variants:

a. As in the paper https://pmc.ncbi.nlm.nih.gov/articles/PMC5536178/pdf/nihms790786.pdf
 – univariate Cox model on the SSIGN points

b. Similarly as in our original manuscript with the GRANT model – multivariate Cox model on the categorized SSIGN variables

# Parameters

In [3]:
# Directories
DIR_SC = os.path.join(os.path.dirname(os.getcwd()), "sc")  # Legend
DIR_MODEL_PKL = "../models_pkl_review"  # Weights for the models used during inference

In [4]:
RANDOM_STATE = 42
EXPERIMENT_NAME = "UC2_review_ssign_finetune_2025_09_1"
PARENT_RUN_ID = None

In [5]:
# Parameters
RANDOM_STATE = 718
EXPERIMENT_NAME = "UC2_review_ssign_finetune_2025_09_1"
PARENT_RUN_ID = "b15e3d81-281c-4e64-b26e-af44809c09bd"


# Functions

In [6]:
def calculate_ssign_score(df: pd.DataFrame) -> pd.DataFrame:
    """
    Computes the SSIGN score based on pre-processed features.

    This function calculates the score for each component of the SSIGN score
    (Pathological T, N, Metastasis, Tumor Size, Grade, Necrosis) and
    sums them to get the total score.

    Args:
        df: A pandas DataFrame containing the necessary columns:
            - 'IST_1_kidney1PathologicalStage2009' (numeric, mapped)
            - 'IST_1_kidney1PN2009_1_0' (boolean, from one-hot encoding)
            - 'IST_1_kidney1TumorDimension' (numeric, in cm)
            - 'IST_1_kidney1Grading' (numeric, mapped)
            - 'IST_1_kidney1Necrosis' (boolean)

    Returns:
        A new pandas DataFrame with added columns for each score component
        and the total 'ssign_score'.
    """
    # Make a copy to avoid modifying the original dataframe
    df_scores = df.copy()

    # 1. Pathological T category score
    # pT1 (1.0, 2.0) -> 0; pT2 (3.0, 4.0) -> +1; pT3 (5.0, 6.0, 7.0) -> +2; pT4 (8.0) -> +4
    pt_score_map = {1.0: 0, 2.0: 0, 3.0: 1, 4.0: 1, 5.0: 2, 6.0: 2, 7.0: 2, 8.0: 4}
    df_scores["ssign_component_pT"] = (
        df_scores["IST_1_kidney1PathologicalStage2009"].map(pt_score_map).fillna(0)
    )

    # 2. Regional lymph node status score
    # pNx/pN0 -> 0; pN1/pN2 -> +2
    # Based on your data, 'IST_1_kidney1PN2009_1_0' being True corresponds to pN1.
    df_scores["ssign_component_pN"] = np.where(
        df_scores["IST_1_kidney1PN2009_1_0"], 2, 0
    )

    # 3. Metastasis category score
    # M0 -> 0; M1 -> +4. Per your inclusion criteria, all are M0.
    df_scores["ssign_component_M"] = 0

    # 4. Tumor size score
    # <5 cm -> 0; >=5 cm -> +2
    df_scores["ssign_component_size"] = np.where(
        df_scores["IST_1_kidney1TumorDimension"] >= 5, 2, 0
    )

    # 5. Tumor (nuclear) grade score
    # Grade 1/2 -> 0; Grade 3 -> +1; Grade 4 -> +3
    grade_score_map = {1.0: 0, 2.0: 0, 3.0: 1, 4.0: 3}
    df_scores["ssign_component_grade"] = (
        df_scores["IST_1_kidney1Grading"].map(grade_score_map).fillna(0)
    )

    # 6. Tumor necrosis present score
    # No -> 0; Yes -> +2
    df_scores["ssign_component_necrosis"] = np.where(
        df_scores["IST_1_kidney1Necrosis"], 2, 0
    )

    # 7. Calculate the total SSIGN score
    score_components = [
        "ssign_component_pT",
        "ssign_component_pN",
        "ssign_component_M",
        "ssign_component_size",
        "ssign_component_grade",
        "ssign_component_necrosis",
    ]
    df_scores["ssign_score"] = df_scores[score_components].sum(axis=1)

    return df_scores

# Data ingestion

## One-hot encoding version

In [7]:
# azureml-core of version 1.0.72 or higher is required
# azureml-dataprep[pandas] of version 1.1.34 or higher is required

subscription_id = "753a0b42-95dc-4871-b53e-160ceb0e6bc1"
resource_group = "rg-s-race-aml-dev-we"
workspace_name = "amlsraceamldevwe01"

workspace = Workspace(subscription_id, resource_group, workspace_name)

dataset = Dataset.get_by_name(workspace, name="UC2_raw_survival_csm_ohe_5yrs")
df_ohe = dataset.to_pandas_dataframe()
print(df_ohe.shape)
df_ohe.head()

{'infer_column_types': 'False', 'activity': 'to_pandas_dataframe'}
{'infer_column_types': 'False', 'activity': 'to_pandas_dataframe', 'activityApp': 'TabularDataset'}


(2536, 211)


Unnamed: 0,P_1_id,ANM_1_previousAbdominalOperations,ANM_1_moduliOK,ANM_1_performanceStatus,ANM_1_asa,ANM_1_patientBMI,ANM_1_hypertension,ANM_1_TerapiaDiabeteIpoglicemizzanti,ANM_1_charlsonIndexComorbiditiesMalattiaPolmonare,ANM_1_charlsonIndexComorbiditiesUlceraPeptica,...,DEG_1_examEmCreatininemiaRange_14-18,DEG_1_examEmCalcioRange_14-18,"DEG_1_examEmCalcioRange_2,10-2,60",DEG_1_examEmCalcioRange_2_1-2_6,DEG_1_examEmCalcioRange_2_10-2_60,DEG_1_examEmCalcioRange_2_25-2_75,death,csm,ocm,ttdeath
0,1.0,False,False,0.0,1.0,24.82,False,,True,False,...,False,False,False,False,True,False,False,False,True,60.0
1,4.0,False,False,1.0,2.0,,True,False,False,False,...,False,False,False,False,True,False,False,,,60.0
2,5.0,True,False,1.0,2.0,,True,False,False,False,...,False,False,False,False,True,False,False,False,False,60.0
3,6.0,False,False,1.0,2.0,29.76,False,,False,False,...,False,False,False,False,True,False,False,False,False,60.0
4,7.0,True,False,1.0,2.0,26.57,False,False,False,False,...,False,False,False,False,False,False,False,False,False,60.0


### Use schema

Recreate the schema from tags:

In [8]:
tags = dataset.tags

dtypes = json.loads(tags["dtypes_json"])
is_ordinal = json.loads(tags["is_ordinal_json"])

for col in dtypes.keys():
    if dtypes[col] == "category":
        categories = (
            sorted(df_ohe[col].dropna().unique())
            if is_ordinal[col]
            else df_ohe[col].dropna().unique()
        )
        df_ohe[col] = pd.Categorical(
            df_ohe[col], categories=categories, ordered=is_ordinal[col]
        )
    else:
        df_ohe[col] = df_ohe[col].astype(dtypes[col])

In [9]:
count_columns_by_dtype(df_ohe)

float64: 31
boolean: 171
ordinal category: 9
non ordinal category: 0


# Start mlflow run

In [10]:
mlflow.set_experiment(EXPERIMENT_NAME)
mlflow.start_run(run_name=str(RANDOM_STATE))
if PARENT_RUN_ID:
    mlflow.set_tag("parent_run_id", PARENT_RUN_ID)

# Drop na on target columns

In [11]:
not_features = ["P_1_id", "death", "csm", "ocm", "ttdeath"]

In [12]:
print(df_ohe.shape[0])
df_ohe = df_ohe.dropna(subset=["ttdeath", "death"])
print(df_ohe.shape[0])

2536
2536


# Train test split

## List features

In [13]:
features_all = sorted(set(df_ohe.columns.tolist()) - set(not_features))
print(len(features_all))

206


## Train test split

In [14]:
# Define features and target
X = df_ohe[features_all]
y = np.array(
    [(event, time) for event, time in zip(df_ohe["death"], df_ohe["ttdeath"])],
    dtype=[("event", bool), ("time", float)],
)
ids = df_ohe["P_1_id"]
mlflow.log_param(
    "death_perc_5yrs",
    pd.Series(y["event"]).value_counts(sort=True, normalize=True)[True],
)

# Split data and IDs into training and testing sets
(
    X_train_missing,
    X_test_missing,
    y_train,
    y_test,
    ids_train,
    ids_test,
) = train_test_split(
    X,
    y,
    ids,
    test_size=0.2,
    stratify=y["event"],
    random_state=RANDOM_STATE,
)
del X, y, ids
# Check distributions of death event on train and test
print(pd.Series(y_train["event"]).value_counts(sort=True, normalize=True))
print(pd.Series(y_test["event"]).value_counts(sort=True, normalize=True))

False    0.949211
True     0.050789
Name: proportion, dtype: float64
False    0.948819
True     0.051181
Name: proportion, dtype: float64


# Imputation

## Fit and trasform on train

In [15]:
X_train = X_train_missing.copy()

imputer = IterativeImputer(
    max_iter=25, initial_strategy="median", random_state=RANDOM_STATE
)
imputer = imputer.fit(X_train)
X_train = imputer.transform(X_train)
X_train = pd.DataFrame(X_train, columns=X_train_missing.columns)

# Assert
assert set(X_train.columns) == set(X_train_missing.columns)

del X_train_missing

## Transform on test

In [16]:
X_test = X_test_missing.copy()

X_test = imputer.transform(X_test)
X_test = pd.DataFrame(X_test, columns=X_test_missing.columns)

# Assert
assert set(X_test.columns) == set(X_test_missing.columns)

del X_test_missing

# Infer rule-based SSIGN prognostic score

In [17]:
X_train = calculate_ssign_score(X_train)
X_test = calculate_ssign_score(X_test)

# Variant a.

In [18]:
model_name = "CoxPHSurvivalAnalysis_ssign_univariate_T1"
mlflow.start_run(run_name=model_name, nested=True)
mlflow.log_param("random_state", RANDOM_STATE)

718

## Train univariate Cox model on the SSIGN points

In [19]:
# Train the model
cox_ssign_univariate = CoxPHSurvivalAnalysis()
cox_ssign_univariate.fit(X_train[["ssign_score"]], y_train)
mlflow.log_param("feature_names_in", cox_ssign_univariate.feature_names_in_)
mlflow.log_param("n_features_in", cox_ssign_univariate.n_features_in_)

1

## Save model weights to pkl

In [20]:
# Save model weights to pkl
os.makedirs(DIR_MODEL_PKL, exist_ok=True)
model_path = os.path.join(
    DIR_MODEL_PKL, "raw_{}_{}.pkl".format(model_name, RANDOM_STATE)
)
joblib.dump(cox_ssign_univariate, model_path)
mlflow.log_artifact(model_path)
mlflow.log_param("model_path", model_path)

'../models_pkl_review/raw_CoxPHSurvivalAnalysis_ssign_univariate_T1_718.pkl'

## Validate on test set (internal validation)

In [21]:
result_censored, result_ipcw, score_brier, mean_auc, fig = validate_sksurv_model(
    model=cox_ssign_univariate,
    y_train=y_train,
    X_test=X_test[["ssign_score"]],
    y_test=y_test,
    tau=60,
)
print("concordance_index_censored", round(result_censored, 3))
mlflow.log_metric("concordance_index_censored", result_censored)
print("concordance_index_ipcw", round(result_ipcw, 3))
mlflow.log_metric("concordance_index_ipcw", result_ipcw)
print("integrated_brier_score", round(score_brier, 3))
mlflow.log_metric("integrated_brier_score", score_brier)
print("mean_cumulative_dynamic_auc", round(mean_auc, 3))
mlflow.log_metric("mean_cumulative_dynamic_auc", mean_auc)
mlflow.log_figure(fig, "time_dependent_auc.png")
plt.show(fig)
del model_name

0
1
2
3
concordance_index_censored 0.809


concordance_index_ipcw 0.81


integrated_brier_score 0.03


mean_cumulative_dynamic_auc 0.821


In [22]:
mlflow.end_run()

# Variant b.

In [23]:
model_name = "CoxPHSurvivalAnalysis_ssign_finetune_T1"
mlflow.start_run(run_name=model_name, nested=True)
mlflow.log_param("random_state", RANDOM_STATE)

718

In [24]:
features_ssign = [
    "ssign_component_pT",
    "ssign_component_pN",
    # "ssign_component_M", # Always 0 on our cohort for inclusion criteria -> if used leads to singular matrix
    "ssign_component_size",
    "ssign_component_grade",
    "ssign_component_necrosis",
]

## Train multivariate Cox model on the categorized SSIGN variables

In [25]:
# Train the model
cox_ssign_finetune = CoxPHSurvivalAnalysis()
cox_ssign_finetune.fit(X_train[features_ssign], y_train)
mlflow.log_param("feature_names_in", cox_ssign_finetune.feature_names_in_)
mlflow.log_param("n_features_in", cox_ssign_finetune.n_features_in_)

5

## Save model weights to pkl

In [26]:
# Save model weights to pkl
os.makedirs(DIR_MODEL_PKL, exist_ok=True)
model_path = os.path.join(
    DIR_MODEL_PKL, "raw_{}_{}.pkl".format(model_name, RANDOM_STATE)
)
joblib.dump(cox_ssign_finetune, model_path)
mlflow.log_artifact(model_path)
mlflow.log_param("model_path", model_path)

'../models_pkl_review/raw_CoxPHSurvivalAnalysis_ssign_finetune_T1_718.pkl'

## Validate on test set (internal validation)

In [27]:
result_censored, result_ipcw, score_brier, mean_auc, fig = validate_sksurv_model(
    model=cox_ssign_finetune,
    y_train=y_train,
    X_test=X_test[features_ssign],
    y_test=y_test,
    tau=60,
)
print("concordance_index_censored", round(result_censored, 3))
mlflow.log_metric("concordance_index_censored", result_censored)
print("concordance_index_ipcw", round(result_ipcw, 3))
mlflow.log_metric("concordance_index_ipcw", result_ipcw)
print("integrated_brier_score", round(score_brier, 3))
mlflow.log_metric("integrated_brier_score", score_brier)
print("mean_cumulative_dynamic_auc", round(mean_auc, 3))
mlflow.log_metric("mean_cumulative_dynamic_auc", mean_auc)
mlflow.log_figure(fig, "time_dependent_auc.png")
plt.show(fig)
del model_name

0
1
2
3
concordance_index_censored 0.829


concordance_index_ipcw 0.829


integrated_brier_score 0.029


mean_cumulative_dynamic_auc 0.844


In [28]:
mlflow.end_run()

# End mlflow run

In [29]:
mlflow.end_run()