In [None]:
import sys
sys.path.append("../")
import pandas as pd
from sklearn.model_selection import KFold, train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn_pandas import DataFrameMapper
from SADGNet import SADGNetCRs
from skopt import BayesSearchCV

In [None]:
# Competing risks dataset 0:right-censored 1:event1 2:event2
data = pd.read_excel("./data/SEER_CRC_CRs.xlsx",sheet_name='Sheet1')
feature_list = list(data.columns)
feature_list.remove('times')
feature_list.remove('events')
data_x = data[feature_list].copy()
data_label = data[['times', 'events']].copy()
x_train, x_test, y_train, y_test = train_test_split(data_x, data_label, test_size=0.2, stratify=data_label['events'],random_state=2024)
print(x_train.shape)
print(x_test.shape)

In [None]:
cols_standardize = ['age', 'positivelymph']
cols_leave = ['race', 'site', 'histology', 'grade', 'T', 'N', 'M', 'surgery', 'radiation']
standardize = [([col], StandardScaler()) for col in cols_standardize]
leave = [([col], OneHotEncoder(handle_unknown='ignore', drop='first')) for col in cols_leave]
x_mapper = DataFrameMapper(standardize + leave)
x_mapper = x_mapper.fit(x_train)
x_train = x_mapper.fit_transform(x_train).astype('float32')
x_test = x_mapper.transform(x_test).astype('float32')
y_train = y_train.values
y_test = y_test.values

In [None]:
# Tuning hyperparameters
kfold = KFold(n_splits=5, shuffle=True, random_state=2024)
opt = BayesSearchCV(
    SADGNetCRs(),
    {
        'learning_rate': (1e-4, 0.01, 'log-uniform'),
        'dropout': [0.5],
        'batch_size': [512],
        'num_layers': [1, 2, 3],
        'hidden_dim': [32, 64, 128],
        'embedding_dim': [32, 64, 128],
        'time_interval': [3, 6, 9],
        'lambda1': [0.5, 1.0, 5.0],
        'lambda2': [1.0, 5.0, 10.0],
        'lambda3': [0.01, 0.1],
        'trans_layer': [1, 2, 3],
        'alpha': [0.5, 1.0, 5.0]
    },
    n_iter=30,
    cv=kfold,
    n_jobs=4
)
opt.fit(x_train, y_train)
print("The best hyperparameters: %s" % opt.best_params_)

In [None]:
model = SADGNetCRs(**opt.best_params_)
model.fit(x_train, y_train)
c_td_1 = model.get_c_index(x_test, y_test)[0]
mae_1 = model.get_mae(x_test, y_test)[0]
c_td_2 = model.get_c_index(x_test, y_test)[1]
mae_2 = model.get_mae(x_test, y_test)[1]
print("The C^td for event1 is %.3f" % c_td_1)
print("The MAE for event1 is %.3f" % mae_1)
print("The C^td for event2 is %.3f" % c_td_2)
print("The MAE for event2 is %.3f" % mae_2)