In [40]:
import sys

!{sys.executable} -m pip install tables



In [41]:
import sys
sys.path.append("../")
import pandas as pd
from sklearn.model_selection import KFold, train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn_pandas import DataFrameMapper
from SADGNet import SADGNetSR
from skopt import BayesSearchCV

In [42]:

import numpy as np

In [43]:
# DeepSurv original h5 file has separated folders for x (covariates), t (time-to-event) and e (event)
# so I have to merge them into 1 df

train_data_path = "../data/metabric/train"

y_train_e = pd.read_csv(train_data_path + "/train_e.csv")
y_train_t = pd.read_csv(train_data_path + "/train_t.csv")
x_train = pd.read_csv(train_data_path + "/train_x.csv")

test_data_path = "../data/metabric/test"

y_test_e = pd.read_csv(test_data_path + "/test_e.csv")
y_test_t = pd.read_csv(test_data_path + "/test_t.csv")
x_test = pd.read_csv(test_data_path + "/test_x.csv")


In [44]:
y_train = pd.concat([y_train_t, y_train_e], axis=1)
y_test = pd.concat([y_test_t, y_test_e], axis=1)

In [45]:
# cols_standardize = ['MKI67', 'EGFR', 'PGR', 'ERBB2', 'age at diagnosis']
# cols_leave = ['hormone treatment indicator', 'radiotherapy indicator', 'chemotherapy indicator',
#               'ER-positive indicator']
cols_standardize = ['x0', 'x1', 'x2', 'x3', 'x8']
cols_leave = ['x4', 'x5', 'x6', 'x7']
standardize = [([col], StandardScaler()) for col in cols_standardize]
leave = [(col, None) for col in cols_leave]
x_mapper = DataFrameMapper(standardize + leave)
x_train = x_mapper.fit_transform(x_train).astype('float32')
x_test = x_mapper.transform(x_test).astype('float32')
y_train = y_train.values
y_test = y_test.values

In [None]:
# Tuning hyperparameters
kfold = KFold(n_splits=5, shuffle=True, random_state=2024)
opt = BayesSearchCV(
    SADGNetSR(),
    {
        'learning_rate': (1e-4, 0.01, 'log-uniform'),
        'dropout': [0.5],
        'batch_size': [512],
        'num_layers': [1, 2, 3],
        'hidden_dim': [32, 64, 128],
        'embedding_dim': [32, 64, 128],
        'time_interval': [3, 6, 9],
        'lambda1': [0.5, 1.0, 5.0],
        'lambda2': [1.0, 5.0, 10.0],
        'lambda3': [0.01, 0.1],
        'trans_layer': [1, 2, 3],
        'alpha': [0.5, 1.0, 5.0]
    },
    n_iter=30,
    cv=kfold,
    n_jobs=4
)
opt.fit(x_train, y_train)
print("The best hyperparameters: %s" % opt.best_params_)



2025-08-14 20:53:32,978 - SADGNetSR.py[line:197] - INFO: hypeparameters: learning_rate 0.0006101003420277037, num_layers 2, hidden_dim 64, dropout 0.5, activation ReLU, batch_size 512, optimizer Adam, embedding_dim 32,time_interval 9,lambda1 1.0, lambda2 5.0, lambda3 0.014200210999105296, trans_layer 3, alpha 5.0
2025-08-14 20:53:32,978 - SADGNetSR.py[line:197] - INFO: hypeparameters: learning_rate 0.0006101003420277037, num_layers 2, hidden_dim 64, dropout 0.5, activation ReLU, batch_size 512, optimizer Adam, embedding_dim 32,time_interval 9,lambda1 1.0, lambda2 5.0, lambda3 0.014200210999105296, trans_layer 3, alpha 5.0
2025-08-14 20:53:32,978 - SADGNetSR.py[line:197] - INFO: hypeparameters: learning_rate 0.0006101003420277037, num_layers 2, hidden_dim 64, dropout 0.5, activation ReLU, batch_size 512, optimizer Adam, embedding_dim 32,time_interval 9,lambda1 1.0, lambda2 5.0, lambda3 0.014200210999105296, trans_layer 3, alpha 5.0
2025-08-14 20:53:32,979 - SADGNetSR.py[line:197] - INFO

OMP: Info #276: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.
OMP: Info #276: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.
OMP: Info #276: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.
OMP: Info #276: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.


2025-08-14 20:53:44,204 - SADGNetSR.py[line:269] - INFO: train loss=868.8659, val loss=916.8156, train_cindex=0.5372 train_mae=80.4306, val_cindex=0.5586, val_mae=80.5109
2025-08-14 20:53:44,221 - SADGNetSR.py[line:231] - INFO: -------------epoch 2/250-------------
2025-08-14 20:53:44,224 - SADGNetSR.py[line:269] - INFO: train loss=853.8629, val loss=843.8994, train_cindex=0.6041 train_mae=87.2760, val_cindex=0.6206, val_mae=85.9082
2025-08-14 20:53:44,242 - SADGNetSR.py[line:231] - INFO: -------------epoch 2/250-------------
2025-08-14 20:53:44,261 - SADGNetSR.py[line:269] - INFO: train loss=850.6637, val loss=837.4566, train_cindex=0.5950 train_mae=86.6892, val_cindex=0.6024, val_mae=82.5094
2025-08-14 20:53:44,270 - SADGNetSR.py[line:231] - INFO: -------------epoch 2/250-------------
2025-08-14 20:53:44,278 - SADGNetSR.py[line:269] - INFO: train loss=838.3520, val loss=871.7706, train_cindex=0.6063 train_mae=86.6796, val_cindex=0.6115, val_mae=95.4745
2025-08-14 20:53:44,285 - SADGN

In [None]:
# 2025-08-14 21:13:17,559 - SADGNetSR.py[line:197] - INFO: hypeparameters: learning_rate 0.003682096604646654, num_layers 1, hidden_dim 32, dropout 0.5, activation ReLU, batch_size 512, optimizer Adam, embedding_dim 128,time_interval 3,lambda1 5.0, lambda2 1.0, lambda3 0.060309443933746966, trans_layer 2, alpha 5.0


In [None]:
model = SADGNetSR(**opt.best_params_)
model.fit(x_train, y_train)
c_td = model.score(x_test, y_test)
mae = model.get_mae(x_test, y_test)
print("The C^td of the testset is %.3f" % c_td)
print("The MAE of the testset is %.3f" % mae)

AttributeError: 'BayesSearchCV' object has no attribute 'best_params_'