In [1]:
from pysurvival.models.survival_forest import RandomSurvivalForestModel
from pysurvival.utils.metrics import concordance_index
from pysurvival.utils._metrics import _concordance_index

In [2]:
import numpy as np
import h5py
import time
import math
from scipy import stats

In [8]:
class SurvivalDataset(Dataset):
    def __init__(self, h5_file, is_train):
        self.X, self.e, self.y = self._read_h5_file(h5_file, is_train)
        self._normalize()
        if is_train:
            print('train data => load {} samples'.format(self.X.shape[0]))
        else :
            print('test data => load {} samples'.format(self.X.shape[0]))

    def _read_h5_file(self, h5_file, is_train):
        split = 'train' if is_train else 'test'
        with h5py.File(h5_file, 'r') as f:
            X = f[split]['x'][()]
            e = f[split]['e'][()].reshape(-1, 1)
            y = f[split]['t'][()].reshape(-1, 1)
        return X, e, y

    def _normalize(self):  
        self.X = (self.X-self.X.min(axis=0)) /  (self.X.max(axis=0)-self.X.min(axis=0))

    def __getitem__(self, item):
        X_item = self.X[item] 
        e_item = self.e[item] 
        y_item = self.y[item] 

        X_tensor = torch.from_numpy(X_item)
        e_tensor = torch.from_numpy(e_item)
        y_tensor = torch.from_numpy(y_item)
        return X_tensor, y_tensor, e_tensor

    def __len__(self):
        return self.X.shape[0]

In [9]:
path = 'bmc_data/gaussian/gaussian_survival_data.h5'
train_dataset = SurvivalDataset(path, is_train = True)
test_dataset = SurvivalDataset(path, is_train = False)

train data => load 4000 samples
test data => load 1000 samples


In [10]:
X_train, y_train, e_train= train_dataset.X, train_dataset.y, train_dataset.e

X_test, y_test, e_test= test_dataset.X, test_dataset.y, test_dataset.e

In [12]:

epoch = 100
train_c = []
val_c = []
for i in range(epoch):
    
    rsf = RandomSurvivalForestModel(num_trees = 200)
    rsf.fit(X_train, y_train, e_train, max_features = "sqrt", max_depth=5, min_node_size=20)
    risk = rsf.predict_risk(X_test)
    results = _concordance_index(risk, y_test, e_test)
    train_c.append(results[0])

In [None]:
train_c = np.array(train_c)
train_mean = train_c.mean()
train_std = train_c.std()
conf_intveral = stats.norm.interval(0.95, loc = train_mean, scale = train_std)
print(train_mean)
print(conf_intveral)

NameError: name 'np' is not defined