In [2]:
from google.colab import drive
drive.mount('/content/gdrive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/gdrive


In [3]:
!pip install "/content/gdrive/My Drive/Colab Notebooks/TFDeepSurv/"

Processing ./gdrive/My Drive/Colab Notebooks/TFDeepSurv
Collecting lifelines>=0.14.6
[?25l  Downloading https://files.pythonhosted.org/packages/b2/96/74e1f74cc00474b969e137ab65246189b1e5841c6ab2eed98c65027bcfcb/lifelines-0.22.8-py2.py3-none-any.whl (338kB)
[K     |████████████████████████████████| 348kB 2.7MB/s 
Collecting autograd-gamma>=0.3
  Downloading https://files.pythonhosted.org/packages/3e/87/788c4bf90cc5c534cb3b7fdb5b719175e33e2658decce75e35e2ce69766f/autograd_gamma-0.4.1-py2.py3-none-any.whl
Building wheels for collected packages: tfdeepsurv
  Building wheel for tfdeepsurv (setup.py) ... [?25l[?25hdone
  Created wheel for tfdeepsurv: filename=tfdeepsurv-2.0.0-cp36-none-any.whl size=15794 sha256=a7aae1975b78410a132742075a06f3591be39d26f63e3b601d146d9634c7ae06
  Stored in directory: /tmp/pip-ephem-wheel-cache-r5glrh1s/wheels/be/9c/9b/7f208bc17b7e2b43fd6634d51f541136728b58ff6530976b88
Successfully built tfdeepsurv
Installing collected packages: autograd-gamma, lifelines, tf

# tfdeepsurv for real data

## Introduction

Let's use `tfdeepsurv` package to build a neural network for predicting hazard ratio. This notebook 
will show you how to build and train a neural network.

## Preparation

For all things going well, you would be better to get acquainted with **Survival Analysis**. Otherwise, I suggest you read the [reference](https://lifelines.readthedocs.io/en/latest/Survival%20Analysis%20intro.html).

## Package installation

Please follow the instructions on [README](../README.md) to install `tfdeepsurv` package.

## Get it started

### Obtain datasets 

In [0]:
import pandas as pd

train_data = pd.read_csv("data_train_filename.csv")
test_data = pd.read_csv("data_test_filename.csv")

train_data.head()

### Dataset statistics

In [0]:
from tfdeepsurv.datasets import survival_stats

# specify the colnames of observed status and time in your dataset
colname_e = 'e'
colname_t = 't'

survival_stats(train_data, t_col=colname_t, e_col=colname_e, plot=True)

In [0]:
survival_stats(test_data, t_col=colname_t, e_col=colname_e, plot=True)

### Survival data transfrom

The transformed survival data contains an new label. Negtive values are considered as right censored, 
and positive values are considered as event occurrence.

**NOTE**: In version 2.0, survival data must be transformed via `tfdeepsurv.datasets.survival_df`.

In [0]:
from tfdeepsurv.datasets import survival_df

surv_train = survival_df(train_data, t_col=colname_t, e_col=colname_e, label_col="Y")
surv_test = survival_df(test_data, t_col=colname_t, e_col=colname_e, label_col="Y")

# columns 't' and 'e' are packed into an new column 'Y'
surv_train.head()

### Model initialization

**NOTE:** You can freely change all hyper-parameters during model initialization or training as you want.

All hyper-parameters is as follows:
- `nn_config`: model configuration
- `hidden_layers_nodes`: hidden layers configuration
- `num_steps`: training steps

Hyperparameters tuning can refer to README in directory `byopt`.

In [0]:
from tfdeepsurv import dsnn

# Number of features in your dataset
input_nodes = len(surv_train.columns) - 1
# Specify your neural network structure
hidden_layers_nodes = [6, 3, 1]

# the arguments of dsnn can be obtained by Bayesian Hyperparameters Tuning.
# It would affect your model performance largely!
nn_config = {
    "learning_rate": 0.7,
    "learning_rate_decay": 1.0,
    "activation": 'relu', 
    "L1_reg": 3.4e-5, 
    "L2_reg": 8.8e-5, 
    "optimizer": 'sgd',
    "dropout_keep_prob": 1.0,
    "seed": 1
}

# ESSENTIAL STEP-1: Pass arguments
model = dsnn(
    input_nodes, 
    hidden_layers_nodes,
    nn_config
)

# ESSENTIAL STEP-2: Build Computation Graph
model.build_graph()

### Model training

You can save your trained model by passing `save_model="file_name.ckpt"` or load your trained model by passing `load_model="file_name.ckpt"`

In [0]:
Y_col = ["Y"]
X_cols = [c for c in surv_train.columns if c not in Y_col]

# model saving and loading is also supported!
# read comments of `train()` function if necessary.

# ESSENTIAL STEP-3: Train Model
# `num_steps` is also a important parameters
watch_list = model.train(
    surv_train[X_cols], surv_train[Y_col],
    num_steps=1900,
    num_skip_steps=100,
    plot=True
)

### Model evaluation

In [0]:
print("CI on training data:", model.evals(surv_train[X_cols], surv_train[Y_col]))
print("CI on test data:", model.evals(surv_test[X_cols], surv_test[Y_col]))

### Model prediction

Model prediction includes:
- predicting hazard ratio or log hazard ratio
- predicting survival function

In [0]:
# predict log hazard ratio
print(model.predict(surv_test.loc[0:10, X_cols]))
# predict hazard ratio
print(model.predict(surv_test.loc[0:10, X_cols], output_margin=False))

In [0]:
# predict survival function
model.predict_survival_function(surv_test.loc[0:5, X_cols], plot=True)

### tf.session close

To release resources, we use `model.close_session()` to close session in tensorflow!

In [0]:
model.close_session()