# Predicting Immunotherapy Response based on RNA-Seq

### Importing Data and gene pathways
TCGA Data

In [47]:
import pandas as pd
import numpy as np

tpm = pd.read_csv("data/tcga_sample/expression.tsv", sep="\t")
survival = pd.read_csv("data/tcga_sample/survival.tsv", sep="\t")
meta = pd.read_csv("data/tcga_sample/metadata.tsv", sep="\t")
cytokines = pd.read_csv("data/genes.cytokine_immune.txt", header=None)

### Data Processing

Get the TPM values for cytokines pathway

In [51]:
# Only use cytokine expression
tpm = tpm.reindex(cytokines.iloc[:,0].unique() , axis=1).dropna(axis=1)

# perform quantile normalization
# https://stackoverflow.com/questions/37935920/quantile-normalization-on-pandas-dataframe
tpm /= np.max(np.abs(tpm),axis=0) # scale between [0,1]
rank_mean = tpm.stack().groupby(tpm.rank(method='first').stack().astype(int)).mean()
tpm = tpm.rank(method='min').stack().astype(int).map(rank_mean).unstack()

# convert pandas df to np array
tpm = tpm.values
survival = survival.iloc[:,1:3].values

Split data into training and testing sets

In [54]:
# split the data into a training set and a validation set
VALIDATION_SPLIT = 0.8

# indices = np.arange(tpm.shape[0])
# np.random.shuffle(indices)
# # tpm = tpm[indices]
# labels = surv_time[indices]
num_validation_samples = int(VALIDATION_SPLIT * tpm.shape[0])

x_train = tpm[:num_validation_samples]
y_train = survival[:num_validation_samples]
x_val = tpm[num_validation_samples:]
y_val = survival[num_validation_samples:]

### Survival Neural Network

Defining the loss function

In [124]:
import tensorflow as tf
import keras.backend as K

def negative_log_partial_likelihood_loss(regularization):
    #Wrapper function for the negative logg partial likelihood loss function
    
    def loss(y_true, risk):
        return negative_log_partial_likelihood(y_true, risk, regularization)
    return loss

def negative_log_partial_likelihood(y_true, risk, regularization):
    """Return the negative log-partial likelihood of the prediction
    y_true contains the survival time
    risk is the risk output from the neural network
    censor is the vector of inputs that are censored
    regularization is the regularization constant (not used currently)
    
    Uses the Keras backend to perform calculations
    
    Sorts the surv_time by sorted reverse time
    """
    
    # sort NN-output risk estimate by labeled surv time per batch
    idx = np.argsort(abs(y_true[:,0]))[::-1]
    print(risk.shape)
    risk_1 = risk[idx, 0]
    
    # sort censor mask by surv time
    censor = y_true[idx, 1].reshape(-1,1) #reshape to [n, 1] for matmul
    
    # calculate negative log likelihood from estimated risk
    hazard_ratio = K.exp(risk_1)
    log_risk = K.log(tf.cumsum(hazard_ratio)) # cumsum on sorted surv time accounts for concordance
    uncensored_likelihood = risk_1 - log_risk
    censored_likelihood = uncensored_likelihood * censor
    num_observed_events = K.sum(censor)
    neg_likelihood = - K.sum(censored_likelihood) / tf.cast(num_observed_events, tf.float64)
    return neg_likelihood

In [125]:
np.random.seed(123)
negative_log_partial_likelihood(y_train, np.random.rand(y_train.shape[0], 2), 0).eval(session=K.get_session())

(80, 2)


TypeError: Expected int32 passed to parameter 'size' of op 'Slice', got [array([ 3,  4,  2, 17, 16, 13, 11, 21, 32, 12, 15,  0,  7, 35, 31, 34, 23,
        9, 10, 18, 38, 37, 48,  6,  8, 76, 24, 50, 33,  1, 30, 49, 51, 19,
       40, 41, 20, 25, 78, 27, 29, 28, 42, 43, 79, 36, 39, 77,  5, 45, 44,
       74, 26, 22, 73, 47, 53, 52, 70, 46, 72, 56, 71, 14, 54, 55, 75, 61,
       63, 66, 65, 64, 62, 68, 69, 67, 60, 59, 58, 57]), 0] of type 'list' instead.

In [126]:
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
from keras import Sequential
from keras.optimizers import Adam

model = Sequential()
model.add(Dense(128, input_dim=x_train.shape[1], name="input"))
model.add(Dense(64, activation='relu', name="dense_1"))
model.add(Dropout(0.25, name="dropout_1"))
model.add(Dense(64, activation='relu', name="dense_2"))
model.add(Dense(2, activation='linear', name="output"))

opt = Adam(lr=0.001)

model_loss = negative_log_partial_likelihood_loss(0)

model.compile(optimizer=opt, loss=model_loss, metrics=['accuracy'])

model.fit(x_train, y_train, validation_data=[x_val,y_val],epochs=10,batch_size=32)

(?, 2)


TypeError: Expected int32 passed to parameter 'size' of op 'Slice', got [array([0]), 0] of type 'list' instead.

Model evaluation