## Train

In [9]:
import numpy as np
import h5py
import scipy.io
from sklearn import metrics
import pandas as pd
import os
os.environ['THEANO_FLAGS'] = "device=cuda0,force_device=True,floatX=float32,gpuarray.preallocate=0.3"
import theano
print(theano.config.device)
from keras.layers import Embedding
from keras.models import Sequential
from keras.models import Model
from keras.layers import Dense, Dropout, Activation, Flatten, Layer, merge, Input, Concatenate, Reshape, concatenate,Lambda,multiply,Permute,Reshape,RepeatVector
from keras.layers.convolutional import Conv1D, MaxPooling1D
from keras.layers.pooling import GlobalMaxPooling1D
from keras.layers.recurrent import LSTM
from keras.layers.wrappers import Bidirectional, TimeDistributed
from keras.models import load_model
from keras.callbacks import ModelCheckpoint, EarlyStopping
from keras import optimizers
from keras import backend as K
from keras import regularizers

cuda0


### Load data (training and validation)

In [5]:
data_folder = "./data/"

trainmat = h5py.File(data_folder+'train.mat')
validmat = scipy.io.loadmat(data_folder+'valid.mat')

X_train = np.transpose(np.array(trainmat['trainxdata']),axes=(2,0,1))
y_train = np.array(trainmat['traindata']).T

trainmat.close()

### Choose only the targets that correspond to the TF binding

In [6]:
y_train = y_train[:,125:815]

### Run TBiNet

In [7]:
sequence_input = Input(shape=(1000,4))

# Convolutional Layer
output = Conv1D(320,kernel_size=26,padding="valid",activation="relu")(sequence_input)
output = MaxPooling1D(pool_size=13, strides=13)(output)
output = Dropout(0.2)(output)

#Attention Layer
attention = Dense(1)(output)
attention = Permute((2, 1))(attention)
attention = Activation('softmax')(attention)
attention = Permute((2, 1))(attention)
attention = Lambda(lambda x: K.mean(x, axis=2), name='attention',output_shape=(75,))(attention)
attention = RepeatVector(320)(attention)
attention = Permute((2,1))(attention)
output = multiply([output, attention])

#BiLSTM Layer
output = Bidirectional(LSTM(320,return_sequences=True))(output)
output = Dropout(0.5)(output)

flat_output = Flatten()(output)

#FC Layer
FC_output = Dense(695)(flat_output)
FC_output = Activation('relu')(FC_output)

#Output Layer
output = Dense(690)(FC_output)
output = Activation('sigmoid')(output)

model = Model(inputs=sequence_input, outputs=output)

print('compiling model')
model.compile(loss='binary_crossentropy', optimizer='adam')

print('model summary')
model.summary()

checkpointer = ModelCheckpoint(filepath="./model/tbinet.{epoch:02d}-{val_loss:.2f}.hdf5", verbose=1, save_best_only=False)
earlystopper = EarlyStopping(monitor='val_loss', patience=10, verbose=1)

model.fit(X_train, y_train, batch_size=100, epochs=60, shuffle=True, verbose=1, validation_data=(np.transpose(validmat['validxdata'],axes=(0,2,1)),validmat['validdata'][:,125:815]), callbacks=[checkpointer,earlystopper])

model.save('./model/tbinet.h5')

compiling model
model summary
Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 1000, 4)]    0                                            
__________________________________________________________________________________________________
conv1d (Conv1D)                 (None, 975, 320)     33600       input_1[0][0]                    
__________________________________________________________________________________________________
max_pooling1d (MaxPooling1D)    (None, 75, 320)      0           conv1d[0][0]                     
__________________________________________________________________________________________________
dropout (Dropout)               (None, 75, 320)      0           max_pooling1d[0][0]              
________________________________________________________________

InternalError: Failed copying input tensor from /job:localhost/replica:0/task:0/device:CPU:0 to /job:localhost/replica:0/task:0/device:GPU:0 in order to run _EagerConst: Dst tensor is not initialized.

## Test

In [10]:
import numpy as np
import scipy.io
from sklearn import metrics
import pandas as pd
import os
os.environ['THEANO_FLAGS'] = "device=cuda0,force_device=True,floatX=float32"
import theano
print(theano.config.device)

from keras.layers import Embedding
from keras.models import Sequential
from keras.models import Model
from keras.layers import Dense, Dropout, Activation, Flatten, Layer, merge, Input, Concatenate, Reshape
from keras.layers.convolutional import Conv1D, MaxPooling1D
from keras.layers.pooling import GlobalMaxPooling1D
from keras.layers.recurrent import LSTM
from keras.layers.wrappers import Bidirectional, TimeDistributed
from keras.models import load_model
from keras.callbacks import ModelCheckpoint, EarlyStopping

cuda0


In [11]:
def get_auroc(preds, obs):
    fpr, tpr, thresholds  = metrics.roc_curve(obs, preds, drop_intermediate=False)
    auroc = metrics.auc(fpr,tpr)
    return auroc

def get_aupr(preds, obs):
    precision, recall, thresholds  = metrics.precision_recall_curve(obs, preds)
    aupr = metrics.auc(recall,precision)
    return aupr

def get_aurocs_and_auprs(tpreds, tobs):
    tpreds_df = pd.DataFrame(tpreds)
    tobs_df = pd.DataFrame(tobs)
    
    task_list = []
    auroc_list = []
    aupr_list = []
    for task in tpreds_df:
        pred = tpreds_df[task]
        obs = tobs_df[task]
        auroc=round(get_auroc(pred,obs),5)
        aupr = round(get_aupr(pred,obs),5)
        task_list.append(task)
        auroc_list.append(auroc)
        aupr_list.append(aupr)
    return auroc_list, aupr_list

### Load data (test)

In [12]:
data_folder = "./data/"

testmat = scipy.io.loadmat(data_folder+'test.mat')

### Load model

In [13]:
model = load_model("./model/best/tbinet_best.hdf5")
print('model summary')
model.summary()

OSError: SavedModel file does not exist at: ./model/best/tbinet_best.hdf5/{saved_model.pbtxt|saved_model.pb}

### Calculate averaged AUROC and AUPR

In [None]:
tpreds = model.predict(np.transpose(testmat['testxdata'],axes=(0,2,1)),verbose=1)
tpreds_temp = np.copy(tpreds)
reverse_start_id = int(testmat['testdata'][:,125:815].shape[0]/2)

for i in range(reverse_start_id):
    tpreds_avg_temp = (tpreds_temp[i] + tpreds_temp[reverse_start_id+i])/2.0
    tpreds_temp[i] = tpreds_avg_temp
    tpreds_temp[reverse_start_id+i] = tpreds_avg_temp


aurocs, auprs = get_aurocs_and_auprs(tpreds_temp,testmat['testdata'][:,125:815])
print("Averaged AUROC:",np.nanmean(aurocs))
print("Averaged AUPR:", np.nanmean(auprs))