In [1]:
import numpy as np
import pickle
import keras.backend as K

from keras.layers import  GlobalAveragePooling1D, Input, Activation, MaxPooling1D, BatchNormalization, Dense, Dropout, Conv1D,GlobalMaxPooling1D
from keras.layers import GRU,AveragePooling1D,CuDNNGRU
from keras.layers.merge import Concatenate
from keras.models import Model 
from keras.callbacks import EarlyStopping,ModelCheckpoint

import keras.backend.tensorflow_backend as KTF
import tensorflow as tf
import os


os.environ["CUDA_VISIBLE_DEVICES"] = "1"

config = tf.ConfigProto()
config.gpu_options.allow_growth=True   #不全部占满显存, 按需分配
sess = tf.Session(config=config)

KTF.set_session(sess)

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
import numpy as np
alphabet = np.array(['A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L',
                     'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'Y'])

def label_sequence(line, MAX_SEQ_LEN, smi_ch_ind):
	X = np.zeros(MAX_SEQ_LEN)

	for i, ch in enumerate(line[:MAX_SEQ_LEN]):
		X[i] = smi_ch_ind[ch]

	return X #.tolist()

def letter_one_hot(aa):
    one_hot = np.zeros(20)
    for idx, letter in enumerate(alphabet):
        if aa == letter:
            one_hot[idx] = 1
            return one_hot


# Convert an entire protein to one-hot representation.
def protein_one_hot(protein_sequence, MAX_SEQ_LEN):
    #  Remove non-specific AA codes (very few are actually present in this dataset)
    protein_sequence = protein_sequence.replace('B', '')
    protein_sequence = protein_sequence.replace('J', '')
    protein_sequence = protein_sequence.replace('O', '')
    protein_sequence = protein_sequence.replace('U', '')
    protein_sequence = protein_sequence.replace('X', '')
    protein_sequence = protein_sequence.replace('Z', '')
    one_hot_seq = np.zeros( (MAX_SEQ_LEN, 20))
    for idx, aa in enumerate(protein_sequence[:MAX_SEQ_LEN]):
        one_hot_seq[idx, :] = letter_one_hot(aa)
    return one_hot_seq


In [3]:
import keras
feature_len = 768
max_go_len = 128
max_seq_len = 1000

from six.moves import cPickle as pickle #for performance

 
def save_dict(di_, filename_):
    with open(filename_, 'wb') as f:
        pickle.dump(di_, f)

def load_dict(filename_):
    with open(filename_, 'rb') as f:
        ret_di = pickle.load(f)
    return ret_di

protein2go =  load_dict('SC_protein2go_dicts.pkl')
protein2seq = load_dict('SC_protein_seqs.pkl')

prot2emb = {}
for key, value in protein2go.items():
    X_go1 =  np.zeros((1,768))
    allgos = value.split(';') 
    allgos = list(set(allgos))
    count = 0
    for  go in  allgos:
        feature = np.load('ncbi_allfeatures4go/'+go+'_0.npy')[1:-1]
        if count + feature.shape[0] > max_go_len:
            break
        X_go1 = np.concatenate((X_go1,feature ))    
        count += feature.shape[0]
    prot2emb[key] =  X_go1[1:] 
protein2onehot = {}
for key, value in protein2seq.items():
    protein2onehot[key] =  protein_one_hot(value, max_seq_len)
        
class DataGenerator(keras.utils.Sequence):
    'Generates data for Keras'
    def __init__(self,  ppi_pair_file, batch_size=128):
        'Initialization' 
        self.batch_size = batch_size
        self.ppi_pair_file = ppi_pair_file
         
        self.max_seqlen = max_seq_len
        self.max_golen = max_go_len
        self.protein2go =  load_dict('SC_protein2go_dicts.pkl')
        self.protein2seq = load_dict('SC_protein_seqs.pkl')
        self.read_ppi()
        self.protein2onehot = protein2onehot
        self.prot2emb = prot2emb
#         self.onehot_seqs()
#         self.prot2embedding() 
        self.on_epoch_end()
    
    def read_ppi(self):
        with open(self.ppi_pair_file, 'r') as f:
            self.ppi_pairs  =  f.readlines()
    
#     def onehot_seqs(self):
#         for key, value in self.protein2seq.items():
#             self.protein2onehot[key] =  protein_one_hot(value, self.max_seqlen) 
    
#     def prot2embedding(self):
#         for key, value in self.protein2go.items():
#             X_go1 =  np.zeros((1,768))
#             allgos = value.split(';') 
#             allgos = list(set(allgos))
#             count = 0
#             for  go in  allgos:
#                 feature = np.load('ncbi_allfeatures4go/'+go+'_0.npy')[1:-1]
#                 if count + feature.shape[0] > max_go_len:
#                     break
#                 X_go1 = np.concatenate((X_go1,feature ))    
#                 count += feature.shape[0]
#             self.prot2emb[key] =  X_go1[1:]   
            
    
   

    def __len__(self):
        'Denotes the number of batches per epoch'
        return int(np.floor(len(self.ppi_pairs) / self.batch_size))

    def __getitem__(self, index):
        'Generate one batch of data'
        # Generate indexes of the batch
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]
        # Generate data
        X, y = self.__data_generation(indexes)

        return X, y

    def on_epoch_end(self):
        'Updates indexes after each epoch'
        self.indexes = np.arange(len(self.ppi_pairs))
         
    def __data_generation(self, list_IDs_temp):
        'Generates data containing batch_size samples' # X : (n_samples, *dim, n_channels)
        # Initialization

        X_go1 = np.empty((self.batch_size, self.max_golen,768))
        X_seq1 = np.empty((self.batch_size, self.max_seqlen,20))

        X_go2 = np.empty((self.batch_size, self.max_golen,768))
        X_seq2 = np.empty((self.batch_size, self.max_seqlen,20))
        y = np.empty((self.batch_size))

        # Generate data
        for i, ID in enumerate(list_IDs_temp):
            ppi_pair = self.ppi_pairs[ID]
            p1, p2, label = ppi_pair.rstrip().split(',')
            if label == '1':
                y[i] = 1
            else:
                y[i] = 0
            
            X_seq1[i] =  self.protein2onehot[p1]
            X_seq2[i] =  self.protein2onehot[p2]
            
            prot1emb = self.prot2emb[p1]
            X_go1[i,:prot1emb.shape[0]] = prot1emb
            
            prot2emb = self.prot2emb[p2]
            X_go2[i,:prot2emb.shape[0]] = prot2emb
            
#             X_go1[i] =  np.load('SC_GO/'+p1+'.npy') 
#             X_go2[i] =  np.load('SC_GO/'+p2+'.npy')
             
#             values = self.protein2go[ p1 ]
            
#             allgos = values.split(';') 
#             allgos = list(set(allgos))
#             count = 0
#             for  go in  allgos:
#                 feature = np.load('ncbi_allfeatures4go/'+go+'_0.npy')[1:-1]
#                 if count + feature.shape[0] > self.max_golen:
#                     break
#                 X_go1[i, count:count+feature.shape[0]] = feature
#                 count += feature.shape[0]

#             values = self.protein2go[ p2]
#             allgos = values.split(';') 
#             allgos = list(set(allgos)) 
#             count = 0
#             for  go in  allgos:
#                 feature = np.load('ncbi_allfeatures4go/'+go+'_0.npy')[1:-1]
#                 if count + feature.shape[0] > self.max_golen:
#                     break
#                 X_go2[i, count:count+feature.shape[0]] = feature
#                 count += feature.shape[0]
        return [X_go1,X_go2, X_seq1, X_seq2] ,  y



    def all_data(self, list_IDs_temp):
        'Generates data containing batch_size samples' # X : (n_samples, *dim, n_channels)
        # Initialization

        X_go1 = np.empty((len(list_IDs_temp), self.max_golen,768))
        X_seq1 = np.empty((len(list_IDs_temp), self.max_seqlen,20))

        X_go2 = np.empty((len(list_IDs_temp), self.max_golen,768))
        X_seq2 = np.empty((len(list_IDs_temp), self.max_seqlen,20))
        y = np.empty((len(list_IDs_temp)))

        # Generate data
        for i, ID in enumerate(list_IDs_temp):
            ppi_pair = self.ppi_pairs[ID]
            p1, p2, label = ppi_pair.rstrip().split(',')
            if label == '1':
                y[i] = 1
            else:
                y[i] = 0
            
 
            X_seq1[i] =  self.protein2onehot[p1]
            X_seq2[i] =  self.protein2onehot[p2]
            prot1emb = self.prot2emb[p1]
            X_go1[i,:prot1emb.shape[0]] = prot1emb
            
            prot2emb = self.prot2emb[p2]
            X_go2[i,:prot2emb.shape[0]] = prot2emb
            
#             X_go1[i] =  np.load('SC_GO/'+p1+'.npy') 
#             X_go2[i] =  np.load('SC_GO/'+p2+'.npy')
#             values = self.protein2go[ p1 ]
#             allgos = values.split(';') 
#             allgos = list(set(allgos))
#             count = 0
#             for  go in  allgos:
#                 feature = np.load('ncbi_allfeatures4go/'+go+'_0.npy')[1:-1]
#                 if count + feature.shape[0] > self.max_golen:
#                     break
#                 X_go1[i, count:count+feature.shape[0]] = feature
#                 count += feature.shape[0]

#             values = self.protein2go[ p2]
#             allgos = values.split(';') 
#             allgos = list(set(allgos)) 
#             count = 0
#             for  go in  allgos:
#                 feature = np.load('ncbi_allfeatures4go/'+go+'_0.npy')[1:-1]
#                 if count + feature.shape[0] > self.max_golen:
#                     break
#                 X_go2[i, count:count+feature.shape[0]] = feature
#                 count += feature.shape[0]
        return [X_go1,X_go2, X_seq1, X_seq2] ,  y

In [4]:
# protein2go =  load_dict('SC_protein2go_dicts.pkl')
# for key, value in protein2go.items():
#     X_go1 =  np.zeros((1,768))
#     allgos = value.split(';') 
#     allgos = list(set(allgos))
#     count = 0
#     for  go in  allgos:
#         feature = np.load('ncbi_allfeatures4go/'+go+'_0.npy')[1:-1]
#         if count + feature.shape[0] > max_go_len:
#             break
#         X_go1 = np.concatenate((X_go1,feature ))    
#         count += feature.shape[0]
# #     np.save('SC_GO/'+key,X_go1)

In [5]:
# X_go1 =  np.zeros((1,768))
# allgos = value.split(';') 
# allgos = list(set(allgos))
# count = 0
# for  go in  allgos:
#     feature = np.load('ncbi_allfeatures4go/'+go+'_0.npy')[1:-1]
#     if count + feature.shape[0] > max_go_len:
#         break
#     X_go1 = np.concatenate((X_go1,feature ))    
#     count += feature.shape[0]

In [6]:
from keras import backend as K, initializers, regularizers, constraints
from keras.engine.topology import Layer


def dot_product(x, kernel):
    """
    Wrapper for dot product operation, in order to be compatible with both
    Theano and Tensorflow
    Args:
        x (): input
        kernel (): weights
    Returns:
    """
    if K.backend() == 'tensorflow':
        # todo: check that this is correct
        return K.squeeze(K.dot(x, K.expand_dims(kernel)), axis=-1)
    else:
        return K.dot(x, kernel)


class Attention(Layer):
    def __init__(self,
                 W_regularizer=None, b_regularizer=None,
                 W_constraint=None, b_constraint=None,
                 bias=True,
                 return_attention=False,
                 **kwargs):
        """
        Keras Layer that implements an Attention mechanism for temporal data.
        Supports Masking.
        Follows the work of Raffel et al. [https://arxiv.org/abs/1512.08756]
        # Input shape
            3D tensor with shape: `(samples, steps, features)`.
        # Output shape
            2D tensor with shape: `(samples, features)`.
        :param kwargs:
        Just put it on top of an RNN Layer (GRU/LSTM/SimpleRNN) with return_sequences=True.
        The dimensions are inferred based on the output shape of the RNN.
        Note: The layer has been tested with Keras 1.x
        Example:
            # 1
            model.add(LSTM(64, return_sequences=True))
            model.add(Attention())
            # next add a Dense layer (for classification/regression) or whatever...
            # 2 - Get the attention scores
            hidden = LSTM(64, return_sequences=True)(words)
            sentence, word_scores = Attention(return_attention=True)(hidden)
        """
        self.supports_masking = True
        self.return_attention = return_attention
        self.init = initializers.get('glorot_uniform')

        self.W_regularizer = regularizers.get(W_regularizer)
        self.b_regularizer = regularizers.get(b_regularizer)

        self.W_constraint = constraints.get(W_constraint)
        self.b_constraint = constraints.get(b_constraint)

        self.bias = bias
        super(Attention, self).__init__(**kwargs)

    def build(self, input_shape):
        assert len(input_shape) == 3

        self.W = self.add_weight((input_shape[-1],),
                                 initializer=self.init,
                                 name='{}_W'.format(self.name),
                                 regularizer=self.W_regularizer,
                                 constraint=self.W_constraint)
        if self.bias:
            self.b = self.add_weight((input_shape[1],),
                                     initializer='zero',
                                     name='{}_b'.format(self.name),
                                     regularizer=self.b_regularizer,
                                     constraint=self.b_constraint)
        else:
            self.b = None

        self.built = True

    def compute_mask(self, input, input_mask=None):
        # do not pass the mask to the next layers
        return None

    def call(self, x, mask=None):
        eij = dot_product(x, self.W)

        if self.bias:
            eij += self.b

        eij = K.tanh(eij)

        a = K.exp(eij)

        # apply mask after the exp. will be re-normalized next
        if mask is not None:
            # Cast the mask to floatX to avoid float64 upcasting in theano
            a *= K.cast(mask, K.floatx())

        # in some cases especially in the early stages of training the sum may be almost zero
        # and this results in NaN's. A workaround is to add a very small positive number ε to the sum.
        # a /= K.cast(K.sum(a, axis=1, keepdims=True), K.floatx())
        a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx())

        weighted_input = x * K.expand_dims(a)

        result = K.sum(weighted_input, axis=1)

        if self.return_attention:
            return [result, a]
        return result

    def compute_output_shape(self, input_shape):
        if self.return_attention:
            return [(input_shape[0], input_shape[-1]),
                    (input_shape[0], input_shape[1])]
        else:
            return input_shape[0], input_shape[-1]

In [7]:
from keras.layers import   Embedding
from keras.layers import  GRU, Bidirectional, CuDNNGRU, Lambda, Flatten
from keras.utils import multi_gpu_model
from keras.layers.merge import concatenate
from keras_radam import RAdam
from keras_lookahead import Lookahead


def inception_block(input_tensor, output_size):
    """"""
    con1d_filters = int(output_size/4)
    y = Conv1D(con1d_filters, 3, activation="relu", padding='same')(input_tensor)
    x1 = Conv1D(con1d_filters, 5, activation="relu", padding='same')(y)

    y = Conv1D(con1d_filters, 1, activation="relu", padding='valid')(input_tensor)
    x2 = Conv1D(con1d_filters, 3, activation="relu", padding='same')(y)

    x3 = Conv1D(con1d_filters, 3, activation="relu", padding='same')(input_tensor)
    x4 = Conv1D(con1d_filters, 1, activation="relu", padding='same')(input_tensor)

    y = Concatenate()([x1, x2, x3, x4])
#     y = MaxPooling1D(4)(mix0)
    # y = AveragePooling1D()(mix0)
#     y = BatchNormalization()(y)

    return y


def build_cnn_gru_model(input_x, con_filters, gru_units):
    x = inception_block(input_x,con_filters )
    x = Dropout(0.3)(x)
    x_gru = Bidirectional(CuDNNGRU(gru_units, return_sequences=True))(input_x)
    x_gru = Dropout(0.3)(x_gru)
     
    x_a = GlobalAveragePooling1D()(x)
    x_b = GlobalMaxPooling1D()(x)
    x_c = Attention()(x)
    x_gru_a = GlobalAveragePooling1D()(x_gru)
    x_gru_b = GlobalMaxPooling1D()(x_gru)
    x_gru_c = Attention()(x_gru)
    x = Concatenate()([x_a, x_b, x_c, x_gru_a, x_gru_b,   x_gru_c])
    x = Dense(256,activation='relu')(x)
    return x



def build_model():
    con_filters = 256
    gru_units = 64
    left_input_go = Input(shape=(max_go_len,feature_len))
    right_input_go = Input(shape=(max_go_len,feature_len))
    
    
    left_input_seq = Input(shape=(max_seq_len,20))
    right_input_seq = Input(shape=(max_seq_len,20))
    
     
 
     
    left_x_go = build_cnn_gru_model(left_input_go, con_filters, gru_units)
    right_x_go = build_cnn_gru_model(right_input_go, con_filters,gru_units)
    
    left_x_seq = build_cnn_gru_model(left_input_seq, con_filters//4, gru_units)
    right_x_seq = build_cnn_gru_model(right_input_seq, con_filters//4, gru_units)
     
    
   
    x =   Concatenate()([left_x_go  , right_x_go, left_x_seq, right_x_seq])
    x = Dense(1024, activation='relu')(x)
    x = Dropout(0.1)(x)
    x = Dense(1024, activation='relu')(x)
    x = Dropout(0.1)(x)
    x = Dense(512, activation='relu')(x)
  
     
    x = Dense(1)(x)
    output = Activation('sigmoid')(x)
    # model = Model([left_input_go, right_input_go], output)
  
    model = Model([left_input_go, right_input_go, left_input_seq, right_input_seq], output)
#     model = multi_gpu_model(model, gpus=2)
    optimizer = Lookahead(RAdam())

    model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    return model


model = build_model()
model.summary()






Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 128, 768)     0                                            
__________________________________________________________________________________________________
input_2 (InputLayer)            (None, 128, 768)     0                                            
__________________________________________________________________________________________________
input_3 (InputLayer)            (None, 1000, 20)     0                                            
___________________________________________________________________________

In [8]:
# # !rm -rf  SC_CV
# !mkdir SC_CV

In [9]:
# ppi_pairs[1].rstrip().split(',')

In [10]:
from sklearn.model_selection import StratifiedKFold
from keras.models import load_model
from sklearn.metrics import accuracy_score
from sklearn.metrics import matthews_corrcoef
from sklearn.metrics import confusion_matrix
from sklearn import metrics
from sklearn.metrics import f1_score
from sklearn.metrics import precision_score
from keras.utils import multi_gpu_model

dataset_name = 'SC'
for rep in range(0,5):
    n_splits = 1
    TPRs =  np.zeros(n_splits)
    FPRs = np.zeros(n_splits)
    Precs = np.zeros(n_splits)
    ACCs = np.zeros(n_splits)
    F1s = np.zeros(n_splits)
    MCCs = np.zeros(n_splits)
    AUCs = np.zeros(n_splits)
     
    count = 0
    for split in range(n_splits):
        train_pairs_file = 'SC_CV/train'+str(rep)+'-'+str(split)
        test_pairs_file = 'SC_CV/test'+str(rep)+'-'+str(split)
        valid_pairs_file = 'SC_CV/valid'+str(rep)+'-'+str(split)

        batch_size = 192
        train_generator = DataGenerator(   train_pairs_file,batch_size = batch_size )
        test_generator = DataGenerator(   test_pairs_file,batch_size = batch_size)
        valid_generator = DataGenerator(   valid_pairs_file,batch_size = batch_size)
         
        # model = build_model_without_att()
        model = build_model()
        save_model_name = 'SC_CV/sc_GoplusSeq'+str(rep)+'-'+str(split) + '.hdf5'
        
        earlyStopping = EarlyStopping(monitor='val_acc', patience=20, verbose=0, mode='max')
        save_checkpoint = ModelCheckpoint(save_model_name, save_best_only=True, monitor='val_acc', mode='max', save_weights_only=True)

         

         
        hist = model.fit_generator(generator=train_generator,
                    validation_data=valid_generator,
                    epochs = 100,verbose=1,callbacks=[earlyStopping, save_checkpoint] )
         
        
         
        model.load_weights(save_model_name)
        with open(test_pairs_file, 'r') as f:
            test_ppi_pairs  =  f.readlines()

        test_len = len(test_ppi_pairs) 
        list_IDs_temp = np.arange(test_len)

        test_x, y_test = test_generator.all_data(list_IDs_temp)

        y_pred_prob = model.predict(test_x)

       
        y_pred = (y_pred_prob > 0.5)
        auc = metrics.roc_auc_score(y_test, y_pred_prob) 
        f1 = f1_score(y_test, y_pred)
        pre = precision_score(y_test, y_pred)
        acc = accuracy_score(y_test, y_pred)

        precision, recall, _thresholds = metrics.precision_recall_curve(y_test, y_pred_prob)
        pr_auc = metrics.auc(recall, precision)
        mcc = matthews_corrcoef(y_test, y_pred)

        tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
        total=tn+fp+fn+tp
        sen = float(tp)/float(tp+fn)
        sps = float(tn)/float((tn+fp))

        tpr = float(tp)/float(tp+fn)
        fpr = float(fp)/float((tn+fp))
        print('--------------------------\n')
        print ('AUC: %f' % auc)
        print ('ACC: %f' % acc) 
        # print("PRAUC: %f" % pr_auc)
        print ('MCC : %f' % mcc)
        # print ('SEN: %f' % sen)
        # print ('SEP: %f' % sps)
        print('TPR:%f'%tpr)
        print('FPR:%f'%fpr)
        print('Pre:%f'%pre)
        print('F1:%f'%f1)
        print('--------------------------\n')
        TPRs[count] = tpr
        FPRs[count] = fpr
        Precs[count] =pre
        ACCs[count] =acc
        F1s[count] =f1
        MCCs[count] =mcc
        AUCs[count] =auc
        count += 1
        del test_x
        del y_test
    print ('mean AUC: %f' % np.mean(AUCs))
    print ('mean ACC: %f' % np.mean(ACCs)) 
    print ('mean MCC : %f' % np.mean(MCCs))
    print('mean TPR:%f'% np.mean(TPRs))
    print('mean FPR:%f'% np.mean(FPRs))
    print('mean Pre:%f'% np.mean(Precs))
    print('mean F1:%f'% np.mean(F1s))
    np.savez('new_seq_and_go__incep_'+str(rep), AUCs=AUCs, ACCs=ACCs, MCCs=MCCs, TPRs = TPRs, FPRs=FPRs, Precs=Precs, F1s=F1s)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
--------------------------

AUC: 0.989100
ACC: 0.958507
MCC : 0.917747
TPR:0.938096
FPR:0.021242
Pre:0.977686
F1:0.957482
--------------------------

mean AUC: 0.989100
mean ACC: 0.958507
mean MCC : 0.917747
mean TPR:0.938096
mean FPR:0.021242
mean Pre:0.977686
mean F1:0.957482
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/10

Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
--------------------------

AUC: 0.989187
ACC: 0.958855
MCC : 0.917929
TPR:0.948952
FPR:0.030896
Pre:0.969499
F1:0.959116
--------------------------

mean AUC: 0.989187
mean ACC: 0.958855
mean MCC : 0.917929
mean TPR:0.948952
mean FPR:0.030896
mean Pre:0.969499
mean F1:0.959116
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Ep

Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
--------------------------

AUC: 0.986054
ACC: 0.953871
MCC : 0.908010
TPR:0.941012
FPR:0.033418
Pre:0.965319
F1:0.953011
--------------------------

mean AUC: 0.986054
mean ACC: 0.953871
mean MCC : 0.908010
mean TPR:0.941012
mean FPR:0.033418
mean Pre:0.965319
mean F1:0.953011
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Ep

Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
--------------------------

AUC: 0.987630
ACC: 0.956189
MCC : 0.912736
TPR:0.943301
FPR:0.030559
Pre:0.969455
F1:0.956199
--------------------------

mean AUC: 0.987630
mean ACC: 0.956189
mean MCC : 0.912736
mean TPR:0.943301
mean FPR:0.030559
mean Pre:0.969455
mean F1:0.956199
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
--------------------------

AUC: 0.987769
ACC: 0.955726
MCC : 0.911

In [11]:
# model.load_weights(save_model_name)
# with open(test_pairs_file, 'r') as f:
#     test_ppi_pairs  =  f.readlines()

# test_len = int(len(test_ppi_pairs))
# list_IDs_temp = np.arange(test_len)

# list_IDs_temp = np.random.choice(list_IDs_temp, 4000).tolist()

# test_x, y_test = test_generator.all_data(list_IDs_temp)

# y_pred_prob = model.predict(test_x)


# y_pred = (y_pred_prob > 0.5)
# auc = metrics.roc_auc_score(y_test, y_pred_prob) 
# f1 = f1_score(y_test, y_pred)
# pre = precision_score(y_test, y_pred)
# acc = accuracy_score(y_test, y_pred)

# precision, recall, _thresholds = metrics.precision_recall_curve(y_test, y_pred_prob)
# pr_auc = metrics.auc(recall, precision)
# mcc = matthews_corrcoef(y_test, y_pred)

# tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
# total=tn+fp+fn+tp
# sen = float(tp)/float(tp+fn)
# sps = float(tn)/float((tn+fp))

# tpr = float(tp)/float(tp+fn)
# fpr = float(fp)/float((tn+fp))
# print('--------------------------\n')
# print ('AUC: %f' % auc)
# print ('ACC: %f' % acc) 
# # print("PRAUC: %f" % pr_auc)
# print ('MCC : %f' % mcc)
# # print ('SEN: %f' % sen)
# # print ('SEP: %f' % sps)
# print('TPR:%f'%tpr)
# print('FPR:%f'%fpr)
# print('Pre:%f'%pre)
# print('F1:%f'%f1)

In [1]:
import numpy as np 
results1 =   np.load( 'new_seq_and_go__incep_1.npz')
results2 =   np.load( 'new_seq_and_go__incep_0.npz')
results3 =   np.load( 'new_seq_and_go__incep_2.npz')
results4 =   np.load( 'new_seq_and_go__incep_4.npz')
results5 =   np.load( 'new_seq_and_go__incep_3.npz')
print ('mean AUC: %f' %  (  (np.mean( results4[ 'AUCs' ] )  +  np.mean( results5[ 'AUCs' ] )  + np.mean( results1[ 'AUCs' ] )  + np.mean(  results2[ 'AUCs' ] )  + np.mean(results3[ 'AUCs' ]))/5     ) )
print ('mean ACC: %f' %   (  ( np.mean( results4[ 'ACCs' ] )  + np.mean(  results5[ 'ACCs' ] )  +   np.mean( results1[ 'ACCs' ] )  + np.mean(  results2[ 'ACCs' ] )  + np.mean(results3[ 'ACCs' ]))/5) )
print ('mean MCC : %f' %  ( ( np.mean( results4[ 'MCCs' ] )  + np.mean(  results5[ 'MCCs' ] )  + np.mean( results1[ 'MCCs' ] )  + np.mean(  results2[ 'MCCs' ] )  + np.mean(results3[ 'MCCs' ])     )/5))
print('mean TPR:%f'%    (( np.mean( results4[ 'TPRs' ] )  + np.mean(  results5[ 'TPRs' ] )  + np.mean( results1[ 'TPRs' ] )  + np.mean(  results2[ 'TPRs' ] )  + np.mean(results3[ 'TPRs' ])     )/5))
print('mean FPR:%f'%   (  (np.mean( results4[ 'FPRs' ] )  + np.mean(  results5[ 'FPRs' ] )  + np.mean( results1[ 'FPRs' ] )  + np.mean(  results2[ 'FPRs' ] )  + np.mean(results3[ 'FPRs' ])     )/5))
print('mean Pre:%f'%    ( (np.mean( results4[ 'Precs' ] )  + np.mean(  results5[ 'Precs' ] )  + np.mean( results1[ 'Precs' ] )  + np.mean(  results2[ 'Precs' ] )  + np.mean(results3[ 'Precs' ])     )/5))
print('mean F1:%f'%    (  (np.mean( results4[ 'F1s' ] )  + np.mean(  results5[ 'F1s' ] )  +np.mean( results1[ 'F1s' ] )  + np.mean(  results2[ 'F1s' ] )  + np.mean(results3[ 'F1s' ])     )/5))

mean AUC: 0.987948
mean ACC: 0.956630
mean MCC : 0.913588
mean TPR:0.944133
mean FPR:0.030802
mean Pre:0.968667
mean F1:0.956222


In [13]:
# import numpy as np 
# results1 =   np.load( 'all_seq_and_go__incep_0.npz')
# results2 =   np.load( 'all_seq_and_go__incep_1.npz')
# results3 =   np.load( 'all_seq_and_go__incep_2.npz')
# print ('mean AUC: %f' %  ( (np.mean( results1[ 'AUCs' ] )  + np.mean(  results2[ 'AUCs' ] )  + np.mean(results3[ 'AUCs' ]))/3     ) )
# print ('mean ACC: %f' %   ( (np.mean( results1[ 'ACCs' ] )  + np.mean(  results2[ 'ACCs' ] )  + np.mean(results3[ 'ACCs' ]))/3) )
# print ('mean MCC : %f' %  (  (np.mean( results1[ 'MCCs' ] )  + np.mean(  results2[ 'MCCs' ] )  + np.mean(results3[ 'MCCs' ])     )/3))
# print('mean TPR:%f'%    ((np.mean( results1[ 'TPRs' ] )  + np.mean(  results2[ 'TPRs' ] )  + np.mean(results3[ 'TPRs' ])     )/3))
# print('mean FPR:%f'%   ( (np.mean( results1[ 'FPRs' ] )  + np.mean(  results2[ 'FPRs' ] )  + np.mean(results3[ 'FPRs' ])     )/3))
# print('mean Pre:%f'%    ((np.mean( results1[ 'Precs' ] )  + np.mean(  results2[ 'Precs' ] )  + np.mean(results3[ 'Precs' ])     )/3))
# print('mean F1:%f'%    ((np.mean( results1[ 'F1s' ] )  + np.mean(  results2[ 'F1s' ] )  + np.mean(results3[ 'F1s' ])     )/3))

mean AUC: 0.981603
mean ACC: 0.938093
mean MCC : 0.850220
mean TPR:0.949721
mean FPR:0.066037
mean Pre:0.836816
mean F1:0.889516
