# =================== Import ===================

In [8]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv1D, MaxPooling1D, Dropout, GlobalMaxPooling1D, Embedding
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.preprocessing import sequence
from sklearn.metrics import confusion_matrix
import math

# =================== Variables ===================

In [9]:
data = np.load('2.ER_dataset.npz')

# =================== Functions ===================

In [20]:
def prediction_test(data):   
    '''Function to predict outcome of the test set (not trained on)'''
    X = data['input']
    y = data['label']
    part = data['partition']
       
    max_protein_length=100
    input_dim=21
    n_filters=110
    kernel_size=7
    dropout=0.25
    maxpooling_size=2
    n_layers=[0]
    activation='relu'
    activation_out='sigmoid'
    padding='same'
    n_classes=1
    test = 4
    not_test = [0,1,2,3]
    test_indices = np.where(part == test)  
    X = sequence.pad_sequences(X, maxlen=max_protein_length)
    X = X[:,-max_protein_length:]
    X_test = X[test_indices]
    y_test = y[test_indices]
    #test_indices = np.where(part == test)
    
    # Cross validation loop
    average_prediction = np.zeros(y_test.shape[0])
    print(average_prediction.shape)
        
    for partition in not_test:  
        # CNN model
        model = Sequential()
        model.add(Embedding(input_dim, n_filters, input_length=max_protein_length))
        model.add(MaxPooling1D(pool_size=maxpooling_size))
        model.add(Dropout(dropout))       
        for n in n_layers:
            model.add(Conv1D(n_filters, kernel_size, activation=activation, padding=padding)) 
        model.add(GlobalMaxPooling1D())
        model.add(Dense(n_filters, activation=activation))
        model.add(Dense(n_classes, activation=activation_out))
        # Using all the builded models
        file_model = "model_%i.hdf5" % partition
        # Loading weights to model
        model.load_weights(filepath = file_model)
        # Prediction
        predictions = model.predict(X_test)
        average_prediction += np.squeeze(predictions,1)
    
    final_prediction = average_prediction / 4.0
         
    pred_class = np.around(final_prediction) 
    y_actu = y_test.tolist()
    y_pred = pred_class.tolist()
    cm = confusion_matrix(y_pred, y_actu)
    TN = cm[0][0]
    FP = cm[0][1]
    FN = cm[1][0]
    TP = cm[1][1]
    MCC_test = (TP*TN-FP*FN)/math.sqrt((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN))
    print('TP', 'FP', 'FN', 'TN')
    print(TP, FP, FN, TN, '\n')
    print('Confusion Matrix')
    print(cm)
    print ('MCC = ', MCC_test, '\n') 
    return MCC_test

# =================== Main ===================

In [21]:
MCC_test = prediction_test(data)

(570,)
TP FP FN TN
75 1 3 491 

Confusion Matrix
[[491   1]
 [  3  75]]
MCC =  0.9700793286542261 

