In [1]:
from __future__ import print_function 
import os, sys, h5py
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
%matplotlib inline
from six.moves import cPickle
from sklearn.metrics import roc_curve, auc, precision_recall_curve, accuracy_score, roc_auc_score

sys.path.append('..')
import helper
from deepomics import neuralnetwork as nn
from deepomics import utils, fit

In [59]:

def experiment_correpsondence(correspondence_path, experiments):
    rncmpt_names = []
    clip_names = []
    rbp_names = []
    cell_types = []
    with open(correspondence_path, 'rb') as f:
        for line in f:
            index = line.index('-')
            rncmpt_names.append(line[:index])
            experiment = line[index+1:].split()[0]
            index = experiment.index('_')
            rbp_names.append(experiment[:index])
            cell_types.append(experiment[index+1:])
            clip_names.append(experiment+'_200.h5')
            
    # get a dictionary of tuples for each rbp correspondence
    unique_rbps = np.unique(rbp_names)
    match = {}
    for j, rbp_name in enumerate(unique_rbps):
        match[rbp_name] = []
    for j, rbp_name in enumerate(rbp_names):
        rbp_index = np.where(experiments==rncmpt_names[j])[0][0]
        match[rbp_name].append((rbp_index, rncmpt_names[j], clip_names[j]))
    return match

def binding_affinity_scores(train, model, input_shape, output_shape, file_path):
    
    X_train = train['inputs']
    y_train = train['targets']
    
    # merge dataset
    index = np.where(y_train[:,0]==1)[0]
    X_pos = X_train[index]
    y_pos = y_train[index]
    index = np.where(y_train[:,0]==0)[0]
    X_neg = X_train[index]
    y_neg = y_train[index]
    num_split = X_pos.shape[1] - 41
    
    # load model
    genome_model = helper.import_model(model)
    model_layers, optimization = genome_model.model(input_shape, output_shape)
    
    # build neural network class
    nnmodel = nn.NeuralNet(seed=247)
    nnmodel.build_layers(model_layers, optimization, use_scope=False)

    nntrainer = nn.NeuralTrainer(nnmodel, save='best', file_path=file_path)

    # initialize session
    sess = utils.initialize_session(nnmodel.placeholders)
    
    # load best model
    nntrainer.set_best_parameters(sess, verbose=False)

    pos_score = []
    pos_max_score = []
    pos_mean_score = []
    for X in X_pos:
        X_split = []
        for i in range(num_split):
            X_split.append([X[i:i+41,:,:]])
        X_split = np.vstack(X_split)

        affinity = nntrainer.get_activations(sess, {'inputs': X_split})
        affinity = np.sort(affinity[:,0])[::-1]
        pos_max_score.append(affinity[0])
        pos_mean_score.append(np.mean(affinity))
        pos_score.append(np.mean(affinity[:20]))

    pos_score = np.array(pos_score)
    pos_max_score = np.array(pos_max_score)
    pos_mean_score = np.array(pos_mean_score)

    neg_score = []
    neg_max_score = []
    neg_mean_score = []
    for X in X_neg:
        X_split = []
        for i in range(num_split):
            X_split.append([X[i:i+41,:,:]])
        X_split = np.vstack(X_split)

        affinity = nntrainer.get_activations(sess, {'inputs': X_split})
        affinity = np.sort(affinity[:,0])[::-1]
        neg_max_score.append(affinity[0])
        neg_mean_score.append(np.mean(affinity))
        neg_score.append(np.mean(affinity[:20]))
    neg_score = np.array(neg_score)
    neg_mean_score = np.array(neg_mean_score)
    neg_max_score = np.array(neg_max_score)

    top_score = np.vstack([np.expand_dims(pos_score,axis=1), np.expand_dims(neg_score, axis=1)])
    mean_score = np.vstack([np.expand_dims(pos_mean_score,axis=1), np.expand_dims(neg_mean_score, axis=1)])
    max_score = np.vstack([np.expand_dims(pos_max_score,axis=1), np.expand_dims(neg_max_score, axis=1)])
    
    y_true = np.vstack([np.ones((len(pos_score),1)), np.zeros((len(neg_score),1))])
    return y_true, max_score, top_score, mean_score




In [72]:

model = 'affinity_conv_net' # 'affinity_residual_net'
rncmpt_name = 'RNCMPT00074'
clip_name = 'SRSF9_HepG2_200.h5'
ss_type = 'seq'
normalize_method = 'log_norm' 

# get list of encode-eclip experiments
data_path = '../../data/RNAcompete_2013/rnacompete2013.h5'
experiments = helper.get_experiments_hdf5(data_path)

rbp_index = np.where(rncmpt_name == experiments)[0][0]

# load rbp dataset
train, valid, test = helper.load_dataset_hdf5(data_path, ss_type=ss_type, rbp_index=rbp_index)

# process rbp dataset
train, valid, test = helper.process_data(train, valid, test, method=normalize_method)

input_shape = list(train['inputs'].shape)
input_shape[0] = None
output_shape = train['targets'].shape

# get corresponding clip-experiments for rnacompete experiments
correspondence_path = 'correspondences_eCLIP_RNACompete.txt'
match = experiment_correpsondence(correspondence_path, experiments)

# load clip-seq dataset
dataset_path = '/media/peter/storage/encode_eclip/eclip_datasets'
dataset_file_path = os.path.join(dataset_path, clip_name)
clip_train, clip_valid, clip_test = helper.load_dataset_hdf5(dataset_file_path, ss_type=ss_type)
X_train = np.vstack([clip_train['inputs'], clip_valid['inputs'], clip_test['inputs']])
y_train = np.vstack([clip_train['targets'], clip_valid['targets'], clip_test['targets']])
clip_train = {'inputs': X_train, 'targets': y_train}

results_path = helper.make_directory('../../results', 'RNAcompete_2013')
#results_path = helper.make_directory('../../results', 'test')
file_path = os.path.join(results_path, normalize_method+'_'+ss_type, model, rncmpt_name)
y_true, max_score, top_score, mean_score = binding_affinity_scores(clip_train, model, input_shape, output_shape, file_path)

print(rbp_index, rncmpt_name, clip_name)

fpr, tpr, thresholds = roc_curve(y_true, mean_score)
roc_score = auc(fpr, tpr)
print('mean = ', roc_score)

fpr, tpr, thresholds = roc_curve(y_true, max_score)
roc_score = auc(fpr, tpr)
print('max  = ', roc_score)

fpr, tpr, thresholds = roc_curve(y_true, top_score)
roc_score = auc(fpr, tpr)
print('T20  = ', roc_score)


217 RNCMPT00074 SRSF9_HepG2_200.h5
mean =  0.796664221672
max  =  0.757472258191
T20  =  0.779755855693


In [193]:

def ensemble_binding_affinity_scores(clip_train, models, input_shape, output_shape, file_path, rncmpt_name, num_ave=20):
    
    def ensemble_scores(X_data, models, input_shape, output_shape, file_path, rncmpt_name, num_ave):

        model_activations = []
        for j, model in enumerate(models):
            # load model
            genome_model = helper.import_model(model)
            model_layers, optimization = genome_model.model(input_shape, output_shape)

            # build neural network class
            nnmodel = nn.NeuralNet(seed=247)
            nnmodel.build_layers(model_layers, optimization, use_scope=False)

            best_path = os.path.join(file_path, model, rncmpt_name)
            nntrainer = nn.NeuralTrainer(nnmodel, save='best', file_path=best_path)

            # initialize session
            sess = utils.initialize_session(nnmodel.placeholders)

            # load best model
            nntrainer.set_best_parameters(sess, verbose=False)

            activations = []
            for i, X in enumerate(X_data):
                X_split = []
                for i in range(num_split):
                    X_split.append([X[i:i+41,:,:]])
                X_split = np.vstack(X_split)

                affinity = nntrainer.get_activations(sess, {'inputs': X_split})
                activations.append(affinity)

            model_activations.append(np.hstack(activations))

        ensemble_activations = np.mean(np.array(model_activations), axis=0)

        num_data = X_data.shape[0]
        top_score = np.zeros((num_data, len(models)))
        max_score = np.zeros((num_data, len(models)))
        mean_score = np.zeros((num_data, len(models)))
        ensemble_score = np.zeros((num_data, 3))
        for i in range(num_data):
            sort_ensemble = np.sort(ensemble_activations[:,i])[::-1]
            ensemble_score[i,0] = sort_ensemble[0]
            ensemble_score[i,1] = np.mean(sort_ensemble[:num_ave])
            ensemble_score[i,2] = np.mean(sort_ensemble)  

            for j in range(len(models)):
                sort_affinity = np.sort(model_activations[j])[::-1,i]
                top_score[i,j] = np.mean(sort_affinity[:num_ave])
                max_score[i,j] = sort_affinity[0]
                mean_score[i,j] = np.mean(sort_affinity)
        return ensemble_score, max_score, top_score, mean_score
                                          
                                          
    X_train = clip_train['inputs']
    y_train = clip_train['targets']
    
    # merge dataset
    index = np.where(y_train[:,0]==1)[0]
    X_pos = X_train[index]
    y_pos = y_train[index]
    index = np.where(y_train[:,0]==0)[0]
    X_neg = X_train[index]
    y_neg = y_train[index]
    num_split = X_pos.shape[1] - 41

    ensemble_pos_score, max_pos_score, top_pos_score, mean_pos_score = ensemble_scores(X_pos, models, input_shape, output_shape, file_path, rncmpt_name, num_ave)

    ensemble_neg_score, max_neg_score, top_neg_score, mean_neg_score = ensemble_scores(X_neg, models, input_shape, output_shape, file_path, rncmpt_name, num_ave)
                        
    max_score = np.vstack([max_pos_score, max_neg_score])
    top_score = np.vstack([top_pos_score, top_neg_score])
    mean_score = np.vstack([mean_pos_score, mean_neg_score])
    ensemble_score = np.vstack([ensemble_pos_score, ensemble_neg_score])
    
    y_true = np.vstack([np.ones((max_pos_score.shape[0],1)), np.zeros((max_neg_score.shape[0],1))])

    return y_true, ensemble_score, max_score, top_score, mean_score


In [177]:
models = ['affinity_conv_net', 'affinity_residual_net', 'affinity_all_conv_net']
rncmpt_name = 'RNCMPT00074'
clip_name = 'SRSF9_HepG2_200.h5'
ss_type = 'seq'
normalize_method = 'log_norm' 

# get list of encode-eclip experiments
data_path = '../../data/RNAcompete_2013/rnacompete2013.h5'
experiments = helper.get_experiments_hdf5(data_path)

rbp_index = np.where(rncmpt_name == experiments)[0][0]

# load rbp dataset
train, valid, test = helper.load_dataset_hdf5(data_path, ss_type=ss_type, rbp_index=rbp_index)

# process rbp dataset
train, valid, test = helper.process_data(train, valid, test, method=normalize_method)

input_shape = list(train['inputs'].shape)
input_shape[0] = None
output_shape = train['targets'].shape

# get corresponding clip-experiments for rnacompete experiments
correspondence_path = 'correspondences_eCLIP_RNACompete.txt'
match = experiment_correpsondence(correspondence_path, experiments)

# load clip-seq dataset
dataset_path = '/media/peter/storage/encode_eclip/eclip_datasets'
dataset_file_path = os.path.join(dataset_path, clip_name)
clip_train, clip_valid, clip_test = helper.load_dataset_hdf5(dataset_file_path, ss_type=ss_type)
X_train = np.vstack([clip_train['inputs'], clip_valid['inputs'], clip_test['inputs']])
y_train = np.vstack([clip_train['targets'], clip_valid['targets'], clip_test['targets']])
clip_train = {'inputs': X_train, 'targets': y_train}

results_path = helper.make_directory('../../results', 'RNAcompete_2013')
#results_path = helper.make_directory('../../results', 'test')
file_path = os.path.join(results_path, normalize_method+'_'+ss_type)
y_true, ensemble_score, max_score, top_score, mean_score = ensemble_binding_affinity_scores(clip_train, models, input_shape, output_shape, file_path, rncmpt_name, num_ave=20)


print(rbp_index, rncmpt_name, clip_name)
for i, model in enumerate(models):
    print('    '+model)
    fpr, tpr, thresholds = roc_curve(y_true[:,0], mean_score[:,i])
    roc_score = auc(fpr, tpr)
    print('      mean = ', roc_score)

for i, model in enumerate(models):
    print('    '+model)
    fpr, tpr, thresholds = roc_curve(y_true[:,0], top_score[:,i])
    roc_score = auc(fpr, tpr)
    print('      max  = ', roc_score)

for i, model in enumerate(models):
    print('    '+model)
    fpr, tpr, thresholds = roc_curve(y_true[:,0], max_score[:,i])
    roc_score = auc(fpr, tpr)
    print('      T20  = ', roc_score)

print('    Ensemble')
for i in range(3):
    fpr, tpr, thresholds = roc_curve(y_true[:,0], ensemble_score[:,i])
    roc_score = auc(fpr, tpr)
    print('      ', roc_score)

217 RNCMPT00074 SRSF9_HepG2_200.h5
affinity_conv_net
    mean =  0.660182309167
affinity_residual_net
    mean =  0.692640634988
affinity_all_conv_net
    mean =  0.684003589634
affinity_conv_net
    max  =  0.611496869314
affinity_residual_net
    max  =  0.634143118228
affinity_all_conv_net
    max  =  0.621891852022
affinity_conv_net
    T20  =  0.610255249226
affinity_residual_net
    T20  =  0.624336163517
affinity_all_conv_net
    T20  =  0.607924650461
Ensemble
     0.762401858742
     0.792983412283
     0.816036363487


In [191]:
match

{'FMR1': [(60, 'RNCMPT00015', 'FMR1_K562_200.h5'),
  (71, 'RNCMPT00016', 'FMR1_K562_200.h5')],
 'FXR1': [(62, 'RNCMPT00161', 'FXR1_K562_200.h5')],
 'FXR2': [(102, 'RNCMPT00020', 'FXR2_K562_200.h5')],
 'HNRNPA1': [(117, 'RNCMPT00022', 'HNRNPA1_HepG2_200.h5'),
  (117, 'RNCMPT00022', 'HNRNPA1_K562_200.h5')],
 'HNRNPC': [(143, 'RNCMPT00025', 'HNRNPC_HepG2_200.h5')],
 'HNRNPK': [(150, 'RNCMPT00026', 'HNRNPK_HepG2_200.h5'),
  (150, 'RNCMPT00026', 'HNRNPK_K562_200.h5')],
 'IGF2BP2': [(173, 'RNCMPT00033', 'IGF2BP2_K562_200.h5')],
 'IGF2BP3': [(74, 'RNCMPT00172', 'IGF2BP3_HepG2_200.h5')],
 'KHDRBS1': [(70, 'RNCMPT00169', 'KHDRBS1_K562_200.h5'),
  (204, 'RNCMPT00062', 'KHDRBS1_K562_200.h5')],
 'PCBP2': [(185, 'RNCMPT00044', 'PCBP2_HepG2_200.h5')],
 'PTBP1': [(148, 'RNCMPT00268', 'PTBP1_HepG2_200.h5'),
  (149, 'RNCMPT00269', 'PTBP1_HepG2_200.h5'),
  (148, 'RNCMPT00268', 'PTBP1_K562_200.h5'),
  (149, 'RNCMPT00269', 'PTBP1_K562_200.h5')],
 'QKI': [(188, 'RNCMPT00047', 'QKI_HepG2_200.h5'),
  (188, '

In [192]:
models = ['affinity_conv_net', 'affinity_residual_net', 'affinity_all_conv_net']
ss_type = 'seq'
normalize_method = 'log_norm' 

# get list of encode-eclip experiments
data_path = '../../data/RNAcompete_2013/rnacompete2013.h5'
experiments = helper.get_experiments_hdf5(data_path)

# get corresponding clip-experiments for rnacompete experiments
correspondence_path = 'correspondences_eCLIP_RNACompete.txt'
match = experiment_correpsondence(correspondence_path, experiments)

# directory for encode-clip experiments
dataset_path = '/media/peter/storage/encode_eclip/eclip_datasets'

results = []
for key in match.keys():
    for experiments in match[key]:
        rbp_index = experiments[0]
        rncmpt_name = experiments[1]
        clip_name = experiments[2]

        # load rbp dataset
        train, valid, test = helper.load_dataset_hdf5(data_path, ss_type=ss_type, rbp_index=rbp_index)

        # process rbp dataset
        train, valid, test = helper.process_data(train, valid, test, method=normalize_method)

        input_shape = list(train['inputs'].shape)
        input_shape[0] = None
        output_shape = train['targets'].shape

        # load clip-seq dataset
        dataset_file_path = os.path.join(dataset_path, clip_name)
        clip_train, clip_valid, clip_test = helper.load_dataset_hdf5(dataset_file_path, ss_type=ss_type)
        X_train = np.vstack([clip_train['inputs'], clip_valid['inputs'], clip_test['inputs']])
        y_train = np.vstack([clip_train['targets'], clip_valid['targets'], clip_test['targets']])
        clip_train = {'inputs': X_train, 'targets': y_train}

        results_path = helper.make_directory('../../results', 'RNAcompete_2013')
        file_path = os.path.join(results_path, normalize_method+'_'+ss_type)
        y_true, ensemble_score, max_score, top_score, mean_score = ensemble_binding_affinity_scores(clip_train, models, input_shape, output_shape, file_path, rncmpt_name, num_ave=20)


        print(rbp_index, rncmpt_name, clip_name)
        for i, model in enumerate(models):
            print('    '+model)
            fpr, tpr, thresholds = roc_curve(y_true[:,0], top_score[:,i])
            roc_score = auc(fpr, tpr)
            print('      max  = ', roc_score)
        for i, model in enumerate(models):
            print('    '+model)
            fpr, tpr, thresholds = roc_curve(y_true[:,0], max_score[:,i])
            roc_score = auc(fpr, tpr)
            print('      T20  = ', roc_score)
        for i, model in enumerate(models):
            print('    '+model)
            fpr, tpr, thresholds = roc_curve(y_true[:,0], mean_score[:,i])
            roc_score = auc(fpr, tpr)
            print('      mean = ', roc_score)
        print('    Ensemble')
        for i in range(3):
            fpr, tpr, thresholds = roc_curve(y_true[:,0], ensemble_score[:,i])
            roc_score = auc(fpr, tpr)
            print('      ', roc_score)
        results.append([rbp_index, rncmpt_name, clip_name, y_true, ensemble_score, mean_score, top_score, max_score])

219 RNCMPT00076 TARDBP_K562_200.h5
    affinity_conv_net
      max  =  0.620682772357
    affinity_residual_net
      max  =  0.609361557046
    affinity_all_conv_net
      max  =  0.63345794571
    affinity_conv_net
      T20  =  0.618711917525
    affinity_residual_net
      T20  =  0.607687186205
    affinity_all_conv_net
      T20  =  0.631009157046
    affinity_conv_net
      mean =  0.846253657466
    affinity_residual_net
      mean =  0.830111167859
    affinity_all_conv_net
      mean =  0.845299017198
    Ensemble
       0.974781704456
       0.977934526679
       0.980958458829


KeyboardInterrupt: 

In [194]:
models = ['affinity_conv_net', 'affinity_residual_net', 'affinity_all_conv_net']
ss_type = 'seq'
normalize_method = 'log_norm' 

# get list of encode-eclip experiments
data_path = '../../data/RNAcompete_2013/rnacompete2013.h5'
experiments = helper.get_experiments_hdf5(data_path)

# get corresponding clip-experiments for rnacompete experiments
correspondence_path = 'correspondences_eCLIP_RNACompete.txt'
match = experiment_correpsondence(correspondence_path, experiments)

# directory for encode-clip experiments
dataset_path = '/media/peter/storage/encode_eclip/eclip_datasets'

results = []
for key in match.keys():
    for experiments in match[key]:
        rbp_index = experiments[0]
        rncmpt_name = experiments[1]
        clip_name = experiments[2]

        # load rbp dataset
        train, valid, test = helper.load_dataset_hdf5(data_path, ss_type=ss_type, rbp_index=rbp_index)

        # process rbp dataset
        train, valid, test = helper.process_data(train, valid, test, method=normalize_method)

        input_shape = list(train['inputs'].shape)
        input_shape[0] = None
        output_shape = train['targets'].shape

        # load clip-seq dataset
        dataset_file_path = os.path.join(dataset_path, clip_name)
        clip_train, clip_valid, clip_test = helper.load_dataset_hdf5(dataset_file_path, ss_type=ss_type)
        X_train = np.vstack([clip_train['inputs'], clip_valid['inputs'], clip_test['inputs']])
        y_train = np.vstack([clip_train['targets'], clip_valid['targets'], clip_test['targets']])
        clip_train = {'inputs': X_train, 'targets': y_train}

        results_path = helper.make_directory('../../results', 'RNAcompete_2013')
        file_path = os.path.join(results_path, normalize_method+'_'+ss_type)
        y_true, ensemble_score, max_score, top_score, mean_score = ensemble_binding_affinity_scores(clip_train, models, input_shape, output_shape, file_path, rncmpt_name, num_ave=20)

        print(rbp_index, rncmpt_name, clip_name)
        for i, model in enumerate(models):
            print('    '+model)
            fpr, tpr, thresholds = roc_curve(y_true[:,0], top_score[:,i])
            roc_score = auc(fpr, tpr)
            print('      max  = ', roc_score)
        for i, model in enumerate(models):
            print('    '+model)
            fpr, tpr, thresholds = roc_curve(y_true[:,0], max_score[:,i])
            roc_score = auc(fpr, tpr)
            print('      T20  = ', roc_score)
        for i, model in enumerate(models):
            print('    '+model)
            fpr, tpr, thresholds = roc_curve(y_true[:,0], mean_score[:,i])
            roc_score = auc(fpr, tpr)
            print('      mean = ', roc_score)
        print('    Ensemble')
        for i in range(3):
            fpr, tpr, thresholds = roc_curve(y_true[:,0], ensemble_score[:,i])
            roc_score = auc(fpr, tpr)
            print('      ', roc_score)
        results.append([rbp_index, rncmpt_name, clip_name, y_true, ensemble_score, mean_score, top_score, max_score])

188 RNCMPT00047 QKI_HepG2_200.h5
    affinity_conv_net
      max  =  0.633039049273
    affinity_residual_net
      max  =  0.663647560548
    affinity_all_conv_net
      max  =  0.588256390838
    affinity_conv_net
      T20  =  0.623336657151
    affinity_residual_net
      T20  =  0.660090030652
    affinity_all_conv_net
      T20  =  0.586273745418
    affinity_conv_net
      mean =  0.706403929703
    affinity_residual_net
      mean =  0.692101520638
    affinity_all_conv_net
      mean =  0.612376100212
    Ensemble
       0.792572201569
       0.805800220962
       0.848357971576
188 RNCMPT00047 QKI_K562_200.h5
    affinity_conv_net
      max  =  0.626455689824
    affinity_residual_net
      max  =  0.652951509471
    affinity_all_conv_net
      max  =  0.583510364504
    affinity_conv_net
      T20  =  0.623369859655
    affinity_residual_net
      T20  =  0.643572756924
    affinity_all_conv_net
      T20  =  0.58305602751
    affinity_conv_net
      mean =  0.733760746019
 

In [78]:
# get list of rnacompete experiments
data_path = '../../data/RNAcompete_2013/rnacompete2013.h5'
experiments = helper.get_experiments_hdf5(data_path)

# get corresponding clip-experiments for rnacompete experiments
correspondence_path = 'correspondences_eCLIP_RNACompete.txt'
match = experiment_correpsondence(correspondence_path, experiments)

# directory for encode-clip experiments
dataset_path = '/media/peter/storage/encode_eclip/eclip_datasets'

models = ['affinity_residual_net', 'affinity_conv_net', 'affinity_all_conv_net']

normalize_method = 'log_norm' 
ss_types = ['seq', 'pu']


# get list of encode-eclip experiments
data_path = '../../data/RNAcompete_2013/rnacompete2013.h5'
experiments = helper.get_experiments_hdf5(data_path)


# get corresponding clip-experiments for rnacompete experiments
correspondence_path = 'correspondences_eCLIP_RNACompete.txt'
match = experiment_correpsondence(correspondence_path, experiments)

results = []
for key in match.keys():
    for experiments in match[key]:
        rbp_index = experiments[0]
        rcmpt_name = experiments[1]
        clip_name = experiments[2]

        model_score = []
        model_max_score = []
        model_mean_score = []
        for ss_type in ss_types:

            # load rbp dataset
            train, valid, test = helper.load_dataset_hdf5(data_path, ss_type=ss_type, rbp_index=rbp_index)
            input_shape = list(train['inputs'].shape)
            input_shape[0] = None
            output_shape = train['targets'].shape

            # load clip-seq dataset
            dataset_path = '/media/peter/storage/encode_eclip/eclip_datasets'
            dataset_file_path = os.path.join(dataset_path, clip_name)
            clip_train, clip_valid, clip_test = helper.load_dataset_hdf5(dataset_file_path, ss_type=ss_type)
            X_train = np.vstack([clip_train['inputs'], clip_valid['inputs'], clip_test['inputs']])
            y_train = np.vstack([clip_train['targets'], clip_valid['targets'], clip_test['targets']])
            clip_train = {'inputs': X_train, 'targets': y_train}

            for model in models:

                results_path = helper.make_directory('../../results', 'RNAcompete_2013')
                file_path = os.path.join(results_path, normalize_method+'_'+ss_type, model, rncmpt_name)
                y_true, max_score, score, mean_score = binding_affinity_scores(clip_train, model, input_shape, output_shape, file_path)

                model_score.append(score)
                model_mean_score.append(mean_score)
                model_max_score.append(max_score)

                
        print(rcmpt_name, clip_name)
    
        from sklearn.metrics import roc_curve, auc, precision_recall_curve, accuracy_score, roc_auc_score
        print('mean')
        for scores in model_mean_score:
            fpr, tpr, thresholds = roc_curve(y_true, scores)
            roc_score = auc(fpr, tpr)
            print('           ', roc_score)
        fpr, tpr, thresholds = roc_curve(y_true, np.mean(np.array(model_mean_score), axis=0))
        roc_score = auc(fpr, tpr)
        print('  ensemble: ', roc_score)

        print('max')
        for scores in model_max_score:
            fpr, tpr, thresholds = roc_curve(y_true, scores)
            roc_score = auc(fpr, tpr)
            print('           ', roc_score)
        fpr, tpr, thresholds = roc_curve(y_true, np.mean(np.array(model_max_score), axis=0))
        roc_score = auc(fpr, tpr)
        print('  ensemble: ', roc_score)

        print('top-20')
        for scores in model_score:
            fpr, tpr, thresholds = roc_curve(y_true, scores)
            roc_score = auc(fpr, tpr)
            print('           ', roc_score)
        fpr, tpr, thresholds = roc_curve(y_true, np.mean(np.array(model_score), axis=0))
        roc_score = auc(fpr, tpr)
        print('  ensemble: ', roc_score)


        #print("%s\t%s\t%s\t%0.4f\t%0.4f\t%0.4f\t%0.4f"%(rcmpt_name, clip_name, model, mean[0], roc_score, roc_score2, roc_score3))
        results.append([rcmpt_name, clip_name, model, y_true, model_score, model_mean_score, model_max_score])


RNCMPT00047 QKI_HepG2_200.h5
mean
            0.605931070316
            0.491899082325
            0.574297777285
            0.566630991611
            0.447441670493
            0.491939602664
  ensemble:  0.528178358777
max
            0.572428945484
            0.50728274213
            0.556939566517
            0.558528242538
            0.487067188019
            0.508627767518
  ensemble:  0.534039845595
top-20
            0.580591954479
            0.504755621667
            0.563888252112
            0.56589421448
            0.482611232123
            0.500983268298
  ensemble:  0.535400349764
RNCMPT00047 QKI_K562_200.h5
mean
            0.65029020337
            0.522555380822
            0.608755966114
            0.621783529766
            0.470099262068
            0.536473867582
  ensemble:  0.567086651073
max
            0.577495767377
            0.514069561723
            0.559569687861
            0.561743165547
            0.493443423581
            0.506761460075