Import libraries

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from ast import literal_eval
from tqdm.notebook import tqdm as tqdm
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
import itertools
import seaborn as sns
from scipy import stats
import scipy
import tensorflow as tf
from tensorflow.keras import *
from tensorflow.keras.models import *
from tensorflow.keras.layers import *
from sklearn.model_selection import train_test_split
!pip install scikit-plot
import scikitplot as skplt
import sklearn
from sklearn.model_selection import KFold
from collections import Counter
import requests
import re

In [None]:
from google.colab import drive
drive.mount('/content/Drive')

Load compiled dataset

In [None]:
epitope_dataset = pd.read_csv('/content/Drive/MyDrive/epitope_dataset.csv')

In [None]:
antigen_dataset = pd.read_csv('/content/Drive/MyDrive/antigen_dataset.csv')

Quantitative Data Analysis

In [None]:
def box_plot(data, label, title):
    data = list(data)
    data.sort()
    mean=np.mean(data)
    median=np.median(data)
    mode=stats.mode(data)[0][0]
    print(stats.mode(data))
    sem = stats.sem(data)
    plt.figure(figsize=(6,3))
    #plt.axvline(x=mode, color='b', linestyle='--', label="Mode = " + str(mode))
    plt.axvline(x=mean+2*sem, color='b', linestyle='--')
    plt.axvline(x=mean-2*sem, color='b', linestyle='--', label="Mean +/- 2*SEM = " + str(round(mean,2)) + " +/- " + str(round(2*sem, 2)))
    plt.legend()
    plt.yticks(rotation=90)
    plt.title(title)
    plt.boxplot(data, vert=False, showfliers=False, showmeans=True, labels=[""], widths=0.4, notch=True)
    plt.xlabel(label)
    plt.show()
    print("Median", median)

Average Antigen Length

In [None]:
#Average Antigen Length
print("Average Antigen Length: " + str(antigen_dataset['Antigen Length'].mean()) + " +/- "  + str(stats.sem(antigen_dataset['Antigen Length'])))
box_plot(sorted(antigen_dataset['Antigen Length']), "Antigen Length (amino acids)", "All antigen lengths")

Average number of epitopes per antigen

In [None]:
#Average number of epitopes per antigen
counts = []
for row in range(antigen_dataset.shape[0]):
    counts.append(len(literal_eval(antigen_dataset.at[row, 'Epitope Starts'])))
print("Average number of epitopes per antigen: " + str(sum(counts)/len(counts)) + " +/- " + str(stats.sem(counts)))
box_plot(sorted(counts), "Number of Epitopes", "Number of epitopes in antigens")

Average length of epitope

In [None]:
#Average length of epitope
print("Average epitope length: " + str(epitope_dataset['Epitope Length'].mean()) + " +/- " + str(stats.sem(epitope_dataset['Epitope Length'])))
box_plot(sorted(epitope_dataset['Epitope Length']), "Epitope Length (amino acids)", "Length of epitopes")

Average distance from start of sequence to start of first epitope

In [None]:
#Average distance from start of sequence to start of first epitope
start_distances = []
for row in range(antigen_dataset.shape[0]):
    start_distances.append(min(literal_eval(antigen_dataset.at[row, 'Epitope Starts'])))

print("Average distance of first epitope from start of sequence: " + str(sum(start_distances)/len(start_distances)) + " +/- " + str(stats.sem(start_distances)))

box_plot(sorted(start_distances), "Start Distances (amino acids)", "Distance to first epitope from sequence start")

Average distance from end of sequence to end of last epitope

In [None]:
#Average distance from end of sequence to end of last epitope
end_distances = []
for row in range(antigen_dataset.shape[0]):
    end_distances.append(len(antigen_dataset.at[row, 'Antigen Sequence']) - max(literal_eval(antigen_dataset.at[row, 'Epitope Ends'])) + 1)

print("Average distance of last epitope from end of sequence: " + str(sum(end_distances)/len(end_distances)) + " +/- " + str(stats.sem(end_distances)))

box_plot(sorted(end_distances), "End Distances (amino acids)", "Distance to end of sequence after last epitope")

average segment length of overall immunostimulant region

In [None]:
#average segment length of overall immunostimulant region

lengths = []

for row in tqdm(range(antigen_dataset.shape[0])):
    sequence = antigen_dataset.at[row, 'Antigen Sequence']

    translation = [0 for i in range(len(sequence))]
    
    for i in range(len(literal_eval(antigen_dataset.at[row, "Epitope Starts"]))):
        for x in range(literal_eval(antigen_dataset.at[row, "Epitope Starts"])[i]-1, literal_eval(antigen_dataset.at[row, "Epitope Ends"])[i]):
            translation[x] = 1
    
    groups = []
    temp = []
    count = 0
    for i in translation:
        count += 1
        if len(temp) == 0:
            temp.append(i)
        else:
            if temp[-1] == i:
                temp.append(i)
            else:
                groups.append(temp)
                temp = []
                temp.append(i)
        if count == len(translation):
            groups.append(temp)

    for group in groups:
        if group[0] == 1:
            lengths.append(len(group))   

print("Average length of immunostimulant region, regardless of the number of unique epitopes that compose it: " + str(sum(lengths)/len(lengths)) + " +/- " + str(stats.sem(lengths)))
box_plot(sorted(lengths), "Immunostimulant Region Length (amino acids)", "Length of immunostimulant region")

number of immunostimulant regions

In [None]:
#number of immunostimulant regions

counts = []

for row in tqdm(range(antigen_dataset.shape[0])):
    sequence = antigen_dataset.at[row, 'Antigen Sequence']

    translation = [0 for i in range(len(sequence))]
    
    for i in range(len(literal_eval(antigen_dataset.at[row, "Epitope Starts"]))):
        for x in range(literal_eval(antigen_dataset.at[row, "Epitope Starts"])[i]-1, literal_eval(antigen_dataset.at[row, "Epitope Ends"])[i]):
            translation[x] = 1
    
    groups = []
    temp = []
    count = 0
    for i in translation:
        count += 1
        if len(temp) == 0:
            temp.append(i)
        else:
            if temp[-1] == i:
                temp.append(i)
            else:
                groups.append(temp)
                temp = []
                temp.append(i)
        if count == len(translation):
            groups.append(temp)

    counts.append(len([i for i in groups if i[0] == 1]))  

print("Average number of immunostimulant region, regardless of the number of unique epitopes that compose it: " + str(sum(counts)/len(counts)) + " +/- " + str(stats.sem(counts)))
box_plot(sorted(counts), "Immunostimulant Region Count", "Number of immunostimulant regions")

kmer frequency analysis for 3 <= k <= 10

In [None]:
#kmer frequency analysis for 3 <= k <= 10
#difference = %epitope area taken - %non-epitope area taken

ks = [i for i in range(3,11)]

def kmers(sequence, ks):
    previous = []
    for i in range(len(sequence)):
        for k in ks:
            mer = ""
            try:
                mer = sequence[i:i+k]
            except: #index out of bounds
                mer = sequence[i:]
            if len(mer) == k and mer not in previous:
                previous.append(mer)
                yield mer

ep_counts = {}
non_ep_counts = {}

for row in tqdm(range(antigen_dataset.shape[0])):
    sequence = antigen_dataset.at[row, 'Antigen Sequence']

    translation = [0 for i in range(len(sequence))]
    
    for i in range(len(literal_eval(antigen_dataset.at[row, "Epitope Starts"]))):
        for x in range(literal_eval(antigen_dataset.at[row, "Epitope Starts"])[i]-1, literal_eval(antigen_dataset.at[row, "Epitope Ends"])[i]):
            translation[x] = 1

    ep_indices = [i for i,j in enumerate(translation) if j == 1]
    non_ep_indices = [i for i,j in enumerate(translation) if j == 0]

    ep_regions = []
    non_ep_regions = []

    for i in range(len(ep_indices)):
        try:
            if ep_indices[i] + 1 != ep_indices[i+1]:
                ep_regions.append(i+1)
        except:
            #reached max index
            if ep_indices[i] - 1 != ep_indices[i-1]:
                ep_regions.append(i+1)
    
    for i in range(len(non_ep_indices)):
        try:
            if non_ep_indices[i] + 1 != non_ep_indices[i+1]:
                non_ep_regions.append(i+1)
        except:
            #reached max index
            if non_ep_indices[i] - 1 != non_ep_indices[i-1]:
                non_ep_regions.append(i+1)
    
    for i in range(len(ep_regions)):
        sub = ""
        if i == 0 and ep_regions[i] != len(sequence):
            sub = sequence[:ep_regions[i]]
        elif ep_regions[i] == len(sequence):
            sub = sequence[ep_regions[i-1] - 1:]
        else:
            sub = sequence[ep_regions[i-1] - 1 : ep_regions[i]]

        for mer in kmers(sub,ks):
            if mer not in ep_counts:
                ep_counts[mer] = []
            ep_counts[mer].append(sub.count(mer)*len(mer)/translation.count(1))
    
    for i in range(len(non_ep_regions)):
        sub = ""
        if i == 0 and non_ep_regions[i] != len(sequence):
            sub = sequence[:non_ep_regions[i]]
        elif non_ep_regions[i] == len(sequence):
            sub = sequence[non_ep_regions[i-1] - 1:]
        else:
            sub = sequence[non_ep_regions[i-1] - 1 : non_ep_regions[i]]

        for mer in kmers(sub,ks):
            if mer not in non_ep_counts:
                non_ep_counts[mer] = []
            non_ep_counts[mer].append(sub.count(mer)*len(mer)/translation.count(0))

ep_counts = {k:sum(v)/antigen_dataset.shape[0] for k,v in ep_counts.items()}
non_ep_counts = {k:sum(v)/antigen_dataset.shape[0] for k,v in non_ep_counts.items()}

diff = {key: ep_counts[key] - non_ep_counts.get(key, 0) for key in ep_counts.keys()} 
diff = dict(sorted(diff.items(), key=lambda item: item[1], reverse=True))
print(diff)

Preprocessing and batch training setup

In [None]:
def pad(full, k):
    pad_req = k-len(full)
    left_pad = round(pad_req/2)
    right_pad = pad_req-left_pad

    for i in range(left_pad):
        full = "0" + full
    for i in range(right_pad):
        full += "0"

    return full

def gen_kmers(row, ids, kmers, classes, thresh=1):

    sequence = row['Antigen Sequence']
    ep_starts = literal_eval(row['Epitope Starts'])
    ep_ends = literal_eval(row['Epitope Ends'])
    
    if len(sequence) < k:
        sequence = pad(sequence, k)

    translation = [0 for i in range(len(sequence))]

    for i in range(len(ep_starts)):
        for x in range(ep_starts[i]-1, ep_ends[i]):
            translation[x] = 1

    for i in range(0,len(translation)-k+1):
        ids.append(row['Antigen UniProt'])
        try:
            kmers.append(sequence[i:i+k])
        except:
            kmers.append(sequence[i:])

        mer = ""
        try:
            mer = translation[i:i+k]
        except:
            mer = translation[i:]
        
        found_pos = mer.count(1)
        num_eps = 0
        for x in range(len(ep_starts)):
            ep_start = ep_starts[x] - 1
            ep_end = ep_ends[x] - 1

            if i >= ep_start and i <= ep_end:
                num_eps += 1
        if (found_pos/k >= thresh):
            classes.append(1)
        else:
            classes.append(0)


def digitize(seq, key):
    return [key[i] for i in seq]

key = {'A':1,
'B':0,
'C':2,
'D':3,
'E':4,
'F':5,
'G':6,
'H':7,
'I':8,
'J':0,
'K':9,
'L':10,
'M':11,
'N':12,
'O':0,
'P':13,
'Q':14,
'R':15,
'S':16,
'T':17,
'U':0,
'V':18,
'W':19,
'X':0,
'Y':20,
'Z':0,
'0':0}

def pre_processing(kmer):
    return digitize(kmer, key)
    #return tf.keras.utils.to_categorical(digitize(kmer, key), num_classes=21)

def generator(samples, batch_size=1000,shuffle_data=True):
    num_samples = len(samples)
    while True: # Loop forever so the generator never terminates
        if shuffle_data:
            np.random.shuffle(samples)

        # Get index to start each batch: [0, batch_size, 2*batch_size, ..., max multiple of batch_size <= num_samples]
        for offset in range(0, num_samples, batch_size):
            # Get the samples you'll use in this batch
            batch_samples = samples[offset:offset+batch_size]

            # Initialise X_train and y_train arrays for this batch
            X_train = []
            y_train = []

            # For each example
            for batch_sample in batch_samples:
                # Load image (X) and label (y)
                kmer = batch_sample[0]
                label = batch_sample[1]

                #apply preprocessing

                kmer = pre_processing(kmer)

                # Add example to arrays
                X_train.append(kmer)
                
                output = [0,0]
                output[label] = 1
                y_train.append(output)

            # Make sure they're numpy arrays (as opposed to lists)
            X_train = np.array(X_train)
            y_train = np.array(y_train)

            if offset==0:
                pass
            
            yield X_train, y_train

def softmax_binarize(preds):
    bin = []
    for pred in preds:
        bin.append(np.argmax(pred))
    return bin

10-fold cross validation

In [None]:
ks = [5,8,9,10,11,12,15,20]

for k_index in tqdm(range(len(ks))):
    k = ks[k_index]

    cv = 10
    kf = KFold(n_splits=cv, shuffle=True) # Define the split - into 10 folds

    TRAIN_SETS = []
    TEST_SETS = []

    for i,j in kf.split(antigen_dataset):
        TRAIN_SETS.append(antigen_dataset)
        TEST_SETS.append(antigen_dataset.iloc[j])

    for fold in range(cv):
        train_fold = TRAIN_SETS[fold]
        test_fold = TEST_SETS[fold]

        ids = []
        kmers = []
        classes = []

        for index in range(train_fold.shape[0]):
            gen_kmers(train_fold.iloc[index], ids, kmers, classes)

        kmer_df = pd.DataFrame()
        kmer_df['ID'] = ids
        kmer_df['kmer'] = kmers
        kmer_df['class'] = classes
        kmer_df = kmer_df.drop_duplicates(subset="kmer", keep="first")
        
        pos_df = kmer_df[kmer_df['class'] == 1]
        neg_df = kmer_df[kmer_df['class'] == 0]

        ratio = pos_df.size/neg_df.size
        neg_df = neg_df.sample(frac=ratio)
        frames = [pos_df,neg_df]
        concat_df = pd.concat(frames)

        concat_df.sample(frac=1)
        train_array = concat_df[['kmer', 'class']].values

        test_ids = []
        test_kmers = []
        test_classes = []

        for index in range(test_fold.shape[0]):
            gen_kmers(test_fold.iloc[index], test_ids, test_kmers, test_classes)
        
        og_test_kmer_df = pd.DataFrame()
        og_test_kmer_df['ID'] = test_ids
        og_test_kmer_df['kmer'] = test_kmers
        og_test_kmer_df['class'] = test_classes
        test_kmer_df = og_test_kmer_df.drop_duplicates(subset="kmer", keep="first")
        
        test_pos_df = test_kmer_df[test_kmer_df['class'] == 1]
        test_neg_df = test_kmer_df[test_kmer_df['class'] == 0]

        test_ratio = test_pos_df.size/test_neg_df.size
        test_neg_df = test_neg_df.sample(frac=test_ratio)
        test_frames = [test_pos_df,test_neg_df]
        test_concat_df = pd.concat(test_frames)

        test_concat_df.sample(frac=1)
        val_array = test_concat_df[['kmer', 'class']].values
        

        batch_size=1000       
        val_batch_size = 1000

        print(train_array.shape, val_array.shape)

        train_gen = generator(train_array, batch_size = batch_size, shuffle_data=True)
        val_gen = generator(val_array, batch_size=val_batch_size, shuffle_data=True)

        hidden_size = 128
        model = Sequential()
        model.add(Embedding(21, 21, input_length=k))
        model.add(Bidirectional(LSTM(hidden_size, return_sequences=True), merge_mode="concat"))
        model.add(Bidirectional(LSTM(hidden_size, return_sequences=False), merge_mode="concat"))
        model.add(Dense(10))
        model.add(Dense(2, activation='softmax'))
        model.summary()

        model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer=tf.optimizers.Adam(learning_rate=0.01, amsgrad=True))
        callback = [tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', min_delta=0.001, patience=10, restore_best_weights=True, verbose=1, baseline=0.1)]
        history = model.fit(train_gen, epochs=50, verbose=1, validation_data=val_gen,steps_per_epoch=len(train_array)//batch_size, validation_steps=len(val_array)//val_batch_size, validation_freq=1, callbacks=callback)
        #plt.plot(history.history['accuracy'], label='accuracy')
        #plt.plot(history.history['val_accuracy'], label = 'val_accuracy')
        #plt.xlabel('Epoch')
        #plt.ylabel('Accuracy')
        #plt.ylim([0.0, 1])
        #plt.legend(loc='lower right')
        #plt.show()

        model.save("model_" + str(fold) + ".h5")

        #classifier performance
        thresholds = [i*0.05 for i in range(0,21)]

        for fold in range(cv):
            model = load_model("model_" + str(fold) + ".h5")

            test_fold = TEST_SETS[fold]

            test_ids = []
            test_kmers = []
            test_classes = []

            for index in range(test_fold.shape[0]):
                gen_kmers(test_fold.iloc[index], test_ids, test_kmers, test_classes)
            
            og_test_kmer_df = pd.DataFrame()
            og_test_kmer_df['ID'] = test_ids
            og_test_kmer_df['kmer'] = test_kmers
            og_test_kmer_df['class'] = test_classes
            test_kmer_df = og_test_kmer_df.drop_duplicates(subset="kmer", keep="first")
            
            test_pos_df = test_kmer_df[test_kmer_df['class'] == 1]
            test_neg_df = test_kmer_df[test_kmer_df['class'] == 0]

            test_ratio = test_pos_df.size/test_neg_df.size
            test_neg_df = test_neg_df.sample(frac=test_ratio)
            test_frames = [test_pos_df,test_neg_df]
            test_concat_df = pd.concat(test_frames)

            test_concat_df.sample(frac=1)
            val_array = test_concat_df[['kmer', 'class']].values

            batch_size=1000    
            val_batch_size = 1000

            test_gen = generator(val_array, batch_size=val_batch_size, shuffle_data=False)

            preds = model.predict(test_gen, verbose=1, batch_size=batch_size, steps=len(val_array)//val_batch_size)
            print(len(preds), len(test_ids))
            bins = softmax_binarize(preds)

            bins = np.array(bins)
            truth = val_array[:,1][:len(bins)].astype(int)

            skplt.metrics.plot_confusion_matrix(truth, bins, normalize=True)
            skplt.metrics.plot_roc_curve(truth, preds)
            plt.show()

            skplt.metrics.plot_precision_recall_curve(truth, preds)
            plt.show()

            print(sklearn.metrics.classification_report(truth,bins))
            
            #full system performance

            tprs = {}
            fprs = {}

            tprs_rank = {}
            fprs_rank = {}
        
            for i in thresholds:
                tprs[i] = []
                fprs[i] = []
                tprs_rank[i] = []
                fprs_rank[i] = []

            fs_ids = og_test_kmer_df['ID'].tolist()
            fs_val_array = og_test_kmer_df[['kmer', 'class']].values

            fs_val_batch_size = 1000
            fs_test_gen = generator(fs_val_array, batch_size=fs_val_batch_size, shuffle_data=False)

            predictions = model.predict(fs_test_gen, verbose=1, batch_size=batch_size, steps=len(fs_val_array)//val_batch_size)

            print(len(fs_ids), len(predictions))

            predictions = list(predictions)
            for i in range(len(fs_ids) - len(predictions)):
                predictions.append([])
            og_test_kmer_df['predictions'] = predictions

            for id_index in tqdm(range(len(og_test_kmer_df['ID'].unique()))):
                id = og_test_kmer_df['ID'].unique()[id_index]
                if [] not in og_test_kmer_df[og_test_kmer_df['ID'] == id]['predictions'].tolist():
                    kmer_preds = og_test_kmer_df[og_test_kmer_df['ID'] == id]['predictions'].tolist()
                    realigned = [[] for i in range(test_fold[test_fold['Antigen UniProt'] == id]['Antigen Length'].tolist()[0])]

                    passed = True

                    if len(kmer_preds) != len(realigned) - k + 1:
                        print("Q issue")
                        passed = False

                    if passed:
                        for x in range(len(kmer_preds)):
                            for y in range(x,x+k):
                                realigned[y].append(kmer_preds[x][1])

                        realigned = [sum(i)/len(i) for i in realigned]

                        sequence = test_fold[test_fold['Antigen UniProt'] == id]["Antigen Sequence"].tolist()[0]

                        translation = [0 for i in range(len(sequence))]
                        for i in range(len(literal_eval(test_fold[test_fold['Antigen UniProt'] == id]["Epitope Starts"].tolist()[0]))):
                            for x in range(literal_eval(test_fold[test_fold['Antigen UniProt'] == id]["Epitope Starts"].tolist()[0])[i]-1, literal_eval(test_fold[test_fold['Antigen UniProt'] == id]["Epitope Ends"].tolist()[0])[i]):
                                translation[x] = 1
                        
                        if translation.count(1) != 0 and translation.count(1) != len(translation): #avoid division by zero for tpr and fpr
                            sigmoid = lambda x: 1/(1+(np.exp(-10*(x-0.5))))
                            #limit = lambda x: x if 0<=x and x<=1 else (0 if x<0 else 1)
                            diff_based = [0 for i in range(len(realigned))]
                            diff_based_count = [0 for i in range(len(realigned))]
                            extent = 100
                            search_diff = dict(list(diff.items())[:extent] + list(diff.items())[-1*extent:])
                            for target in list(search_diff.keys()):
                                for start in (m.start() for m in re.finditer('(?='+ target +')', sequence)):
                                    for ind in range(start,start+len(target)):
                                        if abs(realigned[ind] - 0.5) <= 0.1:
                                            diff_based[ind] += search_diff[target]
                                            diff_based_count[ind] += 1
                            diff_based = [diff_based[i]/diff_based_count[i] if diff_based_count[i] != 0 else 0 for i in range(len(diff_based))]
                            diff_based = [sigmoid(((1/(1000*diff_based[i])) + realigned[i])/2) if diff_based[i] != 0 else realigned[i] for i in range(len(realigned))]

                            for thresh in thresholds:
                                tp = 0
                                fp = 0
                                tn = 0
                                fn = 0

                                where = []
                                for q in realigned:
                                    if q >=thresh:
                                        where.append(1)
                                    else:
                                        where.append(0)

                                for index in range(len(translation)):
                                    if where[index] == 1: # predicted positive
                                        if translation[index] == 1:
                                            tp += 1
                                        else:
                                            fp += 1
                                    elif where[index] == 0: # predicted negative
                                        if translation[index] == 1:
                                            fn += 1
                                        else:
                                            tn += 1
                                    else:
                                        print("Binarized Prediction Error")
                                
                                tpr = tp/(tp+fn)
                                fpr = fp/(fp+tn)

                                tprs[thresh].append(tpr)
                                fprs[thresh].append(fpr)

                                #ranking implementation

                                tp = 0
                                fp = 0
                                tn = 0
                                fn = 0

                                where = []
                                for q in diff_based:
                                    if q >=thresh:
                                        where.append(1)
                                    else:
                                        where.append(0)

                                for index in range(len(translation)):
                                    if where[index] == 1: # predicted positive
                                        if translation[index] == 1:
                                            tp += 1
                                        else:
                                            fp += 1
                                    elif where[index] == 0: # predicted negative
                                        if translation[index] == 1:
                                            fn += 1
                                        else:
                                            tn += 1
                                    else:
                                        print("Binarized Prediction Error")
                                
                                tpr = tp/(tp+fn)
                                fpr = fp/(fp+tn)

                                tprs_rank[thresh].append(tpr)
                                fprs_rank[thresh].append(fpr)
                            

            tprs = {k:sum(v)/len(v) for k,v in tprs.items()}
            fprs = {k:sum(v)/len(v) for k,v in fprs.items()}

            if k == 10:
                print(tprs)
                print(fprs)

            tprs_rank = {k:sum(v)/len(v) for k,v in tprs_rank.items()}
            fprs_rank = {k:sum(v)/len(v) for k,v in fprs_rank.items()}

            plt.plot(list(fprs.values()), list(tprs.values()), label="AUC=" + str(round(sklearn.metrics.auc(list(fprs.values()), list(tprs.values())), 3)))
            plt.plot(list(fprs_rank.values()), list(tprs_rank.values()), label="With Ranks AUC=" + str(round(sklearn.metrics.auc(list(fprs_rank.values()), list(tprs_rank.values())), 3)))
            plt.legend()