In [1]:
import numpy as np
from scipy.stats import spearmanr, kendalltau
from sklearn.metrics import ndcg_score

def reliability_scores(D, narrow=False, measure="spearman", aggregation="mean", preprocess="none", idx='external'):
    keys = D.keys()
    total_models = len(keys)

    if narrow:
        distinct_models = list(set([(k[0], k[1], k[-1]) for k in keys]))
        runs_per_model = total_models // len(distinct_models)
        similarity_matrix = np.zeros((total_models, runs_per_model-1))
    else:
        similarity_matrix = np.zeros((total_models, total_models-1))

    if preprocess=="maxnormalize" or "minmaxnormalize":
        all_dists = np.array([])
        for k in keys:
            all_dists = np.append(all_dists, D[k][idx].dists.numpy())

        maxdist = max(all_dists)
        mindist = min(all_dists)

    for i,k in enumerate(keys):
        kk = (k[0], k[1], k[-1])
        dists_k = D[k][idx].dists.numpy()

        if preprocess=="maxnormalize":
            dists_k  = dists_k/maxdist
        elif preprocess=="minmaxnormalize":
            dists_k = (dists_k - mindist)/(maxdist-mindist)
        elif preprocess=="rank":
            temp = (-dists_k).argsort()
            ranks = np.empty_like(temp)
            ranks[temp] = np.arange(1,len(dists_k)+1)
            dists_k = 1/ranks
        
        if narrow:
            other_runs = [j for j in keys if j != k and (j[0], j[1], j[-1]) == kk]
        
        else:
            other_runs = [j for j in keys if j != k]
        
        for j,l in enumerate(other_runs):
            dists_l = D[l][idx].dists.numpy()

            if preprocess=="maxnormalize":
                dists_l  = dists_l/maxdist
            elif preprocess=="minmaxnormalize":
                dists_l = (dists_l - mindist)/(maxdist-mindist)
            elif preprocess=="rank":
                temp = (-dists_l).argsort()
                ranks = np.empty_like(temp)
                ranks[temp] = np.arange(1,len(dists_l)+1)
                dists_l = 1/ranks
        
            if measure == "spearman":
                score = spearmanr(dists_k, dists_l)[0]
            elif measure == "KT":
                score = kendalltau(dists_k, dists_l)[0]
            elif measure == "NDCG":
                score = (ndcg_score(np.asarray([dists_k]), np.asarray([dists_l])) + ndcg_score(np.asarray([dists_k]), np.asarray([dists_l])))/2
            else:
                print("wrong measure")
                return -1
            
            similarity_matrix[i,j] = score
            
    if aggregation == "mean":
        reliability_scores = np.mean(similarity_matrix, 1)
    elif aggregation == "median":
        reliability_scores = np.median(similarity_matrix, 1)
    else:
        print("wrong aggregation")
        return -1

    return reliability_scores

In [2]:
import numpy as np

def HITS(D, nu = 0.05, iters=10, init = "rank", idx='external'):
    keys = D.keys()
    os_lists = [D[k][idx].dists for k in keys]
    no_of_models = len(os_lists)
    no_of_points = len(os_lists[0])
    
    if init == "soft":
        label_list = os_lists
    elif init == "hard":
        label_list = []
        for os_list in os_lists:
            ind = np.argpartition(os_list, -int(nu*no_of_points))[-int(nu*no_of_points):]
            labels = np.array([1 if idx in ind else 0 for idx in range(no_of_points)])
            label_list.append(labels)
    elif init == "semisoft":
        label_list = []
        for os_list in os_lists:
            ind = np.argpartition(os_list, -int(nu*no_of_points))[-int(nu*no_of_points):]
            labels = np.array([os_list[idx] if idx in ind else 0 for idx in range(no_of_points)])
            label_list.append(labels)
    elif init == "rank":
        label_list = []
        for os_list in os_lists:
            temp = (-np.array(os_list)).argsort()
            ranks = np.empty_like(temp)
            ranks[temp] = np.arange(1,len(os_list)+1)
            labels = (1/ranks).tolist()
            label_list.append(labels)

    model_weights = np.ones(no_of_models)

    for _ in range(iters):

        label_matrix = np.stack(label_list)
        point_weights = np.matmul(model_weights, label_matrix)
        point_weights = point_weights/np.linalg.norm(point_weights)
        
        model_weights = np.matmul(label_matrix, point_weights)
        model_weights = model_weights/np.linalg.norm(model_weights)

        best_model = np.argmax(model_weights)


    return list(keys)[best_model], point_weights


In [None]:
def dawidskeene(D, nu = 0.05, iters=10, init = "soft", idx=0):
    print("init=", init)
    keys = D.keys()
    os_lists = [D[k][idx].dists for k in keys]
    no_of_models = len(os_lists)
    no_of_points = len(os_lists[0])
    
    if init == "soft":
        label_list = os_lists
    elif init == "hard":
        label_list = []
        for os_list in os_lists:
            ind = np.argpartition(os_list, -int(nu*no_of_points))[-int(nu*no_of_points):]
            labels = np.array([1 if idx in ind else 0 for idx in range(no_of_points)])
            label_list.append(labels)
    elif init == "semisoft":
        label_list = []
        for os_list in os_lists:
            ind = np.argpartition(os_list, -int(nu*no_of_points))[-int(nu*no_of_points):]
            labels = np.array([os_list[idx] if idx in ind else 0 for idx in range(no_of_points)])
            label_list.append(labels)
    elif init == "rank":
        label_list = []
        for os_list in os_lists:
            temp = (-np.array(os_list)).argsort()
            ranks = np.empty_like(temp)
            ranks[temp] = np.arange(1,len(os_list)+1)
            labels = (1/ranks).tolist()
            label_list.append(labels)
    
    
    def E(model_weights, pi0=0.95):

        pi1 = 1-pi0
    
        #eta1s = []
        #eta0s = []
        rho0s = []
        for j in range(no_of_points):
            #eta1 = np.product([model_weights[i] if label_list[i][j] == 1 else 1-model_weights[i] for i in range(no_of_models)])
            eta1 = np.product([model_weights[i] if label_list[i][j] == 1 else 1 for i in range(no_of_models)])
            #eta1s.append(eta1)
            #eta0 = np.product([model_weights[i] if label_list[i][j] == 0 else 1-model_weights[i] for i in range(no_of_models)])
            eta0 = np.product([model_weights[i] if label_list[i][j] == 0 else 1 for i in range(no_of_models)])
            #eta0s.append(eta0)
            rho0 = (pi0*eta0)/(pi0*eta0 + pi1*eta1)
            rho0s.append(rho0)
        
        return rho0s
    
    def M(rho0s):

        model_weights = []

        for i in range(no_of_models):
            mw = np.mean([rho0s[j] if label_list[i][j] == 0 else 1-rho0s[j] for j in range(no_of_points)])
            model_weights.append(mw)

        pi0 = np.mean(rho0s)

        return model_weights, pi0


    label_matrix = np.stack(label_list)
    label_means = np.mean(label_matrix, axis=0)
    majority_labels = label_means >= 0.5
    
    model_weights = [np.mean(labels == majority_labels) for labels in label_list]
    
    pi0 = 1- np.mean(majority_labels)

    print(model_weights, pi0)
    for _ in range(iters):
        rho0s = E(model_weights, pi0)
        #print(rho0s)
        model_weights, pi0 = M(rho0s)
        #print(model_weights, pi0)

In [16]:
import pickle

with open('all_models_Tox21_AhR_training_1213.pkl', 'rb') as f:
    D = pickle.load(f)

In [17]:
from sklearn.metrics import average_precision_score, roc_auc_score
import numpy as np

for idx in ['external', 'converged', 'last']:
    
    if idx == 'external':
        print("Epoch selection: CH2")
    elif idx == 'converged':
        print("Epoch selection: convergence")
    elif idx == 'last':
        print("Epoch selection: 150th epoch")

    all_aps = []
    all_roc_aucs = []
    min_svdd_model = None
    min_svdd = np.inf
    for k in D.keys():
        model = D[k][idx]
        all_aps.append(model.ap)
        all_roc_aucs.append(model.roc_auc)
        
        if model.svdd_loss < min_svdd:
            min_svdd_model = model
            min_svdd = model.svdd_loss

    print("\tAverage: AP=%.2f +- %.2f, ROC-AUC=%.2f += %.2f" % (np.mean(all_aps), np.std(all_aps), np.mean(all_roc_aucs), np.std(all_roc_aucs), ))
    print("\tAt Min SVDD: AP=%.2f, ROC-AUC=%.2f" % (min_svdd_model.ap, min_svdd_model.roc_auc))
    
    '''
    for narrow in [True, False]:
        for aggregation in ["mean", "median"]:
            for measure in ["spearman", "KT", "NDCG"]:
                for preprocess in ["none", "rank", "maxnormalize", "minmaxnormalize"]:
                    rel = reliability_scores(D, narrow=narrow, measure=measure, aggregation=aggregation, preprocess=preprocess, idx=idx)
                    max_idx = np.argmax(rel)
                    v = list(D.values())[max_idx]
                    print(v[idx].ap, v[idx].roc_auc)
                    print("\n\n")
    '''

    rel = reliability_scores(D, narrow=False, measure="spearman", aggregation="median", preprocess="none", idx=idx)
    max_idx = np.argmax(rel)
    v = list(D.values())[max_idx]
    print("\tMC: AP=%.2f, ROC-AUC=%.2f" % (v[idx].ap, v[idx].roc_auc))

    rel = reliability_scores(D, narrow=True, measure="spearman", aggregation="mean", preprocess="none", idx=idx)
    max_idx = np.argmax(rel)
    v = list(D.values())[max_idx]
    print("\tUDR: AP=%.2f, ROC-AUC=%.2f" % (v[idx].ap, v[idx].roc_auc))

    for init in ["soft", "rank"]:
        
        if init=="soft":
            print("\tHITS using actual scores:")
        elif init=="rank":
            print("\tHITS using 1/rank:")
        best_hits_key, combined_hits_scores = HITS(D, init=init, idx=idx)
        print("\t\tBest model: AP=%.2f, ROC-AUC=%.2f" % (D[best_hits_key][idx].ap, D[best_hits_key][idx].roc_auc))

        labels = D[best_hits_key][idx].labels
        ap = average_precision_score(labels, combined_hits_scores)
        roc_auc = roc_auc_score(labels, combined_hits_scores)
        print("\t\tEnsemble model: AP=%.2f, ROC-AUC=%.2f" % (ap, roc_auc))

    print("\n\n")

Epoch selection: CH2
	Average: AP=0.17 +- 0.03, ROC-AUC=0.70 += 0.04
	At Min SVDD: AP=0.15, ROC-AUC=0.63
	MC: AP=0.17, ROC-AUC=0.67
	UDR: AP=0.14, ROC-AUC=0.65
	HITS using actual scores:
		Best model: AP=0.14, ROC-AUC=0.67
		Ensemble model: AP=0.18, ROC-AUC=0.77
	HITS using 1/rank:
		Best model: AP=0.18, ROC-AUC=0.70
		Ensemble model: AP=0.22, ROC-AUC=0.75



Epoch selection: convergence
	Average: AP=0.17 +- 0.03, ROC-AUC=0.69 += 0.03
	At Min SVDD: AP=0.17, ROC-AUC=0.74
	MC: AP=0.19, ROC-AUC=0.71
	UDR: AP=0.15, ROC-AUC=0.68
	HITS using actual scores:
		Best model: AP=0.11, ROC-AUC=0.67
		Ensemble model: AP=0.13, ROC-AUC=0.68
	HITS using 1/rank:
		Best model: AP=0.18, ROC-AUC=0.69
		Ensemble model: AP=0.20, ROC-AUC=0.74



Epoch selection: 150th epoch
	Average: AP=0.17 +- 0.03, ROC-AUC=0.70 += 0.04
	At Min SVDD: AP=0.17, ROC-AUC=0.74
	MC: AP=0.21, ROC-AUC=0.76
	UDR: AP=0.19, ROC-AUC=0.70
	HITS using actual scores:
		Best model: AP=0.17, ROC-AUC=0.72
		Ensemble model: AP=0.18, ROC-AUC=0.

In [5]:
def add_scatter_plot(D, ax, color, selected_indices=["external"], cutoff = None, find_knee = False):


    output_string = ""
    all_APs = []
    all_SVDDs = []

    for v in D.values():
        for selected_index in selected_indices:
            epoch = v[selected_index]
            all_APs.append(epoch.ap)
            all_SVDDs.append(epoch.svdd_loss)

    all_APs = sorted(list(set(all_APs)), reverse=True)
    all_SVDDs = sorted(all_SVDDs)

    if cutoff == None:
        cutoff_SVDD = all_SVDDs[-1]
    else:
        cutoff_SVDD = all_SVDDs[int(cutoff*len(all_SVDDs))]

    for selected_index in selected_indices:
        if selected_index == "external":
            output_string += "\nFor CH2-stop epochs with SVDD below (%.4f)" % (cutoff_SVDD)
        elif selected_index == "converged":
            output_string += "\nFor conv-stop epochs with SVDD below (%.4f)" % (cutoff_SVDD)
        elif selected_index == "last":
            output_string += "\nFor 150th epochs with SVDD below (%.4f)" % (cutoff_SVDD)
        
    min_svdd = np.inf
    total_AP = 0
    total_ROC = 0

    ch_epochs = []

    AP_dict = {}
    ROC_dict = {}
    for v in D.values():
        
        for selected_index in selected_indices:
            epoch = v[selected_index]

            if epoch.svdd_loss < min_svdd:
                min_svdd = epoch.svdd_loss
                min_svdd_AP = epoch.ap
                min_svdd_ROC = epoch.roc_auc
            
            if len(all_APs) < 10 or epoch.ap >= all_APs[9]:
                ax2.scatter(epoch.svdd_loss, -epoch.ch2, s=50*epoch.ap*epoch.ap, color=color, alpha=0.4, linewidths=0, marker='s')
                ax2.annotate("%0.2f" % (epoch.ap), (epoch.svdd_loss, -epoch.ch2), fontsize=3, color=color, bbox=bbox_props)
            else:
                ax2.scatter(epoch.svdd_loss, -epoch.ch2, s=50*epoch.ap*epoch.ap, color=color, alpha=0.4, linewidths=0)

            AP_dict[(epoch.svdd_loss, -epoch.ch2)] = epoch.ap
            ROC_dict[(epoch.svdd_loss, -epoch.ch2)] = epoch.roc_auc

            if epoch.svdd_loss <= cutoff_SVDD:
                ch_epochs.append(epoch)
                total_AP += epoch.ap
                total_ROC += epoch.roc_auc

        
    output_string += "\nAP at min SVDD = %.2f\naverage AP = %.2f" % (min_svdd_AP, total_AP/len(ch_epochs))
    output_string += "\nROCAUC at min SVDD = %.2f\naverage ROCAUC = %.2f" % (min_svdd_ROC, total_ROC/len(ch_epochs))

    if find_knee:
        Xs = [e.svdd_loss for e in ch_epochs]
        Ys = [-e.ch2 for e in ch_epochs]

        pf_X, pf_Y = find_pareto_frontier(Xs, Ys)
        for x,y in zip(pf_X, pf_Y):
            AP = AP_dict[(x,y)]
            ROC = ROC_dict[(x,y)]
            ax2.annotate("%0.2f (%0.2f)" % (AP, ROC), (x, y), fontsize=4, color=color, bbox=bbox_props)
        ax2.plot(pf_X, pf_Y, color=color, alpha=1)

        from kneed import KneeLocator
        kneedle = KneeLocator(pf_X, pf_Y, S=1.0, curve="convex", direction="decreasing")
        try:
            output_string += "\nKnee point: (%.4f, %.2f)\nAP at knee = %.2f; ROCAUC at knee = %.2f" % (kneedle.knee, kneedle.knee_y, AP_dict[(kneedle.knee, kneedle.knee_y)], ROC_dict[(kneedle.knee, kneedle.knee_y)])
        except KeyError:
            output_string += "\nKnee point not found"


    ch_distlist = [e.dists for e in ch_epochs]

    '''
    best_ndcg_index, best_ndcg = find_best_ndcg(ch_distlist, init="hard")
    best_ndcg_epoch = ch_epochs[best_ndcg_index]
    ax2.scatter(best_ndcg_epoch.svdd_loss, -best_ndcg_epoch.ch, s=50, alpha=0.7, color='yellow', linewidths=0, marker='x')
    ax2.annotate("%0.2f" % (best_ndcg_epoch.ap), (best_ndcg_epoch.svdd_loss, -best_ndcg_epoch.ch), fontsize=4, color=color, bbox=bbox_props)
    output_string += "\nMax NDCG at: (%.4f, %.2f)\nNDCG= %.2f; AP= %.2f" % (best_ndcg_epoch.svdd_loss, -best_ndcg_epoch.ch, best_ndcg, best_ndcg_epoch.ap)


    best_kt_index, best_kt = find_best_kt(ch_distlist, init="semisoft")
    best_kt_epoch = ch_epochs[best_kt_index]
    ax2.scatter(best_kt_epoch.svdd_loss, -best_kt_epoch.ch, s=50, alpha=0.7, color='yellow', linewidths=0, marker='x')
    ax2.annotate("%0.2f" % (best_kt_epoch.ap), (best_kt_epoch.svdd_loss, -best_kt_epoch.ch), fontsize=4, color=color, bbox=bbox_props)
    output_string += "\nMax KT at: (%.4f, %.2f)\nKT= %.2f; AP= %.2f" % (best_kt_epoch.svdd_loss, -best_kt_epoch.ch, best_ndcg, best_kt_epoch.ap)
    

    best_hits_index, hits_combined_ap = HITS(ch_distlist, init="hard")
    best_hits_epoch = ch_epochs[best_hits_index]
    ax2.scatter(best_hits_epoch.svdd_loss, -best_hits_epoch.ch2, s=50, alpha=0.7, color='yellow', linewidths=0, marker='x')
    ax2.annotate("%0.2f" % (best_hits_epoch.ap), (best_hits_epoch.svdd_loss, -best_hits_epoch.ch2), fontsize=4, color=color, bbox=bbox_props)
    output_string += "\nAP of combined HITS model = %.2f" % (hits_combined_ap)
    output_string += "\nBest HITS model at: (%.4f, %.2f); AP= %.2f" % (best_hits_epoch.svdd_loss, -best_hits_epoch.ch2, best_hits_epoch.ap)
    '''
    return output_string

In [6]:
def find_pareto_frontier(Xs, Ys):
    '''Pareto frontier selection process'''
    sorted_list = sorted([[Xs[i], Ys[i]] for i in range(len(Xs))])
    pareto_front = [sorted_list[0]]
    for pair in sorted_list[1:]:
        if pair[1] <= pareto_front[-1][1]:
            pareto_front.append(pair)
    
    pf_X = [pair[0] for pair in pareto_front]
    pf_Y = [pair[1] for pair in pareto_front]    
    return pf_X, pf_Y

In [7]:
!pip install kneed

Collecting kneed
  Downloading https://files.pythonhosted.org/packages/c3/6b/e130913aaaad1373060e259ab222ca2330672db696b297b082c3f3089fcc/kneed-0.7.0-py2.py3-none-any.whl
Installing collected packages: kneed
Successfully installed kneed-0.7.0


In [18]:
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
from matplotlib import rcParams
rcParams.update({'figure.autolayout': True})
import numpy as np

colors = ["blue", "black", "red"]
txtstrng = ""

bbox_props = dict(boxstyle="round", fc="w", ec="none", alpha=0.7)

fig2, ax2 = plt.subplots()
lines = [Line2D([0], [0], color=c) for c in colors]
labels = []
ax2.set(xlabel='SVDD loss', ylabel='-CH')
ax2.title.set_text('All points')

label = "at min CH value"
labels.append(label)
label = "at convergence"
labels.append(label)
label = "at epoch 150"
labels.append(label)

txtstrng += add_scatter_plot(D, ax2, colors[0], selected_indices=["external"], find_knee=True)
txtstrng += "\n\n"
txtstrng += add_scatter_plot(D, ax2, colors[1], selected_indices=["converged"], find_knee=False)
txtstrng += "\n\n"
txtstrng += add_scatter_plot(D, ax2, colors[2], selected_indices=["last"], find_knee=False)

print(txtstrng)

ax2.text(0.7, 0.8, txtstrng, transform=ax2.transAxes, fontsize=4, verticalalignment='top')

ax2.legend(lines, labels, bbox_to_anchor=(0.8, 1), loc='upper left', prop={'size': 4})

fig2.savefig("scatter", dpi=1000)
from google.colab import files
files.download("scatter.png")
plt.close(fig2)


For CH2-stop epochs with SVDD below (3.9419)
AP at min SVDD = 0.15
average AP = 0.17
ROCAUC at min SVDD = 0.63
average ROCAUC = 0.70
Knee point: (0.1365, -24417743.88)
AP at knee = 0.15; ROCAUC at knee = 0.63


For conv-stop epochs with SVDD below (5.9469)
AP at min SVDD = 0.17
average AP = 0.17
ROCAUC at min SVDD = 0.74
average ROCAUC = 0.69


For 150th epochs with SVDD below (1.2974)
AP at min SVDD = 0.17
average AP = 0.17
ROCAUC at min SVDD = 0.74
average ROCAUC = 0.70


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [29]:
rm *.pkl