In [1]:
import argparse
import math
import os
from heapq import heappush, heappop, heappushpop

import matplotlib.pyplot as plotter
import numpy as np
import pandas as pd
from scipy.stats import rankdata
from sklearn.metrics import roc_auc_score, roc_curve

from utils.ens import *

In [2]:
pd.set_option("display.width", 180)
pd.set_option("display.max_rows", None)

## Load Data

In [3]:
path = './data/csvs'
gt_path = './data'

In [4]:
# Ground truth
gt = pd.read_json(os.path.join(gt_path, 'dev_seen.jsonl'), lines=True)

dev, ts, tu = {}, {}, {}
print('Loading data:')
for csv in sorted(os.listdir(path)):
    if ".csv" in csv:
        print(csv)
        name = csv.split('_')[0]
        if ("dev" in csv) or ("val" in csv):
            dev[name] = pd.read_csv(os.path.join(path, csv))
            dev_idx = dev[name].id.values
        elif "test_unseen" in csv:
            tu[name] = pd.read_csv(os.path.join(path, csv))
            tu_idx = tu[name].id.values
        elif "test_seen" in csv:
            ts[name] = pd.read_csv(os.path.join(path, csv))
            ts_idx = ts[name].id.values

dev_probas = pd.DataFrame({k: v.proba.values for k, v in dev.items()})
#dev_probas.set_index(dev_idx, inplace=True)
ts_probas = pd.DataFrame({k: v.proba.values for k, v in ts.items()})
#test_seen_probas.set_index(ts_idx, inplace=True)
tu_probas = pd.DataFrame({k: v.proba.values for k, v in tu.items()})
#test_unseen_probas.set_index(tu_idx, inplace=True)

Loading data:
O365050_dev_seen_SA.csv
O365050_test_seen_SA.csv
O365050_test_unseen_SA.csv
U365072_dev_seen_SA.csv
U365072_test_seen_SA.csv
U365072_test_unseen_SA.csv
VLMDB_dev_seen_SA.csv
VLMDB_test_seen_SA.csv
VLMDB_test_unseen_SA.csv


## Utils

In [5]:
def average(data, weights=None):
    N = data.shape[1]
    if weights is None:
        weights = [1/N] * N
    elif np.sum(weights) != 1.:
        weights = weights / np.sum(weights)
    
    # Compute weighted avg
    return data.apply(lambda row: row.multiply(weights).sum(), axis=1)

In [6]:
def acc_from_roc(labels, probas, splits=None):
    '''Determines the greatest achievable accuracy from the ROC curve.'''
    if splits is None:
        splits = (250, 250)

    fpr, tpr, thresholds = roc_curve(labels, probas)
    tp = tpr * splits[0]
    tn = (1 - fpr) * splits[1]
    acc = (tp + tn) / np.sum(splits)
    best_threshold = thresholds[np.argmax(acc)]

    return np.amax(acc), best_threshold

## Main Loop

In [7]:
loop, last_score, delta = 0, 0, 0.1

while delta > 0.0001:

    # Individual AUROCs
    print('\n' + '-' * 21 , 'ROUND ' + str(loop) , '-' * 21)
    print("Individual AUROCs for Validation Sets:\n")
    for i, column in enumerate(dev_probas):   
        score = roc_auc_score(gt.label, dev_probas.iloc[:, i])
        print(column, score)

    # Drop worst performing sets
    if loop > 0:
        print('\n' + '-' * 50)
        scores = dev_probas.apply(lambda col: roc_auc_score(gt.label, col), result_type='reduce')
        while len(scores) > 5:
            worst = scores.idxmin()
            #del dev[worst]
            dev_probas.drop(worst, axis=1, inplace=True)
            ts_probas.drop(worst, axis=1, inplace=True)
            tu_probas.drop(worst, axis=1, inplace=True)
            scores.drop(worst, inplace=True)
            print("Dropped:", worst)

    # Spearman Correlations:
    print('\n' + '-' * 50)
    print("Spearman Corrs:")
    dev_corr = dev_probas.corr(method='spearman')
    test_seen_corr = ts_probas.corr(method='spearman')
    test_unseen_corr = tu_probas.corr(method='spearman')

    print('\n', dev_corr)
    print('\n', test_seen_corr)
    print('\n', test_unseen_corr)
    print('\n' + '-' * 50)

    # Simple
    print('Simple:')
    weights_dev = Simplex(dev_probas, gt.label)
    dev_probas[f'dev_SX_{loop}'] = average(dev_probas, weights=weights_dev)
    ts_probas[f'ts_SX_{loop}'] = average(ts_probas, weights=weights_dev)
    tu_probas[f'tu_SX_{loop}'] = average(tu_probas, weights=weights_dev)
    score = roc_auc_score(gt.label, dev_probas[f'dev_SX_{loop}'])
    print(f"AUROC: {score:.4f}")
    print(f"Accuracy: {acc_from_roc(gt.label, dev_probas[f'dev_SX_{loop}'])[0]:.4f}")
    print('\n' + '-' * 50)
    
    # Arithmetic Mean
    print('Arithmetic Mean:')
    dev_probas[f'dev_AM_{loop}'] = average(dev_probas.apply(np.exp))
    ts_probas[f'ts_AM_{loop}'] = average(ts_probas.apply(np.exp))
    tu_probas[f'tu_AM_{loop}'] = average(tu_probas.apply(np.exp))
    print(f"AUROC: {roc_auc_score(gt.label, dev_probas[f'dev_AM_{loop}']):.4f}")
    print(f"Accuracy: {acc_from_roc(gt.label, dev_probas[f'dev_AM_{loop}'])[0]:.4f}")
    print('\n' + '-' * 50)
    
    # Geometric Mean (remain in logspace)
    print('Geometric Mean:')
    dev_probas[f'dev_GM_{loop}'] = average(dev_probas)
    ts_probas[f'ts_GM_{loop}'] = average(ts_probas)
    tu_probas[f'tu_GM_{loop}'] = average(tu_probas)
    print(f"AUROC: {roc_auc_score(gt.label, dev_probas[f'dev_GM_{loop}']):.4f}")
    print(f"Accuracy: {acc_from_roc(gt.label, dev_probas[f'dev_GM_{loop}'])[0]:.4f}")
    print('\n' + '-' * 50)

    # TODO: Power Average
    '''
    print('Power Average:')
    dev_PA = simple_average(dev_probas, dev[0], power=2, normalize=True)
    test_PA = simple_average(test_probas, test[0], power=2, normalize=True)
    test_unseen_PA = simple_average(test_unseen_probas, test_unseen[0], power=2, normalize=True)
    print(roc_auc_score(dev_df.label, dev_PA.proba), accuracy_score(dev_df.label, dev_PA.label))
    print('\n' + '-' * 50)
    '''
    
    # Rank Average
    print('Rank Average:')
    dev_probas[f'dev_RA_{loop}'] = average(dev_probas.apply(lambda col: rankdata(col) / len(col)))
    ts_probas[f'ts_RA_{loop}'] = average(ts_probas.apply(lambda col: rankdata(col) / len(col)))
    tu_probas[f'tu_RA_{loop}'] = average(tu_probas.apply(lambda col: rankdata(col) / len(col)))
    print(f"AUROC: {roc_auc_score(gt.label, dev_probas[f'dev_RA_{loop}']):.4f}")
    print(f"Accuracy: {acc_from_roc(gt.label, dev_probas[f'dev_RA_{loop}'])[0]:.4f}")
    print('\n' + '-' * 50)
    
    # Calculate Delta & increment loop
    delta = abs(score - last_score)
    last_score = score

    loop += 1

    print("Currently at {} after {} loops.".format(last_score, loop))

dev_best = dev_probas[f'dev_SX_{loop-1}']
ts_best = ts_probas[f'ts_SX_{loop-1}']
tu_best = tu_probas[f'tu_SX_{loop-1}']
print("Finished!")


--------------------- ROUND 0 ---------------------
Individual AUROCs for Validation Sets:

O365050 0.7678225664495687
U365072 0.7847370021283063
VLMDB 0.751900273639404

--------------------------------------------------
Spearman Corrs:

           O365050   U365072     VLMDB
O365050  1.000000  0.844154  0.739764
U365072  0.844154  1.000000  0.733841
VLMDB    0.739764  0.733841  1.000000

           O365050   U365072     VLMDB
O365050  1.000000  0.816988  0.788083
U365072  0.816988  1.000000  0.740760
VLMDB    0.788083  0.740760  1.000000

           O365050   U365072     VLMDB
O365050  1.000000  0.807346  0.761874
U365072  0.807346  1.000000  0.718090
VLMDB    0.761874  0.718090  1.000000

--------------------------------------------------
Simple:
500
Optimizing 3 inputs.
Optimized = 0.7918100206429726
Weights = [0.16097738 0.67080073 0.13244844]
AUROC: 0.7918
Accuracy: 0.7139

--------------------------------------------------
Arithmetic Mean:
AUROC: 0.7723
Accuracy: 0.6919

------

## Dump Output

In [8]:
experiment = 'OVU2'

In [11]:
 # Get accuracy thresholds & optimize (This does not add value to the roc auc, but just to also have an acc score)
acc, threshold = acc_from_roc(gt.label, dev_best)

# As Simplex at some point simply weighs the highest of all - lets take sx as the final prediction after x loops
ts_labels = ts_best.apply(lambda x: 1 if x > threshold else 0)
ts_out = pd.DataFrame({'id': ts_idx, 'proba': ts_best, 'label': ts_labels})
tu_labels = tu_best.apply(lambda x: 1 if x > threshold else 0)
tu_out = pd.DataFrame({'id': tu_idx, 'proba': tu_best, 'label': tu_labels})
ts_out.to_csv(os.path.join(path, f"final/FIN_test_seen_{experiment}_{loop}.csv"), index=False)
tu_out.to_csv(os.path.join(path, f"final/FIN_test_unseen_{experiment}_{loop}.csv"), index=False)

In [12]:
acc

0.7138867996991567