In [None]:
import mti
import nab
import rbo
import auto_param
import time
import numpy as np
import pandas as pd
from sklearn import metrics
from Chap4_other_metrics import etapr, vus, affiliation, pate

In [None]:
### Example of computing all metrics for an example dataset

def get_thresholds_fpr_pos(labels, score, thresholds, max_fpr=0.1):
    for i_thresh, thresh in enumerate(thresholds):
        pred = np.zeros(score.size)
        pred[score > thresh] = 1
        tn, fp, fn, tp = metrics.confusion_matrix(labels, pred).ravel()
        if tn:
            fpr = fp / (fp+tn)
            if fpr <= max_fpr:
                return i_thresh
    return len(thresholds)-1

def get_thresholds_fpr(labels, score, thresholds, max_fpr=0.1):
    thresholds_fpr = list()
    for thresh in thresholds:
        pred = np.zeros(score.size)
        pred[score > thresh] = 1
        tn, fp, fn, tp = metrics.confusion_matrix(labels, pred).ravel()
        if tn:
            fpr = fp / (fp+tn)
            if fpr <= max_fpr:
                thresholds_fpr.append(thresh)
    return np.array(thresholds_fpr)

def compute_all_metrics(metrics: list, metrics_names, tresholded_metrics: list, labels, scores, scores_names, n_thresholds=100, pos_label=1, max_fpr=0.1):
    metrics_outputs = np.zeros((scores.shape[1], len(metrics)))
    thresholds = dict()
    for i_score in range(scores.shape[1]):
        score = scores[:, i_score]
        thresholds_raw = np.linspace(np.quantile(score, 0.01), np.quantile(score, 0.99), n_thresholds)
        thresholds[i_score] = get_thresholds_fpr(labels, score, thresholds_raw, max_fpr)
        for j_metric, metric in enumerate(metrics):
            start_time = time.time()
            print(metrics_names[j_metric])
            if tresholded_metrics[j_metric]:
                thresh_values = np.zeros(n_thresholds)
                for j_thresh, thresh in enumerate(thresholds[i_score]):
                    ij_pred = np.zeros(score.size)
                    ij_pred[score > thresh] = 1
                    try:
                        thresh_values[j_thresh] = metric(labels, ij_pred, pos_label=1)
                    except ValueError:
                        thresh_values[j_thresh] = 0
                metrics_outputs[i_score, j_metric] = thresh_values[np.argmax(np.abs(thresh_values))]
            else:
                metrics_outputs[i_score, j_metric] = metric(labels, score, max_fpr=max_fpr)

    df_metrics = pd.DataFrame(metrics_outputs, columns=metrics_names, index=scores_names)
    return df_metrics


def compute_all_metrics_rbo(ground_truth_ranking: list, rbo_params: list, metrics: list, metrics_names, tresholded_metrics: list, 
                            labels, scores, scores_names, n_thresholds=100, pos_label=1, max_fpr=0.1):
    df_metrics = compute_all_metrics(metrics, metrics_names, tresholded_metrics, labels, scores, scores_names, n_thresholds, pos_label, max_fpr)
    rbo_values = np.zeros((len(rbo_params), len(metrics_names)))
    for i_metric in range(len(metrics_names)):
        df_metrics = df_metrics.sort_values(by=metrics_names[i_metric], ascending=False)
        i_ranking = df_metrics.index.to_numpy()
        print(metrics_names[i_metric], i_ranking)
        for j_param, param in enumerate(rbo_params):
            rbo_values[j_param, i_metric] = rbo.rbo(ground_truth_ranking, i_ranking, p=param)

    df_rbo = pd.DataFrame(rbo_values, columns=metrics_names, index=rbo_params)
    return df_rbo

def mti_compute(labels, pred, pos_label):
    return  mti.mti(labels, pred, pos_label=pos_label)

def nab_compute(labels, pred, pos_label):
    return nab.nab(labels, pred, pos_label=pos_label)

def etapr_compute(labels, pred, pos_label):
    return etapr.evaluate_w_streams(labels, pred)['f1']

def pate_compute(labels, score, max_fpr):
    return pate.PATE(labels, score, pos_label=1, num_desired_thresholds=100)

def mcc_compute(labels, pred, pos_label):
    return metrics.matthews_corrcoef(labels, pred)

def VUS_compute(labels, score, max_fpr):
    return vus.generate_curve(y_true=labels, y_score=score, slidingWindow=50)[6]

In [None]:
df0 = pd.read_csv('Chap4_SyntheticExamples/synthetic_simple_cases.csv')

# df0.iloc[2500:2850, 1] = df0.iloc[2150:2500, 1]

labels = df0.labels.values
labels[labels == 1] = 0
labels[labels == -1] = 1

scores = df0.values[:, 1:]
scores_names = df0.columns.to_numpy()[1:]

metrics_to_compute = [mti_compute,
                       nab_compute, 
                      metrics.f1_score, metrics.roc_auc_score,
                       affiliation.affiliation_f1_score, 
                      etapr_compute, mcc_compute, VUS_compute, pate_compute
                      ]
metrics_name = ['mti', 
                'NAB', 'F1', 'AUC',
                 'Affiliation',
                 'eTaPR', 
                 'MCC',
                   'VUS', 'PATE'
                 ]
metrics_thresholded = [True, 
                       True, True, False, 
                       True, 
                       True, True, False, False
                       ]

ground_truth = ['A', 'B', 'C', 'D', 'E']
rbo_params = [0.2, 0.5, 1]

print(compute_all_metrics_rbo(ground_truth, rbo_params, metrics_to_compute, metrics_name, metrics_thresholded, labels, scores, scores_names))

In [None]:
## AutoParam example

df_ex2 = pd.read_csv('Chap5_SyntheticCollections/df_onearea_auto.csv')

def mti_get_simple_components(y_true, y_pred, pos_label=1):
    mti_metric = mti.MTI(anticipation_period="default", earliness_period="default", inertia_delay="default")
    mti_metric.compute_metrics(y_true, y_pred, pos_label)
    return [mti_metric.recall_score, mti_metric.masked_specificity_score, mti_metric.anticipation_score, mti_metric.alarm_cardinality_score]

labels = df_ex2.labels.values
gt_ranking = [['Full'], ['Two First Half'], ['Two Spaced'], ['Inertia_detect'], ['Detect + FP'], ['Reverse']]
names = ['Full', 'Two First Half', 'Two Spaced', 'Inertia_detect', 'Detect + FP', 'Reverse']

mti_components = np.zeros((len(gt_ranking), 4))
for i_d, dataset in enumerate(gt_ranking):
    tmp_pred = df_ex2[dataset[0]].values
    mti_components[i_d, :] = mti_get_simple_components(labels, tmp_pred)

predictions = [df_ex2['Full'].values, df_ex2['Two First Half'].values, df_ex2['Two Spaced'].values, df_ex2['Inertia_detect'].values, 
               df_ex2['Detect + FP'].values, df_ex2['Reverse'].values]

params_mti = {'component': mti_components, 'anticip_areas': [[390, 400]], 'anticip_early_len': [30]}

np.random.seed(0)

coef_start = np.random.uniform(low=-20, high=20, size=100)
Wr_start = np.random.uniform(low=0.01, high=100, size=100)
Wspem_start = np.random.uniform(low=0.01, high=100, size=100)
Wcardal_start = np.random.uniform(low=0.01, high=100, size=100)

sols_mti_ex2 = np.zeros((100, 4))
rbo_mti_ex2 = np.zeros(100)
time_torczon_mti_ex2 = np.zeros(100)
for i in range(100):
    t_start = time.time()
    auto_mti = auto_param.AutoProfile(x0=[coef_start[i], Wr_start[i], Wspem_start[i], Wcardal_start[i]], 
                           xmin=[-20, 0, 0, 0], xmax=[20, 100, 100, 100], Nguess=20, Niter=50, rbo_p=[0.2, 1])
    auto_mti.fit(gt_ranking, names, predictions, params=params_mti, metric='MTI')
    time_torczon_mti_ex2[i] = time.time() - t_start
    rbo_mti_ex2[i] = auto_mti.solution.f
    sols_mti_ex2[i, :] = auto_mti.solution.x
    if not i % 20:
        print(i, np.mean(rbo_mti_ex2[:i+1]))

df_ex2_mti = pd.DataFrame()
df_ex2_mti['rbo'] = rbo_mti_ex2
df_ex2_mti['time'] = time_torczon_mti_ex2
df_ex2_mti[['coefs', 'Wrs', 'Wspems', 'Wcardals']] = sols_mti_ex2
# df_ex1_mti.to_csv('Results_AutoParam/ex2_autoparam_mti.csv')

In [None]:
params_nab = {'label': labels}
np.random.seed(0)

coef_start = np.random.uniform(low=-20, high=-0.01, size=100)
Afp_start = np.random.uniform(low=0.01, high=100, size=100)
Afn_start = np.random.uniform(low=0.01, high=100, size=100)
sols_nab_ex2 = np.zeros((100, 3))
rbo_nab_ex2 = np.zeros(100)
time_torczon_nab_ex2 = np.zeros(100)

for i in range(100):
    t_start = time.time()
    auto_nab = auto_param.AutoProfile(x0=[coef_start[i], Afp_start[i], Afn_start[i]], xmin=[-20, 0.01, 0.01], xmax=[-0.01, 100, 100], Nguess=20, Niter=50, rbo_p=[0.2, 1])
    auto_nab.fit(gt_ranking, names, predictions, params=params_nab, metric='NAB')
    time_torczon_nab_ex2[i] = time.time() - t_start
    rbo_nab_ex2[i] = auto_nab.solution.f
    sols_nab_ex2[i, :] = auto_nab.solution.x
    if not i % 5:
        print(i, np.mean(rbo_nab_ex2[:i+1]))

df_ex2_nab = pd.DataFrame()
df_ex2_nab['rbo'] = rbo_nab_ex2
df_ex2_nab['time'] = time_torczon_nab_ex2
df_ex2_nab[['coefs', 'Afps', 'Afns']] = sols_nab_ex2
# df_ex2_nab.to_csv('ex2_autoparam_nab.csv')