In [154]:
import math
import numpy as np
import argparse
import sys
import pandas as pd
from scipy.stats import norm
def write(X, y, file):
    with open(file, "w") as f:
        for i in range(len(y)):
            f.write("{y}\t{x}\n".format(y=y[i], x='\t'.join(map(str, X[i]))))
def writeOOD(X, Y, file):
    y = []
    with open(file, "w") as f:
        for i in range(len(X)):
            y.append(0)
            f.write("{y}\t{x}\n".format(y=0, x='\t'.join(map(str, X[i]))))
        for i in range(len(Y)):
            y.append(1)
            f.write("{y}\t{x}\n".format(y=1, x='\t'.join(map(str, Y[i]))))
    return np.array(y)
from sklearn.metrics import auc

from sklearn.metrics import roc_auc_score
def Auc(x, y):
    order = np.lexsort((y, x))
    x, y = np.array(x)[order], np.array(y)[order]
    return auc(x, y)

def reject_MSE(targets, preds, measure, measure_name, save_path, pos_label=1, show=True):
    if pos_label != 1:
        measure_loc = -1.0 * measure
    else:
        measure_loc = measure
    preds = np.squeeze(preds)
    # Compute total MSE
    error = np.array(list(map(int, (2 * np.array(targets) - 1) *  ( 2 * np.array(preds) - 1) < 0)))
    MSE_0 = np.mean(error)
    # print 'BASE MSE', MSE_0

    # Create array
    array = np.concatenate(
        (preds[:, np.newaxis], targets[:, np.newaxis], error[:, np.newaxis], measure_loc[:, np.newaxis]), axis=1)

    # Results arrays
    results_max = [[0.0, 0.0]]
    results_var = [[0.0, 0.0]]
    results_min = [[0.0, 0.0]]

    optimal_ranking = array[:, 2].argsort()
    sorted_array = array[optimal_ranking]  # Sort by error

    for i in range(1, array.shape[0]):
        x = np.concatenate((sorted_array[:-i, 0], sorted_array[-i:, 1]), axis=0)
        mse = np.mean((2 * x - 1) * (2 * sorted_array[:, 1] - 1) < 0)
        # Best rejection
        results_max.append([float(i) / float(array.shape[0]), (MSE_0 - mse) / MSE_0])
        # Random Rejection
        results_min.append([float(i) / float(array.shape[0]), float(i) / float(array.shape[0])])

    uncertainty_ranking = array[:, 3].argsort()
    sorted_array = array[uncertainty_ranking]  # Sort by uncertainty

    for i in range(1, array.shape[0]):
        x = np.concatenate((sorted_array[:-i, 0], sorted_array[-i:, 1]), axis=0)
        mse = np.mean((2 * x - 1) * (2 * sorted_array[:, 1] - 1) < 0)
        results_var.append([float(i) / float(array.shape[0]), (MSE_0 - mse) / MSE_0])

    max_auc = Auc([x[0] for x in results_max], [x[1] for x in results_max])
    var_auc = Auc([x[0] for x in results_var], [x[1] for x in results_var])
    min_auc = Auc([x[0] for x in results_min], [x[1] for x in results_min])

#     plt.scatter([x[0] for x in results_max], [x for x in np.asarray(sorted(measure_loc, reverse=True))])
#     plt.xlim(0.0, 1.0)
#     if show == True:
#         plt.savefig(os.path.join(save_path, measure_name), bbox_inches='tight')
#     plt.close()
#     plt.plot([x[0] for x in results_max], [x[1] for x in results_max], '^',
#              [x[0] for x in results_var], [x[1] for x in results_var], 'o',
#              [x[0] for x in results_min], [x[1] for x in results_min], '--')
#     plt.legend(['Optimal-Rejection', 'Model-Rejection', 'Expected Random-Rejection'], loc=4)
#     plt.xlim(0.0, 1.0)
#     plt.ylim(0.0, 1.0)
#     plt.xlabel('Rejection Fraction')
#     plt.ylabel('Pearson Correlation')
#     if show==True:
#         plt.savefig(os.path.join(save_path, "MSE Rejection Curve using " + measure_name), bbox_inches='tight')
#     plt.close()

    AUC_RR = (var_auc - min_auc) / (max_auc - min_auc)
#     if save_path is not None:
#         with open(os.path.join(save_path, 'results.txt'), 'a') as f:
#             f.write('MSE ROC using ' + measure_name + ": " + str(np.round(AUC_RR * 100.0, 1)) + '\n')
    return error, AUC_RR

def reject_MSE1(targets, preds, measure, measure_name, save_path, pos_label=1, show=True):
    return roc_auc_score(targets, measure)
def run_exp(dataset, num, num_ensembles, langevin, params_dict=None):
    approach = "sglb"
    results_dict = dict()
    for data_directory in [dataset]:
        from subprocess import call

        _DATA_DIRECTORY_PATH = "./" + data_directory + "/"
        _DATA_FILE_OOD = _DATA_DIRECTORY_PATH + "test-mix-ood.tsv"
        _DATA_TRAIN = _DATA_DIRECTORY_PATH + "train.tsv"
        _DATA_TRAIN_WITHOUT_VALIDATION = _DATA_DIRECTORY_PATH + "train_without_validation.tsv"
        _DATA_VALIDATION = _DATA_DIRECTORY_PATH + "validation.tsv"
        _DATA_TEST = _DATA_DIRECTORY_PATH + "test.tsv"


        # We fix the random seed

        np.random.seed(1)

        print ("Loading data and other hyperparameters...")
        # We load the data
        label = list(sorted([int(x[0]) for x in [x.strip().split('\t') for x in open(_DATA_DIRECTORY_PATH + "pool.cd").readlines()] if x[1][0].upper() in ['T', 'L']]))[0]
        data = pd.read_csv(_DATA_TRAIN, sep='\t', header=None)
        data_without_validation = pd.read_csv(_DATA_TRAIN_WITHOUT_VALIDATION, sep='\t', header=None)
        data_test = pd.read_csv(_DATA_TEST, sep='\t', header=None)
        data_validation = pd.read_csv(_DATA_VALIDATION, sep='\t', header=None)
        data_ood = pd.read_csv(_DATA_FILE_OOD, sep='\t', header=None)

        y_train = data[ label ].values
        y_train_w = data_without_validation[label].values
        y_validation = data_validation[label].values
        y_test = data_test[label].values
        for i in [data, data_without_validation, data_test, data_validation, data_ood]:
            i.drop(columns=[label], inplace=True)
        Xood = data_ood.values
        X_train = data.values
        X_train_w = data_without_validation.values
        X_test = data_test.values
        X_validation = data_validation.values
        poolcd = list(sorted([(int(x[0]) + int(int(x[0]) < label), x[1]) for x in [x.strip().split('\t') for x in open(_DATA_DIRECTORY_PATH + "pool.cd").readlines()] if x[1][0].upper() not in ['T', 'L']]))
        poolcd = [(0, "Label")] + poolcd
        with open("_pool.cd", "w") as out:
            for k, v in poolcd:
                out.write("{}\t{}\n".format(k, v))
        
        # We iterate over the training test splits

        n_splits = 1
        print ("Done.")

        errors, MC_errors, lls = [], [], []
        for split in range(int(n_splits)):
            print("split {} of {}".format(split, n_splits))
            # We load the indexes of the training and test sets

            X_train_original = X_train
            y_train_original = y_train
            X_train = X_train_w
            y_train = y_train_w
            y_train_mean = y_train.mean()
            y_train_std = y_train.std(ddof=0)
            y_train_normalized = y_train
            y_validate_normalized = y_validation
            write(X_train, y_train_normalized, "_train.tsv")
            write(X_validation, y_validate_normalized, "_validation.tsv")
            N = len(y_train)
            beta = (N + 0.) * int(langevin)
            shrinkage = (0.5 / N) * int(langevin)
            # Printing the size of the training, validation and test sets
            print ('Number of training examples: ' + str(X_train.shape[0]))
            print ('Number of validation examples: ' + str(X_validation.shape[0]))
            print ('Number of test examples: ' + str(X_test.shape[0]))
            print ('Number of train_original examples: ' + str(X_train_original.shape[0]))

            # List of hyperparameters which we will try out using grid-search
            depths = [4,5,6]
            lrs = [0.0001, 0.001, 0.01, 0.1]
            rss = [0]
            best_depth = 4
            best_lr = 0.003
            best_rs = 0.1
            iterations = [1000]
            best_ll = float("-inf")
            best_iters = 1000
            total = len(depths) * len(lrs) * len(rss) * len(iterations)
            count = 0
            if params_dict is None or len(params_dict.keys()) == 0:
                params_dict = {}
                for depth in depths:
                    for lr in lrs:
                        for rs in rss:
                            for iters in iterations:
                                count += 1
                                print('count {} of total {}'.format(count, total))
                                scores = []
                                #iterations_ = []
                                predictions = []
                                for index in range(1):
                                    !./ngboost fit -f _train.tsv -t _validation.tsv  --bootstrap-type Bernoulli --subsample 0.5  --cd _pool.cd --use-best-model False --depth $depth --leaf-estimation-backtracking No --learning-rate $lr --random-strength $rs --leaf-estimation-method Gradient --loss-function Logloss --iterations $iters --seed $index -m out_.m --langevin $langevin --diffusion-temperature $beta --model-shrink-rate $shrinkage  | grep best > output__.tsv
                                    with open("output__.tsv") as f:
                                        lines = f.readlines()
                                        scores = [float(line.split('test: ')[-1].split()[0]) for line in lines[-2-num*100:-2]]
                                        #iterations_.append(int(lines[-1].split('=')[-1]))

                                arr = (np.array(scores)[-1:] if num == 1 else np.array(scores)[99::100])
                                ll = -np.mean(arr)
                                print(-ll)
                                if ll > best_ll:
                                    best_ll = ll
                                    best_rs = rs
                                    best_lr = lr
                                    best_depth = depth
                                    best_iters = iters#int(np.mean(iterations_) + 0.5)
                params_dict["depth"] = best_depth
                params_dict["rs"] = best_rs
                params_dict["lr"] = best_lr
                params_dict["iters"] = best_iters
            best_depth = params_dict["depth"]
            best_lr = params_dict["lr"]
            best_rs = params_dict["rs"]
            best_iters = params_dict["iters"]
            X_train = X_train_original
            y_train = y_train_original
            y_train_normalized = y_train
            y_test_normalized = y_test
            write(X_train, y_train, "_train.tsv")
            write(X_test, y_test, "_test.tsv")
            yood = writeOOD(X_test, Xood, "_ood.tsv")
            N = len(y_train)
            beta = (N + 0.) * int(langevin)
            shrinkage = 0.5 / N * int(langevin)
            def sigm(x):
                return (1.0 / (1.0 + 1.0 / np.exp(np.float128(x)))) if x > 0 else (1.0 / (1.0 + np.exp(-np.float128(x))))
            predictions = []
            for index in range(num_ensembles):
                !./ngboost fit -f _train.tsv -t _test.tsv --cd _pool.cd  --bootstrap-type Bernoulli --subsample 0.5 --use-best-model False --depth $best_depth --leaf-estimation-backtracking No --learning-rate $best_lr --random-strength $best_rs --leaf-estimation-method Gradient --loss-function Logloss --iterations $best_iters --seed $index -m out_.m --langevin $langevin --diffusion-temperature $beta --model-shrink-rate $shrinkage > output__.tsv
                for i in range(num):
                    trees = best_iters - i*100
                    !./ngboost calc -m out_.m --input-path _ood.tsv -o out_.tsv --cd _pool.cd --prediction-type RawFormulaVal --tree-count-limit $trees > stdoutapply.txt
                    with open("out_.tsv") as f:
                        tmp = [x.strip().split('\t') for x in f.readlines()[1:]]
                        CC = (1 - best_lr * shrinkage) ** (-i*100)
                        predictions.append([sigm(CC * float(x[1])) for x in tmp])
            lls_ = []
            for index in range(len(y_test)):
                p = 0.0
                yy = y_test[index]
                for i in range(max(num, num_ensembles)):
                    p += predictions[i][index]/max(num, num_ensembles)
                lls_.append(- max(0, yy) * np.log(p) - (1-max(0, yy)) * np.log(1-p))
            ll = np.mean(lls_)
            preds = np.array([np.mean([x[y] for x in predictions]) for y in range(len(y_test))])
            total = np.array([-np.log(x) * x - np.log(1- x) * ( 1- x) for x in np.array([np.mean([x[y] for x in predictions]) for y in range(len(y_test))])])
            _, total_score = reject_MSE(y_test_normalized, preds, total, "variance", "anywhere")
            ent = np.array([np.mean(-np.log(x) * x - np.log(1- x) * ( 1- x)) for x in np.array([[x[y] for x in predictions] for y in range(len(y_test))])])
            _, ent_score = reject_MSE(y_test_normalized, preds, ent, "variance", "anywhere")
            knowledge = total - ent
            acc, knowledge_score = reject_MSE(y_test_normalized, preds, knowledge, "variance", "anywhere")

            y_test = yood
            y_test_normalized = yood
            preds = np.array([np.mean([x[y] for x in predictions]) for y in range(len(y_test))])
            total = np.array([-np.log(x) * x - np.log(1- x) * ( 1- x) for x in np.array([np.mean([x[y] for x in predictions]) for y in range(len(y_test))])])
            total_score1 = reject_MSE1(y_test_normalized, preds, total, "variance", "anywhere")
            ent = np.array([np.mean(-np.log(x) * x - np.log(1- x) * ( 1- x)) for x in np.array([[x[y] for x in predictions] for y in range(len(y_test))])])
            ent_score1 = reject_MSE1(y_test_normalized, preds, ent, "variance", "anywhere")
            knowledge = total - ent
            knowledge_score1 = reject_MSE1(y_test_normalized, preds, knowledge, "variance", "anywhere")
            print(ll)
            errors.append(acc)
            MC_errors.append([total_score, ent_score, knowledge_score, total_score1, ent_score1, knowledge_score1])
            print([total_score, ent_score, knowledge_score, total_score1, ent_score1, knowledge_score1])
            lls.append(lls_)
        results_dict[data_directory] = (errors[0], MC_errors, lls[0])
    return results_dict, {data_directory: params_dict}
        

In [155]:
#params = {"SGB": {}, "SGLB": {}, "VSGLB": {}}
results = {"SGB": {}, "SGLB": {}}
results["SGB-10"] = {}
results["SGLB-10"] = {}
results["VSGLB-10"] = {}

In [156]:
#"adult", "amazon", "click", "KDDCup09_appetency"
first = False
for ds in ["adult", "amazon", "click", "KDDCup09_appetency", "KDDCup09_churn", "KDDCup09_upselling", "internet", "kick"]:
    print(ds)
    r, p = run_exp(ds, 1, 1, False, params_dict=params["SGB"].get(ds))
    results["SGB"].update(r)
    params["SGB"].update(p)
    r, p = run_exp(ds, 1, 1, True, params_dict=params["SGLB"].get(ds))
    results["SGLB"].update(r)
    params["SGLB"].update(p)
    if not first:
        r, p = run_exp(ds, 1, 10, False, params_dict=params["SGB"].get(ds))
        results["SGB-10"].update(r)
        r, p = run_exp(ds, 1, 10, True, params_dict=params["SGLB"].get(ds))
        results["SGLB-10"].update(r)
    first = False
    r, p = run_exp(ds, 10, 1, True, params_dict=params["VSGLB"].get(ds))
    results["VSGLB-10"].update(r)
    params["VSGLB"].update(p)
    

adult
Loading data and other hyperparameters...
Done.
split 0 of 1
Number of training examples: 31259
Number of validation examples: 7815
Number of test examples: 9768
Number of train_original examples: 39074
0.27596216843124756024
[0.7226481747992137, 0.7226481747992137, 0.08540378445430337, 0.19533603432559207, 0.19533603432559207, 0.5]
Loading data and other hyperparameters...
Done.
split 0 of 1
Number of training examples: 31259
Number of validation examples: 7815
Number of test examples: 9768
Number of train_original examples: 39074
0.27638441865605534477
[0.7153806895860783, 0.7153806895860783, 0.09426848368975427, 0.14169160644897746, 0.14169160644897746, 0.5]
Loading data and other hyperparameters...
Done.
split 0 of 1
Number of training examples: 31259
Number of validation examples: 7815
Number of test examples: 9768
Number of train_original examples: 39074
0.2754777766774294198
[0.7191427097442404, 0.7189409577739091, 0.5282412480536495, 0.19682625863522668, 0.192783406312276

In [157]:
results_saved = results

In [158]:
approaches = [("SGB-1", results["SGB"]), ("SGLB-1", results["SGLB"]), ("SGB-10", results["SGB-10"]), ("SGLB-10", results["SGLB-10"]), ("VSGLB-10", results["VSGLB-10"])]



In [159]:
results["VSGLB-10"]["amazon"][1]

[[0.6920733202161157,
  0.6922288755847458,
  0.5168117683695848,
  0.845675477846671,
  0.8456584780966838,
  0.7965208637698721]]

In [107]:
np.mean(approaches[0][1][ds][2][:-1])

0.27596216843124756024

In [166]:
import numpy as np
from scipy.stats import ttest_rel
for ds in ["adult", "amazon", "click", "KDDCup09_appetency", "KDDCup09_churn", "KDDCup09_upselling", "internet", "kick"]:
    nll = []
    for a in approaches:
        nll.append(np.mean(a[1][ds][0]))
    maxind = np.argmin(nll)
    print("{} & {}\\\\".format(ds[:1].upper() + ds[1:], " & ".join(map(lambda x: "{} {:.3f}".format("\\bf" if x[2] == maxind or ttest_rel(approaches[maxind][1][ds][0], approaches[x[2]][1][ds][0]).pvalue > 0.05 else "", int(1000*x[0]) / 1000), zip(nll, stddev, range(len(nll)))))))

Adult & \bf 0.128 & \bf 0.126 & \bf 0.126 & \bf 0.127 & \bf 0.126\\
Amazon & \bf 0.044 & \bf 0.045 & \bf 0.043 & \bf 0.044 & \bf 0.045\\
Click & \bf 0.156 & \bf 0.156 & \bf 0.156 & \bf 0.156 & \bf 0.156\\
KDDCup09_appetency & \bf 0.018 & \bf 0.018 & \bf 0.018 & \bf 0.018 & \bf 0.017\\
KDDCup09_churn & \bf 0.071 & \bf 0.071 & \bf 0.071 & \bf 0.071 & \bf 0.071\\
KDDCup09_upselling & \bf 0.047 & \bf 0.046 & \bf 0.046 & \bf 0.046 & \bf 0.046\\
Internet &  0.103 & \bf 0.100 & \bf 0.101 & \bf 0.099 & \bf 0.101\\
Kick & \bf 0.095 &  0.095 & \bf 0.095 & \bf 0.095 & \bf 0.095\\


In [163]:
import numpy as np
from scipy.stats import ttest_rel
def make_ds(x):
    return "\\multirow{{{{3}}}}{{{{*}}}} {{{{{}}}}}& {{}} \\\\ \\midrule".format(x)
first = False
for ds in ["adult", "amazon", "click", "KDDCup09_appetency","KDDCup09_churn", "KDDCup09_upselling", "internet", "kick"]:
    ds_ = ds.split("KDDCup09_")[-1]
    formatter = make_ds(ds_[:1].upper() + ds_[1:])
    data = [a[ds] for _, a in approaches]
    results = [[b] for b, _ in approaches] 
#     unc = list(map(lambda x: x+6, [5, 0, 3, 2, 4, 1]))
    unc = list(map(lambda x: x, [0, 2]))
    data = [[np.transpose(a[1])[ind] for a in data] for ind in unc]
    maxapp = 0
    maxind = 0
    maxval = -100
    for idx, dat in enumerate(data):
        arr = list(map(np.mean, dat))
        values = max(arr)
        ind = np.argmax(arr)
        if values > maxval:
            maxval = values
            maxind = ind
            maxapp = idx
    for idx, dat in enumerate(data):
        values = list(map(np.mean, dat))
        ttests = list(map(lambda x: int(100*x[1]) ==  int(100*data[maxapp][maxind]) or maxind == x[0] and maxapp == idx, enumerate(dat)))
        stds = np.array(list(map(np.std, dat))) / np.array(list(map(lambda x: np.sqrt(len(x)), dat)))
        for mean, std, best, index in zip(values, stds, ttests, range(len(ttests))):
            results[index].append((("\\bf {{{}}}" if best else "{}").format("{}".format(int(100*mean), int(100*std)))))
    results_ = results
    results = [[] for i in range(len(results_[0]))]
    tvs = ["Total", "Knowledge"]
    for i in range(len(tvs)):
        results[i+1].append(tvs[i])
    for i in range(0 if first else 1, len(results_[0])):
        for j in range(len(results_)):
            results[i].append(results_[j][i])
    results = list(map(lambda x: " & ".join(x), results))
    print(formatter.format(" \\\\\n & ".join(results)))
    first = False
        
    

\multirow{3}{*} {Adult}&  \\
 & Total & \bf {72} & 71 & 71 & \bf {72} & 71 \\
 & Knowledge & 8 & 9 & 52 & 52 & 26 \\ \midrule
\multirow{3}{*} {Amazon}&  \\
 & Total & 65 & 67 & 65 & 67 & \bf {69} \\
 & Knowledge & 13 & 13 & 53 & 59 & 51 \\ \midrule
\multirow{3}{*} {Click}&  \\
 & Total & \bf {43} & \bf {43} & \bf {43} & \bf {43} & \bf {43} \\
 & Knowledge & 16 & 16 & 25 & 25 & 10 \\ \midrule
\multirow{3}{*} {Appetency}&  \\
 & Total & 70 & \bf {72} & 71 & 71 & 71 \\
 & Knowledge & 0 & 0 & 62 & 64 & 35 \\ \midrule
\multirow{3}{*} {Churn}&  \\
 & Total & 49 & 49 & \bf {50} & \bf {50} & 49 \\
 & Knowledge & 0 & 0 & 38 & 36 & 22 \\ \midrule
\multirow{3}{*} {Upselling}&  \\
 & Total & 54 & \bf {56} & \bf {56} & \bf {56} & \bf {56} \\
 & Knowledge & 3 & 3 & 50 & 47 & 17 \\ \midrule
\multirow{3}{*} {Internet}&  \\
 & Total & 77 & 76 & 76 & 76 & \bf {78} \\
 & Knowledge & 6 & 8 & 70 & 70 & 43 \\ \midrule
\multirow{3}{*} {Kick}&  \\
 & Total & 43 & 43 & \bf {44} & \bf {44} & 43 \\
 & Knowledge 

In [148]:
import numpy as np
from scipy.stats import ttest_rel
def make_ds(x):
    return "\\multirow{{{{3}}}}{{{{*}}}} {{{{{}}}}}& {{}} \\\\ \\midrule".format(x)
first = False
for ds in ["adult", "amazon", "click", "KDDCup09_appetency","KDDCup09_churn", "KDDCup09_upselling", "internet", "kick"]:
    ds_ = ds.split("KDDCup09_")[-1]
    formatter = make_ds(ds_[:1].upper() + ds_[1:])
    data = [a[ds] for _, a in approaches]
    results = [[b] for b, _ in approaches] 
#     unc = list(map(lambda x: x+6, [5, 0, 3, 2, 4, 1]))
    unc = list(map(lambda x: x+3, [0, 1, 2]))
    data = [[np.transpose(a[1])[ind] for a in data] for ind in unc]
    maxapp = 0
    maxind = 0
    maxval = -100
    for idx, dat in enumerate(data):
        arr = list(map(np.mean, dat))
        values = max(arr)
        ind = np.argmax(arr)
        if values > maxval:
            maxval = values
            maxind = ind
            maxapp = idx
    for idx, dat in enumerate(data):
        values = list(map(np.mean, dat))
        ttests = list(map(lambda x: int(100*x[1]) ==  int(100*data[maxapp][maxind]) or maxind == x[0] and maxapp == idx, enumerate(dat)))
        stds = np.array(list(map(np.std, dat))) / np.array(list(map(lambda x: np.sqrt(len(x)), dat)))
        for mean, std, best, index in zip(values, stds, ttests, range(len(ttests))):
            results[index].append((("\\bf {{{}}}" if best else "{}").format("{}".format(int(100*mean), int(100*std)))))
    results_ = results
    results = [[] for i in range(len(results_[0]))]
    tvs = ["Total", "Data", "Knowledge"]
    for i in range(len(tvs)):
        results[i+1].append(tvs[i])
    for i in range(0 if first else 1, len(results_[0])):
        for j in range(len(results_)):
            results[i].append(results_[j][i])
    results = list(map(lambda x: " & ".join(x), results))
    print(formatter.format(" \\\\\n & ".join(results)))
    first = False
        
    

\multirow{3}{*} {Adult}&  \\
 & Total & 19 & 14 & 19 & 21 & 21 \\
 & Data & 19 & 14 & 19 & 20 & 21 \\
 & Knowledge & 50 & 50 & 66 & \bf {72} & 44 \\ \midrule
\multirow{3}{*} {Amazon}&  \\
 & Total & 82 & \bf {84} & 82 & \bf {84} & \bf {84} \\
 & Data & 82 & \bf {84} & 83 & \bf {84} & \bf {84} \\
 & Knowledge & 50 & 50 & 58 & 65 & 79 \\ \midrule
\multirow{3}{*} {Click}&  \\
 & Total & 40 & 39 & 40 & 40 & 39 \\
 & Data & 40 & 39 & 40 & 40 & 39 \\
 & Knowledge & 50 & 50 & \bf {87} & \bf {87} & 78 \\ \midrule
\multirow{3}{*} {Appetency}&  \\
 & Total & 15 & 19 & 16 & 19 & 17 \\
 & Data & 15 & 19 & 16 & 19 & 17 \\
 & Knowledge & 50 & 50 & 53 & 72 & \bf {85} \\ \midrule
\multirow{3}{*} {Churn}&  \\
 & Total & 96 & 91 & 97 & 96 & 94 \\
 & Data & 96 & 91 & 96 & 95 & 94 \\
 & Knowledge & 50 & 50 & \bf {99} & \bf {99} & 98 \\ \midrule
\multirow{3}{*} {Upselling}&  \\
 & Total & 35 & 61 & 44 & 61 & 48 \\
 & Data & 35 & 61 & 43 & 60 & 48 \\
 & Knowledge & 50 & 50 & 81 & \bf {92} & 80 \\ \midrule
\