In [2]:
set.seed(20)

library(dplyr)
library(caret)
library(tidyr)

In [3]:
evaluation_algorithms <- read.csv("results_evaluation/algorithms_metrics_2017-05-26.csv") %>% rename(algorithm_f1 = f1,
                                                                                                    algorithm_precision = precision,
                                                                                                    algorithm_recall = recall)

In [4]:
evaluation_algorithms_no_NA <- evaluation_algorithms

In [5]:
evaluation_algorithms_no_NA[is.na(evaluation_algorithms_no_NA$algorithm_f1), "algorithm_f1"] <- 0

In [6]:
evaluation_algorithms_no_NA

dataset,algorithm,variant,algorithm_f1,algorithm_precision,algorithm_recall
dataset_aloi,DBSCAN,dbscan_0.3,0.07183342,0.03766637,0.77320955
dataset_aloi,DBSCAN,dbscan_0.5,0.07760393,0.04142224,0.61339523
dataset_aloi,DBSCAN,dbscan_0.7,0.08007986,0.04369402,0.47877984
dataset_aloi,DBSCAN,dbscan_0.9,0.08166263,0.04591626,0.36870027
dataset_aloi,DBSCAN,dbscan_1.1,0.08042446,0.04677856,0.28647215
dataset_aloi,LOF,lof_03,0.20689655,0.20689655,0.20689655
dataset_aloi,LOF,lof_05,0.20557029,0.20557029,0.20557029
dataset_aloi,LOF,lof_08,0.18435013,0.18435013,0.18435013
dataset_aloi,LOF,lof_14,0.15583554,0.15583554,0.15583554
dataset_aloi,LOF,lof_19,0.14854111,0.14854111,0.14854111


In [7]:
evaluation_algorithms_no_NA %>% group_by(dataset) %>% top_n(1, algorithm_f1)

dataset,algorithm,variant,algorithm_f1,algorithm_precision,algorithm_recall
dataset_aloi,LOF,lof_03,0.2068966,0.2068966,0.2068966
dataset_iono,randomForest,rf,0.8957379,0.9274292,0.8730769
dataset_kdd,randomForest,rf,0.8655868,0.9652798,0.79
dataset_pen,randomForest,rf,0.6,0.8,0.5
dataset_shuttle,randomForest,rf,0.98,0.9666667,1.0
dataset_waveform,LOF,lof_25,0.15,0.15,0.15
dataset_waveform,LOF,lof_30,0.15,0.15,0.15
dataset_wbc,randomForest,rf,0.4333333,0.4,0.5
dataset_wdbc,LOF,lof_19,0.7,0.7,0.7
dataset_wdbc,randomForest,rf,0.7,0.7,0.7


In [8]:
evaluation_algorithms <- evaluation_algorithms_no_NA

# Results without Random Forest

In [9]:
evaluation_ensemble_uns <- read.csv("results_evaluation/ensemble_metrics_2017-07-06_no_rf.csv") %>% rename(ensemble_f1 = f1,
                                                                                                 ensemble_precision = precision,
                                                                                                 ensemble_recall = recall)

In [10]:
evaluation_ensemble_uns

dataset,ensemble,ensemble_f1,ensemble_precision,ensemble_recall
dataset_aloi,majority,0.22727273,0.25409836,0.20557029
dataset_aloi,glm,0.07595942,0.37610828,0.04242826
dataset_iono,majority,0.81021898,0.75,0.88095238
dataset_iono,glm,0.82753082,0.86164169,0.80192308
dataset_kdd,majority,0.16778523,0.25510204,0.125
dataset_kdd,glm,0.0,0.0,0.0
dataset_pen,majority,0.04,0.03333333,0.05
dataset_pen,glm,0.0,0.0,0.0
dataset_shuttle,majority,0.42424242,0.35,0.53846154
dataset_shuttle,glm,0.86333333,0.81666667,0.95


In [11]:
evaluation_ensemble_uns %>% group_by(dataset) %>% top_n(1, ensemble_f1)

dataset,ensemble,ensemble_f1,ensemble_precision,ensemble_recall
dataset_aloi,majority,0.22727273,0.25409836,0.20557029
dataset_iono,glm,0.82753082,0.86164169,0.80192308
dataset_kdd,majority,0.16778523,0.25510204,0.125
dataset_pen,majority,0.04,0.03333333,0.05
dataset_shuttle,glm,0.86333333,0.81666667,0.95
dataset_waveform,majority,0.17021277,0.14814815,0.2
dataset_wbc,glm,0.61666667,0.58333333,0.7
dataset_wdbc,majority,0.57142857,0.44444444,0.8
dataset_wpbc,majority,0.21153846,0.19298246,0.23404255
dataset_ann,majority,0.19920319,0.17337032,0.2340824


In [12]:
left_join(evaluation_algorithms %>% filter(algorithm != "randomForest"), evaluation_ensemble_uns, by = "dataset") %>% group_by(dataset) %>%
  top_n(1, algorithm_f1) %>% filter(ensemble_f1 > algorithm_f1) %>%
  select(-algorithm_precision, -algorithm_recall, -ensemble_precision, -ensemble_recall)

dataset,algorithm,variant,algorithm_f1,ensemble,ensemble_f1
dataset_aloi,LOF,lof_03,0.2068966,majority,0.2272727
dataset_iono,LOF,lof_08,0.8174603,glm,0.8275308
dataset_kdd,LOF,lof_19,0.105,majority,0.1677852
dataset_kdd,LOF,lof_25,0.105,majority,0.1677852
dataset_shuttle,DBSCAN,dbscan_1.1,0.490566,glm,0.8633333
dataset_waveform,LOF,lof_25,0.15,majority,0.1702128
dataset_waveform,LOF,lof_30,0.15,majority,0.1702128
dataset_wbc,LOF,lof_30,0.3,glm,0.6166667
dataset_heart,oneClassSVM,oneClassSVM,0.4720686,glm,0.5092857
dataset_ads,oneClassSVM,oneClassSVM,0.3982663,majority,0.4191867


# Results with Random Forest

In [13]:
evaluation_ensemble_total <- read.csv("results_evaluation/ensemble_metrics_2017-07-06_with_rf.csv") %>% rename(ensemble_f1 = f1,
                                                                                                 ensemble_precision = precision,
                                                                                                 ensemble_recall = recall)

In [14]:
evaluation_ensemble_total %>% group_by(dataset) %>% top_n(1, ensemble_f1)

dataset,ensemble,ensemble_f1,ensemble_precision,ensemble_recall
dataset_aloi,majority,0.2509946,0.2760541,0.2301061
dataset_iono,glm,0.8919529,0.8988578,0.8891026
dataset_kdd,glm,0.8280721,0.8893641,0.79
dataset_pen,glm,0.5833333,0.75,0.5
dataset_shuttle,glm,0.98,0.9666667,1.0
dataset_waveform,majority,0.2083333,0.1785714,0.25
dataset_wbc,glm,0.6166667,0.5833333,0.7
dataset_wdbc,glm,0.6666667,0.65,0.7
dataset_wpbc,majority,0.3090909,0.2698413,0.3617021
dataset_ann,glm,0.9770084,0.9640153,0.9906359


In [15]:
left_join(evaluation_algorithms, evaluation_ensemble_total, by = "dataset") %>% group_by(dataset) %>%
  top_n(1, algorithm_f1) %>% filter(ensemble_f1 > algorithm_f1) %>%
  select(-algorithm_precision, -algorithm_recall, -ensemble_precision, -ensemble_recall)

dataset,algorithm,variant,algorithm_f1,ensemble,ensemble_f1
dataset_aloi,LOF,lof_03,0.2068966,majority,0.2509946
dataset_waveform,LOF,lof_25,0.15,majority,0.2083333
dataset_waveform,LOF,lof_30,0.15,majority,0.2083333
dataset_wbc,randomForest,rf,0.4333333,majority,0.4827586
dataset_wbc,randomForest,rf,0.4333333,glm,0.6166667
dataset_arr,LOF,lof_19,0.3333333,majority,0.3606557
dataset_arr,LOF,lof_25,0.3333333,majority,0.3606557
dataset_arr,LOF,lof_30,0.3333333,majority,0.3606557
dataset_hepatitis,randomForest,rf,0.5,glm,0.5833333
dataset_ads,randomForest,rf,0.8903844,glm,0.8904887


# Best Unsupervised Algorithm in total

In [53]:
evaluation_algorithms %>%
  select(-algorithm_precision, -algorithm_recall) %>%
  filter(algorithm != "randomForest") %>%
  group_by(dataset) %>%
  top_n(1, algorithm_f1) %>% distinct(algorithm, .keep_all = TRUE) %>% select(-variant)
#top_n(1, algorithm_f1)

dataset,algorithm,algorithm_f1
dataset_aloi,LOF,0.2068966
dataset_iono,LOF,0.8174603
dataset_kdd,LOF,0.105
dataset_pen,LOF,0.05
dataset_shuttle,DBSCAN,0.490566
dataset_waveform,LOF,0.15
dataset_wbc,LOF,0.3
dataset_wdbc,LOF,0.7
dataset_wpbc,DBSCAN,0.3836735
dataset_ann,oneClassSVM,0.2028011


In [54]:
t1 <- .Last.value

# Best Unsupervised Ensemble

In [55]:
evaluation_ensemble_uns %>% select(-ensemble_precision, -ensemble_recall) %>% group_by(dataset) %>% top_n(1, ensemble_f1) %>%
  rename(unsupervised_ensemble = ensemble, unsupervised_ensemble_f1 = ensemble_f1)

dataset,unsupervised_ensemble,unsupervised_ensemble_f1
dataset_aloi,majority,0.22727273
dataset_iono,glm,0.82753082
dataset_kdd,majority,0.16778523
dataset_pen,majority,0.04
dataset_shuttle,glm,0.86333333
dataset_waveform,majority,0.17021277
dataset_wbc,glm,0.61666667
dataset_wdbc,majority,0.57142857
dataset_wpbc,majority,0.21153846
dataset_ann,majority,0.19920319


In [56]:
t2 <- .Last.value

# Random Forest Value

In [57]:
evaluation_algorithms %>%
  select(-algorithm_precision, -algorithm_recall) %>%
  filter(algorithm == "randomForest") %>% select(-algorithm, -variant) %>% rename(rf_f1 = algorithm_f1)

dataset,rf_f1
dataset_aloi,0.1566124
dataset_iono,0.8957379
dataset_kdd,0.8655868
dataset_pen,0.6
dataset_shuttle,0.98
dataset_waveform,0.1393939
dataset_wbc,0.4333333
dataset_wdbc,0.7
dataset_wpbc,0.3452381
dataset_ann,0.9779297


In [58]:
t3 <- .Last.value

# Total Ensemble

In [59]:
evaluation_ensemble_total %>% select(-ensemble_precision, -ensemble_recall) %>% group_by(dataset) %>% top_n(1, ensemble_f1) %>%
  rename(total_ensemble = ensemble, total_ensemble_f1 = ensemble_f1)

dataset,total_ensemble,total_ensemble_f1
dataset_aloi,majority,0.2509946
dataset_iono,glm,0.8919529
dataset_kdd,glm,0.8280721
dataset_pen,glm,0.5833333
dataset_shuttle,glm,0.98
dataset_waveform,majority,0.2083333
dataset_wbc,glm,0.6166667
dataset_wdbc,glm,0.6666667
dataset_wpbc,majority,0.3090909
dataset_ann,glm,0.9770084


In [60]:
t4 <- .Last.value

In [63]:
t1 %>% left_join(t2, by = "dataset") %>% left_join(t3, by = "dataset") %>% left_join(t4, by = "dataset")

dataset,algorithm,algorithm_f1,unsupervised_ensemble,unsupervised_ensemble_f1,rf_f1,total_ensemble,total_ensemble_f1
dataset_aloi,LOF,0.2068966,majority,0.22727273,0.1566124,majority,0.2509946
dataset_iono,LOF,0.8174603,glm,0.82753082,0.8957379,glm,0.8919529
dataset_kdd,LOF,0.105,majority,0.16778523,0.8655868,glm,0.8280721
dataset_pen,LOF,0.05,majority,0.04,0.6,glm,0.5833333
dataset_shuttle,DBSCAN,0.490566,glm,0.86333333,0.98,glm,0.98
dataset_waveform,LOF,0.15,majority,0.17021277,0.1393939,majority,0.2083333
dataset_wbc,LOF,0.3,glm,0.61666667,0.4333333,glm,0.6166667
dataset_wdbc,LOF,0.7,majority,0.57142857,0.7,glm,0.6666667
dataset_wpbc,DBSCAN,0.3836735,majority,0.21153846,0.3452381,majority,0.3090909
dataset_ann,oneClassSVM,0.2028011,majority,0.19920319,0.9779297,glm,0.9770084


In [64]:
paper_tab <- .Last.value

In [72]:
(paper_tab %>% filter(unsupervised_ensemble_f1 > algorithm_f1) %>% nrow) / paper_tab %>% nrow

In [77]:
paper_tab %>% filter(unsupervised_ensemble_f1 > algorithm_f1)

dataset,algorithm,algorithm_f1,unsupervised_ensemble,unsupervised_ensemble_f1,rf_f1,total_ensemble,total_ensemble_f1
dataset_aloi,LOF,0.2068966,majority,0.2272727,0.1566124,majority,0.2509946
dataset_iono,LOF,0.8174603,glm,0.8275308,0.8957379,glm,0.8919529
dataset_kdd,LOF,0.105,majority,0.1677852,0.8655868,glm,0.8280721
dataset_shuttle,DBSCAN,0.490566,glm,0.8633333,0.98,glm,0.98
dataset_waveform,LOF,0.15,majority,0.1702128,0.1393939,majority,0.2083333
dataset_wbc,LOF,0.3,glm,0.6166667,0.4333333,glm,0.6166667
dataset_heart,oneClassSVM,0.4720686,glm,0.5092857,0.575,glm,0.5347619
dataset_ads,oneClassSVM,0.3982663,majority,0.4191867,0.8903844,glm,0.8904887
dataset_parkinson,LOF,0.5833333,majority,0.6451613,0.5,majority,0.6451613


In [73]:
(paper_tab %>% filter(total_ensemble_f1 > algorithm_f1
                     & total_ensemble_f1 > rf_f1) %>% nrow) / paper_tab %>% nrow

In [75]:
paper_tab %>% filter(total_ensemble_f1 > algorithm_f1
                     & total_ensemble_f1 > rf_f1)

dataset,algorithm,algorithm_f1,unsupervised_ensemble,unsupervised_ensemble_f1,rf_f1,total_ensemble,total_ensemble_f1
dataset_aloi,LOF,0.2068966,majority,0.2272727,0.1566124,majority,0.2509946
dataset_waveform,LOF,0.15,majority,0.1702128,0.1393939,majority,0.2083333
dataset_wbc,LOF,0.3,glm,0.6166667,0.4333333,glm,0.6166667
dataset_arr,LOF,0.3333333,majority,0.3333333,0.18,majority,0.3606557
dataset_hepatitis,oneClassSVM,0.3799351,majority,0.3684211,0.5,glm,0.5833333
dataset_ads,oneClassSVM,0.3982663,majority,0.4191867,0.8903844,glm,0.8904887
dataset_parkinson,LOF,0.5833333,majority,0.6451613,0.5,majority,0.6451613
dataset_pima,DBSCAN,0.4079422,majority,0.3868852,0.4399987,majority,0.4753086
dataset_stamps,DBSCAN,0.31,majority,0.1944444,0.7864286,glm,0.7990476


In [81]:
paper_tab %>% filter(total_ensemble_f1 > unsupervised_ensemble_f1)

dataset,algorithm,algorithm_f1,unsupervised_ensemble,unsupervised_ensemble_f1,rf_f1,total_ensemble,total_ensemble_f1
dataset_aloi,LOF,0.2068966,majority,0.22727273,0.1566124,majority,0.2509946
dataset_iono,LOF,0.8174603,glm,0.82753082,0.8957379,glm,0.8919529
dataset_kdd,LOF,0.105,majority,0.16778523,0.8655868,glm,0.8280721
dataset_pen,LOF,0.05,majority,0.04,0.6,glm,0.5833333
dataset_shuttle,DBSCAN,0.490566,glm,0.86333333,0.98,glm,0.98
dataset_waveform,LOF,0.15,majority,0.17021277,0.1393939,majority,0.2083333
dataset_wdbc,LOF,0.7,majority,0.57142857,0.7,glm,0.6666667
dataset_wpbc,DBSCAN,0.3836735,majority,0.21153846,0.3452381,majority,0.3090909
dataset_ann,oneClassSVM,0.2028011,majority,0.19920319,0.9779297,glm,0.9770084
dataset_arr,LOF,0.3333333,majority,0.33333333,0.18,majority,0.3606557


In [80]:
(paper_tab %>% filter(total_ensemble_f1 > unsupervised_ensemble_f1) %>% nrow) / paper_tab %>% nrow



In [82]:
(paper_tab %>% filter(rf_f1 > algorithm_f1) %>% nrow) / paper_tab %>% nrow

In [83]:
(paper_tab %>% filter(unsupervised_ensemble_f1 > rf_f1
                     & total_ensemble_f1 > unsupervised_ensemble_f1) %>% nrow) / paper_tab %>% nrow

In [85]:
paper_tab %>% filter(unsupervised_ensemble_f1 > rf_f1
                     & total_ensemble_f1 > unsupervised_ensemble_f1) %>% mutate(dif = total_ensemble_f1 - unsupervised_ensemble_f1)

dataset,algorithm,algorithm_f1,unsupervised_ensemble,unsupervised_ensemble_f1,rf_f1,total_ensemble,total_ensemble_f1,dif
dataset_aloi,LOF,0.2068966,majority,0.2272727,0.1566124,majority,0.2509946,0.02372185
dataset_waveform,LOF,0.15,majority,0.1702128,0.1393939,majority,0.2083333,0.03812057
dataset_arr,LOF,0.3333333,majority,0.3333333,0.18,majority,0.3606557,0.0273224
