  # 1 - Evaluate datasets on the anomaly algorithms

In [6]:
set.seed(20)

library(dplyr)
library(caret)
library(tidyr)

In [7]:
evaluation_algorithms <- read.csv("results_evaluation/algorithms_metrics_2017-05-26.csv") %>% rename(algorithm_f1 = f1,
                                                                                                    algorithm_precision = precision,
                                                                                                    algorithm_recall = recall)

- Casos em que no Random o precision e o recall são 0 e o F1 é NaN: Não existiu nenhum caso em que o random acertasse

In [21]:
unique(evaluation_algorithms$dataset) %>% length

In [8]:
evaluation_algorithms

dataset,algorithm,variant,algorithm_f1,algorithm_precision,algorithm_recall
dataset_aloi,DBSCAN,dbscan_0.3,0.07183342,0.03766637,0.77320955
dataset_aloi,DBSCAN,dbscan_0.5,0.07760393,0.04142224,0.61339523
dataset_aloi,DBSCAN,dbscan_0.7,0.08007986,0.04369402,0.47877984
dataset_aloi,DBSCAN,dbscan_0.9,0.08166263,0.04591626,0.36870027
dataset_aloi,DBSCAN,dbscan_1.1,0.08042446,0.04677856,0.28647215
dataset_aloi,LOF,lof_03,0.20689655,0.20689655,0.20689655
dataset_aloi,LOF,lof_05,0.20557029,0.20557029,0.20557029
dataset_aloi,LOF,lof_08,0.18435013,0.18435013,0.18435013
dataset_aloi,LOF,lof_14,0.15583554,0.15583554,0.15583554
dataset_aloi,LOF,lof_19,0.14854111,0.14854111,0.14854111


Que casos têm F1 a NA?

In [9]:
evaluation_algorithms %>% filter(is.na(algorithm_f1)) %>% select(dataset, algorithm, variant)

dataset,algorithm,variant
dataset_kdd,random,random
dataset_pen,LOF,lof_25
dataset_pen,LOF,lof_30
dataset_pen,random,random
dataset_shuttle,random,random
dataset_wbc,LOF,lof_03
dataset_wbc,LOF,lof_05
dataset_wbc,LOF,lof_08
dataset_wbc,LOF,lof_14
dataset_wbc,LOF,lof_19


Tirar os NA dos F1

In [10]:
evaluation_algorithms_no_NA <- evaluation_algorithms

In [11]:
evaluation_algorithms_no_NA[is.na(evaluation_algorithms_no_NA$algorithm_f1), "algorithm_f1"] <- 0

In [12]:
evaluation_algorithms_no_NA

dataset,algorithm,variant,algorithm_f1,algorithm_precision,algorithm_recall
dataset_aloi,DBSCAN,dbscan_0.3,0.07183342,0.03766637,0.77320955
dataset_aloi,DBSCAN,dbscan_0.5,0.07760393,0.04142224,0.61339523
dataset_aloi,DBSCAN,dbscan_0.7,0.08007986,0.04369402,0.47877984
dataset_aloi,DBSCAN,dbscan_0.9,0.08166263,0.04591626,0.36870027
dataset_aloi,DBSCAN,dbscan_1.1,0.08042446,0.04677856,0.28647215
dataset_aloi,LOF,lof_03,0.20689655,0.20689655,0.20689655
dataset_aloi,LOF,lof_05,0.20557029,0.20557029,0.20557029
dataset_aloi,LOF,lof_08,0.18435013,0.18435013,0.18435013
dataset_aloi,LOF,lof_14,0.15583554,0.15583554,0.15583554
dataset_aloi,LOF,lof_19,0.14854111,0.14854111,0.14854111


All the algorithms were better than random! Random forest seems to be better than almost any algorithm in most datasets. Precision seems to be the strongest point of this algorithm. LOF keeps outputing the same precision and recall

In [14]:
evaluation_algorithms_no_NA %>% group_by(dataset) %>% top_n(1, algorithm_f1)

dataset,algorithm,variant,algorithm_f1,algorithm_precision,algorithm_recall
dataset_aloi,LOF,lof_03,0.2068966,0.2068966,0.2068966
dataset_iono,randomForest,rf,0.8957379,0.9274292,0.8730769
dataset_kdd,randomForest,rf,0.8655868,0.9652798,0.79
dataset_pen,randomForest,rf,0.6,0.8,0.5
dataset_shuttle,randomForest,rf,0.98,0.9666667,1.0
dataset_waveform,LOF,lof_25,0.15,0.15,0.15
dataset_waveform,LOF,lof_30,0.15,0.15,0.15
dataset_wbc,randomForest,rf,0.4333333,0.4,0.5
dataset_wdbc,LOF,lof_19,0.7,0.7,0.7
dataset_wdbc,randomForest,rf,0.7,0.7,0.7


# 2 - Evaluate datasets on the ensemble

In [15]:
evaluation_ensemble <- read.csv("results_evaluation/ensemble_metrics_2017-05-26.csv") %>% rename(ensemble_f1 = f1,
                                                                                                 ensemble_precision = precision,
                                                                                                 ensemble_recall = recall)

In [16]:
evaluation_ensemble

dataset,ensemble,ensemble_f1,ensemble_precision,ensemble_recall
dataset_aloi,majority,0.25099458,0.2760541,0.2301061
dataset_aloi,glm,0.19191009,0.67121828,0.11271523
dataset_iono,majority,0.84507042,0.75949367,0.95238095
dataset_iono,glm,0.89195286,0.89885781,0.88910256
dataset_kdd,majority,0.49171271,0.54938272,0.445
dataset_kdd,glm,0.8280721,0.8893641,0.79
dataset_pen,majority,0.25,0.19444444,0.35
dataset_pen,glm,0.72916667,0.83333333,0.5
dataset_shuttle,majority,0.65,0.48148148,1.0
dataset_shuttle,glm,0.98,0.96666667,1.0


In [31]:
left_join(evaluation_algorithms, evaluation_ensemble, by = "dataset") %>% group_by(dataset) %>% top_n(1, algorithm_f1) %>% filter(ensemble_f1 > algorithm_f1)

dataset,algorithm,variant,algorithm_f1,algorithm_precision,algorithm_recall,ensemble,ensemble_f1,ensemble_precision,ensemble_recall
dataset_aloi,LOF,lof_03,0.2068966,0.2068966,0.2068966,majority,0.2509946,0.2760541,0.2301061
dataset_pen,randomForest,rf,0.6,0.8,0.5,glm,0.7291667,0.8333333,0.5
dataset_waveform,LOF,lof_25,0.15,0.15,0.15,majority,0.2083333,0.1785714,0.25
dataset_waveform,LOF,lof_25,0.15,0.15,0.15,glm,0.2323232,1.0,0.08
dataset_waveform,LOF,lof_30,0.15,0.15,0.15,majority,0.2083333,0.1785714,0.25
dataset_waveform,LOF,lof_30,0.15,0.15,0.15,glm,0.2323232,1.0,0.08
dataset_wbc,randomForest,rf,0.4333333,0.4,0.5,majority,0.4827586,0.3684211,0.7
dataset_wbc,randomForest,rf,0.4333333,0.4,0.5,glm,0.8809524,0.8333333,0.7
dataset_wdbc,LOF,lof_19,0.7,0.7,0.7,glm,0.952381,0.9285714,0.7
dataset_wdbc,randomForest,rf,0.7,0.7,0.7,glm,0.952381,0.9285714,0.7


Temos 21 datasets:
- Em 12 datasets pelo menos um dos ensembles (majority ou logist regression) foi melhor do que qualquer algoritmo sozinho
    - Em 2 datasets o majority foi o único tipo de ensemble melhor do que qualquer algoritmo sozinho
    - Em 6 datasets o logistic regression foi o único tipo de ensemble melhor do que qualquer algoritmo sozinho
    - Em 4 datasets ambos os tipos de ensemble (majority e logistic regression) foram melhores do que qualquer algoritmo sozinho