In [1]:
set.seed(20)

library(dplyr)
library(caret)
library(tidyr)


Attaching package: 'dplyr'

The following objects are masked from 'package:stats':

    filter, lag

The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union

Loading required package: lattice
Loading required package: ggplot2


In [2]:
evaluation_algorithms <- read.csv("results_evaluation/algorithms_metrics_2017-06-15.csv") %>% rename(algorithm_f1 = f1,
                                                                                                    algorithm_precision = precision,
                                                                                                    algorithm_recall = recall)

In [3]:
evaluation_ensemble <- read.csv("results_evaluation/ensemble_metrics_2017-06-15_all.csv") %>% rename(
                                                                                                ensemble_all = ensemble,
                                                                                                  ensemble_f1 = f1,
                                                                                                 ensemble_precision = precision,
                                                                                                 ensemble_recall = recall)

In [4]:
evaluation_sup <- read.csv("results_evaluation/ensemble_metrics_2017-06-15_sup.csv") %>% rename(
                                                                                                ensemble_sup = ensemble,
                                                                                                sup_f1 = f1,
                                                                                                 sup_precision = precision,
                                                                                                 sup_recall = recall)

In [5]:
evaluation_un <- read.csv("results_evaluation/ensemble_metrics_2017-06-15_un.csv") %>% rename(un_f1 = f1,
                                                                                                ensemble_un = ensemble,
                                                                                                 un_precision = precision,
                                                                                                 un_recall = recall)

In [6]:
supervised_algorithms <- c("CART",
                           "naiveBayes",
                           "neuralNetwork",
                           "randomForest",
                           "SVM_linear",
                           "SVM_polynomial",
                           "SVM_radial", 
                           "SVM_sigmoid")
unsupervised_algorithms <- c("DBSCAN",
                             "kmeans",
                             "LOF",
                             "oneClassSVM_linear",
                             "oneClassSVM_polynomial",
                             "oneClassSVM_radial",
                             "oneClassSVM_sigmoid")

In [7]:
evaluation_algorithms %>% group_by(dataset) %>% top_n(1, algorithm_f1)

dataset,algorithm,variant,algorithm_f1,algorithm_precision,algorithm_recall
dataset_aloi,randomForest,rf,0.5895966,0.6000766,0.5795629
dataset_iono,SVM_radial,SVM_radial,0.9331795,0.9294872,0.9371795
dataset_kdd,randomForest,rf,0.8521795,0.8544737,0.85
dataset_pen,neuralNetwork,mlp,1.0,1.0,1.0
dataset_shuttle,CART,cart,0.9,0.9,0.9
dataset_waveform,SVM_radial,SVM_radial,0.6,0.6,0.6
dataset_wbc,randomForest,rf,0.9,0.9,0.9
dataset_wdbc,naiveBayes,nb,0.9,0.9,0.9
dataset_wdbc,neuralNetwork,mlp,0.9,0.9,0.9
dataset_wdbc,SVM_linear,SVM_linear,0.9,0.9,0.9


In [8]:
best_algs <- .Last.value

In [9]:
evaluation_algorithms %>% filter(algorithm %in% supervised_algorithms) %>% group_by(dataset) %>% top_n(1, algorithm_f1)

dataset,algorithm,variant,algorithm_f1,algorithm_precision,algorithm_recall
dataset_aloi,randomForest,rf,0.5895966,0.6000766,0.5795629
dataset_iono,SVM_radial,SVM_radial,0.9331795,0.9294872,0.9371795
dataset_kdd,randomForest,rf,0.8521795,0.8544737,0.85
dataset_pen,neuralNetwork,mlp,1.0,1.0,1.0
dataset_shuttle,CART,cart,0.9,0.9,0.9
dataset_waveform,SVM_radial,SVM_radial,0.6,0.6,0.6
dataset_wbc,randomForest,rf,0.9,0.9,0.9
dataset_wdbc,naiveBayes,nb,0.9,0.9,0.9
dataset_wdbc,neuralNetwork,mlp,0.9,0.9,0.9
dataset_wdbc,SVM_linear,SVM_linear,0.9,0.9,0.9


In [10]:
best_algs_sup <- .Last.value

In [11]:
evaluation_algorithms %>% filter(algorithm %in% unsupervised_algorithms) %>% group_by(dataset) %>% top_n(1, algorithm_f1)

dataset,algorithm,variant,algorithm_f1,algorithm_precision,algorithm_recall
dataset_aloi,LOF,lof_03,0.2068966,0.2068966,0.2068966
dataset_iono,kmeans,kmeans_25,0.8492063,0.8492063,0.8492063
dataset_kdd,kmeans,kmeans_08,0.56,0.56,0.56
dataset_pen,LOF,lof_03,0.05,0.05,0.05
dataset_pen,LOF,lof_05,0.05,0.05,0.05
dataset_pen,LOF,lof_08,0.05,0.05,0.05
dataset_pen,LOF,lof_14,0.05,0.05,0.05
dataset_pen,LOF,lof_19,0.05,0.05,0.05
dataset_shuttle,DBSCAN,dbscan_1.1,0.490566,0.325,1.0
dataset_waveform,kmeans,kmeans_30,0.21,0.21,0.21


In [12]:
best_algs_un <- .Last.value

# Ensemble

In [13]:
evaluation_ensemble %>% group_by(dataset) %>% top_n(1, ensemble_f1)

dataset,ensemble_all,ensemble_f1,ensemble_precision,ensemble_recall
dataset_aloi,mlp,0.6033737,0.7481859,0.5066446
dataset_iono,rf,0.9394954,0.9508059,0.9352564
dataset_kdd,rf,0.8743555,0.9493693,0.82
dataset_pen,rpart,0.98,0.9666667,1.0
dataset_pen,mlp,0.98,0.9666667,1.0
dataset_shuttle,mlp,0.98,0.9666667,1.0
dataset_waveform,glm,0.5782653,0.8482143,0.47
dataset_wbc,rpart,0.8,0.75,0.9
dataset_wdbc,rf,0.8,0.8,0.8
dataset_wpbc,rpart,0.4675469,0.6059524,0.4


In [14]:
best_ens <- .Last.value

Who was the best? Ensemble or algorithm? best in 10 datasets

In [15]:
left_join(best_algs, best_ens, by = "dataset") %>% filter(ensemble_f1 > algorithm_f1)

dataset,algorithm,variant,algorithm_f1,algorithm_precision,algorithm_recall,ensemble_all,ensemble_f1,ensemble_precision,ensemble_recall
dataset_aloi,randomForest,rf,0.5895966,0.6000766,0.5795629,mlp,0.6033737,0.7481859,0.5066446
dataset_iono,SVM_radial,SVM_radial,0.9331795,0.9294872,0.9371795,rf,0.9394954,0.9508059,0.9352564
dataset_kdd,randomForest,rf,0.8521795,0.8544737,0.85,rf,0.8743555,0.9493693,0.82
dataset_shuttle,CART,cart,0.9,0.9,0.9,mlp,0.98,0.9666667,1.0
dataset_ann,randomForest,rf,0.973901,0.9685185,0.979385,rpart,0.9753529,0.9628702,0.988819
dataset_cardio,randomForest,rf,0.8990576,0.9001742,0.8979675,rpart,0.9026999,0.9228273,0.8861208
dataset_ads,randomForest,rf,0.8803406,0.8802553,0.8805556,glm,0.9013635,0.9494595,0.8587087
dataset_blocks,randomForest,rf,0.8852068,0.8861176,0.8843137,rpart,0.8899327,0.8901465,0.8901961
dataset_spam,randomForest,rf,0.8725144,0.8732143,0.8718254,rf,0.8863591,0.9062263,0.86875
dataset_stamps,neuralNetwork,mlp,0.8857143,0.8,1.0,rpart,0.9095238,0.9166667,0.9083333


In [16]:
left_join(best_algs, best_ens, by = "dataset") %>% filter(ensemble_f1 > algorithm_f1) %>% .$dataset %>% unique %>% length

Sup

In [None]:
best_sup <- evaluation_sup %>% group_by(dataset) %>% top_n(1, sup_f1)

In [None]:
best_un <- evaluation_un %>% group_by(dataset) %>% top_n(1, un_f1)

In [None]:
best_algs %>%
  left_join(best_algs_sup, by = "dataset") %>%
  left_join(best_algs_un, by = "dataset") %>%
  left_join(best_ens, by = "dataset") %>%
  left_join(best_sup, by = "dataset") %>%
  left_join(best_un, by = "dataset") %>%
  select(-contains("precision"), -contains("recall"))