In [1]:
import os
import pandas
import numpy

from sklearn.svm import OneClassSVM
import sklearn.metrics as metrics
from sklearn.ensemble import IsolationForest

In [2]:
FOLDER = "../results/2020-09-28/da_vae/"

mu_inliers = pandas.read_csv(os.path.join(FOLDER, "mu_train_inliers_scenario_cars_egal.csv"), header=None, prefix="mu_").drop(columns=["mu_0"])
mu_inliers = mu_inliers[1:mu_inliers.shape[0]]
mu_inliers["outliers"] = False

mu_outliers = pandas.read_csv(os.path.join(FOLDER, "mu_train_outliers_scenario_cars_egal.csv"), header=None, prefix="mu_").drop(columns=["mu_0"])
mu_outliers = mu_outliers[1:mu_outliers.shape[0]]
mu_outliers["outliers"] = True

sigma_inliers = pandas.read_csv(os.path.join(FOLDER, "sigma_train_inliers_scenario_cars_egal.csv"), header=None, prefix="sigma_").drop(columns=["sigma_0"])
sigma_inliers = sigma_inliers[1:sigma_inliers.shape[0]]

sigma_outliers = pandas.read_csv(os.path.join(FOLDER, "sigma_train_outliers_scenario_cars_egal.csv"), header=None, prefix="sigma_").drop(columns=["sigma_0"])
sigma_outliers = sigma_outliers[1:sigma_outliers.shape[0]]

data_rep = pandas.concat([pandas.concat([mu_inliers, sigma_inliers], axis=1), pandas.concat([mu_outliers, sigma_outliers], axis=1)])

print(data_rep.shape)
print(data_rep.columns)
data_rep.head()

(10000, 51)
Index(['mu_1', 'mu_2', 'mu_3', 'mu_4', 'mu_5', 'mu_6', 'mu_7', 'mu_8', 'mu_9',
       'mu_10', 'mu_11', 'mu_12', 'mu_13', 'mu_14', 'mu_15', 'mu_16', 'mu_17',
       'mu_18', 'mu_19', 'mu_20', 'mu_21', 'mu_22', 'mu_23', 'mu_24', 'mu_25',
       'outliers', 'sigma_1', 'sigma_2', 'sigma_3', 'sigma_4', 'sigma_5',
       'sigma_6', 'sigma_7', 'sigma_8', 'sigma_9', 'sigma_10', 'sigma_11',
       'sigma_12', 'sigma_13', 'sigma_14', 'sigma_15', 'sigma_16', 'sigma_17',
       'sigma_18', 'sigma_19', 'sigma_20', 'sigma_21', 'sigma_22', 'sigma_23',
       'sigma_24', 'sigma_25'],
      dtype='object')


Unnamed: 0,mu_1,mu_2,mu_3,mu_4,mu_5,mu_6,mu_7,mu_8,mu_9,mu_10,...,sigma_16,sigma_17,sigma_18,sigma_19,sigma_20,sigma_21,sigma_22,sigma_23,sigma_24,sigma_25
1,0.787054,0.147824,-0.32481,0.816929,1.125872,-1.600141,-1.211472,0.191429,0.701913,0.003673,...,0.339324,0.350935,0.312231,0.299975,0.307308,0.35133,0.246808,0.299607,0.270722,0.338421
2,-0.77151,-0.667571,-0.576272,-1.444775,-0.19338,1.14971,0.104275,-0.254978,1.931073,-0.507665,...,0.210557,0.242951,0.264957,0.257416,0.291715,0.256402,0.263446,0.248467,0.244524,0.260926
3,0.650881,0.354527,-0.854581,-0.64673,-0.103038,-1.17246,0.955655,-0.951517,0.9133,-0.379478,...,0.278381,0.241598,0.269358,0.236655,0.244096,0.250294,0.210411,0.267848,0.19356,0.238589
4,0.549507,0.688395,-1.430703,0.90992,0.776369,0.490633,-1.955449,-0.094513,-0.405247,-0.892765,...,0.26517,0.288527,0.215879,0.254592,0.227859,0.264422,0.28856,0.27,0.266442,0.269569
5,-0.490732,1.03662,0.067829,0.689034,-0.165989,0.539279,-2.3682,0.200933,1.676649,-0.50156,...,0.273967,0.293365,0.366862,0.30349,0.406144,0.393857,0.342326,0.28736,0.26544,0.320481


In [58]:
# One-class SVM
svm = OneClassSVM(kernel="rbf", gamma=0.5, nu=0.05).fit(data_rep.drop(columns=["outliers"]))
train_preds = svm.predict(data_rep.drop(columns=["outliers"]))
preds = numpy.zeros(data_rep.shape[0])
preds[numpy.argwhere(train_preds == -1)] = 1
index = data_rep["outliers"]

precision = metrics.precision_score(index, preds)
recall = metrics.recall_score(index, preds)
f1_score = metrics.f1_score(index, preds)
average_precision = metrics.average_precision_score(index, preds)
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1_score}")
print(f"Average Precision: {average_precision}")

Precision: 0.03143989431968296
Recall: 0.238
F1 Score: 0.05554259043173861
Average Precision: 0.04558269484808455


In [19]:
# Isolation forests
iso_forest = IsolationForest(max_samples='auto', max_features=0.5)
iso_forest.fit(data_rep.drop(columns=["outliers"]))
train_preds = iso_forest.score_samples(data_rep.drop(columns=["outliers"]))
perc = numpy.percentile(train_preds, 5)
print(perc)
preds = numpy.zeros(data_rep.shape[0])
preds[numpy.argwhere(train_preds <= perc)] = 1
index = data_rep["outliers"]

precision = metrics.precision_score(index, preds)
recall = metrics.recall_score(index, preds)
f1_score = metrics.f1_score(index, preds)
average_precision = metrics.average_precision_score(index, preds)
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1_score}")
print(f"Average Precision: {average_precision}")

-0.5210746127608605
Precision: 0.508
Recall: 0.508
F1 Score: 0.508
Average Precision: 0.282664
