# Import Libraries

In [None]:
import sys
import pandas as pd
import numpy as np

from sklearn import preprocessing
from sklearn.decomposition import PCA
from sklearn import random_projection
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import fbeta_score, roc_curve, auc
from sklearn import svm
from sklearn.ensemble import IsolationForest

import plotly.plotly as py
import plotly.graph_objs as go
import plotly.offline as offline
from plotly import tools

from itertools import product
import matplotlib.pyplot as plt
import matplotlib.mlab as mlab
import pickle
import json

pd.options.display.max_rows = 999

sys.path.insert(0, '../../scripts/modeling_toolbox/')
# load the autoreload extension
%load_ext autoreload
# Set extension to reload modules every time before executing code
%autoreload 2

from metric_processor import MetricProcessor
import evaluation

%matplotlib inline
offline.init_notebook_mode()

# Data Preparation

In [None]:
features = ['temporal_dct-mean', 'temporal_gaussian-mean', 'temporal_spatial_complexity-mean',
           'temporal_difference-mean', 'dimension', 'temporal_gaussian_difference-mean']

path = '../../machine_learning/cloud_functions/data-large.csv'

metric_processor = MetricProcessor(features,'UL', path)
df = metric_processor.read_and_process_data()
df.shape

In [None]:
df.head()

In [None]:
(X_train, X_test, X_attacks), (df_train, df_test, df_attacks) = metric_processor.split_test_and_train(df)

print('Shape of train: {}'.format(X_train.shape))
print('Shape of test: {}'.format(X_test.shape))
print('Shape of attacks: {}'.format(X_attacks.shape))

In [None]:
# Scaling the data
ss = StandardScaler()
x_train = ss.fit_transform(X_train)
x_test = ss.transform(X_test)
x_attacks = ss.transform(X_attacks)

# Fitting model

In [None]:
OCSVM = svm.OneClassSVM(kernel='rbf',gamma='auto', nu=0.01, cache_size=5000)
OCSVM.fit(x_train)

In [None]:
fb, area, tnr, tpr_train, tpr_test = evaluation.unsupervised_evaluation(OCSVM, x_train, x_test, x_attacks)

In [None]:
print('TNR: {}\nTPR_test: {}\nTPR_train: {}\n'.format(tnr, tpr_test, tpr_train))
print('F20: {}\nAUC: {}'.format(fb, area))

# Predicting Distances 

In [None]:
train_scores = OCSVM.decision_function(x_train)
test_scores = OCSVM.decision_function(x_test)
attack_scores = OCSVM.decision_function(x_attacks)

print('Mean score values:\n-Train: {}\n-Test: {}\n-Attacks: {}'.format(np.mean(train_scores),
                                                                       np.mean(test_scores),
                                                                       np.mean(attack_scores)))

In [None]:
trace0 = go.Box(
    y=test_scores,
    name='test'
    
)
trace1 = go.Box(
    y=attack_scores,
    name='attacks'
)
data = [trace0, trace1]

layout = {'title': 'Boxplots', 
          'yaxis': {'title': 'Distance to decision function'}
         }

fig = go.Figure(data=data, layout=layout)
offline.iplot(fig)

Negative distances mean points outside the decision function thus, classified as attacks.

# Error study

Here we will study the distances to the decision function comparing them to different attacks and resolutions, in order to gain insights of the model we have built.

In [None]:
df_train, df_test, df_attacks = df_train.reset_index(), df_test.reset_index(), df_attacks.reset_index()

In [None]:
df_train['distance_to_dec_func'] = train_scores
df_test['distance_to_dec_func'] = test_scores
df_attacks['distance_to_dec_func'] = attack_scores

In [None]:
resolutions = df_test['dimension'].unique()
attacks = df_attacks['attack'].unique()

In [None]:
data = []
resolutions = np.sort(resolutions)
for res in resolutions:
    selection = df_test[df_test['dimension'] == res]
    trace = go.Box(y = selection['distance_to_dec_func'], name = str(res) + 'p',
                   text = selection['title']
)
    data.append(trace)

layout = go.Layout(
            title=go.layout.Title(text='Test Set'),
            yaxis = go.layout.YAxis(title = 'Distance to decision function'),
            xaxis = go.layout.XAxis(
                title = 'Resolutions',
                tickmode = 'array',
                ticktext = [str(i) + 'p' for i in resolutions]
            )
)

fig = go.Figure(data=data, layout=layout)
offline.iplot(fig)

In [None]:
data = []
resolutions = np.sort(resolutions)
for res in resolutions:
    selection = df_attacks[df_attacks['dimension'] == res]
    trace = go.Box(y = selection['distance_to_dec_func'], name = str(res) + 'p',
                   text = selection['title']
)
    data.append(trace)

layout = go.Layout(
            title=go.layout.Title(text='Attack Set'),
            yaxis = go.layout.YAxis(title = 'Distance to decision function'),
            xaxis = go.layout.XAxis(
                title = 'Resolutions',
                tickmode = 'array',
                ticktext = [str(i) + 'p' for i in resolutions]
            )
)

fig = go.Figure(data=data, layout=layout)
offline.iplot(fig)

In [None]:
data = []
attack_types = list(set([i.split('_', 1)[1] for i in attacks]))
for attk in attack_types:
    selection = df_attacks[df_attacks['attack'].str.contains(attk)]
    trace = go.Box(y = selection['distance_to_dec_func'], name = attk, text = selection['title'])
    data.append(trace)

layout = go.Layout(
            title=go.layout.Title(text='Attack Set'),
            yaxis = go.layout.YAxis(title = 'Distance to decision function'),
            xaxis = go.layout.XAxis(
                title = 'Attack Type',
                tickmode = 'array',
                ticktext = attack_types
            )
)

fig = go.Figure(data=data, layout=layout)
offline.iplot(fig)

In [None]:
data = []

for res in resolutions:
    for attk in attack_types:
        selection = df_attacks[(df_attacks['attack'].str.contains(attk)) & (df_attacks['dimension'] == res)]
        trace = go.Box(y = selection['distance_to_dec_func'], name = '{}p-{}'.format(res,attk),
        text = selection['title'])
        data.append(trace)



    layout = go.Layout(
            title=go.layout.Title(text=str(res)+ 'p'),
            yaxis = go.layout.YAxis(title = 'Distance to decision function'),
            xaxis = go.layout.XAxis(
                title = 'Attack Type',
                tickmode = 'array',
                ticktext = attack_types
            )
    )

    fig = go.Figure(data=data, layout=layout)
    offline.iplot(fig)
    data = []


In [None]:
selection