# Import Libraries

In [None]:
import sys
import pandas as pd
import numpy as np

from sklearn import preprocessing
from sklearn.decomposition import PCA
from sklearn import random_projection
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import fbeta_score, roc_curve, auc
from sklearn import svm
from sklearn.ensemble import IsolationForest

import plotly.plotly as py
import plotly.graph_objs as go
import plotly.offline as offline

from keras.layers import Input, Dense
from keras.models import Model
from keras import regularizers
from keras.optimizers import Adam

from itertools import product
import matplotlib.pyplot as plt
import matplotlib.mlab as mlab
import pickle
import json


offline.init_notebook_mode(connected=False)

pd.options.display.max_columns = 999

sys.path.insert(0, '../../scripts/modeling_toolbox/')
# load the autoreload extension
%load_ext autoreload
# Set extension to reload modules every time before executing code
%autoreload 2

from metric_processor import MetricProcessor
import evaluation

%matplotlib inline

# Data Preparation

In [None]:
features = ['dimension',
            'size',
            'temporal_dct-mean', 
            'temporal_gaussian_mse-mean', 
            'temporal_gaussian_difference-mean',
            'temporal_threshold_gaussian_difference-mean',
            #'temporal_match-mean'
           ]


path = '../../machine_learning/cloud_functions/data-large.csv'

metric_processor = MetricProcessor(features,'UL', path, reduced=False, scale=True, bins=0)
df = metric_processor.read_and_process_data(unique_ID=True)

df.shape

In [None]:
display(df.head())

In [None]:

# We remove the low bitrates since we are only focused on tampering attacks. The rotation attacks are also
# removed since they will be detected by the pre-verifier just by checking output dimensions
#df = df[~(df['attack'].str.contains('low_bitrate')) & ~(df['attack'].str.contains('rotate'))]

In [None]:
(X_train, X_test, X_attacks), (df_train, df_test, df_attacks) = metric_processor.split_test_and_train(df.drop(['unique_ID'], axis=1))

print('Shape of train: {}'.format(X_train.shape))
print('Shape of test: {}'.format(X_test.shape))
print('Shape of attacks: {}'.format(X_attacks.shape))

print(X_train)
df.head()

The train and test are **only** composed by legit assets

In [None]:
# Scaling the data
ss = StandardScaler()
x_train = ss.fit_transform(X_train)
x_test = ss.transform(X_test)
x_attacks = ss.transform(X_attacks)

# One Class SVM

In [None]:
# Train the model
OCSVM = svm.OneClassSVM(kernel='rbf',gamma='auto', nu=0.01, cache_size=5000)

OCSVM.fit(x_train)

In [None]:
fb, area, tnr, tpr_train, tpr_test = evaluation.unsupervised_evaluation(OCSVM, x_train, x_test, x_attacks)
# Show global results of classification
print('TNR: {}\nTPR_test: {}\nTPR_train: {}\n'.format(tnr, tpr_test, tpr_train))
print('F20: {}\nAUC: {}'.format(fb, area))

In [None]:
# Show mean distances to the decision function. A negative distance means that the data is classified as
# an attack
df_train['dist_to_dec_funct'] = OCSVM.decision_function(x_train)
df_test['dist_to_dec_funct'] = OCSVM.decision_function(x_test)
df_attacks['dist_to_dec_funct'] = OCSVM.decision_function(x_attacks)
display(df_train.describe())
display(df_test.describe())
display(df_attacks.describe())
print('Mean score values:\n-Train: {}\n-Test: {}\n-Attacks: {}'.format(df_train['dist_to_dec_funct'].mean(),
                                                                       df_test['dist_to_dec_funct'].mean(),
                                                                       df_attacks['dist_to_dec_funct'].mean()))

# QoE metrics-based classifier

In [None]:
path = '../../machine_learning/cloud_functions/data-qoe-large.csv'

features_qoe = ['dimension',
            'size',
            'temporal_ssim-mean', 
            'temporal_psnr-mean',
            'temporal_ssim-euclidean', 
            'temporal_psnr-euclidean'
           ]

metric_processor = MetricProcessor(features_qoe, 'UL', path, reduced=10000, bins=0, scale=False)
df_qoe = metric_processor.read_and_process_data(unique_ID=True)

Now we have to merge QoE dataframe and add features from training dataframe

In [None]:
df_qoe = pd.merge(left=df, right=df_qoe, left_on='unique_ID', right_on='unique_ID')

df_qoe = df_qoe.rename(columns={'attack_ID_x': 'attack_ID',
                       'title_x': 'title',
                       'attack_x': 'attack',
                       'dimension_x': 'dimension',
                       'size_x': 'size',
                               })
df_qoe = df_qoe.drop(['attack_ID_y', 'title_y', 'attack_y'], axis=1)

df_qoe['color'] = df_qoe['attack_ID'].apply(lambda x: 'red' if x>=10 else 'green')

# Convert PSNR to a linear value so we can establish a threshold
max_error = np.log10(255*255)
df_qoe['mse'] = df_qoe['temporal_psnr-mean'].apply(lambda x: 10**((10 * max_error - x)/10))

In [None]:
print(df_qoe.shape)
df_qoe.head()

In [None]:
# Make the prediction using the simple QoE assumption establishing a threshold
df_qoe['ssim_pred'] = df_qoe['temporal_ssim-mean'].apply(lambda x: 1 if x > 0.9 else -1)
df_qoe['mse_pred'] = df_qoe['mse'].apply(lambda x: 1 if x < 15 else -1)
df_qoe['psnr_pred'] = df_qoe['temporal_psnr-mean'].apply(lambda x: 1 if x > 35 else -1)


# Make prediction using OCSVM
x_ocsvm = ss.transform(df_qoe[features])

df_qoe['ocsvm_pred'] = OCSVM.predict(x_ocsvm)
df_qoe['ocsvm_dist'] = OCSVM.decision_function(x_ocsvm)
display(df_qoe.head(100))

In [None]:
def qoe_evaluation(prediction, train_set, test_set, attack_set, beta=20):

    y_pred_train = train_set[prediction]
    y_pred_test = test_set[prediction]
    y_pred_outliers = attack_set[prediction]

    n_accurate_train = y_pred_train[y_pred_train == 1].size
    n_accurate_test = y_pred_test[y_pred_test == 1].size
    n_accurate_outliers = y_pred_outliers[y_pred_outliers == -1].size

    fpr, tpr, _ = roc_curve(np.concatenate([np.ones(y_pred_test.shape[0]), -1*np.ones(y_pred_outliers.shape[0])]),
                            np.concatenate([y_pred_test, y_pred_outliers]), pos_label=1)
    fb = fbeta_score(np.concatenate([np.ones(y_pred_test.shape[0]), -1*np.ones(y_pred_outliers.shape[0])]),
                     np.concatenate([y_pred_test, y_pred_outliers]), beta=beta, pos_label=1)

    tnr = n_accurate_outliers/attack_set.shape[0]
    tpr_test = n_accurate_test/test_set.shape[0]
    tpr_train = n_accurate_train/train_set.shape[0]

    area = auc(fpr, tpr)
    return fb, area, tnr, tpr_train, tpr_test

In [None]:
# Evaluate the naive QoE assumption

(X_train, X_test, X_attacks), (df_qoe_train, df_qoe_test, df_qoe_attacks) = metric_processor.split_test_and_train(df_qoe)

print('Shape of train: {}'.format(X_train.shape))
print('Shape of test: {}'.format(X_test.shape))
print('Shape of attacks: {}'.format(X_attacks.shape))
accuracy_df = pd.DataFrame(columns=['f20', 'area', 'tnr', 'tpr_train', 'tpr_test'])

accuracy_df.loc['SSIM'] = qoe_evaluation('ssim_pred',
                                         df_qoe_train,
                                         df_qoe_test,
                                         df_qoe_attacks)
accuracy_df.loc['PSNR'] = qoe_evaluation('psnr_pred',
                                         df_qoe_train,
                                         df_qoe_test,
                                         df_qoe_attacks)

accuracy_df.loc['MSE'] = qoe_evaluation('mse_pred',
                                         df_qoe_train,
                                         df_qoe_test,
                                         df_qoe_attacks)
accuracy_df.loc['OCSVM'] = qoe_evaluation('ocsvm_pred',
                                         df_qoe_train,
                                         df_qoe_test,
                                         df_qoe_attacks)

display(accuracy_df)

In [None]:
compare_features = features + ['temporal_psnr-mean', 'temporal_ssim-mean', 'mse']
for feature in compare_features:
    traceSources = go.Scatter(
        x = df_qoe['ocsvm_dist'],
        y =  df_qoe[feature],
        #z =  df_qoe['dimension'],
        hoverinfo='skip',
        mode = 'markers',
        text = df_qoe['attack'].values,
        showlegend = False,
         marker = dict(
             size = 2,
             color = df_qoe['color'], 
             showscale = False,
             opacity = 0.8
        )
    )
    data = [traceSources]

    layout = dict(title = 'OCSVM decision function vs {}'.format(feature),

                  hovermode= 'closest',
                  yaxis = dict(zeroline=False, title=feature),
                  xaxis = dict(zeroline=False, title='Decision function'),
                  showlegend= True,
                 height=900
                 )

    fig = dict(data=data,
               layout=layout
               )

    offline.iplot(fig)