In [None]:
import sys
import glob, os
import pandas as pd
import plotly.plotly as py

from scipy import linalg
from scipy import signal
import matplotlib.pyplot as plt
import numpy as np

sys.path.insert(0, '../../scripts/modeling_toolbox/')
# load the autoreload extension
%load_ext autoreload
# Set extension to reload modules every time before executing code
%autoreload 2

from metric_processor import MetricProcessor
import evaluation


%matplotlib inline


In [None]:
path = '../../machine_learning/cloud_functions/data-large.csv.old'

In [None]:
features = ['temporal_canny-euclidean', 'temporal_cross_correlation-euclidean',
            'temporal_difference-euclidean', 'temporal_histogram_distance-euclidean',
            'temporal_dct-euclidean', 'size', 'dimension', 'temporal_gaussian-mean',
            'temporal_dct-std', 'temporal_dct-manhattan', 'temporal_dct-mean', 'temporal_histogram_distance-mean',
            'temporal_cross_correlation-mean', 'temporal_canny-mean', 'temporal_gaussian-euclidean']


metric_processor = MetricProcessor(features,'UL', path)
df = metric_processor.read_and_process_data()
df.shape

In [None]:
_, (df_train, df_test, df_attacks) = metric_processor.split_test_and_train(df)

print('Shape of train: {}'.format(df_train.shape))
print('Shape of test: {}'.format(df_test.shape))
print('Shape of attacks: {}'.format(df_attacks.shape))

In [None]:
metrics = ['temporal_dct-mean', 'temporal_histogram_distance-mean', 'temporal_gaussian-mean']
resolutions = [144, 240, 360, 480, 720]

In [None]:
# We compute the quantile of 99%, fixing the TPR to this value.

params = {}
quantile = 0.99

for metric in metrics:    
    params[metric] = {}
    for res in resolutions:
        th = np.quantile(df_train[df_train['attack'] == str(res) + 'p'][metric].to_numpy(), quantile)
        params[metric][res] = th
        

In [None]:
# We need to extrapolate the thresholds for 1080p

ticks = ['144p', '240p', '360p', '480p', '720p', '1080p']
fig, ax = plt.subplots(len(metrics),1, figsize=(10, 15)) 
resolutions_ = resolutions.copy()
resolutions_.extend([1080])

for i, metric in enumerate(metrics):
    ths = []
    for res in resolutions:
        ths.append(params[metric][res])
    fit = np.polyfit(resolutions, np.log10(ths),1)
    fit_means = np.poly1d(fit)
    y_pred = fit_means(resolutions_)
    
    ax[i].semilogy(resolutions, ths, '--*', resolutions_, 10 ** y_pred, '--k')
    ax[i].set_xticks(resolutions_)
    _ = ax[i].set_xticklabels(ticks, rotation='horizontal', fontsize=18)
    
    params[metric][1080] = 10 ** y_pred[-1]


In [None]:
print('The parameters of the curve are: y = {}*x + ({}) ## (Logarithmic)'.format(fit_means[1], fit_means[0]))

In [None]:
results_train = {}

for metric in metrics:
    results_train[metric] = {}
    for res in resolutions_:
        results_train[metric][res] = df_train[df_train['attack'] == str(res) + 'p'][metric].to_numpy() > params[metric][res]
        
results_test = {}

for metric in metrics:
    results_test[metric] = {}
    for res in resolutions_:
        results_test[metric][res] = df_test[df_test['attack'] == str(res) + 'p'][metric].to_numpy() > params[metric][res]

results_attacks = {}

for metric in metrics:
    results_attacks[metric] = {}
    for res in resolutions_:
        results_attacks[metric][res] = df_attacks[df_attacks['attack'].str.contains(str(res) + 'p')][metric].to_numpy() > params[metric][res]


In [None]:
tp_train = 0
tp_test = 0
fn_test = 0
tn_attacks = 0
fp_attacks = 0

metric = 'temporal_gaussian-mean'
for res in resolutions_:
    tp_train += sum(results_train[metric][res] < params[metric][res])
    tp_test += sum(results_test[metric][res] < params[metric][res])
    fn_test += sum(results_test[metric][res] > params[metric][res])
    tn_attacks += sum(results_attacks[metric][res] > params[metric][res])
    fp_attacks += sum(results_attacks[metric][res] < params[metric][res])
    
beta = 20
precision = tp_test/(tp_test+fp_attacks)
recall = tp_test/(tp_test+fn_test)
F20 = (1 + (beta ** 2))*precision*recall/((beta ** 2)*precision + recall)
    
print('With the metric {} we have:'.format(metric))
print('TPR train: {}'.format(tp_train / df_train.shape[0]))
print('TPR test: {}'.format(tp_test / df_test.shape[0]))
print('TNR: {}'.format(tn_attacks / df_attacks.shape[0]))
print('F20: {}'.format(F20))

In [None]:
df_attacks['pred'] = df_attacks.apply(lambda row: row[metric] < params[metric][row['dimension']], axis=1)

In [None]:
df_attacks[df_attacks['pred'] == True].groupby(['dimension', 'attack']).count()