**The dataset** is collected from UCI Machine Learning Repository through the following [link](https://archive.ics.uci.edu/ml/datasets/Unmanned+Aerial+Vehicle+%28UAV%29+Intrusion+Detection)

This application is working in first dataset (Bidirectional-flow/Parrot Bebop1), combined first dataset can be [downloaded](http://mason.gmu.edu/~lzhao9/materials/data/UAV/data/pub_dataset1.mat) from Liang Zhao homepage.Bidirectional-flow mode will involve 9 features × 2 sources × 3 direction flow = 54 features for more info visit this [link](http://mason.gmu.edu/~lzhao9/materials/data/UAV/)

extract data with its default name `pub_dataset1.mat` in `__data__` directory

In [1]:
import numpy as np
import pandas as pd
import h5py

import matplotlib.pyplot as plt
import seaborn as sns

from pprint import pprint

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [2]:
# use HDF reader for matlab v7.3 files
data = { k:np.array(v).T for k, v in h5py.File('./__data__/pub_dataset1.mat').items()}
data.keys()

dict_keys(['D', 'H', 'data_te', 'data_tr'])

$n$ is the number of training samples   
$k$ is the number of feature   
$n^{\prime}$ is the number of testing samples    
$k^{\prime}$ is the number of feature computational components and k is the numbe of features.  
The last column of `data_te` and `data_tr` is the label: `1 means UAV, 0 otherwise`

--- 
$\text{data_tr} \in R^{n×(k+1)}$   
$\text{data_te} \in R^{n^{\prime}×(k+1)}$   
$D \in R^{k×1}$. The generation runtime for each feature.  
$H \in R^{k^{\prime}×k}$. The incident matrix of the feature computational hypergraph (see the paper for details). 


In [3]:
def reset_random_seed(seed=1917):
    np.random.seed(seed)

In [4]:
X = data['data_tr'][:, :-1]
y = data['data_tr'][:, -1]

X_test = data['data_te'][:, :-1]
y_test = data['data_te'][:, -1]

## MLP
### Accuracy 0.9937035566396278

In [5]:
from sklearn.neural_network import MLPClassifier

In [6]:
reset_random_seed()
model = MLPClassifier()
model.fit(X, y)
model.score(X_test, y_test)

0.9937035566396278

In [7]:
def encoder(data, ae, encoding_layers_count=3):
    data = np.asmatrix(data)

    layer = data
    for i in range(encoding_layers_count):
        layer = layer*ae.coefs_[i] + ae.intercepts_[i]
        encoder1 = np.tanh(layer)
    
    return np.asarray(layer)

## Auto Encoder
### Flipbit Accuracy 0.5536332179930796

In [8]:
from sklearn.neural_network import MLPRegressor

In [9]:
# Encoder structure
n_encoder1 = 25
n_encoder2 = 10

n_latent = 2

encoding_layers_count = 3

# Decoder structure
n_decoder2 = 10
n_decoder1 = 25

hidden_layer_sizes = (
    n_encoder1, 
    n_encoder2, 
    n_latent, 
    n_decoder2, 
    n_decoder1
)
reset_random_seed()
auto_encoder = MLPRegressor(
                   hidden_layer_sizes=hidden_layer_sizes, 
                   activation = 'tanh', 
                   solver = 'adam', 
                   learning_rate_init = 0.0001, 
                   max_iter = 200, 
                   tol = 0.0000001, 
                   verbose = True
)
auto_encoder.fit(X, X)

Iteration 1, loss = 87909.46438852
Iteration 2, loss = 87907.26911447
Iteration 3, loss = 87905.08867516
Iteration 4, loss = 87902.91165426
Iteration 5, loss = 87900.69464342
Iteration 6, loss = 87898.45406284
Iteration 7, loss = 87896.17534820
Iteration 8, loss = 87893.85662279
Iteration 9, loss = 87891.53259668
Iteration 10, loss = 87889.19414346
Iteration 11, loss = 87886.82789623
Iteration 12, loss = 87884.44114813
Iteration 13, loss = 87882.01880032
Iteration 14, loss = 87879.56105016
Iteration 15, loss = 87877.06515435
Iteration 16, loss = 87874.55420496
Iteration 17, loss = 87871.98707037
Iteration 18, loss = 87869.36962808
Iteration 19, loss = 87866.78570170
Iteration 20, loss = 87864.17729422
Iteration 21, loss = 87861.54654581
Iteration 22, loss = 87858.85854800
Iteration 23, loss = 87856.15758688
Iteration 24, loss = 87853.42773949
Iteration 25, loss = 87850.68005862
Iteration 26, loss = 87847.90027719
Iteration 27, loss = 87845.06829821
Iteration 28, loss = 87842.18625978
I

MLPRegressor(activation='tanh', alpha=0.0001, batch_size='auto', beta_1=0.9,
             beta_2=0.999, early_stopping=False, epsilon=1e-08,
             hidden_layer_sizes=(25, 10, 2, 10, 25), learning_rate='constant',
             learning_rate_init=0.0001, max_fun=15000, max_iter=200,
             momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
             power_t=0.5, random_state=None, shuffle=True, solver='adam',
             tol=1e-07, validation_fraction=0.1, verbose=True,
             warm_start=False)

In [10]:
# soft max
accuracy_score(y_test, 1 - np.argmax(encoder(X_test, auto_encoder), axis=1))

0.5536332179930796

## AUTO ENCODER + SVM + Standarad Scaler
### Accuracy 0.9332350104940723

In [11]:
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.pipeline import make_pipeline

In [12]:
reset_random_seed()
svm = make_pipeline(StandardScaler(), SVC(gamma='auto'))
svm.fit(encoder(X, auto_encoder), y)
accuracy_score(y_test, svm.predict(encoder(X_test, auto_encoder)))

0.9332350104940723

## AUTO ENCODER {Multi laten} + SVM + Standarad Scaler
### Accuracy 0.9965965171025015

In [13]:
reset_random_seed()
AE = MLPRegressor(
    hidden_layer_sizes=(100,30,7,30,50), 
    activation = 'tanh', 
    solver = 'adam', 
    learning_rate_init = 0.0001, 
    max_iter = 30, 
    tol = 0.0000001, 
    verbose = True
)
AE.fit(X, X)
svm = make_pipeline(StandardScaler(), SVC(gamma='auto'))
svm.fit(encoder(X, AE), y)
accuracy_score(y_test, svm.predict(encoder(X_test, AE)))

Iteration 1, loss = 87919.67346394
Iteration 2, loss = 87907.66099604
Iteration 3, loss = 87895.80083362
Iteration 4, loss = 87884.80337463
Iteration 5, loss = 87875.00997284
Iteration 6, loss = 87865.39050097
Iteration 7, loss = 87855.57328140
Iteration 8, loss = 87846.54585992
Iteration 9, loss = 87838.14641115
Iteration 10, loss = 87830.13033517
Iteration 11, loss = 87822.23193690
Iteration 12, loss = 87814.42633271
Iteration 13, loss = 87806.52859203
Iteration 14, loss = 87798.56408199
Iteration 15, loss = 87790.52981879
Iteration 16, loss = 87782.36156212
Iteration 17, loss = 87774.08264285
Iteration 18, loss = 87765.60897127
Iteration 19, loss = 87756.99505153
Iteration 20, loss = 87748.22758430
Iteration 21, loss = 87739.21256325
Iteration 22, loss = 87730.11401503
Iteration 23, loss = 87720.75464798
Iteration 24, loss = 87711.21995779
Iteration 25, loss = 87701.45228452
Iteration 26, loss = 87691.58698806
Iteration 27, loss = 87681.39493524
Iteration 28, loss = 87671.13760849
I

0.9965965171025015

## XGBOOST
### Acuuracy 100%

In [14]:
from sklearn.ensemble import GradientBoostingClassifier

In [15]:
reset_random_seed()
xgboost = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, max_depth=1)
xgboost.fit(X, y)

GradientBoostingClassifier(ccp_alpha=0.0, criterion='friedman_mse', init=None,
                           learning_rate=1.0, loss='deviance', max_depth=1,
                           max_features=None, max_leaf_nodes=None,
                           min_impurity_decrease=0.0, min_impurity_split=None,
                           min_samples_leaf=1, min_samples_split=2,
                           min_weight_fraction_leaf=0.0, n_estimators=100,
                           n_iter_no_change=None, presort='deprecated',
                           random_state=None, subsample=1.0, tol=0.0001,
                           validation_fraction=0.1, verbose=0,
                           warm_start=False)

In [16]:
accuracy_score(y_test, xgboost.predict(X_test))

1.0

## Runtime

> Real-time responses are often understood to be in the order of milliseconds, and sometimes microseconds. 
 
So xgboost can be concider aa real-time process

In [17]:
import time
prediction_times = []

for x in X_test:
    x = x.reshape(1,-1)
    t0 = time.time()
    xgboost.predict(x)
    t1 = time.time()
    prediction_times.append(t1 - t0)

prediction_times = np.array(prediction_times)

In [18]:
print(f"prediction_times ~ N({np.mean(prediction_times)}, {np.std(prediction_times)})")
print(f"prediction_times slowers={prediction_times.max()*1000} ms (miliseconds)")
print(f"prediction_times fastest={prediction_times.min()*1000} ms (miliseconds)")

prediction_times ~ N(0.0002532774568030078, 4.119685509044161e-05)
prediction_times slowers=0.8068084716796875 ms (miliseconds)
prediction_times fastest=0.2262592315673828 ms (miliseconds)


In [19]:
import platform
print(f"plarfomr machine {platform.machine()}")
print(f"plarfomr system {platform.system()}")
print(f"plarfomr processor {platform.processor()}")
print(f"plarfomr detail {platform.platform()}")

plarfomr machine x86_64
plarfomr system Linux
plarfomr processor x86_64
plarfomr detail Linux-4.15.0-132-generic-x86_64-with-glibc2.10
