**The dataset** is collected from UCI Machine Learning Repository through the following [link](https://archive.ics.uci.edu/ml/datasets/Unmanned+Aerial+Vehicle+%28UAV%29+Intrusion+Detection)

This application is working in first dataset (Bidirectional-flow/Parrot Bebop1), combined first dataset can be [downloaded](http://mason.gmu.edu/~lzhao9/materials/data/UAV/data/pub_dataset1.mat) from Liang Zhao homepage.Bidirectional-flow mode will involve 9 features × 2 sources × 3 direction flow = 54 features for more info visit this [link](http://mason.gmu.edu/~lzhao9/materials/data/UAV/)

extract data with its default name `pub_dataset1.mat` in `__data__` directory

In [176]:
import numpy as np
import pandas as pd
import h5py

import matplotlib.pyplot as plt
import seaborn as sns

from pprint import pprint

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [19]:
# use HDF reader for matlab v7.3 files
data = { k:np.array(v).T for k, v in h5py.File('./__data__/pub_dataset1.mat').items()}
data.keys()

dict_keys(['D', 'H', 'data_te', 'data_tr'])

In [None]:
$n$ is the number of training samples   
$k$ is the number of feature   
$n^{\prime}$ is the number of testing samples    
$k^{\prime}$ is the number of feature computational components and k is the numbe of features.  
The last column of `data_te` and `data_tr` is the label: `1 means UAV, 0 otherwise`

--- 
$\text{data_tr} \in R^{n×(k+1)}$   
$\text{data_te} \in R^{n^{\prime}×(k+1)}$   
$D \in R^{k×1}$. The generation runtime for each feature.  
$H \in R^{k^{\prime}×k}$. The incident matrix of the feature computational hypergraph (see the paper for details). 


In [20]:
def reset_random_seed(seed=1917):
    np.random.seed(seed)

In [21]:
X = data['data_tr'][:, :-1]
y = data['data_tr'][:, -1]

X_test = data['data_te'][:, :-1]
y_test = data['data_te'][:, -1]

## MLP
### Accuracy 0.9937035566396278

In [22]:
from sklearn.neural_network import MLPClassifier

In [23]:
reset_random_seed()
model = MLPClassifier()
model.fit(X, y)
model.score(X_test, y_test)

0.9937035566396278

In [174]:
def encoder(data, ae, encoding_layers_count=3):
    data = np.asmatrix(data)

    layer = data
    for i in range(encoding_layers_count):
        layer = layer*ae.coefs_[i] + ae.intercepts_[i]
        encoder1 = np.tanh(layer)
    
    return np.asarray(layer)

## Auto Encoder
### Accuracy 0.5536332179930796

In [38]:
from sklearn.neural_network import MLPRegressor

In [187]:
# Encoder structure
n_encoder2 = 25
n_encoder3 = 10

n_latent = 2

encoding_layers_count = 3

# Decoder structure
n_decoder3 = 10
n_decoder2 = 25

hidden_layer_sizes = (
    n_encoder1, 
    n_encoder2, 
    n_latent, 
    n_decoder2, 
    n_decoder1
)
reset_random_seed()
auto_encoder = MLPRegressor(
                   hidden_layer_sizes=hidden_layer_sizes, 
                   activation = 'tanh', 
                   solver = 'adam', 
                   learning_rate_init = 0.0001, 
                   max_iter = 200, 
                   tol = 0.0000001, 
                   verbose = True
)
auto_encoder.fit(X, X)

Iteration 1, loss = 87931.33384487
Iteration 2, loss = 87927.27633029
Iteration 3, loss = 87923.67704696
Iteration 4, loss = 87920.15169486
Iteration 5, loss = 87916.61288665
Iteration 6, loss = 87913.05216356
Iteration 7, loss = 87909.50056686
Iteration 8, loss = 87905.91427044
Iteration 9, loss = 87902.25035234
Iteration 10, loss = 87898.56404509
Iteration 11, loss = 87894.86006997
Iteration 12, loss = 87891.06019651
Iteration 13, loss = 87887.24699085
Iteration 14, loss = 87883.28777192
Iteration 15, loss = 87879.31143292
Iteration 16, loss = 87875.17031097
Iteration 17, loss = 87870.93969102
Iteration 18, loss = 87866.57590696
Iteration 19, loss = 87862.08969428
Iteration 20, loss = 87857.48735840
Iteration 21, loss = 87852.74302202
Iteration 22, loss = 87847.82784237
Iteration 23, loss = 87842.75453955
Iteration 24, loss = 87837.48416242
Iteration 25, loss = 87832.05479651
Iteration 26, loss = 87826.48792148
Iteration 27, loss = 87820.72014286
Iteration 28, loss = 87814.80765226
I

MLPRegressor(activation='tanh', alpha=0.0001, batch_size='auto', beta_1=0.9,
             beta_2=0.999, early_stopping=False, epsilon=1e-08,
             hidden_layer_sizes=(34, 25, 2, 25, 34), learning_rate='constant',
             learning_rate_init=0.0001, max_fun=15000, max_iter=200,
             momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
             power_t=0.5, random_state=None, shuffle=True, solver='adam',
             tol=1e-07, validation_fraction=0.1, verbose=True,
             warm_start=False)

In [190]:
# soft max
accuracy_score(y_test, np.argmax(encoder(X_test, auto_encoder), axis=1))

0.5536332179930796

## AUTO ENCODER + SVM + Standarad Scaler
### Accuracy 0.9157637982869136

In [166]:
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.pipeline import make_pipeline

In [168]:
svm = make_pipeline(StandardScaler(), SVC(gamma='auto'))
svm.fit(encoder(X), y)
accuracy_score(y_test, svm.predict(encoder(X_test)))

0.9157637982869136

## AUTO ENCODER {Multi laten} + SVM + Standarad Scaler
### Accuracy 0.9934766577797947

In [173]:
AE = MLPRegressor(
    hidden_layer_sizes=(100,30,7,30,50), 
    activation = 'tanh', 
    solver = 'adam', 
    learning_rate_init = 0.0001, 
    max_iter = 30, 
    tol = 0.0000001, 
    verbose = True
)
AE.fit(X, X)
svm = make_pipeline(StandardScaler(), SVC(gamma='auto'))
svm.fit(encoder(X, 3, AE), y)
accuracy_score(y_test, svm.predict(encoder(X_test, 3, AE)))

Iteration 1, loss = 87908.89191948
Iteration 2, loss = 87900.83005277
Iteration 3, loss = 87892.89103523
Iteration 4, loss = 87884.88454356
Iteration 5, loss = 87876.84062800
Iteration 6, loss = 87868.60495798
Iteration 7, loss = 87860.07827601
Iteration 8, loss = 87851.35748410
Iteration 9, loss = 87842.48923491
Iteration 10, loss = 87833.32172414
Iteration 11, loss = 87823.89700553
Iteration 12, loss = 87814.26890947
Iteration 13, loss = 87804.33933314
Iteration 14, loss = 87794.17880984
Iteration 15, loss = 87783.76944986
Iteration 16, loss = 87773.08604304
Iteration 17, loss = 87762.15706269
Iteration 18, loss = 87751.00045754
Iteration 19, loss = 87739.61474835
Iteration 20, loss = 87728.02415342
Iteration 21, loss = 87716.09393861
Iteration 22, loss = 87703.94774254
Iteration 23, loss = 87691.54380877
Iteration 24, loss = 87678.88100280
Iteration 25, loss = 87666.02841067
Iteration 26, loss = 87653.07801898
Iteration 27, loss = 87639.95885990
Iteration 28, loss = 87626.52333795
I

0.9934766577797947

proposed method will be the follwoing  
- classify   
- print wrong data   
- train model on wrong data such as svm  
- use svm for that classified datas
- o.w use mlp normal model

## XGBOOST
### Acuuracy 100%

In [178]:
from sklearn.ensemble import GradientBoostingClassifier

In [184]:
random
xgboost = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, max_depth=1)
xgboost.fit(X, y)

GradientBoostingClassifier(ccp_alpha=0.0, criterion='friedman_mse', init=None,
                           learning_rate=1.0, loss='deviance', max_depth=1,
                           max_features=None, max_leaf_nodes=None,
                           min_impurity_decrease=0.0, min_impurity_split=None,
                           min_samples_leaf=1, min_samples_split=2,
                           min_weight_fraction_leaf=0.0, n_estimators=100,
                           n_iter_no_change=None, presort='deprecated',
                           random_state=None, subsample=1.0, tol=0.0001,
                           validation_fraction=0.1, verbose=0,
                           warm_start=False)

In [185]:
accuracy_score(y_test, xgboost.predict(X_test))

1.0

## Runtime

> Real-time responses are often understood to be in the order of milliseconds, and sometimes microseconds. 
 
So xgboost can be concider aa real-time process

In [195]:
import time
prediction_times = []

for x in X_test:
    x = x.reshape(1,-1)
    t0 = time.time()
    xgboost.predict(x)
    t1 = time.time()
    prediction_times.append(t1 - t0)

prediction_times = np.array(prediction_times)

In [203]:
print(f"prediction_times ~ N({np.mean(prediction_times)}, {np.std(prediction_times)})")
print(f"prediction_times slowers={prediction_times.max()*1000} ms (miliseconds)")
print(f"prediction_times fastest={prediction_times.min()*1000} ms (miliseconds)")

prediction_times ~ N(0.0002549129820186462, 3.486487932831444e-05)
prediction_times slowers=0.9953975677490234 ms (miliseconds)
prediction_times fastest=0.2238750457763672 ms (miliseconds)


In [207]:
import platform
print(f"plarfomr machine {platform.machine()}")
print(f"plarfomr system {platform.system()}")
print(f"plarfomr processor {platform.processor()}")
print(f"plarfomr detail {platform.platform()}")

plarfomr machine x86_64
plarfomr system Linux
plarfomr processor x86_64
plarfomr detail Linux-4.15.0-132-generic-x86_64-with-glibc2.10
