# **Botnet Detection in IOT devices using an Autoencoder**

## Loading Required Libraries

In [3]:
import os
import numpy as np
import tensorflow as tf

from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras import layers, losses, Sequential
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

## Autoencoder

In [4]:
class Autoencoder(Model):
    def __init__(self):
        super(Autoencoder, self).__init__()
        self.encoder = Sequential([
            layers.Dense(59, activation="relu"),
            layers.Dense(40, activation="relu"),
            layers.Dense(24, activation="relu"),
            layers.Dense(15, activation="relu")
        ])
        self.decoder = Sequential([
           layers.Dense(15, activation="relu"),
            layers.Dense(24, activation="relu"),
            layers.Dense(40, activation="relu"),
            layers.Dense(59, activation="sigmoid")
        ])
    
    def call(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded

## Function To print Stats

In [5]:
def print_stats(data, outcome):
    print(f"Shape of data: {data.shape}")
    print(f"Detected anomalies: {np.mean(outcome)*100}%")
    print()

## Training and Testing 

### Loading the dataset

In [6]:
import pandas as p
from sklearn.model_selection import train_test_split

test=p.read_csv('CTU13_Attack_Traffic.csv')
train=p.read_csv('CTU13_Normal_Traffic.csv')

In [7]:
print(train.shape,test.shape )

(53314, 59) (4158, 59)


### Training the autoencoder

In [8]:
scaler = MinMaxScaler()
x = scaler.fit_transform(train)

ae = Autoencoder()
ae.compile(optimizer=Adam(learning_rate=0.001), loss='mse')
monitor = EarlyStopping(
    monitor='val_loss',
    min_delta=1e-9,
    patience=5,
    verbose=1,
    mode='auto'
)
ae.fit(
    x=x,
    y=x,
    epochs=800,
    validation_split=0.3,
    shuffle=True,
    callbacks=[monitor]
)

training_loss = losses.mse(x, ae(x))
threshold = np.mean(training_loss)+np.std(training_loss)

Epoch 1/800
Epoch 2/800
Epoch 3/800
Epoch 4/800
Epoch 5/800
Epoch 6/800
Epoch 7/800
Epoch 8/800
Epoch 9/800
Epoch 10/800
Epoch 11/800
Epoch 12/800
Epoch 13/800
Epoch 14/800
Epoch 15/800
Epoch 16/800
Epoch 17/800
Epoch 18/800
Epoch 19/800
Epoch 20/800
Epoch 21/800
Epoch 22/800
Epoch 23/800
Epoch 24/800
Epoch 25/800
Epoch 26/800
Epoch 27/800
Epoch 28/800
Epoch 29/800
Epoch 30/800
Epoch 31/800
Epoch 32/800
Epoch 33/800
Epoch 33: early stopping


### Testing the autoencoder

In [9]:
def predict(x, threshold=threshold, window_size=82):
    x = scaler.transform(x)
    predictions = losses.mse(x, ae(x)) > threshold
    # Majority voting over `window_size` predictions
    return np.array([np.mean(predictions[i-window_size:i]) > 0.5
                     for i in range(window_size, len(predictions)+1)])

test_data = [test]

for i, x in enumerate(test_data):
    print(i)
    outcome = predict(x)
    print_stats(x, outcome)

0
Shape of data: (4158, 59)
Detected anomalies: 100.0%

