# **Botnet Detection in IOT devices using an Autoencoder**

## Loading Required Libraries

In [None]:
import os
import numpy as np
import tensorflow as tf

from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras import layers, losses, Sequential
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from keras.models import load_model
from tensorflow.keras.callbacks import ModelCheckpoint

## Mounting drive

In [None]:
from google.colab import drive
drive.mount('/drive')

Mounted at /drive


## Autoencoder

In [None]:

class Autoencoder(Model):
    def __init__(self):
        super(Autoencoder, self).__init__()
        self.encoder = Sequential([
            layers.Dense(115, activation="relu"),
            layers.Dense(86, activation="relu"),
            layers.Dense(57, activation="relu"),
            layers.Dense(37, activation="relu"),
            layers.Dense(28, activation="relu")
        ])
        self.decoder = Sequential([
            layers.Dense(37, activation="relu"),
            layers.Dense(57, activation="relu"),
            layers.Dense(86, activation="relu"),
            layers.Dense(115, activation="sigmoid")
        ])
    
    def call(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded

## Function To print Stats

In [None]:
def print_stats(data, outcome):
    print(f"Shape of data: {data.shape}")
    print(f"Detected anomalies: {np.mean(outcome)*100}%")
    print()

## IOT Device 1: Danmini Doorbell

### Loading the dataset

In [None]:
def load_nbaiot(filename):
    return np.genfromtxt(
        os.path.join("/drive/My Drive/Mini_Project_Dataset/N-Balot_Dataset", filename),
        delimiter=",",
        skip_header=1
    )

benign = load_nbaiot("1.benign.csv")
X_train = benign[:40000]
X_test0 = benign[40000:]
X_test1 = load_nbaiot("1.mirai.scan.csv")
X_test2 = load_nbaiot("1.mirai.ack.csv")
X_test3 = load_nbaiot("1.mirai.syn.csv")
X_test4 = load_nbaiot("1.mirai.udp.csv")
X_test5 = load_nbaiot("1.mirai.udpplain.csv")
X_test6 = load_nbaiot("1.gafgyt.combo.csv")
X_test7 = load_nbaiot("1.gafgyt.junk.csv")
X_test8 = load_nbaiot("1.gafgyt.scan.csv")
X_test9 = load_nbaiot("1.gafgyt.tcp.csv")
X_test10 = load_nbaiot("1.gafgyt.udp.csv")

In [None]:
print(X_train.shape, X_test0.shape, X_test1.shape, X_test2.shape,
      X_test3.shape, X_test4.shape, X_test5.shape, X_test6.shape, 
      X_test7.shape, X_test8.shape, X_test9.shape, X_test10.shape)

(40000, 115) (9548, 115) (107685, 115) (102195, 115) (122573, 115) (237665, 115) (81982, 115) (59718, 115) (29068, 115) (29849, 115) (92141, 115) (105874, 115)


### Training the autoencoder

In [None]:
scaler = MinMaxScaler()
x = scaler.fit_transform(X_train)

ae = Autoencoder()
ae.compile(optimizer=Adam(learning_rate=0.012), loss='mse')
monitor = EarlyStopping(
    monitor='val_loss',
    min_delta=1e-9,
    patience=5,
    verbose=1,
    mode='auto'
)
ae.fit(
    x=x,
    y=x,
    epochs=800,
    validation_split=0.3,
    shuffle=True,
    # callbacks=[monitor]
)

training_loss = losses.mse(x, ae(x))
threshold = np.mean(training_loss)+np.std(training_loss)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


### Testing the autoencoder

In [None]:
def predict(x, threshold=0.042, window_size=82):
    x = scaler.transform(x)
    predictions = losses.mse(x, ae(x)) > threshold
    # Majority voting over `window_size` predictions
    return np.array([np.mean(predictions[i-window_size:i]) > 0.5
                     for i in range(window_size, len(predictions)+1)])

test_data = [X_test0, X_test1, X_test2, X_test3, X_test4, X_test5, X_test6, X_test7, X_test8, X_test9, X_test10]

for i, x in enumerate(test_data):
    print(i)
    outcome = predict(x)
    print_stats(x, outcome)

0
Shape of data: (9548, 115)
Detected anomalies: 0.0%

1
Shape of data: (107685, 115)
Detected anomalies: 100.0%

2
Shape of data: (102195, 115)
Detected anomalies: 100.0%

3
Shape of data: (122573, 115)
Detected anomalies: 100.0%

4
Shape of data: (237665, 115)
Detected anomalies: 100.0%

5
Shape of data: (81982, 115)
Detected anomalies: 100.0%

6
Shape of data: (59718, 115)
Detected anomalies: 100.0%

7
Shape of data: (29068, 115)
Detected anomalies: 100.0%

8
Shape of data: (29849, 115)
Detected anomalies: 100.0%

9
Shape of data: (92141, 115)
Detected anomalies: 0.0%

10
Shape of data: (105874, 115)
Detected anomalies: 0.0%



## IOT Device 2: Ecobee Thermostat

### Loading the dataset

In [None]:
def load_nbaiot(filename):
    return np.genfromtxt(
        os.path.join("/drive/My Drive/Mini_Project_Dataset/N-Balot_Dataset", filename),
        delimiter=",",
        skip_header=1
    )

benign = load_nbaiot("2.benign.csv")
X_train = benign[:10000]
X_test0 = benign[10000:]
X_test1 = load_nbaiot("2.mirai.scan.csv")
X_test2 = load_nbaiot("2.mirai.ack.csv")
X_test3 = load_nbaiot("2.mirai.syn.csv")
X_test4 = load_nbaiot("2.mirai.udp.csv")
X_test5 = load_nbaiot("2.mirai.udpplain.csv")
X_test6 = load_nbaiot("2.gafgyt.combo.csv")
X_test7 = load_nbaiot("2.gafgyt.junk.csv")
X_test8 = load_nbaiot("2.gafgyt.scan.csv")
X_test9 = load_nbaiot("2.gafgyt.tcp.csv")
X_test10 = load_nbaiot("2.gafgyt.udp.csv")

In [None]:
print(X_train.shape, X_test0.shape, X_test1.shape, X_test2.shape,
      X_test3.shape, X_test4.shape, X_test5.shape, X_test6.shape, 
      X_test7.shape, X_test8.shape, X_test9.shape, X_test10.shape)

(10000, 115) (3113, 115) (43192, 115) (113285, 115) (116807, 115) (151481, 115) (87368, 115) (53012, 115) (30312, 115) (27494, 115) (95021, 115) (104791, 115)


### Training the autoencoder

In [None]:
scaler = MinMaxScaler()
x = scaler.fit_transform(X_train)

ae = Autoencoder()
ae.compile(optimizer=Adam(learning_rate=0.028), loss='mse')
monitor = EarlyStopping(
    monitor='val_loss',
    min_delta=1e-9,
    patience=5,
    verbose=1,
    mode='auto'
)
ae.fit(
    x=x,
    y=x,
    epochs=250,
    validation_split=0.3,
    shuffle=True,
    # callbacks=[monitor]
)

training_loss = losses.mse(x, ae(x))
threshold = np.mean(training_loss)+np.std(training_loss)

Epoch 1/250
Epoch 2/250
Epoch 3/250
Epoch 4/250
Epoch 5/250
Epoch 6/250
Epoch 7/250
Epoch 8/250
Epoch 9/250
Epoch 10/250
Epoch 11/250
Epoch 12/250
Epoch 13/250
Epoch 14/250
Epoch 15/250
Epoch 16/250
Epoch 17/250
Epoch 18/250
Epoch 19/250
Epoch 20/250
Epoch 21/250
Epoch 22/250
Epoch 23/250
Epoch 24/250
Epoch 25/250
Epoch 26/250
Epoch 27/250
Epoch 28/250
Epoch 29/250
Epoch 30/250
Epoch 31/250
Epoch 32/250
Epoch 33/250
Epoch 34/250
Epoch 35/250
Epoch 36/250
Epoch 37/250
Epoch 38/250
Epoch 39/250
Epoch 40/250
Epoch 41/250
Epoch 42/250
Epoch 43/250
Epoch 44/250
Epoch 45/250
Epoch 46/250
Epoch 47/250
Epoch 48/250
Epoch 49/250
Epoch 50/250
Epoch 51/250
Epoch 52/250
Epoch 53/250
Epoch 54/250
Epoch 55/250
Epoch 56/250
Epoch 57/250
Epoch 58/250
Epoch 59/250
Epoch 60/250
Epoch 61/250
Epoch 62/250
Epoch 63/250
Epoch 64/250
Epoch 65/250
Epoch 66/250
Epoch 67/250
Epoch 68/250
Epoch 69/250
Epoch 70/250
Epoch 71/250
Epoch 72/250
Epoch 73/250
Epoch 74/250
Epoch 75/250
Epoch 76/250
Epoch 77/250
Epoch 78

### Testing the autoencoder

In [None]:
def predict(x, threshold=0.011, window_size=20):
    x = scaler.transform(x)
    predictions = losses.mse(x, ae(x)) > threshold
    # Majority voting over `window_size` predictions
    return np.array([np.mean(predictions[i-window_size:i]) > 0.5
                     for i in range(window_size, len(predictions)+1)])

test_data = [X_test0, X_test1, X_test2, X_test3, X_test4, X_test5, X_test6, X_test7, X_test8, X_test9, X_test10]

for i, x in enumerate(test_data):
    print(i)
    outcome = predict(x)
    print_stats(x, outcome)

0
Shape of data: (3113, 115)
Detected anomalies: 1.0019392372333549%

1
Shape of data: (43192, 115)
Detected anomalies: 100.0%

2
Shape of data: (113285, 115)
Detected anomalies: 100.0%

3
Shape of data: (116807, 115)
Detected anomalies: 100.0%

4
Shape of data: (151481, 115)
Detected anomalies: 100.0%

5
Shape of data: (87368, 115)
Detected anomalies: 100.0%

6
Shape of data: (53012, 115)
Detected anomalies: 100.0%

7
Shape of data: (30312, 115)
Detected anomalies: 100.0%

8
Shape of data: (27494, 115)
Detected anomalies: 100.0%

9
Shape of data: (95021, 115)
Detected anomalies: 100.0%

10
Shape of data: (104791, 115)
Detected anomalies: 100.0%



## IOT Device 3: Ennio Doorbell

### Loading the dataset

In [None]:
def load_nbaiot(filename):
    return np.genfromtxt(
        os.path.join("/drive/My Drive/Mini_Project_Dataset/N-Balot_Dataset", filename),
        delimiter=",",
        skip_header=1
    )

benign = load_nbaiot("3.benign.csv")
X_train = benign[:30000]
X_test0 = benign[30000:]
X_test1 = load_nbaiot("3.gafgyt.combo.csv")
X_test2 = load_nbaiot("3.gafgyt.junk.csv")
X_test3 = load_nbaiot("3.gafgyt.scan.csv")
X_test4 = load_nbaiot("3.gafgyt.tcp.csv")
X_test5 = load_nbaiot("3.gafgyt.udp.csv")

In [None]:
print(X_train.shape, X_test0.shape, X_test1.shape, X_test2.shape,
      X_test3.shape, X_test4.shape, X_test5.shape)

(30000, 115) (9100, 115) (53014, 115) (29797, 115) (28120, 115) (101536, 115) (103933, 115) (53012, 115) (30312, 115) (27494, 115) (95021, 115) (104791, 115)


### Training the autoencoder

In [None]:
scaler = MinMaxScaler()
x = scaler.fit_transform(X_train)

ae = Autoencoder()
ae.compile(optimizer=Adam(learning_rate=0.003), loss='mse')
monitor = EarlyStopping(
    monitor='val_loss',
    min_delta=1e-9,
    patience=5,
    verbose=1,
    mode='auto'
)
ae.fit(
    x=x,
    y=x,
    epochs=350,
    validation_split=0.3,
    shuffle=True,
    # callbacks=[monitor]
)

training_loss = losses.mse(x, ae(x))
threshold = np.mean(training_loss)+np.std(training_loss)

Epoch 1/350
Epoch 2/350
Epoch 3/350
Epoch 4/350
Epoch 5/350
Epoch 6/350
Epoch 7/350
Epoch 8/350
Epoch 9/350
Epoch 10/350
Epoch 11/350
Epoch 12/350
Epoch 13/350
Epoch 14/350
Epoch 15/350
Epoch 16/350
Epoch 17/350
Epoch 18/350
Epoch 19/350
Epoch 20/350
Epoch 21/350
Epoch 22/350
Epoch 23/350
Epoch 24/350
Epoch 25/350
Epoch 26/350
Epoch 27/350
Epoch 28/350
Epoch 29/350
Epoch 30/350
Epoch 31/350
Epoch 32/350
Epoch 33/350
Epoch 34/350
Epoch 35/350
Epoch 36/350
Epoch 37/350
Epoch 38/350
Epoch 39/350
Epoch 40/350
Epoch 41/350
Epoch 42/350
Epoch 43/350
Epoch 44/350
Epoch 45/350
Epoch 46/350
Epoch 47/350
Epoch 48/350
Epoch 49/350
Epoch 50/350
Epoch 51/350
Epoch 52/350
Epoch 53/350
Epoch 54/350
Epoch 55/350
Epoch 56/350
Epoch 57/350
Epoch 58/350
Epoch 59/350
Epoch 60/350
Epoch 61/350
Epoch 62/350
Epoch 63/350
Epoch 64/350
Epoch 65/350
Epoch 66/350
Epoch 67/350
Epoch 68/350
Epoch 69/350
Epoch 70/350
Epoch 71/350
Epoch 72/350
Epoch 73/350
Epoch 74/350
Epoch 75/350
Epoch 76/350
Epoch 77/350
Epoch 78

### Testing the autoencoder

In [None]:
def predict(x, threshold=0.011, window_size=22):
    x = scaler.transform(x)
    predictions = losses.mse(x, ae(x)) > threshold
    # Majority voting over `window_size` predictions
    return np.array([np.mean(predictions[i-window_size:i]) > 0.5
                     for i in range(window_size, len(predictions)+1)])

test_data = [X_test0, X_test1, X_test2, X_test3, X_test4, X_test5]

for i, x in enumerate(test_data):
    print(i)
    outcome = predict(x)
    print_stats(x, outcome)

0
Shape of data: (9100, 115)
Detected anomalies: 0.0%

1
Shape of data: (53014, 115)
Detected anomalies: 99.99811295831525%

2
Shape of data: (29797, 115)
Detected anomalies: 100.0%

3
Shape of data: (28120, 115)
Detected anomalies: 100.0%

4
Shape of data: (101536, 115)
Detected anomalies: 0.0%

5
Shape of data: (103933, 115)
Detected anomalies: 0.0%



## IOT Device 4: Philips B120N10 Baby Monitor

### Loading the dataset

In [None]:
def load_nbaiot(filename):
    return np.genfromtxt(
        os.path.join("/drive/My Drive/Mini_Project_Dataset/N-Balot_Dataset", filename),
        delimiter=",",
        skip_header=1
    )

benign = load_nbaiot("4.benign.csv")
X_train = benign[:160000]
X_test0 = benign[160000:]
X_test1 = load_nbaiot("4.mirai.scan.csv")
X_test2 = load_nbaiot("4.mirai.ack.csv")
X_test3 = load_nbaiot("4.mirai.syn.csv")
X_test4 = load_nbaiot("4.mirai.udp.csv")
X_test5 = load_nbaiot("4.mirai.udpplain.csv")
X_test6 = load_nbaiot("4.gafgyt.combo.csv")
X_test7 = load_nbaiot("4.gafgyt.junk.csv")
X_test8 = load_nbaiot("4.gafgyt.scan.csv")
X_test9 = load_nbaiot("4.gafgyt.tcp.csv")
X_test10 = load_nbaiot("4.gafgyt.udp.csv")

In [None]:
print(X_train.shape, X_test0.shape, X_test1.shape, X_test2.shape,
      X_test3.shape, X_test4.shape, X_test5.shape, X_test6.shape, 
      X_test7.shape, X_test8.shape, X_test9.shape, X_test10.shape)

(160000, 115) (15240, 115) (103621, 115) (91123, 115) (118128, 115) (217034, 115) (80808, 115) (58152, 115) (28349, 115) (27859, 115) (92581, 115) (105782, 115)


### Training the autoencoder

In [None]:
scaler = MinMaxScaler()
x = scaler.fit_transform(X_train)

ae = Autoencoder()
ae.compile(optimizer=Adam(learning_rate=0.016), loss='mse')
monitor = EarlyStopping(
    monitor='val_loss',
    min_delta=1e-9,
    patience=5,
    verbose=1,
    mode='auto'
)
ae.fit(
    x=x,
    y=x,
    epochs=100,
    validation_split=0.3,
    shuffle=True,
    # callbacks=[monitor]
)

training_loss = losses.mse(x, ae(x))
threshold = np.mean(training_loss)+np.std(training_loss)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

### Testing the autoencoder

In [None]:
def predict(x, threshold=0.030, window_size=65):
    x = scaler.transform(x)
    predictions = losses.mse(x, ae(x)) > threshold
    # Majority voting over `window_size` predictions
    return np.array([np.mean(predictions[i-window_size:i]) > 0.5
                     for i in range(window_size, len(predictions)+1)])

test_data = [X_test0, X_test1, X_test2, X_test3, X_test4, X_test5, X_test6, X_test7, X_test8, X_test9, X_test10]

for i, x in enumerate(test_data):
    print(i)
    outcome = predict(x)
    print_stats(x, outcome)

0
Shape of data: (15240, 115)
Detected anomalies: 3.8679493937796523%

1
Shape of data: (103621, 115)
Detected anomalies: 100.0%

2
Shape of data: (91123, 115)
Detected anomalies: 100.0%

3
Shape of data: (118128, 115)
Detected anomalies: 100.0%

4
Shape of data: (217034, 115)
Detected anomalies: 100.0%

5
Shape of data: (80808, 115)
Detected anomalies: 100.0%

6
Shape of data: (58152, 115)
Detected anomalies: 100.0%

7
Shape of data: (28349, 115)
Detected anomalies: 100.0%

8
Shape of data: (27859, 115)
Detected anomalies: 100.0%

9
Shape of data: (92581, 115)
Detected anomalies: 100.0%

10
Shape of data: (105782, 115)
Detected anomalies: 100.0%



## IOT Device 5: Provision PT 737E Security Camera

### Loading the dataset

In [None]:
def load_nbaiot(filename):
    return np.genfromtxt(
        os.path.join("/drive/My Drive/Mini_Project_Dataset/N-Balot_Dataset", filename),
        delimiter=",",
        skip_header=1
    )

benign = load_nbaiot("5.benign.csv")
X_train = benign[:55000]
X_test0 = benign[55000:]
X_test1 = load_nbaiot("5.mirai.scan.csv")
X_test2 = load_nbaiot("5.mirai.ack.csv")
X_test3 = load_nbaiot("5.mirai.syn.csv")
X_test4 = load_nbaiot("5.mirai.udp.csv")
X_test5 = load_nbaiot("5.mirai.udpplain.csv")
X_test6 = load_nbaiot("5.gafgyt.combo.csv")
X_test7 = load_nbaiot("5.gafgyt.junk.csv")
X_test8 = load_nbaiot("5.gafgyt.scan.csv")
X_test9 = load_nbaiot("5.gafgyt.tcp.csv")
X_test10 = load_nbaiot("5.gafgyt.udp.csv")

In [None]:
print(X_train.shape, X_test0.shape, X_test1.shape, X_test2.shape,
      X_test3.shape, X_test4.shape, X_test5.shape, X_test6.shape, 
      X_test7.shape, X_test8.shape, X_test9.shape, X_test10.shape)

(55000, 115) (7154, 115) (96781, 115) (60554, 115) (65746, 115) (156248, 115) (56681, 115) (61380, 115) (30898, 115) (29297, 115) (104510, 115) (104011, 115)


### Training the autoencoder

In [None]:
scaler = MinMaxScaler()
x = scaler.fit_transform(X_train)

ae = Autoencoder()
ae.compile(optimizer=Adam(learning_rate=0.026), loss='mse')
monitor = EarlyStopping(
    monitor='val_loss',
    min_delta=1e-9,
    patience=5,
    verbose=1,
    mode='auto'
)
ae.fit(
    x=x,
    y=x,
    epochs=300,
    validation_split=0.3,
    shuffle=True,
    # callbacks=[monitor]
)

training_loss = losses.mse(x, ae(x))
threshold = np.mean(training_loss)+np.std(training_loss)

Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 31/300
Epoch 32/300
Epoch 33/300
Epoch 34/300
Epoch 35/300
Epoch 36/300
Epoch 37/300
Epoch 38/300
Epoch 39/300
Epoch 40/300
Epoch 41/300
Epoch 42/300
Epoch 43/300
Epoch 44/300
Epoch 45/300
Epoch 46/300
Epoch 47/300
Epoch 48/300
Epoch 49/300
Epoch 50/300
Epoch 51/300
Epoch 52/300
Epoch 53/300
Epoch 54/300
Epoch 55/300
Epoch 56/300
Epoch 57/300
Epoch 58/300
Epoch 59/300
Epoch 60/300
Epoch 61/300
Epoch 62/300
Epoch 63/300
Epoch 64/300
Epoch 65/300
Epoch 66/300
Epoch 67/300
Epoch 68/300
Epoch 69/300
Epoch 70/300
Epoch 71/300
Epoch 72/300
Epoch 73/300
Epoch 74/300
Epoch 75/300
Epoch 76/300
Epoch 77/300
Epoch 78

### Testing the autoencoder

In [None]:
def predict(x, threshold=0.035, window_size=32):
    x = scaler.transform(x)
    predictions = losses.mse(x, ae(x)) > threshold
    # Majority voting over `window_size` predictions
    return np.array([np.mean(predictions[i-window_size:i]) > 0.5
                     for i in range(window_size, len(predictions)+1)])

test_data = [X_test0, X_test1, X_test2, X_test3, X_test4, X_test5, X_test6, X_test7, X_test8, X_test9, X_test10]

for i, x in enumerate(test_data):
    print(i)
    outcome = predict(x)
    print_stats(x, outcome)

0
Shape of data: (7154, 115)
Detected anomalies: 21.05854274884178%

1
Shape of data: (96781, 115)
Detected anomalies: 100.0%

2
Shape of data: (60554, 115)
Detected anomalies: 100.0%

3
Shape of data: (65746, 115)
Detected anomalies: 100.0%

4
Shape of data: (156248, 115)
Detected anomalies: 100.0%

5
Shape of data: (56681, 115)
Detected anomalies: 100.0%

6
Shape of data: (61380, 115)
Detected anomalies: 100.0%

7
Shape of data: (30898, 115)
Detected anomalies: 100.0%

8
Shape of data: (29297, 115)
Detected anomalies: 100.0%

9
Shape of data: (104510, 115)
Detected anomalies: 100.0%

10
Shape of data: (104011, 115)
Detected anomalies: 100.0%



## IOT Device 6: Provision PT 838 Security Camera

### Loading the dataset

In [None]:
def load_nbaiot(filename):
    return np.genfromtxt(
        os.path.join("/drive/My Drive/Mini_Project_Dataset/N-Balot_Dataset", filename),
        delimiter=",",
        skip_header=1
    )

benign = load_nbaiot("6.benign.csv")
X_train = benign[:90000]
X_test0 = benign[90000:]
X_test1 = load_nbaiot("6.mirai.scan.csv")
X_test2 = load_nbaiot("6.mirai.ack.csv")
X_test3 = load_nbaiot("6.mirai.syn.csv")
X_test4 = load_nbaiot("6.mirai.udp.csv")
X_test5 = load_nbaiot("6.mirai.udpplain.csv")
X_test6 = load_nbaiot("6.gafgyt.combo.csv")
X_test7 = load_nbaiot("6.gafgyt.junk.csv")
X_test8 = load_nbaiot("6.gafgyt.scan.csv")
X_test9 = load_nbaiot("6.gafgyt.tcp.csv")
X_test10 = load_nbaiot("6.gafgyt.udp.csv")

In [None]:
print(X_train.shape, X_test0.shape, X_test1.shape, X_test2.shape,
      X_test3.shape, X_test4.shape, X_test5.shape, X_test6.shape, 
      X_test7.shape, X_test8.shape, X_test9.shape, X_test10.shape)

(90000, 115) (8514, 115) (97096, 115) (57997, 115) (61851, 115) (158608, 115) (53785, 115) (57530, 115) (29068, 115) (28397, 115) (89387, 115) (104658, 115)


### Training the autoencoder

In [None]:
scaler = MinMaxScaler()
x = scaler.fit_transform(X_train)

ae = Autoencoder()
ae.compile(optimizer=Adam(learning_rate=0.008), loss='mse')
monitor = EarlyStopping(
    monitor='val_loss',
    min_delta=1e-9,
    patience=5,
    verbose=1,
    mode='auto'
)
ae.fit(
    x=x,
    y=x,
    epochs=450,
    validation_split=0.3,
    shuffle=True,
    # callbacks=[monitor]
)

training_loss = losses.mse(x, ae(x))
threshold = np.mean(training_loss)+np.std(training_loss)

Epoch 1/450
Epoch 2/450
Epoch 3/450
Epoch 4/450
Epoch 5/450
Epoch 6/450
Epoch 7/450
Epoch 8/450
Epoch 9/450
Epoch 10/450
Epoch 11/450
Epoch 12/450
Epoch 13/450
Epoch 14/450
Epoch 15/450
Epoch 16/450
Epoch 17/450
Epoch 18/450
Epoch 19/450
Epoch 20/450
Epoch 21/450
Epoch 22/450
Epoch 23/450
Epoch 24/450
Epoch 25/450
Epoch 26/450
Epoch 27/450
Epoch 28/450
Epoch 29/450
Epoch 30/450
Epoch 31/450
Epoch 32/450
Epoch 33/450
Epoch 34/450
Epoch 35/450
Epoch 36/450
Epoch 37/450
Epoch 38/450
Epoch 39/450
Epoch 40/450
Epoch 41/450
Epoch 42/450
Epoch 43/450
Epoch 44/450
Epoch 45/450
Epoch 46/450
Epoch 47/450
Epoch 48/450
Epoch 49/450
Epoch 50/450
Epoch 51/450
Epoch 52/450
Epoch 53/450
Epoch 54/450
Epoch 55/450
Epoch 56/450
Epoch 57/450
Epoch 58/450
Epoch 59/450
Epoch 60/450
Epoch 61/450
Epoch 62/450
Epoch 63/450
Epoch 64/450
Epoch 65/450
Epoch 66/450
Epoch 67/450
Epoch 68/450
Epoch 69/450
Epoch 70/450
Epoch 71/450
Epoch 72/450
Epoch 73/450
Epoch 74/450
Epoch 75/450
Epoch 76/450
Epoch 77/450
Epoch 78

### Testing the autoencoder

In [None]:
def predict(x, threshold=0.038, window_size=43):
    x = scaler.transform(x)
    predictions = losses.mse(x, ae(x)) > threshold
    # Majority voting over `window_size` predictions
    return np.array([np.mean(predictions[i-window_size:i]) > 0.5
                     for i in range(window_size, len(predictions)+1)])

test_data = [X_test0, X_test1, X_test2, X_test3, X_test4, X_test5, X_test6, X_test7, X_test8, X_test9, X_test10]

for i, x in enumerate(test_data):
    print(i)
    outcome = predict(x)
    print_stats(x, outcome)

0
Shape of data: (8514, 115)
Detected anomalies: 0.0%

1
Shape of data: (97096, 115)
Detected anomalies: 100.0%

2
Shape of data: (57997, 115)
Detected anomalies: 100.0%

3
Shape of data: (61851, 115)
Detected anomalies: 100.0%

4
Shape of data: (158608, 115)
Detected anomalies: 100.0%

5
Shape of data: (53785, 115)
Detected anomalies: 100.0%

6
Shape of data: (57530, 115)
Detected anomalies: 100.0%

7
Shape of data: (29068, 115)
Detected anomalies: 100.0%

8
Shape of data: (28397, 115)
Detected anomalies: 100.0%

9
Shape of data: (89387, 115)
Detected anomalies: 100.0%

10
Shape of data: (104658, 115)
Detected anomalies: 100.0%



## IOT Device 7: Samsung SNH 1011 N Webcam

### Loading the dataset

In [None]:
def load_nbaiot(filename):
    return np.genfromtxt(
        os.path.join("/drive/My Drive/Mini_Project_Dataset/N-Balot_Dataset", filename),
        delimiter=",",
        skip_header=1
    )

benign = load_nbaiot("7.benign.csv")
X_train = benign[:50000]
X_test0 = benign[50000:]
X_test1 = load_nbaiot("7.gafgyt.combo.csv")
X_test2 = load_nbaiot("7.gafgyt.junk.csv")
X_test3 = load_nbaiot("7.gafgyt.scan.csv")
X_test4 = load_nbaiot("7.gafgyt.tcp.csv")
X_test5 = load_nbaiot("7.gafgyt.udp.csv")

In [None]:
print(X_train.shape, X_test0.shape, X_test1.shape, X_test2.shape,
      X_test3.shape, X_test4.shape, X_test5.shape)

(50000, 115) (2150, 115) (58669, 115) (28305, 115) (27698, 115) (97783, 115) (110617, 115)


### Training the autoencoder

In [None]:
scaler = MinMaxScaler()
x = scaler.fit_transform(X_train)

ae = Autoencoder()
ae.compile(optimizer=Adam(learning_rate=0.013), loss='mse')
monitor = EarlyStopping(
    monitor='val_loss',
    min_delta=1e-9,
    patience=5,
    verbose=1,
    mode='auto'
)
ae.fit(
    x=x,
    y=x,
    epochs=150,
    validation_split=0.3,
    shuffle=True,
    # callbacks=[monitor]
)

training_loss = losses.mse(x, ae(x))
threshold = np.mean(training_loss)+np.std(training_loss)

Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150
Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 24/150
Epoch 25/150
Epoch 26/150
Epoch 27/150
Epoch 28/150
Epoch 29/150
Epoch 30/150
Epoch 31/150
Epoch 32/150
Epoch 33/150
Epoch 34/150
Epoch 35/150
Epoch 36/150
Epoch 37/150
Epoch 38/150
Epoch 39/150
Epoch 40/150
Epoch 41/150
Epoch 42/150
Epoch 43/150
Epoch 44/150
Epoch 45/150
Epoch 46/150
Epoch 47/150
Epoch 48/150
Epoch 49/150
Epoch 50/150
Epoch 51/150
Epoch 52/150
Epoch 53/150
Epoch 54/150
Epoch 55/150
Epoch 56/150
Epoch 57/150
Epoch 58/150
Epoch 59/150
Epoch 60/150
Epoch 61/150
Epoch 62/150
Epoch 63/150
Epoch 64/150
Epoch 65/150
Epoch 66/150
Epoch 67/150
Epoch 68/150
Epoch 69/150
Epoch 70/150
Epoch 71/150
Epoch 72/150
Epoch 73/150
Epoch 74/150
Epoch 75/150
Epoch 76/150
Epoch 77/150
Epoch 78

### Testing the autoencoder

In [None]:
def predict(x, threshold=0.074, window_size=32):
    x = scaler.transform(x)
    predictions = losses.mse(x, ae(x)) > threshold
    # Majority voting over `window_size` predictions
    return np.array([np.mean(predictions[i-window_size:i]) > 0.5
                     for i in range(window_size, len(predictions)+1)])

test_data = [X_test0, X_test1, X_test2, X_test3, X_test4, X_test5]

for i, x in enumerate(test_data):
    print(i)
    outcome = predict(x)
    print_stats(x, outcome)

0
Shape of data: (2150, 115)
Detected anomalies: 1.557338367154318%

1
Shape of data: (58669, 115)
Detected anomalies: 100.0%

2
Shape of data: (28305, 115)
Detected anomalies: 100.0%

3
Shape of data: (27698, 115)
Detected anomalies: 100.0%

4
Shape of data: (97783, 115)
Detected anomalies: 0.0%

5
Shape of data: (110617, 115)
Detected anomalies: 0.0%



## IOT Device 8: SimpleHome XCS7 1002 WHT Security Camera

### Loading the dataset

In [None]:
def load_nbaiot(filename):
    return np.genfromtxt(
        os.path.join("/drive/My Drive/Mini_Project_Dataset/N-Balot_Dataset", filename),
        delimiter=",",
        skip_header=1
    )

benign = load_nbaiot("8.benign.csv")
X_train = benign[:40000]
X_test0 = benign[40000:]
X_test1 = load_nbaiot("8.mirai.scan.csv")
X_test2 = load_nbaiot("8.mirai.ack.csv")
X_test3 = load_nbaiot("8.mirai.syn.csv")
X_test4 = load_nbaiot("8.mirai.udp.csv")
X_test5 = load_nbaiot("8.mirai.udpplain.csv")
X_test6 = load_nbaiot("8.gafgyt.combo.csv")
X_test7 = load_nbaiot("8.gafgyt.junk.csv")
X_test8 = load_nbaiot("8.gafgyt.scan.csv")
X_test9 = load_nbaiot("8.gafgyt.tcp.csv")
X_test10 = load_nbaiot("8.gafgyt.udp.csv")

In [None]:
print(X_train.shape, X_test0.shape, X_test1.shape, X_test2.shape,
      X_test3.shape, X_test4.shape, X_test5.shape, X_test6.shape, 
      X_test7.shape, X_test8.shape, X_test9.shape, X_test10.shape)

(40000, 115) (6585, 115) (45930, 115) (111480, 115) (125715, 115) (151879, 115) (78244, 115) (54283, 115) (28579, 115) (27825, 115) (88816, 115) (103720, 115)


### Training the autoencoder

In [None]:
scaler = MinMaxScaler()
x = scaler.fit_transform(X_train)

ae = Autoencoder()
ae.compile(optimizer=Adam(learning_rate=0.017), loss='mse')
monitor = EarlyStopping(
    monitor='val_loss',
    min_delta=1e-9,
    patience=5,
    verbose=1,
    mode='auto'
)
ae.fit(
    x=x,
    y=x,
    epochs=230,
    validation_split=0.3,
    shuffle=True,
    # callbacks=[monitor]
)

training_loss = losses.mse(x, ae(x))
threshold = np.mean(training_loss)+np.std(training_loss)

Epoch 1/230
Epoch 2/230
Epoch 3/230
Epoch 4/230
Epoch 5/230
Epoch 6/230
Epoch 7/230
Epoch 8/230
Epoch 9/230
Epoch 10/230
Epoch 11/230
Epoch 12/230
Epoch 13/230
Epoch 14/230
Epoch 15/230
Epoch 16/230
Epoch 17/230
Epoch 18/230
Epoch 19/230
Epoch 20/230
Epoch 21/230
Epoch 22/230
Epoch 23/230
Epoch 24/230
Epoch 25/230
Epoch 26/230
Epoch 27/230
Epoch 28/230
Epoch 29/230
Epoch 30/230
Epoch 31/230
Epoch 32/230
Epoch 33/230
Epoch 34/230
Epoch 35/230
Epoch 36/230
Epoch 37/230
Epoch 38/230
Epoch 39/230
Epoch 40/230
Epoch 41/230
Epoch 42/230
Epoch 43/230
Epoch 44/230
Epoch 45/230
Epoch 46/230
Epoch 47/230
Epoch 48/230
Epoch 49/230
Epoch 50/230
Epoch 51/230
Epoch 52/230
Epoch 53/230
Epoch 54/230
Epoch 55/230
Epoch 56/230
Epoch 57/230
Epoch 58/230
Epoch 59/230
Epoch 60/230
Epoch 61/230
Epoch 62/230
Epoch 63/230
Epoch 64/230
Epoch 65/230
Epoch 66/230
Epoch 67/230
Epoch 68/230
Epoch 69/230
Epoch 70/230
Epoch 71/230
Epoch 72/230
Epoch 73/230
Epoch 74/230
Epoch 75/230
Epoch 76/230
Epoch 77/230
Epoch 78

### Testing the autoencoder

In [None]:
def predict(x, threshold=0.056, window_size=23):
    x = scaler.transform(x)
    predictions = losses.mse(x, ae(x)) > threshold
    # Majority voting over `window_size` predictions
    return np.array([np.mean(predictions[i-window_size:i]) > 0.5
                     for i in range(window_size, len(predictions)+1)])

test_data = [X_test0, X_test1, X_test2, X_test3, X_test4, X_test5, X_test6, X_test7, X_test8, X_test9, X_test10]

for i, x in enumerate(test_data):
    print(i)
    outcome = predict(x)
    print_stats(x, outcome)

0
Shape of data: (6585, 115)
Detected anomalies: 13.941794910863933%

1
Shape of data: (45930, 115)
Detected anomalies: 100.0%

2
Shape of data: (111480, 115)
Detected anomalies: 100.0%

3
Shape of data: (125715, 115)
Detected anomalies: 100.0%

4
Shape of data: (151879, 115)
Detected anomalies: 100.0%

5
Shape of data: (78244, 115)
Detected anomalies: 100.0%

6
Shape of data: (54283, 115)
Detected anomalies: 100.0%

7
Shape of data: (28579, 115)
Detected anomalies: 100.0%

8
Shape of data: (27825, 115)
Detected anomalies: 100.0%

9
Shape of data: (88816, 115)
Detected anomalies: 100.0%

10
Shape of data: (103720, 115)
Detected anomalies: 100.0%



## IOT Device 9: SimpleHome XCS7 1003 WHT Security Camera

### Loading the dataset

In [None]:
def load_nbaiot(filename):
    return np.genfromtxt(
        os.path.join("/drive/My Drive/Mini_Project_Dataset/N-Balot_Dataset", filename),
        delimiter=",",
        skip_header=1
    )

benign = load_nbaiot("9.benign.csv")
X_train = benign[:15000]
X_test0 = benign[15000:]
X_test1 = load_nbaiot("9.mirai.scan.csv")
X_test2 = load_nbaiot("9.mirai.ack.csv")
X_test3 = load_nbaiot("9.mirai.syn.csv")
X_test4 = load_nbaiot("9.mirai.udp.csv")
X_test5 = load_nbaiot("9.mirai.udpplain.csv")
X_test6 = load_nbaiot("9.gafgyt.combo.csv")
X_test7 = load_nbaiot("9.gafgyt.junk.csv")
X_test8 = load_nbaiot("9.gafgyt.scan.csv")
X_test9 = load_nbaiot("9.gafgyt.tcp.csv")
X_test10 = load_nbaiot("9.gafgyt.udp.csv")

In [None]:
print(X_train.shape, X_test0.shape, X_test1.shape, X_test2.shape,
      X_test3.shape, X_test4.shape, X_test5.shape, X_test6.shape, 
      X_test7.shape, X_test8.shape, X_test9.shape, X_test10.shape)

(15000, 115) (4528, 115) (43674, 115) (107187, 115) (122479, 115) (157084, 115) (84436, 115) (59398, 115) (27413, 115) (28572, 115) (98075, 115) (102980, 115)


### Training the autoencoder

In [None]:
scaler = MinMaxScaler()
x = scaler.fit_transform(X_train)

ae = Autoencoder()
ae.compile(optimizer=Adam(learning_rate=0.006), loss='mse')
monitor = EarlyStopping(
    monitor='val_loss',
    min_delta=1e-9,
    patience=5,
    verbose=1,
    mode='auto'
)
ae.fit(
    x=x,
    y=x,
    epochs=500,
    validation_split=0.3,
    shuffle=True,
    # callbacks=[monitor]
)

training_loss = losses.mse(x, ae(x))
threshold = np.mean(training_loss)+np.std(training_loss)

Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 76/500
Epoch 77/500
Epoch 78

### Testing the autoencoder

In [None]:
def predict(x, threshold=0.004, window_size=25):
    x = scaler.transform(x)
    predictions = losses.mse(x, ae(x)) > threshold
    # Majority voting over `window_size` predictions
    return np.array([np.mean(predictions[i-window_size:i]) > 0.5
                     for i in range(window_size, len(predictions)+1)])

test_data = [X_test0, X_test1, X_test2, X_test3, X_test4, X_test5, X_test6, X_test7, X_test8, X_test9, X_test10]

for i, x in enumerate(test_data):
    print(i)
    outcome = predict(x)
    print_stats(x, outcome)

0
Shape of data: (4528, 115)
Detected anomalies: 4.174067495559503%

1
Shape of data: (43674, 115)
Detected anomalies: 100.0%

2
Shape of data: (107187, 115)
Detected anomalies: 100.0%

3
Shape of data: (122479, 115)
Detected anomalies: 100.0%

4
Shape of data: (157084, 115)
Detected anomalies: 100.0%

5
Shape of data: (84436, 115)
Detected anomalies: 100.0%

6
Shape of data: (59398, 115)
Detected anomalies: 100.0%

7
Shape of data: (27413, 115)
Detected anomalies: 100.0%

8
Shape of data: (28572, 115)
Detected anomalies: 100.0%

9
Shape of data: (98075, 115)
Detected anomalies: 100.0%

10
Shape of data: (102980, 115)
Detected anomalies: 100.0%

