# **Botnet Detection in IOT devices using an Autoencoder**

## Loading Required Libraries

In [None]:
import os
import numpy as np
import tensorflow as tf

from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras import layers, losses, Sequential
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from keras.models import load_model
from tensorflow.keras.callbacks import ModelCheckpoint

## Mounting drive

In [None]:
from google.colab import drive
drive.mount('/drive')

Drive already mounted at /drive; to attempt to forcibly remount, call drive.mount("/drive", force_remount=True).


## Autoencoder

In [None]:

class Autoencoder(Model):
    def __init__(self):
        super(Autoencoder, self).__init__()
        self.encoder = Sequential([
            layers.Dense(115, activation="relu"),
            layers.Dense(86, activation="relu"),
            layers.Dense(57, activation="relu"),
            layers.Dense(37, activation="relu"),
            layers.Dense(28, activation="relu")
        ])
        self.decoder = Sequential([
            layers.Dense(37, activation="relu"),
            layers.Dense(57, activation="relu"),
            layers.Dense(86, activation="relu"),
            layers.Dense(115, activation="sigmoid")
        ])
    
    def call(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded

## Function To print Stats

In [None]:
def print_stats(data, outcome):
    print(f"Shape of data: {data.shape}")
    print(f"Detected anomalies: {np.mean(outcome)*100}%")
    print()

## IOT Device 1: Danmini Doorbell

### Loading the dataset

In [None]:
def load_nbaiot(filename):
    return np.genfromtxt(
        os.path.join("/drive/My Drive/Mini_Project_Dataset/N-Balot_Dataset", filename),
        delimiter=",",
        skip_header=1
    )

benign = load_nbaiot("1.benign.csv")
X_train = benign[:30000]
X_train0 = benign[30000:40000]
X_train1 = benign[30000:]
X_test0 = benign[40000:]
X_test1 = load_nbaiot("1.mirai.scan.csv")
X_test2 = load_nbaiot("1.mirai.ack.csv")
X_test3 = load_nbaiot("1.mirai.syn.csv")
X_test4 = load_nbaiot("1.mirai.udp.csv")
X_test5 = load_nbaiot("1.mirai.udpplain.csv")
X_test6 = load_nbaiot("1.gafgyt.combo.csv")
X_test7 = load_nbaiot("1.gafgyt.junk.csv")
X_test8 = load_nbaiot("1.gafgyt.scan.csv")
X_test9 = load_nbaiot("1.gafgyt.tcp.csv")
X_test10 = load_nbaiot("1.gafgyt.udp.csv")

In [None]:
print(X_train.shape, X_train0.shape, X_train1.shape, X_test0.shape, X_test1.shape, X_test2.shape,
      X_test3.shape, X_test4.shape, X_test5.shape, X_test6.shape, 
      X_test7.shape, X_test8.shape, X_test9.shape, X_test10.shape)

(30000, 115) (10000, 115) (19548, 115) (9548, 115) (107685, 115) (102195, 115) (122573, 115) (237665, 115) (81982, 115) (59718, 115) (29068, 115) (29849, 115) (92141, 115) (105874, 115)


### Training the autoencoder

In [None]:
scaler = MinMaxScaler()
x = scaler.fit_transform(X_train)
checkpoint = ModelCheckpoint("ae1_model", monitor='val_loss', verbose=1, save_best_only=True, mode='max')

ae1 = Autoencoder()
ae1.compile(optimizer=Adam(learning_rate=0.01), loss='mse')
monitor = EarlyStopping(
    monitor='val_loss',
    min_delta=1e-9,
    patience=5,
    verbose=1,
    mode='auto'
)
ae1.fit(
    x=x,
    y=x,
    epochs=800,
    validation_split=0.3,
    shuffle=True,
    callbacks=[monitor,checkpoint]
)

training_loss = losses.mse(x, ae1(x))
threshold = np.mean(training_loss)+np.std(training_loss)
print(threshold)

Epoch 1/800
Epoch 1: val_loss improved from -inf to 0.00052, saving model to ae1_model
Epoch 2/800
Epoch 2: val_loss did not improve from 0.00052
Epoch 3/800
Epoch 3: val_loss did not improve from 0.00052
Epoch 4/800
Epoch 4: val_loss improved from 0.00052 to 0.00056, saving model to ae1_model
Epoch 5/800
Epoch 5: val_loss did not improve from 0.00056
Epoch 6/800
Epoch 6: val_loss improved from 0.00056 to 0.00068, saving model to ae1_model
Epoch 7/800
Epoch 7: val_loss did not improve from 0.00068
Epoch 8/800
Epoch 8: val_loss did not improve from 0.00068
Epoch 9/800
Epoch 9: val_loss improved from 0.00068 to 0.00091, saving model to ae1_model
Epoch 10/800
Epoch 10: val_loss did not improve from 0.00091
Epoch 11/800
Epoch 11: val_loss did not improve from 0.00091
Epoch 12/800
Epoch 12: val_loss did not improve from 0.00091
Epoch 13/800
Epoch 13: val_loss did not improve from 0.00091
Epoch 14/800
Epoch 14: val_loss did not improve from 0.00091
Epoch 15/800
Epoch 15: val_loss did not imp

In [None]:
scaler = MinMaxScaler()
x = scaler.fit_transform(X_train0)

ae = Autoencoder()
ae.compile(optimizer=Adam(learning_rate=0.01), loss='mse')
monitor = EarlyStopping(
    monitor='val_loss',
    min_delta=1e-9,
    patience=5,
    verbose=1,
    mode='auto'
)
ae.fit(
    x=x,
    y=x,
    epochs=800,
    validation_split=0.3,
    shuffle=True,
    callbacks=[monitor]
)

training_loss1 = losses.mse(x, ae(x))
threshold1 = np.mean(training_loss1)+np.std(training_loss1)

Epoch 1/800
Epoch 2/800
Epoch 3/800
Epoch 4/800
Epoch 5/800
Epoch 6/800
Epoch 6: early stopping


In [None]:
af1 = load_model('ae1_model')

af1.fit(
    x=x,
    y=x,
    epochs=800,
    validation_split=0.3,
    shuffle=True,
    callbacks=[monitor]
)

training_loss2 = losses.mse(x, af1(x))
threshold2 = np.mean(training_loss2)+np.std(training_loss2)

Epoch 1/800
Epoch 2/800
Epoch 3/800
Epoch 4/800
Epoch 5/800
Epoch 6/800
Epoch 7/800
Epoch 8/800
Epoch 9/800
Epoch 10/800
Epoch 11/800
Epoch 11: early stopping


### Testing the autoencoder

In [None]:
def predict(x, threshold=threshold1, window_size=82):
    x = scaler.transform(x)
    predictions = losses.mse(x, ae(x)) > threshold
    # Majority voting over `window_size` predictions
    return np.array([np.mean(predictions[i-window_size:i]) > 0.5
                     for i in range(window_size, len(predictions)+1)])

test_data = [X_test0, X_test1, X_test2, X_test3, X_test4, X_test5, X_test6, X_test7, X_test8, X_test9, X_test10]

for i, x in enumerate(test_data):
    print(i)
    outcome = predict(x)
    print_stats(x, outcome)

0
Shape of data: (9548, 115)
Detected anomalies: 0.285201225308968%

1
Shape of data: (107685, 115)
Detected anomalies: 100.0%

2
Shape of data: (102195, 115)
Detected anomalies: 100.0%

3
Shape of data: (122573, 115)
Detected anomalies: 100.0%

4
Shape of data: (237665, 115)
Detected anomalies: 100.0%

5
Shape of data: (81982, 115)
Detected anomalies: 100.0%

6
Shape of data: (59718, 115)
Detected anomalies: 100.0%

7
Shape of data: (29068, 115)
Detected anomalies: 100.0%

8
Shape of data: (29849, 115)
Detected anomalies: 100.0%

9
Shape of data: (92141, 115)
Detected anomalies: 100.0%

10
Shape of data: (105874, 115)
Detected anomalies: 100.0%



In [None]:
af = load_model('ae1_model')

def predict(x, threshold=0.006098755, window_size=82):
    x = scaler.transform(x)
    predictions = losses.mse(x, af(x)) > threshold
    # Majority voting over `window_size` predictions
    return np.array([np.mean(predictions[i-window_size:i]) > 0.5
                     for i in range(window_size, len(predictions)+1)])

test_data = [X_train1, X_test1, X_test2, X_test3, X_test4, X_test5, X_test6, X_test7, X_test8, X_test9, X_test10]

for i, x in enumerate(test_data):
    print(i)
    outcome = predict(x)
    print_stats(x, outcome)

0
Shape of data: (19548, 115)
Detected anomalies: 100.0%

1
Shape of data: (107685, 115)
Detected anomalies: 100.0%

2
Shape of data: (102195, 115)
Detected anomalies: 100.0%

3
Shape of data: (122573, 115)
Detected anomalies: 100.0%

4
Shape of data: (237665, 115)
Detected anomalies: 100.0%

5
Shape of data: (81982, 115)
Detected anomalies: 100.0%

6
Shape of data: (59718, 115)
Detected anomalies: 100.0%

7
Shape of data: (29068, 115)
Detected anomalies: 100.0%

8
Shape of data: (29849, 115)
Detected anomalies: 100.0%

9
Shape of data: (92141, 115)
Detected anomalies: 100.0%

10
Shape of data: (105874, 115)
Detected anomalies: 100.0%



In [None]:
def predict(x, threshold=threshold2, window_size=82):
    x = scaler.transform(x)
    predictions = losses.mse(x, af1(x)) > threshold
    # Majority voting over `window_size` predictions
    return np.array([np.mean(predictions[i-window_size:i]) > 0.5
                     for i in range(window_size, len(predictions)+1)])

test_data = [X_test0, X_test1, X_test2, X_test3, X_test4, X_test5, X_test6, X_test7, X_test8, X_test9, X_test10]

for i, x in enumerate(test_data):
    print(i)
    outcome = predict(x)
    print_stats(x, outcome)

0
Shape of data: (9548, 115)
Detected anomalies: 0.5387134255836062%

1
Shape of data: (107685, 115)
Detected anomalies: 100.0%

2
Shape of data: (102195, 115)
Detected anomalies: 100.0%

3
Shape of data: (122573, 115)
Detected anomalies: 100.0%

4
Shape of data: (237665, 115)
Detected anomalies: 100.0%

5
Shape of data: (81982, 115)
Detected anomalies: 100.0%

6
Shape of data: (59718, 115)
Detected anomalies: 100.0%

7
Shape of data: (29068, 115)
Detected anomalies: 100.0%

8
Shape of data: (29849, 115)
Detected anomalies: 100.0%

9
Shape of data: (92141, 115)
Detected anomalies: 100.0%

10
Shape of data: (105874, 115)
Detected anomalies: 100.0%



## IOT Device 2: Ecobee Thermostat


### Loading the dataset

In [None]:
def load_nbaiot(filename):
    return np.genfromtxt(
        os.path.join("/drive/My Drive/Mini_Project_Dataset/N-Balot_Dataset", filename),
        delimiter=",",
        skip_header=1
    )

benign = load_nbaiot("2.benign.csv")
X_train = benign[:5000]
X_train0 = benign[5000:10000]
X_train1 = benign[5000:]
X_test0 = benign[10000:]
X_test1 = load_nbaiot("2.mirai.scan.csv")
X_test2 = load_nbaiot("2.mirai.ack.csv")
X_test3 = load_nbaiot("2.mirai.syn.csv")
X_test4 = load_nbaiot("2.mirai.udp.csv")
X_test5 = load_nbaiot("2.mirai.udpplain.csv")
X_test6 = load_nbaiot("2.gafgyt.combo.csv")
X_test7 = load_nbaiot("2.gafgyt.junk.csv")
X_test8 = load_nbaiot("2.gafgyt.scan.csv")
X_test9 = load_nbaiot("2.gafgyt.tcp.csv")
X_test10 = load_nbaiot("2.gafgyt.udp.csv")

In [None]:
print(X_train.shape, X_train0.shape, X_train1.shape, X_test0.shape, X_test1.shape, X_test2.shape,
      X_test3.shape, X_test4.shape, X_test5.shape, X_test6.shape, 
      X_test7.shape, X_test8.shape, X_test9.shape, X_test10.shape)

(5000, 115) (5000, 115) (8113, 115) (3113, 115) (43192, 115) (113285, 115) (116807, 115) (151481, 115) (87368, 115) (53012, 115) (30312, 115) (27494, 115) (95021, 115) (104791, 115)


### Training the autoencoder

In [None]:
scaler = MinMaxScaler()
x = scaler.fit_transform(X_train)
checkpoint = ModelCheckpoint("ae2_model", monitor='val_loss', verbose=1, save_best_only=True, mode='max')

ae1 = Autoencoder()
ae1.compile(optimizer=Adam(learning_rate=0.01), loss='mse')
monitor = EarlyStopping(
    monitor='val_loss',
    min_delta=1e-9,
    patience=5,
    verbose=1,
    mode='auto'
)
ae1.fit(
    x=x,
    y=x,
    epochs=800,
    validation_split=0.3,
    shuffle=True,
    callbacks=[monitor,checkpoint]
)

training_loss = losses.mse(x, ae1(x))
threshold = np.mean(training_loss)+np.std(training_loss)
print(threshold)

Epoch 1/800
Epoch 1: val_loss improved from -inf to 0.00360, saving model to ae2_model
Epoch 2/800
Epoch 2: val_loss did not improve from 0.00360
Epoch 3/800
Epoch 3: val_loss did not improve from 0.00360
Epoch 4/800
Epoch 4: val_loss did not improve from 0.00360
Epoch 5/800
Epoch 5: val_loss did not improve from 0.00360
Epoch 6/800
Epoch 6: val_loss did not improve from 0.00360
Epoch 7/800
Epoch 7: val_loss did not improve from 0.00360
Epoch 8/800
Epoch 8: val_loss did not improve from 0.00360
Epoch 9/800
Epoch 9: val_loss did not improve from 0.00360
Epoch 10/800
Epoch 10: val_loss did not improve from 0.00360
Epoch 11/800
Epoch 11: val_loss did not improve from 0.00360
Epoch 12/800
Epoch 12: val_loss did not improve from 0.00360
Epoch 13/800
Epoch 13: val_loss did not improve from 0.00360
Epoch 13: early stopping
0.008497585


In [None]:
scaler = MinMaxScaler()
x = scaler.fit_transform(X_train0)

ae = Autoencoder()
ae.compile(optimizer=Adam(learning_rate=0.01), loss='mse')
monitor = EarlyStopping(
    monitor='val_loss',
    min_delta=1e-9,
    patience=5,
    verbose=1,
    mode='auto'
)
ae.fit(
    x=x,
    y=x,
    epochs=800,
    validation_split=0.3,
    shuffle=True,
    callbacks=[monitor]
)

training_loss1 = losses.mse(x, ae(x))
threshold1 = np.mean(training_loss1)+np.std(training_loss1)

Epoch 1/800
Epoch 2/800
Epoch 3/800
Epoch 4/800
Epoch 5/800
Epoch 6/800
Epoch 7/800
Epoch 8/800
Epoch 9/800
Epoch 10/800
Epoch 11/800
Epoch 12/800
Epoch 13/800
Epoch 14/800
Epoch 15/800
Epoch 16/800
Epoch 17/800
Epoch 18/800
Epoch 19/800
Epoch 20/800
Epoch 21/800
Epoch 22/800
Epoch 23/800
Epoch 23: early stopping


In [None]:
af1 = load_model('ae2_model')

af1.fit(
    x=x,
    y=x,
    epochs=800,
    validation_split=0.3,
    shuffle=True,
    callbacks=[monitor]
)

training_loss2 = losses.mse(x, af1(x))
threshold2 = np.mean(training_loss2)+np.std(training_loss2)

Epoch 1/800
Epoch 2/800
Epoch 3/800
Epoch 4/800
Epoch 5/800
Epoch 6/800
Epoch 7/800
Epoch 8/800
Epoch 9/800
Epoch 10/800
Epoch 11/800
Epoch 12/800
Epoch 13/800
Epoch 14/800
Epoch 15/800
Epoch 16/800
Epoch 17/800
Epoch 18/800
Epoch 18: early stopping


### Testing the autoencoder

In [None]:
def predict(x, threshold=threshold1, window_size=82):
    x = scaler.transform(x)
    predictions = losses.mse(x, ae(x)) > threshold
    # Majority voting over `window_size` predictions
    return np.array([np.mean(predictions[i-window_size:i]) > 0.5
                     for i in range(window_size, len(predictions)+1)])

test_data = [X_test0, X_test1, X_test2, X_test3, X_test4, X_test5, X_test6, X_test7, X_test8, X_test9, X_test10]

for i, x in enumerate(test_data):
    print(i)
    outcome = predict(x)
    print_stats(x, outcome)

0
Shape of data: (3113, 115)
Detected anomalies: 0.0%

1
Shape of data: (43192, 115)
Detected anomalies: 100.0%

2
Shape of data: (113285, 115)
Detected anomalies: 100.0%

3
Shape of data: (116807, 115)
Detected anomalies: 100.0%

4
Shape of data: (151481, 115)
Detected anomalies: 100.0%

5
Shape of data: (87368, 115)
Detected anomalies: 100.0%

6
Shape of data: (53012, 115)
Detected anomalies: 100.0%

7
Shape of data: (30312, 115)
Detected anomalies: 100.0%

8
Shape of data: (27494, 115)
Detected anomalies: 100.0%

9
Shape of data: (95021, 115)
Detected anomalies: 100.0%

10
Shape of data: (104791, 115)
Detected anomalies: 100.0%



In [None]:
af = load_model('ae2_model')

def predict(x, threshold=0.008497585, window_size=82):
    x = scaler.transform(x)
    predictions = losses.mse(x, af(x)) > threshold
    # Majority voting over `window_size` predictions
    return np.array([np.mean(predictions[i-window_size:i]) > 0.5
                     for i in range(window_size, len(predictions)+1)])

test_data = [X_train1, X_test1, X_test2, X_test3, X_test4, X_test5, X_test6, X_test7, X_test8, X_test9, X_test10]

for i, x in enumerate(test_data):
    print(i)
    outcome = predict(x)
    print_stats(x, outcome)

0
Shape of data: (8113, 115)
Detected anomalies: 1.9173306772908367%

1
Shape of data: (43192, 115)
Detected anomalies: 100.0%

2
Shape of data: (113285, 115)
Detected anomalies: 100.0%

3
Shape of data: (116807, 115)
Detected anomalies: 100.0%

4
Shape of data: (151481, 115)
Detected anomalies: 100.0%

5
Shape of data: (87368, 115)
Detected anomalies: 100.0%

6
Shape of data: (53012, 115)
Detected anomalies: 100.0%

7
Shape of data: (30312, 115)
Detected anomalies: 100.0%

8
Shape of data: (27494, 115)
Detected anomalies: 100.0%

9
Shape of data: (95021, 115)
Detected anomalies: 100.0%

10
Shape of data: (104791, 115)
Detected anomalies: 100.0%



In [None]:
def predict(x, threshold=threshold2, window_size=82):
    x = scaler.transform(x)
    predictions = losses.mse(x, af1(x)) > threshold
    # Majority voting over `window_size` predictions
    return np.array([np.mean(predictions[i-window_size:i]) > 0.5
                     for i in range(window_size, len(predictions)+1)])

test_data = [X_test0, X_test1, X_test2, X_test3, X_test4, X_test5, X_test6, X_test7, X_test8, X_test9, X_test10]

for i, x in enumerate(test_data):
    print(i)
    outcome = predict(x)
    print_stats(x, outcome)

0
Shape of data: (3113, 115)
Detected anomalies: 0.0%

1
Shape of data: (43192, 115)
Detected anomalies: 100.0%

2
Shape of data: (113285, 115)
Detected anomalies: 100.0%

3
Shape of data: (116807, 115)
Detected anomalies: 100.0%

4
Shape of data: (151481, 115)
Detected anomalies: 100.0%

5
Shape of data: (87368, 115)
Detected anomalies: 100.0%

6
Shape of data: (53012, 115)
Detected anomalies: 100.0%

7
Shape of data: (30312, 115)
Detected anomalies: 100.0%

8
Shape of data: (27494, 115)
Detected anomalies: 100.0%

9
Shape of data: (95021, 115)
Detected anomalies: 100.0%

10
Shape of data: (104791, 115)
Detected anomalies: 100.0%



## IOT Device 3: Ennio Doorbell

### Loading the dataset

In [None]:
def load_nbaiot(filename):
    return np.genfromtxt(
        os.path.join("/drive/My Drive/Mini_Project_Dataset/N-Balot_Dataset", filename),
        delimiter=",",
        skip_header=1
    )

benign = load_nbaiot("3.benign.csv")
X_train = benign[:30000]
X_train0 = benign[30000:35000]
X_train1 = benign[30000:]
X_test0 = benign[35000:]
X_test6 = load_nbaiot("3.gafgyt.combo.csv")
X_test7 = load_nbaiot("3.gafgyt.junk.csv")
X_test8 = load_nbaiot("3.gafgyt.scan.csv")
X_test9 = load_nbaiot("3.gafgyt.tcp.csv")
X_test10 = load_nbaiot("3.gafgyt.udp.csv")

In [None]:
print(X_train.shape, X_train0.shape, X_train1.shape, X_test0.shape, X_test6.shape, 
      X_test7.shape, X_test8.shape, X_test9.shape, X_test10.shape)

(30000, 115) (5000, 115) (9100, 115) (4100, 115) (53014, 115) (29797, 115) (28120, 115) (101536, 115) (103933, 115)


### Training the autoencoder

In [None]:
scaler = MinMaxScaler()
x = scaler.fit_transform(X_train)
checkpoint = ModelCheckpoint("ae3_model", monitor='val_loss', verbose=1, save_best_only=True, mode='max')

ae1 = Autoencoder()
ae1.compile(optimizer=Adam(learning_rate=0.01), loss='mse')
monitor = EarlyStopping(
    monitor='val_loss',
    min_delta=1e-9,
    patience=5,
    verbose=1,
    mode='auto'
)
ae1.fit(
    x=x,
    y=x,
    epochs=800,
    validation_split=0.3,
    shuffle=True,
    callbacks=[monitor,checkpoint]
)

training_loss = losses.mse(x, ae1(x))
threshold = np.mean(training_loss)+np.std(training_loss)
print(threshold)

Epoch 1/800
Epoch 1: val_loss improved from -inf to 0.04177, saving model to ae3_model
Epoch 2/800
Epoch 2: val_loss did not improve from 0.04177
Epoch 3/800
Epoch 3: val_loss did not improve from 0.04177
Epoch 4/800
Epoch 4: val_loss did not improve from 0.04177
Epoch 5/800
Epoch 5: val_loss improved from 0.04177 to 0.04710, saving model to ae3_model
Epoch 6/800
Epoch 6: val_loss did not improve from 0.04710
Epoch 6: early stopping
0.08886978


In [None]:
scaler = MinMaxScaler()
x = scaler.fit_transform(X_train0)

ae = Autoencoder()
ae.compile(optimizer=Adam(learning_rate=0.01), loss='mse')
monitor = EarlyStopping(
    monitor='val_loss',
    min_delta=1e-9,
    patience=5,
    verbose=1,
    mode='auto'
)
ae.fit(
    x=x,
    y=x,
    epochs=800,
    validation_split=0.3,
    shuffle=True,
    callbacks=[monitor]
)

training_loss1 = losses.mse(x, ae(x))
threshold1 = np.mean(training_loss1)+np.std(training_loss1)

Epoch 1/800
Epoch 2/800
Epoch 3/800
Epoch 4/800
Epoch 5/800
Epoch 6/800
Epoch 7/800
Epoch 8/800
Epoch 9/800
Epoch 10/800
Epoch 11/800
Epoch 12/800
Epoch 13/800
Epoch 13: early stopping


In [None]:
af1 = load_model('ae3_model')

af1.fit(
    x=x,
    y=x,
    epochs=800,
    validation_split=0.3,
    shuffle=True,
    callbacks=[monitor]
)

training_loss2 = losses.mse(x, af1(x))
threshold2 = np.mean(training_loss2)+np.std(training_loss2)

Epoch 1/800
Epoch 2/800
Epoch 3/800
Epoch 4/800
Epoch 5/800
Epoch 6/800
Epoch 7/800
Epoch 8/800
Epoch 9/800
Epoch 10/800
Epoch 11/800
Epoch 12/800
Epoch 13/800
Epoch 14/800
Epoch 15/800
Epoch 16/800
Epoch 17/800
Epoch 18/800
Epoch 19/800
Epoch 19: early stopping


### Testing the autoencoder

In [None]:
def predict(x, threshold=threshold1, window_size=82):
    x = scaler.transform(x)
    predictions = losses.mse(x, ae(x)) > threshold
    # Majority voting over `window_size` predictions
    return np.array([np.mean(predictions[i-window_size:i]) > 0.5
                     for i in range(window_size, len(predictions)+1)])

test_data = [X_test0, X_test6, X_test7, X_test8, X_test9, X_test10]

for i, x in enumerate(test_data):
    print(i)
    outcome = predict(x)
    print_stats(x, outcome)

0
Shape of data: (4100, 115)
Detected anomalies: 4.4787260512565314%

1
Shape of data: (53014, 115)
Detected anomalies: 100.0%

2
Shape of data: (29797, 115)
Detected anomalies: 100.0%

3
Shape of data: (28120, 115)
Detected anomalies: 100.0%

4
Shape of data: (101536, 115)
Detected anomalies: 100.0%

5
Shape of data: (103933, 115)
Detected anomalies: 100.0%



In [None]:
af = load_model('ae3_model')

def predict(x, threshold=0.08886978, window_size=82):
    x = scaler.transform(x)
    predictions = losses.mse(x, af(x)) > threshold
    # Majority voting over `window_size` predictions
    return np.array([np.mean(predictions[i-window_size:i]) > 0.5
                     for i in range(window_size, len(predictions)+1)])

test_data = [X_train1, X_test6, X_test7, X_test8, X_test9, X_test10]

for i, x in enumerate(test_data):
    print(i)
    outcome = predict(x)
    print_stats(x, outcome)

0
Shape of data: (9100, 115)
Detected anomalies: 2.2618915622574565%

1
Shape of data: (53014, 115)
Detected anomalies: 100.0%

2
Shape of data: (29797, 115)
Detected anomalies: 100.0%

3
Shape of data: (28120, 115)
Detected anomalies: 100.0%

4
Shape of data: (101536, 115)
Detected anomalies: 100.0%

5
Shape of data: (103933, 115)
Detected anomalies: 100.0%



In [None]:
def predict(x, threshold=threshold2, window_size=82):
    x = scaler.transform(x)
    predictions = losses.mse(x, af1(x)) > threshold
    # Majority voting over `window_size` predictions
    return np.array([np.mean(predictions[i-window_size:i]) > 0.5
                     for i in range(window_size, len(predictions)+1)])

test_data = [X_test0, X_test6, X_test7, X_test8, X_test9, X_test10]

for i, x in enumerate(test_data):
    print(i)
    outcome = predict(x)
    print_stats(x, outcome)

0
Shape of data: (4100, 115)
Detected anomalies: 4.677780542423489%

1
Shape of data: (53014, 115)
Detected anomalies: 100.0%

2
Shape of data: (29797, 115)
Detected anomalies: 100.0%

3
Shape of data: (28120, 115)
Detected anomalies: 100.0%

4
Shape of data: (101536, 115)
Detected anomalies: 100.0%

5
Shape of data: (103933, 115)
Detected anomalies: 100.0%



## IOT Device 4: Philips B120N10 Baby Monitor

### Loading the dataset

In [None]:
def load_nbaiot(filename):
    return np.genfromtxt(
        os.path.join("/drive/My Drive/Mini_Project_Dataset/N-Balot_Dataset", filename),
        delimiter=",",
        skip_header=1
    )

benign = load_nbaiot("4.benign.csv")
X_train = benign[:150000]
X_train0 = benign[150000:170000]
X_train1 = benign[150000:]
X_test0 = benign[170000:]
X_test1 = load_nbaiot("4.mirai.scan.csv")
X_test2 = load_nbaiot("4.mirai.ack.csv")
X_test3 = load_nbaiot("4.mirai.syn.csv")
X_test4 = load_nbaiot("4.mirai.udp.csv")
X_test5 = load_nbaiot("4.mirai.udpplain.csv")
X_test6 = load_nbaiot("4.gafgyt.combo.csv")
X_test7 = load_nbaiot("4.gafgyt.junk.csv")
X_test8 = load_nbaiot("4.gafgyt.scan.csv")
X_test9 = load_nbaiot("4.gafgyt.tcp.csv")
X_test10 = load_nbaiot("4.gafgyt.udp.csv")

In [None]:
print(X_train.shape, X_train0.shape, X_train1.shape, X_test0.shape, X_test1.shape, X_test2.shape,
      X_test3.shape, X_test4.shape, X_test5.shape, X_test6.shape, 
      X_test7.shape, X_test8.shape, X_test9.shape, X_test10.shape)

(150000, 115) (20000, 115) (25240, 115) (5240, 115) (103621, 115) (91123, 115) (118128, 115) (217034, 115) (80808, 115) (58152, 115) (28349, 115) (27859, 115) (92581, 115) (105782, 115)


### Training the autoencoder

In [None]:
scaler = MinMaxScaler()
x = scaler.fit_transform(X_train)
checkpoint = ModelCheckpoint("ae4_model", monitor='val_loss', verbose=1, save_best_only=True, mode='max')

ae1 = Autoencoder()
ae1.compile(optimizer=Adam(learning_rate=0.01), loss='mse')
monitor = EarlyStopping(
    monitor='val_loss',
    min_delta=1e-9,
    patience=5,
    verbose=1,
    mode='auto'
)
ae1.fit(
    x=x,
    y=x,
    epochs=800,
    validation_split=0.3,
    shuffle=True,
    callbacks=[monitor,checkpoint]
)

training_loss = losses.mse(x, ae1(x))
threshold = np.mean(training_loss)+np.std(training_loss)
print(threshold)

Epoch 1/800
Epoch 1: val_loss improved from -inf to 0.04721, saving model to ae4_model
Epoch 2/800
Epoch 2: val_loss improved from 0.04721 to 0.04906, saving model to ae4_model
Epoch 3/800
Epoch 3: val_loss did not improve from 0.04906
Epoch 4/800
Epoch 4: val_loss did not improve from 0.04906
Epoch 5/800
Epoch 5: val_loss did not improve from 0.04906
Epoch 6/800
Epoch 6: val_loss did not improve from 0.04906
Epoch 6: early stopping
0.082419306


In [None]:
scaler = MinMaxScaler()
x = scaler.fit_transform(X_train0)

ae = Autoencoder()
ae.compile(optimizer=Adam(learning_rate=0.01), loss='mse')
monitor = EarlyStopping(
    monitor='val_loss',
    min_delta=1e-9,
    patience=5,
    verbose=1,
    mode='auto'
)
ae.fit(
    x=x,
    y=x,
    epochs=800,
    validation_split=0.3,
    shuffle=True,
    callbacks=[monitor]
)

training_loss1 = losses.mse(x, ae(x))
threshold1 = np.mean(training_loss1)+np.std(training_loss1)

Epoch 1/800
Epoch 2/800
Epoch 3/800
Epoch 4/800
Epoch 5/800
Epoch 6/800
Epoch 6: early stopping


In [None]:
af1 = load_model('ae4_model')

af1.fit(
    x=x,
    y=x,
    epochs=800,
    validation_split=0.3,
    shuffle=True,
    callbacks=[monitor]
)

training_loss2 = losses.mse(x, af1(x))
threshold2 = np.mean(training_loss2)+np.std(training_loss2)

Epoch 1/800
Epoch 2/800
Epoch 3/800
Epoch 4/800
Epoch 5/800
Epoch 6/800
Epoch 6: early stopping


### Testing the autoencoder

In [None]:
def predict(x, threshold=threshold1, window_size=82):
    x = scaler.transform(x)
    predictions = losses.mse(x, ae(x)) > threshold
    # Majority voting over `window_size` predictions
    return np.array([np.mean(predictions[i-window_size:i]) > 0.5
                     for i in range(window_size, len(predictions)+1)])

test_data = [X_test0, X_test1, X_test2, X_test3, X_test4, X_test5, X_test6, X_test7, X_test8, X_test9, X_test10]

for i, x in enumerate(test_data):
    print(i)
    outcome = predict(x)
    print_stats(x, outcome)

0
Shape of data: (5240, 115)
Detected anomalies: 19.073463849583252%

1
Shape of data: (103621, 115)
Detected anomalies: 100.0%

2
Shape of data: (91123, 115)
Detected anomalies: 100.0%

3
Shape of data: (118128, 115)
Detected anomalies: 100.0%

4
Shape of data: (217034, 115)
Detected anomalies: 100.0%

5
Shape of data: (80808, 115)
Detected anomalies: 100.0%

6
Shape of data: (58152, 115)
Detected anomalies: 100.0%

7
Shape of data: (28349, 115)
Detected anomalies: 100.0%

8
Shape of data: (27859, 115)
Detected anomalies: 100.0%

9
Shape of data: (92581, 115)
Detected anomalies: 100.0%

10
Shape of data: (105782, 115)
Detected anomalies: 100.0%



In [None]:
af = load_model('ae4_model')

def predict(x, threshold=0.082419306, window_size=82):
    x = scaler.transform(x)
    predictions = losses.mse(x, af(x)) > threshold
    # Majority voting over `window_size` predictions
    return np.array([np.mean(predictions[i-window_size:i]) > 0.5
                     for i in range(window_size, len(predictions)+1)])

test_data = [X_train1, X_test1, X_test2, X_test3, X_test4, X_test5, X_test6, X_test7, X_test8, X_test9, X_test10]

for i, x in enumerate(test_data):
    print(i)
    outcome = predict(x)
    print_stats(x, outcome)

0
Shape of data: (25240, 115)
Detected anomalies: 10.576731984578084%

1
Shape of data: (103621, 115)
Detected anomalies: 100.0%

2
Shape of data: (91123, 115)
Detected anomalies: 100.0%

3
Shape of data: (118128, 115)
Detected anomalies: 100.0%

4
Shape of data: (217034, 115)
Detected anomalies: 100.0%

5
Shape of data: (80808, 115)
Detected anomalies: 100.0%

6
Shape of data: (58152, 115)
Detected anomalies: 100.0%

7
Shape of data: (28349, 115)
Detected anomalies: 100.0%

8
Shape of data: (27859, 115)
Detected anomalies: 100.0%

9
Shape of data: (92581, 115)
Detected anomalies: 100.0%

10
Shape of data: (105782, 115)
Detected anomalies: 100.0%



In [None]:
def predict(x, threshold=threshold2, window_size=82):
    x = scaler.transform(x)
    predictions = losses.mse(x, af1(x)) > threshold
    # Majority voting over `window_size` predictions
    return np.array([np.mean(predictions[i-window_size:i]) > 0.5
                     for i in range(window_size, len(predictions)+1)])

test_data = [X_test0, X_test1, X_test2, X_test3, X_test4, X_test5, X_test6, X_test7, X_test8, X_test9, X_test10]

for i, x in enumerate(test_data):
    print(i)
    outcome = predict(x)
    print_stats(x, outcome)

0
Shape of data: (5240, 115)
Detected anomalies: 16.747431672804808%

1
Shape of data: (103621, 115)
Detected anomalies: 100.0%

2
Shape of data: (91123, 115)
Detected anomalies: 100.0%

3
Shape of data: (118128, 115)
Detected anomalies: 100.0%

4
Shape of data: (217034, 115)
Detected anomalies: 100.0%

5
Shape of data: (80808, 115)
Detected anomalies: 100.0%

6
Shape of data: (58152, 115)
Detected anomalies: 100.0%

7
Shape of data: (28349, 115)
Detected anomalies: 100.0%

8
Shape of data: (27859, 115)
Detected anomalies: 100.0%

9
Shape of data: (92581, 115)
Detected anomalies: 100.0%

10
Shape of data: (105782, 115)
Detected anomalies: 100.0%



## IOT Device 5: Provision PT 737E Security Camera

### Loading the dataset

In [None]:
def load_nbaiot(filename):
    return np.genfromtxt(
        os.path.join("/drive/My Drive/Mini_Project_Dataset/N-Balot_Dataset", filename),
        delimiter=",",
        skip_header=1
    )

benign = load_nbaiot("5.benign.csv")
X_train = benign[:50000]
X_train0 = benign[50000:60000]
X_train1 = benign[50000:]
X_test0 = benign[60000:]
X_test1 = load_nbaiot("5.mirai.scan.csv")
X_test2 = load_nbaiot("5.mirai.ack.csv")
X_test3 = load_nbaiot("5.mirai.syn.csv")
X_test4 = load_nbaiot("5.mirai.udp.csv")
X_test5 = load_nbaiot("5.mirai.udpplain.csv")
X_test6 = load_nbaiot("5.gafgyt.combo.csv")
X_test7 = load_nbaiot("5.gafgyt.junk.csv")
X_test8 = load_nbaiot("5.gafgyt.scan.csv")
X_test9 = load_nbaiot("5.gafgyt.tcp.csv")
X_test10 = load_nbaiot("5.gafgyt.udp.csv")

In [None]:
print(X_train.shape, X_train0.shape, X_train1.shape, X_test0.shape, X_test1.shape, X_test2.shape,
      X_test3.shape, X_test4.shape, X_test5.shape, X_test6.shape, 
      X_test7.shape, X_test8.shape, X_test9.shape, X_test10.shape)

(50000, 115) (10000, 115) (12154, 115) (2154, 115) (96781, 115) (60554, 115) (65746, 115) (156248, 115) (56681, 115) (61380, 115) (30898, 115) (29297, 115) (104510, 115) (104011, 115)


### Training the autoencoder

In [None]:
scaler = MinMaxScaler()
x = scaler.fit_transform(X_train)
checkpoint = ModelCheckpoint("ae5_model", monitor='val_loss', verbose=1, save_best_only=True, mode='max')

ae1 = Autoencoder()
ae1.compile(optimizer=Adam(learning_rate=0.01), loss='mse')
monitor = EarlyStopping(
    monitor='val_loss',
    min_delta=1e-9,
    patience=5,
    verbose=1,
    mode='auto'
)
ae1.fit(
    x=x,
    y=x,
    epochs=800,
    validation_split=0.3,
    shuffle=True,
    callbacks=[monitor,checkpoint]
)

training_loss = losses.mse(x, ae1(x))
threshold = np.mean(training_loss)+np.std(training_loss)
print(threshold)

Epoch 1/800
Epoch 1: val_loss improved from -inf to 0.00121, saving model to ae5_model
Epoch 2/800
Epoch 2: val_loss improved from 0.00121 to 0.00234, saving model to ae5_model
Epoch 3/800
Epoch 3: val_loss did not improve from 0.00234
Epoch 4/800
Epoch 4: val_loss did not improve from 0.00234
Epoch 5/800
Epoch 5: val_loss did not improve from 0.00234
Epoch 6/800
Epoch 6: val_loss did not improve from 0.00234
Epoch 7/800
Epoch 7: val_loss did not improve from 0.00234
Epoch 8/800
Epoch 8: val_loss did not improve from 0.00234
Epoch 9/800
Epoch 9: val_loss did not improve from 0.00234
Epoch 10/800
Epoch 10: val_loss did not improve from 0.00234
Epoch 11/800
Epoch 11: val_loss did not improve from 0.00234
Epoch 12/800
Epoch 12: val_loss did not improve from 0.00234
Epoch 13/800
Epoch 13: val_loss did not improve from 0.00234
Epoch 14/800
Epoch 14: val_loss did not improve from 0.00234
Epoch 15/800
Epoch 15: val_loss did not improve from 0.00234
Epoch 16/800
Epoch 16: val_loss did not impr

In [None]:
scaler = MinMaxScaler()
x = scaler.fit_transform(X_train0)

ae = Autoencoder()
ae.compile(optimizer=Adam(learning_rate=0.01), loss='mse')
monitor = EarlyStopping(
    monitor='val_loss',
    min_delta=1e-9,
    patience=5,
    verbose=1,
    mode='auto'
)
ae.fit(
    x=x,
    y=x,
    epochs=800,
    validation_split=0.3,
    shuffle=True,
    callbacks=[monitor]
)

training_loss1 = losses.mse(x, ae(x))
threshold1 = np.mean(training_loss1)+np.std(training_loss1)

Epoch 1/800
Epoch 2/800
Epoch 3/800
Epoch 4/800
Epoch 5/800
Epoch 6/800
Epoch 7/800
Epoch 8/800
Epoch 9/800
Epoch 10/800
Epoch 11/800
Epoch 12/800
Epoch 13/800
Epoch 14/800
Epoch 15/800
Epoch 16/800
Epoch 17/800
Epoch 18/800
Epoch 19/800
Epoch 20/800
Epoch 21/800
Epoch 22/800
Epoch 22: early stopping


In [None]:
af1 = load_model('ae5_model')

af1.fit(
    x=x,
    y=x,
    epochs=800,
    validation_split=0.3,
    shuffle=True,
    callbacks=[monitor]
)

training_loss2 = losses.mse(x, af1(x))
threshold2 = np.mean(training_loss2)+np.std(training_loss2)

Epoch 1/800
Epoch 2/800
Epoch 3/800
Epoch 4/800
Epoch 5/800
Epoch 6/800
Epoch 7/800
Epoch 8/800
Epoch 9/800
Epoch 10/800
Epoch 11/800
Epoch 12/800
Epoch 13/800
Epoch 14/800
Epoch 14: early stopping


### Testing the autoencoder

In [None]:
def predict(x, threshold=threshold1, window_size=82):
    x = scaler.transform(x)
    predictions = losses.mse(x, ae(x)) > threshold
    # Majority voting over `window_size` predictions
    return np.array([np.mean(predictions[i-window_size:i]) > 0.5
                     for i in range(window_size, len(predictions)+1)])

test_data = [X_test0, X_test1, X_test2, X_test3, X_test4, X_test5, X_test6, X_test7, X_test8, X_test9, X_test10]

for i, x in enumerate(test_data):
    print(i)
    outcome = predict(x)
    print_stats(x, outcome)

0
Shape of data: (2154, 115)
Detected anomalies: 11.577424023154848%

1
Shape of data: (96781, 115)
Detected anomalies: 100.0%

2
Shape of data: (60554, 115)
Detected anomalies: 100.0%

3
Shape of data: (65746, 115)
Detected anomalies: 100.0%

4
Shape of data: (156248, 115)
Detected anomalies: 100.0%

5
Shape of data: (56681, 115)
Detected anomalies: 100.0%

6
Shape of data: (61380, 115)
Detected anomalies: 100.0%

7
Shape of data: (30898, 115)
Detected anomalies: 100.0%

8
Shape of data: (29297, 115)
Detected anomalies: 100.0%

9
Shape of data: (104510, 115)
Detected anomalies: 100.0%

10
Shape of data: (104011, 115)
Detected anomalies: 100.0%



In [None]:
af = load_model('ae5_model')

def predict(x, threshold=0.0044242707, window_size=82):
    x = scaler.transform(x)
    predictions = losses.mse(x, af(x)) > threshold
    # Majority voting over `window_size` predictions
    return np.array([np.mean(predictions[i-window_size:i]) > 0.5
                     for i in range(window_size, len(predictions)+1)])

test_data = [X_train1, X_test1, X_test2, X_test3, X_test4, X_test5, X_test6, X_test7, X_test8, X_test9, X_test10]

for i, x in enumerate(test_data):
    print(i)
    outcome = predict(x)
    print_stats(x, outcome)

0
Shape of data: (12154, 115)
Detected anomalies: 100.0%

1
Shape of data: (96781, 115)
Detected anomalies: 100.0%

2
Shape of data: (60554, 115)
Detected anomalies: 100.0%

3
Shape of data: (65746, 115)
Detected anomalies: 100.0%

4
Shape of data: (156248, 115)
Detected anomalies: 100.0%

5
Shape of data: (56681, 115)
Detected anomalies: 100.0%

6
Shape of data: (61380, 115)
Detected anomalies: 100.0%

7
Shape of data: (30898, 115)
Detected anomalies: 100.0%

8
Shape of data: (29297, 115)
Detected anomalies: 100.0%

9
Shape of data: (104510, 115)
Detected anomalies: 100.0%

10
Shape of data: (104011, 115)
Detected anomalies: 100.0%



In [None]:
def predict(x, threshold=threshold2, window_size=82):
    x = scaler.transform(x)
    predictions = losses.mse(x, af1(x)) > threshold
    # Majority voting over `window_size` predictions
    return np.array([np.mean(predictions[i-window_size:i]) > 0.5
                     for i in range(window_size, len(predictions)+1)])

test_data = [X_test0, X_test1, X_test2, X_test3, X_test4, X_test5, X_test6, X_test7, X_test8, X_test9, X_test10]

for i, x in enumerate(test_data):
    print(i)
    outcome = predict(x)
    print_stats(x, outcome)

0
Shape of data: (2154, 115)
Detected anomalies: 5.692233478051134%

1
Shape of data: (96781, 115)
Detected anomalies: 100.0%

2
Shape of data: (60554, 115)
Detected anomalies: 100.0%

3
Shape of data: (65746, 115)
Detected anomalies: 100.0%

4
Shape of data: (156248, 115)
Detected anomalies: 100.0%

5
Shape of data: (56681, 115)
Detected anomalies: 100.0%

6
Shape of data: (61380, 115)
Detected anomalies: 100.0%

7
Shape of data: (30898, 115)
Detected anomalies: 100.0%

8
Shape of data: (29297, 115)
Detected anomalies: 100.0%

9
Shape of data: (104510, 115)
Detected anomalies: 100.0%

10
Shape of data: (104011, 115)
Detected anomalies: 100.0%



## IOT Device 6: Provision PT 838 Security Camera

### Loading the dataset

In [None]:
def load_nbaiot(filename):
    return np.genfromtxt(
        os.path.join("/drive/My Drive/Mini_Project_Dataset/N-Balot_Dataset", filename),
        delimiter=",",
        skip_header=1
    )

benign = load_nbaiot("6.benign.csv")
X_train = benign[:80000]
X_train0 = benign[80000:90000]
X_train1 = benign[80000:]
X_test0 = benign[90000:]
X_test1 = load_nbaiot("6.mirai.scan.csv")
X_test2 = load_nbaiot("6.mirai.ack.csv")
X_test3 = load_nbaiot("6.mirai.syn.csv")
X_test4 = load_nbaiot("6.mirai.udp.csv")
X_test5 = load_nbaiot("6.mirai.udpplain.csv")
X_test6 = load_nbaiot("6.gafgyt.combo.csv")
X_test7 = load_nbaiot("6.gafgyt.junk.csv")
X_test8 = load_nbaiot("6.gafgyt.scan.csv")
X_test9 = load_nbaiot("6.gafgyt.tcp.csv")
X_test10 = load_nbaiot("6.gafgyt.udp.csv")

In [None]:
print(X_train.shape, X_train0.shape, X_train1.shape, X_test0.shape, X_test1.shape, X_test2.shape,
      X_test3.shape, X_test4.shape, X_test5.shape, X_test6.shape, 
      X_test7.shape, X_test8.shape, X_test9.shape, X_test10.shape)

(80000, 115) (10000, 115) (18514, 115) (8514, 115) (97096, 115) (57997, 115) (61851, 115) (158608, 115) (53785, 115) (57530, 115) (29068, 115) (28397, 115) (89387, 115) (104658, 115)


### Training the autoencoder

In [None]:
scaler = MinMaxScaler()
x = scaler.fit_transform(X_train)
checkpoint = ModelCheckpoint("ae6_model", monitor='val_loss', verbose=1, save_best_only=True, mode='max')

ae1 = Autoencoder()
ae1.compile(optimizer=Adam(learning_rate=0.01), loss='mse')
monitor = EarlyStopping(
    monitor='val_loss',
    min_delta=1e-9,
    patience=5,
    verbose=1,
    mode='auto'
)
ae1.fit(
    x=x,
    y=x,
    epochs=800,
    validation_split=0.3,
    shuffle=True,
    callbacks=[monitor,checkpoint]
)

training_loss = losses.mse(x, ae1(x))
threshold = np.mean(training_loss)+np.std(training_loss)
print(threshold)

Epoch 1/800
Epoch 1: val_loss improved from -inf to 0.00162, saving model to ae6_model
Epoch 2/800
Epoch 2: val_loss did not improve from 0.00162
Epoch 3/800
Epoch 3: val_loss improved from 0.00162 to 0.00320, saving model to ae6_model
Epoch 4/800
Epoch 4: val_loss did not improve from 0.00320
Epoch 5/800
Epoch 5: val_loss did not improve from 0.00320
Epoch 6/800
Epoch 6: val_loss did not improve from 0.00320
Epoch 7/800
Epoch 7: val_loss did not improve from 0.00320
Epoch 8/800
Epoch 8: val_loss did not improve from 0.00320
Epoch 9/800
Epoch 9: val_loss did not improve from 0.00320
Epoch 10/800
Epoch 10: val_loss did not improve from 0.00320
Epoch 11/800
Epoch 11: val_loss did not improve from 0.00320
Epoch 12/800
Epoch 12: val_loss did not improve from 0.00320
Epoch 13/800
Epoch 13: val_loss did not improve from 0.00320
Epoch 13: early stopping
0.006529484


In [None]:
scaler = MinMaxScaler()
x = scaler.fit_transform(X_train0)

ae = Autoencoder()
ae.compile(optimizer=Adam(learning_rate=0.01), loss='mse')
monitor = EarlyStopping(
    monitor='val_loss',
    min_delta=1e-9,
    patience=5,
    verbose=1,
    mode='auto'
)
ae.fit(
    x=x,
    y=x,
    epochs=800,
    validation_split=0.3,
    shuffle=True,
    callbacks=[monitor]
)

training_loss1 = losses.mse(x, ae(x))
threshold1 = np.mean(training_loss1)+np.std(training_loss1)

Epoch 1/800
Epoch 2/800
Epoch 3/800
Epoch 4/800
Epoch 5/800
Epoch 6/800
Epoch 7/800
Epoch 8/800
Epoch 9/800
Epoch 10/800
Epoch 10: early stopping


In [None]:
af1 = load_model('ae6_model')

af1.fit(
    x=x,
    y=x,
    epochs=800,
    validation_split=0.3,
    shuffle=True,
    callbacks=[monitor]
)

training_loss2 = losses.mse(x, af1(x))
threshold2 = np.mean(training_loss2)+np.std(training_loss2)

Epoch 1/800
Epoch 2/800
Epoch 3/800
Epoch 4/800
Epoch 5/800
Epoch 6/800
Epoch 7/800
Epoch 8/800
Epoch 9/800
Epoch 10/800
Epoch 11/800
Epoch 12/800
Epoch 13/800
Epoch 14/800
Epoch 15/800
Epoch 16/800
Epoch 16: early stopping


### Testing the autoencoder

In [None]:
def predict(x, threshold=threshold1, window_size=82):
    x = scaler.transform(x)
    predictions = losses.mse(x, ae(x)) > threshold
    # Majority voting over `window_size` predictions
    return np.array([np.mean(predictions[i-window_size:i]) > 0.5
                     for i in range(window_size, len(predictions)+1)])

test_data = [X_test0, X_test1, X_test2, X_test3, X_test4, X_test5, X_test6, X_test7, X_test8, X_test9, X_test10]

for i, x in enumerate(test_data):
    print(i)
    outcome = predict(x)
    print_stats(x, outcome)

0
Shape of data: (8514, 115)
Detected anomalies: 19.791296098660027%

1
Shape of data: (97096, 115)
Detected anomalies: 100.0%

2
Shape of data: (57997, 115)
Detected anomalies: 100.0%

3
Shape of data: (61851, 115)
Detected anomalies: 100.0%

4
Shape of data: (158608, 115)
Detected anomalies: 100.0%

5
Shape of data: (53785, 115)
Detected anomalies: 100.0%

6
Shape of data: (57530, 115)
Detected anomalies: 100.0%

7
Shape of data: (29068, 115)
Detected anomalies: 100.0%

8
Shape of data: (28397, 115)
Detected anomalies: 100.0%

9
Shape of data: (89387, 115)
Detected anomalies: 100.0%

10
Shape of data: (104658, 115)
Detected anomalies: 100.0%



In [None]:
af = load_model('ae6_model')

def predict(x, threshold=0.006529484, window_size=82):
    x = scaler.transform(x)
    predictions = losses.mse(x, af(x)) > threshold
    # Majority voting over `window_size` predictions
    return np.array([np.mean(predictions[i-window_size:i]) > 0.5
                     for i in range(window_size, len(predictions)+1)])

test_data = [X_train1, X_test1, X_test2, X_test3, X_test4, X_test5, X_test6, X_test7, X_test8, X_test9, X_test10]

for i, x in enumerate(test_data):
    print(i)
    outcome = predict(x)
    print_stats(x, outcome)

0
Shape of data: (18514, 115)
Detected anomalies: 59.31752834590137%

1
Shape of data: (97096, 115)
Detected anomalies: 100.0%

2
Shape of data: (57997, 115)
Detected anomalies: 100.0%

3
Shape of data: (61851, 115)
Detected anomalies: 100.0%

4
Shape of data: (158608, 115)
Detected anomalies: 100.0%

5
Shape of data: (53785, 115)
Detected anomalies: 100.0%

6
Shape of data: (57530, 115)
Detected anomalies: 100.0%

7
Shape of data: (29068, 115)
Detected anomalies: 100.0%

8
Shape of data: (28397, 115)
Detected anomalies: 100.0%

9
Shape of data: (89387, 115)
Detected anomalies: 100.0%

10
Shape of data: (104658, 115)
Detected anomalies: 100.0%



In [None]:
def predict(x, threshold=threshold2, window_size=82):
    x = scaler.transform(x)
    predictions = losses.mse(x, af1(x)) > threshold
    # Majority voting over `window_size` predictions
    return np.array([np.mean(predictions[i-window_size:i]) > 0.5
                     for i in range(window_size, len(predictions)+1)])

test_data = [X_test0, X_test1, X_test2, X_test3, X_test4, X_test5, X_test6, X_test7, X_test8, X_test9, X_test10]

for i, x in enumerate(test_data):
    print(i)
    outcome = predict(x)
    print_stats(x, outcome)

0
Shape of data: (8514, 115)
Detected anomalies: 19.60156527926005%

1
Shape of data: (97096, 115)
Detected anomalies: 100.0%

2
Shape of data: (57997, 115)
Detected anomalies: 100.0%

3
Shape of data: (61851, 115)
Detected anomalies: 100.0%

4
Shape of data: (158608, 115)
Detected anomalies: 100.0%

5
Shape of data: (53785, 115)
Detected anomalies: 100.0%

6
Shape of data: (57530, 115)
Detected anomalies: 100.0%

7
Shape of data: (29068, 115)
Detected anomalies: 100.0%

8
Shape of data: (28397, 115)
Detected anomalies: 100.0%

9
Shape of data: (89387, 115)
Detected anomalies: 100.0%

10
Shape of data: (104658, 115)
Detected anomalies: 100.0%



## IOT Device 7: Samsung SNH 1011 N Webcam

### Loading the dataset

In [None]:
def load_nbaiot(filename):
    return np.genfromtxt(
        os.path.join("/drive/My Drive/Mini_Project_Dataset/N-Balot_Dataset", filename),
        delimiter=",",
        skip_header=1
    )

benign = load_nbaiot("7.benign.csv")
X_train = benign[:40000]
X_train0 = benign[40000:50000]
X_train1 = benign[40000:]
X_test0 = benign[50000:]
X_test6 = load_nbaiot("7.gafgyt.combo.csv")
X_test7 = load_nbaiot("7.gafgyt.junk.csv")
X_test8 = load_nbaiot("7.gafgyt.scan.csv")
X_test9 = load_nbaiot("7.gafgyt.tcp.csv")
X_test10 = load_nbaiot("7.gafgyt.udp.csv")

In [None]:
print(X_train.shape, X_train0.shape, X_train1.shape, X_test0.shape, X_test6.shape, 
      X_test7.shape, X_test8.shape, X_test9.shape, X_test10.shape)

(40000, 115) (10000, 115) (12150, 115) (2150, 115) (58669, 115) (28305, 115) (27698, 115) (97783, 115) (110617, 115)


### Training the autoencoder

In [None]:
scaler = MinMaxScaler()
x = scaler.fit_transform(X_train)
checkpoint = ModelCheckpoint("ae7_model", monitor='val_loss', verbose=1, save_best_only=True, mode='max')

ae1 = Autoencoder()
ae1.compile(optimizer=Adam(learning_rate=0.01), loss='mse')
monitor = EarlyStopping(
    monitor='val_loss',
    min_delta=1e-9,
    patience=5,
    verbose=1,
    mode='auto'
)
ae1.fit(
    x=x,
    y=x,
    epochs=800,
    validation_split=0.3,
    shuffle=True,
    callbacks=[monitor,checkpoint]
)

training_loss = losses.mse(x, ae1(x))
threshold = np.mean(training_loss)+np.std(training_loss)
print(threshold)

Epoch 1/800
Epoch 1: val_loss improved from -inf to 0.00216, saving model to ae7_model
Epoch 2/800
Epoch 2: val_loss did not improve from 0.00216
Epoch 3/800
Epoch 3: val_loss did not improve from 0.00216
Epoch 4/800
Epoch 4: val_loss did not improve from 0.00216
Epoch 5/800
Epoch 5: val_loss did not improve from 0.00216
Epoch 6/800
Epoch 6: val_loss did not improve from 0.00216
Epoch 7/800
Epoch 7: val_loss did not improve from 0.00216
Epoch 8/800
Epoch 8: val_loss did not improve from 0.00216
Epoch 9/800
Epoch 9: val_loss did not improve from 0.00216
Epoch 10/800
Epoch 10: val_loss did not improve from 0.00216
Epoch 11/800
Epoch 11: val_loss did not improve from 0.00216
Epoch 12/800
Epoch 12: val_loss improved from 0.00216 to 0.00424, saving model to ae7_model
Epoch 13/800
Epoch 13: val_loss did not improve from 0.00424
Epoch 14/800
Epoch 14: val_loss did not improve from 0.00424
Epoch 15/800
Epoch 15: val_loss did not improve from 0.00424
Epoch 16/800
Epoch 16: val_loss did not impr

In [None]:
scaler = MinMaxScaler()
x = scaler.fit_transform(X_train0)

ae = Autoencoder()
ae.compile(optimizer=Adam(learning_rate=0.01), loss='mse')
monitor = EarlyStopping(
    monitor='val_loss',
    min_delta=1e-9,
    patience=5,
    verbose=1,
    mode='auto'
)
ae.fit(
    x=x,
    y=x,
    epochs=800,
    validation_split=0.3,
    shuffle=True,
    callbacks=[monitor]
)

training_loss1 = losses.mse(x, ae(x))
threshold1 = np.mean(training_loss1)+np.std(training_loss1)

Epoch 1/800
Epoch 2/800
Epoch 3/800
Epoch 4/800
Epoch 5/800
Epoch 6/800
Epoch 7/800
Epoch 8/800
Epoch 9/800
Epoch 10/800
Epoch 11/800
Epoch 11: early stopping


In [None]:
af1 = load_model('ae7_model')

af1.fit(
    x=x,
    y=x,
    epochs=800,
    validation_split=0.3,
    shuffle=True,
    callbacks=[monitor]
)

training_loss2 = losses.mse(x, af1(x))
threshold2 = np.mean(training_loss2)+np.std(training_loss2)

Epoch 1/800
Epoch 2/800
Epoch 3/800
Epoch 4/800
Epoch 5/800
Epoch 6/800
Epoch 7/800
Epoch 8/800
Epoch 9/800
Epoch 10/800
Epoch 10: early stopping


### Testing the autoencoder

In [None]:
def predict(x, threshold=threshold1, window_size=82):
    x = scaler.transform(x)
    predictions = losses.mse(x, ae(x)) > threshold
    # Majority voting over `window_size` predictions
    return np.array([np.mean(predictions[i-window_size:i]) > 0.5
                     for i in range(window_size, len(predictions)+1)])

test_data = [X_test0, X_test6, X_test7, X_test8, X_test9, X_test10]

for i, x in enumerate(test_data):
    print(i)
    outcome = predict(x)
    print_stats(x, outcome)

0
Shape of data: (2150, 115)
Detected anomalies: 52.7791203479942%

1
Shape of data: (58669, 115)
Detected anomalies: 100.0%

2
Shape of data: (28305, 115)
Detected anomalies: 100.0%

3
Shape of data: (27698, 115)
Detected anomalies: 100.0%

4
Shape of data: (97783, 115)
Detected anomalies: 100.0%

5
Shape of data: (110617, 115)
Detected anomalies: 100.0%



In [None]:
af = load_model('ae7_model')

def predict(x, threshold=0.00954742, window_size=82):
    x = scaler.transform(x)
    predictions = losses.mse(x, af(x)) > threshold
    # Majority voting over `window_size` predictions
    return np.array([np.mean(predictions[i-window_size:i]) > 0.5
                     for i in range(window_size, len(predictions)+1)])

test_data = [X_train1, X_test6, X_test7, X_test8, X_test9, X_test10]

for i, x in enumerate(test_data):
    print(i)
    outcome = predict(x)
    print_stats(x, outcome)

0
Shape of data: (12150, 115)
Detected anomalies: 100.0%

1
Shape of data: (58669, 115)
Detected anomalies: 100.0%

2
Shape of data: (28305, 115)
Detected anomalies: 100.0%

3
Shape of data: (27698, 115)
Detected anomalies: 100.0%

4
Shape of data: (97783, 115)
Detected anomalies: 100.0%

5
Shape of data: (110617, 115)
Detected anomalies: 100.0%



In [None]:
def predict(x, threshold=threshold2, window_size=82):
    x = scaler.transform(x)
    predictions = losses.mse(x, af1(x)) > threshold
    # Majority voting over `window_size` predictions
    return np.array([np.mean(predictions[i-window_size:i]) > 0.5
                     for i in range(window_size, len(predictions)+1)])

test_data = [X_test0, X_test6, X_test7, X_test8, X_test9, X_test10]

for i, x in enumerate(test_data):
    print(i)
    outcome = predict(x)
    print_stats(x, outcome)

0
Shape of data: (2150, 115)
Detected anomalies: 56.74238762687288%

1
Shape of data: (58669, 115)
Detected anomalies: 100.0%

2
Shape of data: (28305, 115)
Detected anomalies: 100.0%

3
Shape of data: (27698, 115)
Detected anomalies: 100.0%

4
Shape of data: (97783, 115)
Detected anomalies: 100.0%

5
Shape of data: (110617, 115)
Detected anomalies: 100.0%



## IOT Device 8: SimpleHome XCS7 1002 WHT Security Camera

### Loading the dataset

In [None]:
def load_nbaiot(filename):
    return np.genfromtxt(
        os.path.join("/drive/My Drive/Mini_Project_Dataset/N-Balot_Dataset", filename),
        delimiter=",",
        skip_header=1
    )

benign = load_nbaiot("8.benign.csv")
X_train = benign[:30000]
X_train0 = benign[30000:40000]
X_train1 = benign[30000:]
X_test0 = benign[40000:]
X_test1 = load_nbaiot("8.mirai.scan.csv")
X_test2 = load_nbaiot("8.mirai.ack.csv")
X_test3 = load_nbaiot("8.mirai.syn.csv")
X_test4 = load_nbaiot("8.mirai.udp.csv")
X_test5 = load_nbaiot("8.mirai.udpplain.csv")
X_test6 = load_nbaiot("8.gafgyt.combo.csv")
X_test7 = load_nbaiot("8.gafgyt.junk.csv")
X_test8 = load_nbaiot("8.gafgyt.scan.csv")
X_test9 = load_nbaiot("8.gafgyt.tcp.csv")
X_test10 = load_nbaiot("8.gafgyt.udp.csv")

In [None]:
print(X_train.shape, X_train0.shape, X_train1.shape, X_test0.shape, X_test1.shape, X_test2.shape,
      X_test3.shape, X_test4.shape, X_test5.shape, X_test6.shape, 
      X_test7.shape, X_test8.shape, X_test9.shape, X_test10.shape)

(30000, 115) (10000, 115) (16585, 115) (6585, 115) (45930, 115) (111480, 115) (125715, 115) (151879, 115) (78244, 115) (54283, 115) (28579, 115) (27825, 115) (88816, 115) (103720, 115)


### Training the autoencoder

In [None]:
scaler = MinMaxScaler()
x = scaler.fit_transform(X_train)
checkpoint = ModelCheckpoint("ae8_model", monitor='val_loss', verbose=1, save_best_only=True, mode='max')

ae1 = Autoencoder()
ae1.compile(optimizer=Adam(learning_rate=0.01), loss='mse')
monitor = EarlyStopping(
    monitor='val_loss',
    min_delta=1e-9,
    patience=5,
    verbose=1,
    mode='auto'
)
ae1.fit(
    x=x,
    y=x,
    epochs=800,
    validation_split=0.3,
    shuffle=True,
    callbacks=[monitor,checkpoint]
)

training_loss = losses.mse(x, ae1(x))
threshold = np.mean(training_loss)+np.std(training_loss)
print(threshold)

Epoch 1/800
Epoch 1: val_loss improved from -inf to 0.00101, saving model to ae8_model
Epoch 2/800
Epoch 2: val_loss did not improve from 0.00101
Epoch 3/800
Epoch 3: val_loss did not improve from 0.00101
Epoch 4/800
Epoch 4: val_loss did not improve from 0.00101
Epoch 5/800
Epoch 5: val_loss did not improve from 0.00101
Epoch 6/800
Epoch 6: val_loss did not improve from 0.00101
Epoch 7/800
Epoch 7: val_loss did not improve from 0.00101
Epoch 8/800
Epoch 8: val_loss did not improve from 0.00101
Epoch 9/800
Epoch 9: val_loss did not improve from 0.00101
Epoch 10/800
Epoch 10: val_loss did not improve from 0.00101
Epoch 11/800
Epoch 11: val_loss did not improve from 0.00101
Epoch 12/800
Epoch 12: val_loss did not improve from 0.00101
Epoch 13/800
Epoch 13: val_loss improved from 0.00101 to 0.00171, saving model to ae8_model
Epoch 13: early stopping
0.016871396


In [None]:
scaler = MinMaxScaler()
x = scaler.fit_transform(X_train0)

ae = Autoencoder()
ae.compile(optimizer=Adam(learning_rate=0.01), loss='mse')
monitor = EarlyStopping(
    monitor='val_loss',
    min_delta=1e-9,
    patience=5,
    verbose=1,
    mode='auto'
)
ae.fit(
    x=x,
    y=x,
    epochs=800,
    validation_split=0.3,
    shuffle=True,
    callbacks=[monitor]
)

training_loss1 = losses.mse(x, ae(x))
threshold1 = np.mean(training_loss1)+np.std(training_loss1)

Epoch 1/800
Epoch 2/800
Epoch 3/800
Epoch 4/800
Epoch 5/800
Epoch 6/800
Epoch 7/800
Epoch 8/800
Epoch 9/800
Epoch 10/800
Epoch 11/800
Epoch 12/800
Epoch 13/800
Epoch 14/800
Epoch 15/800
Epoch 16/800
Epoch 17/800
Epoch 18/800
Epoch 19/800
Epoch 20/800
Epoch 21/800
Epoch 22/800
Epoch 23/800
Epoch 24/800
Epoch 25/800
Epoch 26/800
Epoch 27/800
Epoch 28/800
Epoch 29/800
Epoch 30/800
Epoch 31/800
Epoch 32/800
Epoch 32: early stopping


In [None]:
af1 = load_model('ae8_model')

af1.fit(
    x=x,
    y=x,
    epochs=800,
    validation_split=0.3,
    shuffle=True,
    callbacks=[monitor]
)

training_loss2 = losses.mse(x, af1(x))
threshold2 = np.mean(training_loss2)+np.std(training_loss2)

Epoch 1/800
Epoch 2/800
Epoch 3/800
Epoch 4/800
Epoch 5/800
Epoch 6/800
Epoch 7/800
Epoch 8/800
Epoch 9/800
Epoch 10/800
Epoch 11/800
Epoch 11: early stopping


### Testing the autoencoder

In [None]:
def predict(x, threshold=threshold1, window_size=82):
    x = scaler.transform(x)
    predictions = losses.mse(x, ae(x)) > threshold
    # Majority voting over `window_size` predictions
    return np.array([np.mean(predictions[i-window_size:i]) > 0.5
                     for i in range(window_size, len(predictions)+1)])

test_data = [X_test0, X_test1, X_test2, X_test3, X_test4, X_test5, X_test6, X_test7, X_test8, X_test9, X_test10]

for i, x in enumerate(test_data):
    print(i)
    outcome = predict(x)
    print_stats(x, outcome)

0
Shape of data: (6585, 115)
Detected anomalies: 5.50430504305043%

1
Shape of data: (45930, 115)
Detected anomalies: 99.81460882461994%

2
Shape of data: (111480, 115)
Detected anomalies: 100.0%

3
Shape of data: (125715, 115)
Detected anomalies: 100.0%

4
Shape of data: (151879, 115)
Detected anomalies: 100.0%

5
Shape of data: (78244, 115)
Detected anomalies: 100.0%

6
Shape of data: (54283, 115)
Detected anomalies: 100.0%

7
Shape of data: (28579, 115)
Detected anomalies: 100.0%

8
Shape of data: (27825, 115)
Detected anomalies: 100.0%

9
Shape of data: (88816, 115)
Detected anomalies: 0.0%

10
Shape of data: (103720, 115)
Detected anomalies: 0.0%



In [None]:
af = load_model('ae8_model')

def predict(x, threshold=0.016871396, window_size=82):
    x = scaler.transform(x)
    predictions = losses.mse(x, af(x)) > threshold
    # Majority voting over `window_size` predictions
    return np.array([np.mean(predictions[i-window_size:i]) > 0.5
                     for i in range(window_size, len(predictions)+1)])

test_data = [X_train1, X_test1, X_test2, X_test3, X_test4, X_test5, X_test6, X_test7, X_test8, X_test9, X_test10]

for i, x in enumerate(test_data):
    print(i)
    outcome = predict(x)
    print_stats(x, outcome)

0
Shape of data: (16585, 115)
Detected anomalies: 100.0%

1
Shape of data: (45930, 115)
Detected anomalies: 100.0%

2
Shape of data: (111480, 115)
Detected anomalies: 100.0%

3
Shape of data: (125715, 115)
Detected anomalies: 100.0%

4
Shape of data: (151879, 115)
Detected anomalies: 100.0%

5
Shape of data: (78244, 115)
Detected anomalies: 100.0%

6
Shape of data: (54283, 115)
Detected anomalies: 100.0%

7
Shape of data: (28579, 115)
Detected anomalies: 100.0%

8
Shape of data: (27825, 115)
Detected anomalies: 100.0%

9
Shape of data: (88816, 115)
Detected anomalies: 100.0%

10
Shape of data: (103720, 115)
Detected anomalies: 100.0%



In [None]:
def predict(x, threshold=threshold2, window_size=82):
    x = scaler.transform(x)
    predictions = losses.mse(x, af1(x)) > threshold
    # Majority voting over `window_size` predictions
    return np.array([np.mean(predictions[i-window_size:i]) > 0.5
                     for i in range(window_size, len(predictions)+1)])

test_data = [X_test0, X_test1, X_test2, X_test3, X_test4, X_test5, X_test6, X_test7, X_test8, X_test9, X_test10]

for i, x in enumerate(test_data):
    print(i)
    outcome = predict(x)
    print_stats(x, outcome)

0
Shape of data: (6585, 115)
Detected anomalies: 10.009225092250922%

1
Shape of data: (45930, 115)
Detected anomalies: 100.0%

2
Shape of data: (111480, 115)
Detected anomalies: 100.0%

3
Shape of data: (125715, 115)
Detected anomalies: 100.0%

4
Shape of data: (151879, 115)
Detected anomalies: 100.0%

5
Shape of data: (78244, 115)
Detected anomalies: 100.0%

6
Shape of data: (54283, 115)
Detected anomalies: 100.0%

7
Shape of data: (28579, 115)
Detected anomalies: 100.0%

8
Shape of data: (27825, 115)
Detected anomalies: 100.0%

9
Shape of data: (88816, 115)
Detected anomalies: 100.0%

10
Shape of data: (103720, 115)
Detected anomalies: 100.0%



## IOT Device 9: SimpleHome XCS7 1003 WHT Security Camera

### Loading the dataset

In [None]:
def load_nbaiot(filename):
    return np.genfromtxt(
        os.path.join("/drive/My Drive/Mini_Project_Dataset/N-Balot_Dataset", filename),
        delimiter=",",
        skip_header=1
    )

benign = load_nbaiot("9.benign.csv")
X_train = benign[:10000]
X_train0 = benign[10000:15000]
X_train1 = benign[10000:]
X_test0 = benign[15000:]
X_test1 = load_nbaiot("9.mirai.scan.csv")
X_test2 = load_nbaiot("9.mirai.ack.csv")
X_test3 = load_nbaiot("9.mirai.syn.csv")
X_test4 = load_nbaiot("9.mirai.udp.csv")
X_test5 = load_nbaiot("9.mirai.udpplain.csv")
X_test6 = load_nbaiot("9.gafgyt.combo.csv")
X_test7 = load_nbaiot("9.gafgyt.junk.csv")
X_test8 = load_nbaiot("9.gafgyt.scan.csv")
X_test9 = load_nbaiot("9.gafgyt.tcp.csv")
X_test10 = load_nbaiot("9.gafgyt.udp.csv")

In [None]:
print(X_train.shape, X_train0.shape, X_train1.shape, X_test0.shape, X_test1.shape, X_test2.shape,
      X_test3.shape, X_test4.shape, X_test5.shape, X_test6.shape, 
      X_test7.shape, X_test8.shape, X_test9.shape, X_test10.shape)

(10000, 115) (5000, 115) (9528, 115) (4528, 115) (43674, 115) (107187, 115) (122479, 115) (157084, 115) (84436, 115) (59398, 115) (27413, 115) (28572, 115) (98075, 115) (102980, 115)


### Training the autoencoder

In [None]:
scaler = MinMaxScaler()
x = scaler.fit_transform(X_train)
checkpoint = ModelCheckpoint("ae9_model", monitor='val_loss', verbose=1, save_best_only=True, mode='max')

ae1 = Autoencoder()
ae1.compile(optimizer=Adam(learning_rate=0.01), loss='mse')
monitor = EarlyStopping(
    monitor='val_loss',
    min_delta=1e-9,
    patience=5,
    verbose=1,
    mode='auto'
)
ae1.fit(
    x=x,
    y=x,
    epochs=800,
    validation_split=0.3,
    shuffle=True,
    callbacks=[monitor,checkpoint]
)

training_loss = losses.mse(x, ae1(x))
threshold = np.mean(training_loss)+np.std(training_loss)
print(threshold)

Epoch 1/800
Epoch 1: val_loss improved from -inf to 0.00386, saving model to ae9_model
Epoch 2/800
Epoch 2: val_loss did not improve from 0.00386
Epoch 3/800
Epoch 3: val_loss did not improve from 0.00386
Epoch 4/800
Epoch 4: val_loss improved from 0.00386 to 0.00409, saving model to ae9_model
Epoch 5/800
Epoch 5: val_loss did not improve from 0.00409
Epoch 6/800
Epoch 6: val_loss did not improve from 0.00409
Epoch 7/800
Epoch 7: val_loss did not improve from 0.00409
Epoch 8/800
Epoch 8: val_loss did not improve from 0.00409
Epoch 9/800
Epoch 9: val_loss did not improve from 0.00409
Epoch 10/800
Epoch 10: val_loss did not improve from 0.00409
Epoch 11/800
Epoch 11: val_loss did not improve from 0.00409
Epoch 12/800
Epoch 12: val_loss did not improve from 0.00409
Epoch 13/800
Epoch 13: val_loss did not improve from 0.00409
Epoch 14/800
Epoch 14: val_loss did not improve from 0.00409
Epoch 15/800
Epoch 15: val_loss did not improve from 0.00409
Epoch 16/800
Epoch 16: val_loss did not impr

In [None]:
scaler = MinMaxScaler()
x = scaler.fit_transform(X_train0)

ae = Autoencoder()
ae.compile(optimizer=Adam(learning_rate=0.01), loss='mse')
monitor = EarlyStopping(
    monitor='val_loss',
    min_delta=1e-9,
    patience=5,
    verbose=1,
    mode='auto'
)
ae.fit(
    x=x,
    y=x,
    epochs=800,
    validation_split=0.3,
    shuffle=True,
    callbacks=[monitor]
)

training_loss1 = losses.mse(x, ae(x))
threshold1 = np.mean(training_loss1)+np.std(training_loss1)

Epoch 1/800
Epoch 2/800
Epoch 3/800
Epoch 4/800
Epoch 5/800
Epoch 6/800
Epoch 7/800
Epoch 8/800
Epoch 9/800
Epoch 10/800
Epoch 11/800
Epoch 12/800
Epoch 13/800
Epoch 14/800
Epoch 15/800
Epoch 16/800
Epoch 17/800
Epoch 18/800
Epoch 19/800
Epoch 20/800
Epoch 21/800
Epoch 22/800
Epoch 23/800
Epoch 24/800
Epoch 25/800
Epoch 26/800
Epoch 27/800
Epoch 27: early stopping


In [None]:
af1 = load_model('ae9_model')

af1.fit(
    x=x,
    y=x,
    epochs=800,
    validation_split=0.3,
    shuffle=True,
    callbacks=[monitor]
)

training_loss2 = losses.mse(x, af1(x))
threshold2 = np.mean(training_loss2)+np.std(training_loss2)

Epoch 1/800
Epoch 2/800
Epoch 3/800
Epoch 4/800
Epoch 5/800
Epoch 6/800
Epoch 7/800
Epoch 8/800
Epoch 8: early stopping


### Testing the autoencoder

In [None]:
def predict(x, threshold=threshold1, window_size=82):
    x = scaler.transform(x)
    predictions = losses.mse(x, ae(x)) > threshold
    # Majority voting over `window_size` predictions
    return np.array([np.mean(predictions[i-window_size:i]) > 0.5
                     for i in range(window_size, len(predictions)+1)])

test_data = [X_test0, X_test1, X_test2, X_test3, X_test4, X_test5, X_test6, X_test7, X_test8, X_test9, X_test10]

for i, x in enumerate(test_data):
    print(i)
    outcome = predict(x)
    print_stats(x, outcome)

0
Shape of data: (4528, 115)
Detected anomalies: 3.080728581065887%

1
Shape of data: (43674, 115)
Detected anomalies: 100.0%

2
Shape of data: (107187, 115)
Detected anomalies: 100.0%

3
Shape of data: (122479, 115)
Detected anomalies: 100.0%

4
Shape of data: (157084, 115)
Detected anomalies: 100.0%

5
Shape of data: (84436, 115)
Detected anomalies: 100.0%

6
Shape of data: (59398, 115)
Detected anomalies: 100.0%

7
Shape of data: (27413, 115)
Detected anomalies: 100.0%

8
Shape of data: (28572, 115)
Detected anomalies: 100.0%

9
Shape of data: (98075, 115)
Detected anomalies: 0.0%

10
Shape of data: (102980, 115)
Detected anomalies: 0.0%



In [None]:
af = load_model('ae9_model')

def predict(x, threshold=0.030374795, window_size=82):
    x = scaler.transform(x)
    predictions = losses.mse(x, af(x)) > threshold
    # Majority voting over `window_size` predictions
    return np.array([np.mean(predictions[i-window_size:i]) > 0.5
                     for i in range(window_size, len(predictions)+1)])

test_data = [X_train1, X_test1, X_test2, X_test3, X_test4, X_test5, X_test6, X_test7, X_test8, X_test9, X_test10]

for i, x in enumerate(test_data):
    print(i)
    outcome = predict(x)
    print_stats(x, outcome)

0
Shape of data: (9528, 115)
Detected anomalies: 0.0%

1
Shape of data: (43674, 115)
Detected anomalies: 100.0%

2
Shape of data: (107187, 115)
Detected anomalies: 100.0%

3
Shape of data: (122479, 115)
Detected anomalies: 100.0%

4
Shape of data: (157084, 115)
Detected anomalies: 100.0%

5
Shape of data: (84436, 115)
Detected anomalies: 100.0%

6
Shape of data: (59398, 115)
Detected anomalies: 100.0%

7
Shape of data: (27413, 115)
Detected anomalies: 100.0%

8
Shape of data: (28572, 115)
Detected anomalies: 100.0%

9
Shape of data: (98075, 115)
Detected anomalies: 0.0%

10
Shape of data: (102980, 115)
Detected anomalies: 0.0%



In [None]:
def predict(x, threshold=threshold2, window_size=82):
    x = scaler.transform(x)
    predictions = losses.mse(x, af1(x)) > threshold
    # Majority voting over `window_size` predictions
    return np.array([np.mean(predictions[i-window_size:i]) > 0.5
                     for i in range(window_size, len(predictions)+1)])

test_data = [X_test0, X_test1, X_test2, X_test3, X_test4, X_test5, X_test6, X_test7, X_test8, X_test9, X_test10]

for i, x in enumerate(test_data):
    print(i)
    outcome = predict(x)
    print_stats(x, outcome)

0
Shape of data: (4528, 115)
Detected anomalies: 3.1481897908702496%

1
Shape of data: (43674, 115)
Detected anomalies: 100.0%

2
Shape of data: (107187, 115)
Detected anomalies: 100.0%

3
Shape of data: (122479, 115)
Detected anomalies: 100.0%

4
Shape of data: (157084, 115)
Detected anomalies: 100.0%

5
Shape of data: (84436, 115)
Detected anomalies: 100.0%

6
Shape of data: (59398, 115)
Detected anomalies: 100.0%

7
Shape of data: (27413, 115)
Detected anomalies: 100.0%

8
Shape of data: (28572, 115)
Detected anomalies: 100.0%

9
Shape of data: (98075, 115)
Detected anomalies: 0.0%

10
Shape of data: (102980, 115)
Detected anomalies: 0.0%

