In [1]:
%matplotlib inline
import glob, os
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

In [2]:
pd.options.display.max_rows = 200

plt.rcParams['figure.figsize'] = [20, 10]
plt.rc('font', size=20)



# Loading Dataset

## IOT Devices

The dataset has been extracted at `ROOT_PATH`, containing the data for each IOT device. There are 9 devices in total. There are folders for each IOT device containing `benign_traffic.csv` and further two folders for `gafgyt_attacks` and `mirai_attacks`

In [3]:
ROOT_PATH = r'/mnt/data/khiz/dataset/00442'

IOT_DEVS = [ 'Danmini_Doorbell',
             'Ecobee_Thermostat',
             'Ennio_Doorbell',
             'B120N10_Baby_Mon',
             '737E_Security_Cam',
             '838_Security_Cam',
             'Samsung_Webcam',
             '1002_Security_Cam',
             '1003_Security_Cam'
           ]

## Paths to CSVs
We create a python dictionary that contains all the paths to the CSVs for the respective IOT devices

In [4]:
PATHS_DICT = {}

for i in IOT_DEVS:
    PATHS_DICT[i] = {}
    print('[' + i + ']')
    iot_dir = os.path.join(ROOT_PATH, i)
    PATHS_DICT[i]['benign'] = os.path.join(iot_dir, 'benign_traffic.csv')
    print('  ', 'benign_traffic.csv')
    attacks = [ d for d in os.listdir(iot_dir)
                   if os.path.isdir( os.path.join(iot_dir, d)) ]
    for attack in attacks:
        attack_name = attack.split('_')[0]
        
        PATHS_DICT[i][attack_name] = {}
        attack_dir = os.path.join(iot_dir, attack)
        types = [ f for f in os.listdir(attack_dir)
                   if os.path.isfile( os.path.join(attack_dir, f)) ]
        print('  ', attack_name, ': ', types)
        for t in types:
            type_name = t.split('.')[0]
            PATHS_DICT[i][attack_name][type_name] = os.path.join(attack_dir,t)

[Danmini_Doorbell]
   benign_traffic.csv
   gafgyt :  ['combo.csv', 'junk.csv', 'scan.csv', 'tcp.csv', 'udp.csv']
   mirai :  ['ack.csv', 'scan.csv', 'syn.csv', 'udp.csv', 'udpplain.csv']
[Ecobee_Thermostat]
   benign_traffic.csv
   gafgyt :  ['combo.csv', 'junk.csv', 'scan.csv', 'tcp.csv', 'udp.csv']
   mirai :  ['ack.csv', 'scan.csv', 'syn.csv', 'udp.csv', 'udpplain.csv']
[Ennio_Doorbell]
   benign_traffic.csv
   gafgyt :  ['combo.csv', 'junk.csv', 'scan.csv', 'tcp.csv', 'udp.csv']
[B120N10_Baby_Mon]
   benign_traffic.csv
   gafgyt :  ['combo.csv', 'junk.csv', 'scan.csv', 'tcp.csv', 'udp.csv']
   mirai :  ['ack.csv', 'scan.csv', 'syn.csv', 'udp.csv', 'udpplain.csv']
[737E_Security_Cam]
   benign_traffic.csv
   gafgyt :  ['combo.csv', 'junk.csv', 'scan.csv', 'tcp.csv', 'udp.csv']
   mirai :  ['ack.csv', 'scan.csv', 'syn.csv', 'udp.csv', 'udpplain.csv']
[838_Security_Cam]
   benign_traffic.csv
   gafgyt :  ['combo.csv', 'junk.csv', 'scan.csv', 'tcp.csv', 'udp.csv']
   mirai :  ['ack.cs

## Loading CSVs into Pandas
We will create pandas dataframe for each botnet attack on each IOT device. For each datafram we will combine the attack traffic with the benign traffic add the following two columns:
- `traffic_type` : benign or attack (0 or 1)
- `attack_type`  : type of attack e.g, ack, scan etc.

In [5]:
nbaiot_dict = {}
for iot_dev in PATHS_DICT:
    b_df = pd.read_csv(PATHS_DICT[iot_dev]['benign'])
    b_df['traffic_type'] = 'benign'
    b_df['attack_type'] = 'benign'
    for botnet in [ b for b in PATHS_DICT[iot_dev] if b != 'benign']:
        if botnet not in nbaiot_dict:
            nbaiot_dict[botnet] = {}
        if iot_dev not in nbaiot_dict[botnet]:
            nbaiot_dict[botnet][iot_dev] = pd.DataFrame()
        nbaiot_dict[botnet][iot_dev] = nbaiot_dict[botnet][iot_dev].append(b_df)
        for attack in PATHS_DICT[iot_dev][botnet]:
            a_df = pd.read_csv(PATHS_DICT[iot_dev][botnet][attack])
            a_df['traffic_type'] = 'attack'
            a_df['attack_type'] = attack
            nbaiot_dict[botnet][iot_dev] = nbaiot_dict[botnet][iot_dev].append(a_df)

In [6]:
for k in nbaiot_dict:
    print(k)
    for b in nbaiot_dict[k]:
        print('  ',b,':','<benign+attack>')

gafgyt
   Danmini_Doorbell : <benign+attack>
   Ecobee_Thermostat : <benign+attack>
   Ennio_Doorbell : <benign+attack>
   B120N10_Baby_Mon : <benign+attack>
   737E_Security_Cam : <benign+attack>
   838_Security_Cam : <benign+attack>
   Samsung_Webcam : <benign+attack>
   1002_Security_Cam : <benign+attack>
   1003_Security_Cam : <benign+attack>
mirai
   Danmini_Doorbell : <benign+attack>
   Ecobee_Thermostat : <benign+attack>
   B120N10_Baby_Mon : <benign+attack>
   737E_Security_Cam : <benign+attack>
   838_Security_Cam : <benign+attack>
   1002_Security_Cam : <benign+attack>
   1003_Security_Cam : <benign+attack>


# Modeling

### Pre processing
Convert the target column `traffic_type` to integer 0 = benigh, 1 = attack 

In [7]:
for botnet in nbaiot_dict:
    for dev in nbaiot_dict[botnet]:
        nbaiot = nbaiot_dict[botnet][dev]

        nbaiot.loc[ nbaiot['traffic_type']=='attack','traffic_type' ]=1
        nbaiot.loc[ nbaiot['traffic_type']=='benign','traffic_type' ]=0
        nbaiot['traffic_type'] = nbaiot['traffic_type'].astype(int)
        
        nbaiot_dict[botnet][dev] = nbaiot

### Classifiers and training functions

In [8]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score


In [9]:
classfiers = {
    'KNN' : KNeighborsClassifier(),
    'RFR' : RandomForestClassifier(),
    'DTR' : DecisionTreeClassifier(),
    'ADB' : AdaBoostClassifier(),
    
}


## Main Loop

In [10]:
all_results = [
    ['TRAIN DEVICE', 'TEST DEVICE', 'BOTNET', 'CLASSIFIER', 'ACCURACY', 'PRECISION', 'RECALL', 'F1-SCORE', 'FALSE-P', 'FALSE-N' ]
]

for clf_name in classfiers:
    clf = classfiers[clf_name]
    for botnet in nbaiot_dict:
        for train_dev in nbaiot_dict[botnet]:
            train_df = nbaiot_dict[botnet][train_dev]
            X_train = train_df.drop(["attack_type", "traffic_type" ], axis=1)
            y_train = train_df['traffic_type']
            clf.fit(X_train, y_train)            
            for test_dev in nbaiot_dict[botnet]:
                test_df  = nbaiot_dict[botnet][test_dev]
                X_test  = test_df.drop(["attack_type", "traffic_type" ], axis=1)
                y_test  = test_df['traffic_type']
                y_preds = clf.predict(X_test)
                clrp = classification_report(y_test, y_preds, output_dict=True)
                conf_mat = confusion_matrix(y_test, y_preds)
                fp = conf_mat[0][1]
                fn = conf_mat[1][0]
                result = [
                    train_dev,
                    test_dev,
                    botnet,
                    clf_name,
                    round(clrp['accuracy'],4),
                    round(clrp['macro avg']['precision'],4),
                    round(clrp['macro avg']['recall'],4),
                    round(clrp['macro avg']['f1-score'],4),
                    fp,
                    fn
                ]
                print(result)
                all_results.append(result)


['Danmini_Doorbell', 'Danmini_Doorbell', 'gafgyt', 'KNN', 0.999, 0.9973, 0.9985, 0.9979, 113, 246]
['Danmini_Doorbell', 'Ecobee_Thermostat', 'gafgyt', 'KNN', 0.9987, 0.9907, 0.9924, 0.9915, 189, 238]
['Danmini_Doorbell', 'Ennio_Doorbell', 'gafgyt', 'KNN', 0.9962, 0.9952, 0.9851, 0.9901, 1137, 228]
['Danmini_Doorbell', 'B120N10_Baby_Mon', 'gafgyt', 'KNN', 0.9797, 0.9827, 0.9734, 0.9777, 8582, 1321]
['Danmini_Doorbell', '737E_Security_Cam', 'gafgyt', 'KNN', 0.9898, 0.9786, 0.9836, 0.9811, 1586, 2396]
['Danmini_Doorbell', '838_Security_Cam', 'gafgyt', 'KNN', 0.9939, 0.9951, 0.9882, 0.9916, 2258, 236]
['Danmini_Doorbell', 'Samsung_Webcam', 'gafgyt', 'KNN', 0.993, 0.994, 0.9768, 0.9852, 2378, 234]
['Danmini_Doorbell', '1002_Security_Cam', 'gafgyt', 'KNN', 0.9895, 0.9916, 0.9628, 0.9766, 3425, 240]
['Danmini_Doorbell', '1003_Security_Cam', 'gafgyt', 'KNN', 0.9936, 0.96, 0.9835, 0.9714, 544, 1614]
['Ecobee_Thermostat', 'Danmini_Doorbell', 'gafgyt', 'KNN', 0.9965, 0.9974, 0.9875, 0.9924, 1229,

['Danmini_Doorbell', 'Ecobee_Thermostat', 'mirai', 'KNN', 0.9953, 0.9263, 0.9869, 0.9545, 287, 2207]
['Danmini_Doorbell', 'B120N10_Baby_Mon', 'mirai', 'KNN', 0.9886, 0.992, 0.975, 0.9832, 8673, 317]
['Danmini_Doorbell', '737E_Security_Cam', 'mirai', 'KNN', 0.9242, 0.811, 0.9437, 0.8582, 1887, 35888]
['Danmini_Doorbell', '838_Security_Cam', 'mirai', 'KNN', 0.9303, 0.8657, 0.9444, 0.8969, 3261, 33529]
['Danmini_Doorbell', '1002_Security_Cam', 'mirai', 'KNN', 0.9938, 0.9854, 0.9734, 0.9793, 2376, 1113]
['Danmini_Doorbell', '1003_Security_Cam', 'mirai', 'KNN', 0.9942, 0.9583, 0.96, 0.9591, 1501, 1580]
['Ecobee_Thermostat', 'Danmini_Doorbell', 'mirai', 'KNN', 0.9176, 0.7291, 0.9432, 0.7894, 1341, 56490]
['Ecobee_Thermostat', 'Ecobee_Thermostat', 'mirai', 'KNN', 0.9994, 0.9974, 0.991, 0.9942, 234, 62]
['Ecobee_Thermostat', 'B120N10_Baby_Mon', 'mirai', 'KNN', 0.9241, 0.8748, 0.9363, 0.8993, 7338, 52285]
['Ecobee_Thermostat', '737E_Security_Cam', 'mirai', 'KNN', 0.91, 0.7896, 0.9359, 0.8375, 1

['B120N10_Baby_Mon', '1003_Security_Cam', 'gafgyt', 'RFR', 0.968, 0.9832, 0.725, 0.8019, 10740, 6]
['737E_Security_Cam', 'Danmini_Doorbell', 'gafgyt', 'RFR', 0.9999, 1.0, 0.9997, 0.9998, 30, 0]
['737E_Security_Cam', 'Ecobee_Thermostat', 'gafgyt', 'RFR', 0.9997, 0.9999, 0.9967, 0.9983, 86, 0]
['737E_Security_Cam', 'Ennio_Doorbell', 'gafgyt', 'RFR', 0.9999, 0.9999, 0.9995, 0.9997, 40, 0]
['737E_Security_Cam', 'B120N10_Baby_Mon', 'gafgyt', 'RFR', 0.9915, 0.9935, 0.9882, 0.9907, 4141, 0]
['737E_Security_Cam', '737E_Security_Cam', 'gafgyt', 'RFR', 1.0, 1.0, 1.0, 1.0, 0, 0]
['737E_Security_Cam', '838_Security_Cam', 'gafgyt', 'RFR', 0.9999, 1.0, 0.9999, 0.9999, 22, 0]
['737E_Security_Cam', 'Samsung_Webcam', 'gafgyt', 'RFR', 0.9938, 0.9964, 0.9776, 0.9868, 2332, 0]
['737E_Security_Cam', '1002_Security_Cam', 'gafgyt', 'RFR', 0.9903, 0.9945, 0.9637, 0.9784, 3385, 0]
['737E_Security_Cam', '1003_Security_Cam', 'gafgyt', 'RFR', 0.9992, 0.9996, 0.9929, 0.9962, 276, 0]
['838_Security_Cam', 'Danmini_D

['1002_Security_Cam', '1002_Security_Cam', 'mirai', 'RFR', 1.0, 1.0, 1.0, 1.0, 0, 0]
['1002_Security_Cam', '1003_Security_Cam', 'mirai', 'RFR', 1.0, 1.0, 0.9995, 0.9997, 21, 0]
['1003_Security_Cam', 'Danmini_Doorbell', 'mirai', 'RFR', 0.9962, 0.998, 0.9733, 0.9853, 2647, 0]
['1003_Security_Cam', 'Ecobee_Thermostat', 'mirai', 'RFR', 1.0, 1.0, 1.0, 1.0, 0, 0]
['1003_Security_Cam', 'B120N10_Baby_Mon', 'mirai', 'RFR', 0.9812, 0.9882, 0.9578, 0.972, 14778, 0]
['1003_Security_Cam', '737E_Security_Cam', 'mirai', 'RFR', 0.9953, 0.9973, 0.9811, 0.989, 2347, 0]
['1003_Security_Cam', '838_Security_Cam', 'mirai', 'RFR', 0.9914, 0.9948, 0.9769, 0.9856, 4544, 0]
['1003_Security_Cam', '1002_Security_Cam', 'mirai', 'RFR', 0.9958, 0.9977, 0.9747, 0.9859, 2354, 0]
['1003_Security_Cam', '1003_Security_Cam', 'mirai', 'RFR', 1.0, 1.0, 1.0, 1.0, 0, 0]
['Danmini_Doorbell', 'Danmini_Doorbell', 'gafgyt', 'DTR', 1.0, 1.0, 1.0, 1.0, 0, 0]
['Danmini_Doorbell', 'Ecobee_Thermostat', 'gafgyt', 'DTR', 0.979, 0.9893, 

['1003_Security_Cam', '737E_Security_Cam', 'gafgyt', 'DTR', 0.9946, 0.9968, 0.983, 0.9898, 2108, 0]
['1003_Security_Cam', '838_Security_Cam', 'gafgyt', 'DTR', 0.9824, 0.9887, 0.9636, 0.9754, 7168, 0]
['1003_Security_Cam', 'Samsung_Webcam', 'gafgyt', 'DTR', 0.9977, 0.9987, 0.9916, 0.9951, 873, 0]
['1003_Security_Cam', '1002_Security_Cam', 'gafgyt', 'DTR', 0.994, 0.9966, 0.9776, 0.9868, 2089, 0]
['1003_Security_Cam', '1003_Security_Cam', 'gafgyt', 'DTR', 1.0, 1.0, 1.0, 1.0, 0, 0]
['Danmini_Doorbell', 'Danmini_Doorbell', 'mirai', 'DTR', 1.0, 1.0, 1.0, 1.0, 0, 0]
['Danmini_Doorbell', 'Ecobee_Thermostat', 'mirai', 'DTR', 0.9948, 0.9142, 0.9972, 0.9517, 3, 2717]
['Danmini_Doorbell', 'B120N10_Baby_Mon', 'mirai', 'DTR', 1.0, 1.0, 0.9999, 0.9999, 35, 0]
['Danmini_Doorbell', '737E_Security_Cam', 'mirai', 'DTR', 0.9902, 0.9638, 0.994, 0.9782, 57, 4839]
['Danmini_Doorbell', '838_Security_Cam', 'mirai', 'DTR', 0.9968, 0.9917, 0.998, 0.9948, 10, 1669]
['Danmini_Doorbell', '1002_Security_Cam', 'mirai

['B120N10_Baby_Mon', '838_Security_Cam', 'gafgyt', 'ADB', 0.9991, 0.9994, 0.9982, 0.9988, 360, 0]
['B120N10_Baby_Mon', 'Samsung_Webcam', 'gafgyt', 'ADB', 0.9982, 0.998, 0.9946, 0.9963, 543, 120]
['B120N10_Baby_Mon', '1002_Security_Cam', 'gafgyt', 'ADB', 0.9988, 0.9993, 0.9957, 0.9975, 401, 2]
['B120N10_Baby_Mon', '1003_Security_Cam', 'gafgyt', 'ADB', 0.9969, 0.9779, 0.995, 0.9863, 140, 886]
['737E_Security_Cam', 'Danmini_Doorbell', 'gafgyt', 'ADB', 0.9999, 1.0, 0.9998, 0.9999, 23, 0]
['737E_Security_Cam', 'Ecobee_Thermostat', 'gafgyt', 'ADB', 1.0, 1.0, 0.9994, 0.9997, 16, 0]
['737E_Security_Cam', 'Ennio_Doorbell', 'gafgyt', 'ADB', 0.9998, 0.9999, 0.9989, 0.9994, 84, 0]
['737E_Security_Cam', 'B120N10_Baby_Mon', 'gafgyt', 'ADB', 0.9956, 0.9966, 0.9939, 0.9952, 2132, 0]
['737E_Security_Cam', '737E_Security_Cam', 'gafgyt', 'ADB', 1.0, 1.0, 1.0, 1.0, 0, 0]
['737E_Security_Cam', '838_Security_Cam', 'gafgyt', 'ADB', 0.9996, 0.9997, 0.9991, 0.9994, 175, 0]
['737E_Security_Cam', 'Samsung_Webcam

['1002_Security_Cam', '737E_Security_Cam', 'mirai', 'ADB', 0.9997, 0.9994, 0.9994, 0.9994, 70, 70]
['1002_Security_Cam', '838_Security_Cam', 'mirai', 'ADB', 0.9998, 0.9999, 0.9995, 0.9997, 107, 1]
['1002_Security_Cam', '1002_Security_Cam', 'mirai', 'ADB', 1.0, 1.0, 1.0, 1.0, 0, 0]
['1002_Security_Cam', '1003_Security_Cam', 'mirai', 'ADB', 1.0, 1.0, 0.9998, 0.9999, 8, 0]
['1003_Security_Cam', 'Danmini_Doorbell', 'mirai', 'ADB', 1.0, 1.0, 1.0, 1.0, 0, 0]
['1003_Security_Cam', 'Ecobee_Thermostat', 'mirai', 'ADB', 1.0, 1.0, 0.9999, 1.0, 2, 0]
['1003_Security_Cam', 'B120N10_Baby_Mon', 'mirai', 'ADB', 0.998, 0.9987, 0.9955, 0.9971, 1576, 0]
['1003_Security_Cam', '737E_Security_Cam', 'mirai', 'ADB', 0.9799, 0.9888, 0.9193, 0.9504, 10033, 0]
['1003_Security_Cam', '838_Security_Cam', 'mirai', 'ADB', 0.9755, 0.9854, 0.9344, 0.9575, 12927, 0]
['1003_Security_Cam', '1002_Security_Cam', 'mirai', 'ADB', 0.9957, 0.9977, 0.9743, 0.9857, 2392, 0]
['1003_Security_Cam', '1003_Security_Cam', 'mirai', 'ADB

In [11]:
res_df =pd.DataFrame(all_results, index=None)

res_df, res_df.columns = res_df[1:] , res_df.iloc[0]

res_df

Unnamed: 0,TRAIN DEVICE,TEST DEVICE,BOTNET,CLASSIFIER,ACCURACY,PRECISION,RECALL,F1-SCORE,FALSE-P,FALSE-N
1,Danmini_Doorbell,Danmini_Doorbell,gafgyt,KNN,0.999,0.9973,0.9985,0.9979,113,246
2,Danmini_Doorbell,Ecobee_Thermostat,gafgyt,KNN,0.9987,0.9907,0.9924,0.9915,189,238
3,Danmini_Doorbell,Ennio_Doorbell,gafgyt,KNN,0.9962,0.9952,0.9851,0.9901,1137,228
4,Danmini_Doorbell,B120N10_Baby_Mon,gafgyt,KNN,0.9797,0.9827,0.9734,0.9777,8582,1321
5,Danmini_Doorbell,737E_Security_Cam,gafgyt,KNN,0.9898,0.9786,0.9836,0.9811,1586,2396
...,...,...,...,...,...,...,...,...,...,...
516,1003_Security_Cam,B120N10_Baby_Mon,mirai,ADB,0.998,0.9987,0.9955,0.9971,1576,0
517,1003_Security_Cam,737E_Security_Cam,mirai,ADB,0.9799,0.9888,0.9193,0.9504,10033,0
518,1003_Security_Cam,838_Security_Cam,mirai,ADB,0.9755,0.9854,0.9344,0.9575,12927,0
519,1003_Security_Cam,1002_Security_Cam,mirai,ADB,0.9957,0.9977,0.9743,0.9857,2392,0


In [12]:
# Write to csv
res_csv = res_df.to_csv(index=False)

print(res_csv, file=open('04_results.csv', 'w'))


### Load results from CSV

In [14]:
res_df = pd.read_csv('04_results.csv')

#res_df = res_df[['DEVICE', 'BOTNET', 'CLASSIFIER', 'ACCURACY', 'PRECISION', 'FALSE-P', 'FALSE-N' ]]

res_df

Unnamed: 0,TRAIN DEVICE,TEST DEVICE,BOTNET,CLASSIFIER,ACCURACY,PRECISION,RECALL,F1-SCORE,FALSE-P,FALSE-N
0,Danmini_Doorbell,Danmini_Doorbell,gafgyt,KNN,0.9990,0.9973,0.9985,0.9979,113,246
1,Danmini_Doorbell,Ecobee_Thermostat,gafgyt,KNN,0.9987,0.9907,0.9924,0.9915,189,238
2,Danmini_Doorbell,Ennio_Doorbell,gafgyt,KNN,0.9962,0.9952,0.9851,0.9901,1137,228
3,Danmini_Doorbell,B120N10_Baby_Mon,gafgyt,KNN,0.9797,0.9827,0.9734,0.9777,8582,1321
4,Danmini_Doorbell,737E_Security_Cam,gafgyt,KNN,0.9898,0.9786,0.9836,0.9811,1586,2396
...,...,...,...,...,...,...,...,...,...,...
515,1003_Security_Cam,B120N10_Baby_Mon,mirai,ADB,0.9980,0.9987,0.9955,0.9971,1576,0
516,1003_Security_Cam,737E_Security_Cam,mirai,ADB,0.9799,0.9888,0.9193,0.9504,10033,0
517,1003_Security_Cam,838_Security_Cam,mirai,ADB,0.9755,0.9854,0.9344,0.9575,12927,0
518,1003_Security_Cam,1002_Security_Cam,mirai,ADB,0.9957,0.9977,0.9743,0.9857,2392,0
