In [1]:
import sys
import os
from glob import iglob
import pandas as pd
import numpy as np
import tensorflow as tf
import pickle
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix
from keras.models import model_from_yaml
from keras.models import Model, Sequential
from keras.layers import Input, Dense, Activation
from keras.callbacks import ModelCheckpoint, TensorBoard
from keras.optimizers import Adam
from sklearn.model_selection import train_test_split

Using TensorFlow backend.


In [2]:
devices = {}

In [3]:
def read_file(f, devs, c):
    d = pd.read_csv(f)
    #print(f)
    dev = f[8: f.find('/', 8)]
    if dev not in devs:
        devs[dev] = {}
    if c not in devs[dev]:
        devs[dev][c] = 0
    devs[dev][c] += d.shape[0]
    #print(d.shape)
    return d

In [51]:
df_gafgyt = pd.concat((read_file(f, devices, 'gafgyt') for f in iglob('../data/**/gafgyt_attacks/*.csv', recursive=True)), ignore_index=True)
print('Loaded, shape: ')
print(df_gafgyt.shape)
df_gafgyt['class'] = 'attack'
print('Loading mirai data')
df_mirai = pd.concat((read_file(f, devices, 'mirai') for f in iglob('../data/**/mirai_attacks/*.csv', recursive=True)), ignore_index=True)
print('Loaded, shape: ')
print(df_mirai.shape)
df_mirai['class'] = 'attack'
print('Loading benign data')
df_benign = pd.concat((read_file(f, devices, 'benign') for f in iglob('../data/**/benign_traffic.csv', recursive=True)), ignore_index=True)
print('Loaded, shape: ')
print(df_benign.shape)
df_benign['class'] = 'benign'
df = df_benign.append(df_gafgyt.sample(frac=1, random_state=17)).append(df_mirai.sample(frac=1, random_state=17))

Loaded, shape: 
(1032056, 115)
Loading mirai data
Loaded, shape: 
(3668402, 115)
Loading benign data
Loaded, shape: 
(555932, 115)


In [17]:
devices

{'Ecobee_Thermostat': {'benign': 110818, 'gafgyt': 512133, 'mirai': 13113},
 'SimpleHome_XCS7_1002_WHT_Security_Camera': {'benign': 110687,
  'gafgyt': 513248,
  'mirai': 46585},
 'SimpleHome_XCS7_1003_WHT_Security_Camera': {'benign': 115383,
  'gafgyt': 514860,
  'mirai': 19528},
 'Samsung_SNH_1011_N_Webcam': {'benign': 114672, 'mirai': 52150},
 'Danmini_Doorbell': {'benign': 118635, 'gafgyt': 652100, 'mirai': 49548},
 'Philips_B120N10_Baby_Monitor': {'benign': 114360,
  'gafgyt': 610714,
  'mirai': 175240},
 'Ennio_Doorbell': {'benign': 110931, 'mirai': 39100},
 'Provision_PT_737E_Security_Camera': {'benign': 121575,
  'gafgyt': 436010,
  'mirai': 62154},
 'Provision_PT_838_Security_Camera': {'benign': 114995,
  'gafgyt': 429337,
  'mirai': 98514}}

In [52]:
classes = ['benign', 'attack']

In [53]:
scored = {}
indices = {}
shps = {}
for cl in classes:
    indices[cl] = df['class'] == cl
    shps[cl] =  df[indices[cl]].shape[0]
        
for col in df.columns:
    if col == 'class':
        continue
    num = 0
    den = 0
    m = df[col].mean()
    
    for cl in classes:
        num += (shps[cl] / df.shape[0]) * (m - df[indices[cl]][col].mean())**2
        den += (shps[cl] / df.shape[0]) * df[indices[cl]][col].var()
    scored[col] = num / den
    print(col + ' scored ' + str(scored[col]))

MI_dir_L5_weight scored 0.671857128701193
MI_dir_L5_mean scored 0.31344467490142436
MI_dir_L5_variance scored 0.2904330511595098
MI_dir_L3_weight scored 0.7131694928565342
MI_dir_L3_mean scored 0.3485744559122524
MI_dir_L3_variance scored 0.3554197256901016
MI_dir_L1_weight scored 0.7348305096553736
MI_dir_L1_mean scored 0.38542073820594
MI_dir_L1_variance scored 0.4097137838932303
MI_dir_L0.1_weight scored 0.7045419811521229
MI_dir_L0.1_mean scored 0.39986870790696877
MI_dir_L0.1_variance scored 0.3866892541675427
MI_dir_L0.01_weight scored 0.5940252503261184
MI_dir_L0.01_mean scored 0.40596709703312395
MI_dir_L0.01_variance scored 0.3785429493515016
H_L5_weight scored 0.6718570543847242
H_L5_mean scored 0.3134446212662264
H_L5_variance scored 0.2904330232719689
H_L3_weight scored 0.71316942192271
H_L3_mean scored 0.3485743192186741
H_L3_variance scored 0.35541963930473003
H_L1_weight scored 0.7348304560920709
H_L1_mean scored 0.3854202117977467
H_L1_variance scored 0.4097134998847572

In [54]:
scored_list = [{'feature': f, 'score': s} for f, s in scored.items()]

In [55]:
scored_list.sort(key=lambda x: x['score'], reverse=True)

In [57]:
scored_list[:15]

[{'feature': 'MI_dir_L1_weight', 'score': 0.7348305096553736},
 {'feature': 'H_L1_weight', 'score': 0.7348304560920709},
 {'feature': 'MI_dir_L3_weight', 'score': 0.7131694928565342},
 {'feature': 'H_L3_weight', 'score': 0.71316942192271},
 {'feature': 'MI_dir_L0.1_weight', 'score': 0.7045419811521229},
 {'feature': 'H_L0.1_weight', 'score': 0.7045419222081496},
 {'feature': 'MI_dir_L5_weight', 'score': 0.671857128701193},
 {'feature': 'H_L5_weight', 'score': 0.6718570543847242},
 {'feature': 'MI_dir_L0.01_weight', 'score': 0.5940252503261184},
 {'feature': 'H_L0.01_weight', 'score': 0.5940252001395452},
 {'feature': 'HH_jit_L1_weight', 'score': 0.4599394728842447},
 {'feature': 'HH_L1_weight', 'score': 0.4599394728842426},
 {'feature': 'HH_L3_weight', 'score': 0.4368453985947054},
 {'feature': 'HH_jit_L3_weight', 'score': 0.43684539859470534},
 {'feature': 'HH_L5_weight', 'score': 0.4132503122673671}]