# Network Intrusion Detection with Deep Learning

In [1]:
# For a broad introduction to the problem and dataset: https://arxiv.org/pdf/1701.02145.pdf
# For modern results using deep learning: http://ieeexplore.ieee.org/document/7777224/

In [2]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler

## The Data

In [3]:
# For the original '99 KDD dataset: http://kdd.ics.uci.edu/databases/kddcup99/kddcup99.html
# For the NSL-KDD Train+/Test+ data: https://github.com/defcom17/NSL_KDD

In [4]:
with open('kddcup.names', 'r') as infile:
    kdd_names = infile.readlines()
kdd_cols = [x.split(':')[0] for x in kdd_names[1:]]

In [5]:
# The Train+/Test+ datasets include sample difficulty rating and the attack class

In [6]:
kdd_cols += ['class', 'difficulty']

In [7]:
kdd = pd.read_csv('KDDTrain+.txt', names=kdd_cols)
kdd_t = pd.read_csv('KDDTest+.txt', names=kdd_cols)

In [8]:
len(np.unique(kdd['flag'].values))

11

In [9]:
# Consult the linked references for attack categories: 
# https://www.researchgate.net/post/What_are_the_attack_types_in_the_NSL-KDD_TEST_set_For_example_processtable_is_a_attack_type_in_test_set_Im_wondering_is_it_prob_DoS_R2L_U2R
# The traffic can be grouped into 5 categories: Normal, DOS, U2R, R2L, Probe
# or more coarsely into Normal vs Anomalous for the binary classification task

In [10]:
kdd_cols = kdd.columns.tolist()
kdd_cols.remove('protocol_type')
kdd_cols.remove('service')
kdd_cols.remove('flag')
kdd_cols += ['protocol_type', 'service', 'flag']

In [11]:
attack_map = [x.strip().split() for x in open('training_attack_types', 'r')]
attack_map = {k:v for (k,v) in attack_map}

In [12]:
attack_map

{'apache2': 'dos',
 'arppoison': 'dos',
 'back': 'dos',
 'buffer_overflow': 'u2r',
 'casesen': 'u2r',
 'crashiis': 'dos',
 'desnuke': 'dos',
 'dict': 'r2l',
 'eject': 'u2r',
 'fdformat': 'u2r',
 'ffbconfig': 'u2r',
 'framespoof': 'r2l',
 'ftp_write': 'r2l',
 'ftpwrite': 'r2l',
 'guess_passwd': 'r2l',
 'guest': 'r2l',
 'httptunnel': 'r2l',
 'illegal-sniffer': 'probe',
 'imap': 'r2l',
 'ipsweep': 'probe',
 'land': 'dos',
 'loadmodule': 'u2r',
 'lsdomain': 'probe',
 'mailbomb': 'dos',
 'mscan': 'probe',
 'msscan': 'probe',
 'multihop': 'r2l',
 'named': 'r2l',
 'ncftp': 'r2l',
 'neptune': 'dos',
 'netbus': 'r2l',
 'netcat': 'r2l',
 'nmap': 'probe',
 'ntfsdos': 'u2r',
 'ntinfoscan': 'probe',
 'nukepw': 'u2r',
 'perl': 'u2r',
 'phf': 'r2l',
 'pod': 'dos',
 'portsweep': 'probe',
 'ppmacro': 'r2l',
 'processtable': 'dos',
 'ps': 'u2r',
 'queso': 'probe',
 'rootkit': 'u2r',
 'saint': 'probe',
 'satan': 'probe',
 'sechole': 'u2r',
 'secret': 'u2r',
 'selfping': 'dos',
 'sendmail': 'r2l',
 'smurf

In [13]:
# Here we opt for the 5-class problem

In [14]:
kdd['class'] = kdd['class'].replace(attack_map)
kdd_t['class'] = kdd_t['class'].replace(attack_map)

In [15]:
def ent_encode(df, col):
    vals = sorted(np.unique(df[col].values))
    val_dict = {val:idx for idx, val in enumerate(vals)}
    df[col] = df[col].map(val_dict)
    return df

In [16]:
def log_trns(df, col):
    return df[col].apply(np.log1p)

In [17]:
cat_lst = ['protocol_type', 'service', 'flag']
for col in cat_lst:
    kdd = ent_encode(kdd, col)
    kdd_t = ent_encode(kdd_t, col)

In [18]:
log_lst = ['duration', 'src_bytes', 'dst_bytes']
for col in log_lst:
    kdd[col] = log_trns(kdd, col)
    kdd_t[col] = log_trns(kdd_t, col)

In [19]:
kdd = kdd[kdd_cols]
for col in kdd_cols:
    if col not in kdd_t.columns:
        kdd_t[col] = 0
kdd_t = kdd_t[kdd_cols]

In [20]:
# Now we have used one-hot encoding and log scaling

In [21]:
kdd.head()

Unnamed: 0,duration,src_bytes,dst_bytes,land,wrong_fragment,urgent,hot,num_failed_logins,logged_in,num_compromised,...,dst_host_srv_diff_host_rate,dst_host_serror_rate,dst_host_srv_serror_rate,dst_host_rerror_rate,dst_host_srv_rerror_rate,class,difficulty,protocol_type,service,flag
0,0.0,6.198479,0.0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.05,0.0,normal,20,1,20,9
1,0.0,4.990433,0.0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,normal,15,2,44,9
2,0.0,0.0,0.0,0,0,0,0,0,0,0,...,0.0,1.0,1.0,0.0,0.0,dos,19,1,49,5
3,0.0,5.451038,9.006264,0,0,0,0,0,1,0,...,0.04,0.03,0.01,0.0,0.01,normal,21,1,24,9
4,0.0,5.298317,6.042633,0,0,0,0,0,1,0,...,0.0,0.0,0.0,0.0,0.0,normal,21,1,24,9


In [22]:
difficulty = kdd.pop('difficulty')
target = kdd.pop('class')
y_diff = kdd_t.pop('difficulty')
y_test = kdd_t.pop('class')

In [23]:
target = pd.get_dummies(target)
y_test = pd.get_dummies(y_test)

In [24]:
target

Unnamed: 0,dos,normal,probe,r2l,u2r
0,0,1,0,0,0
1,0,1,0,0,0
2,1,0,0,0,0
3,0,1,0,0,0
4,0,1,0,0,0
5,1,0,0,0,0
6,1,0,0,0,0
7,1,0,0,0,0
8,1,0,0,0,0
9,1,0,0,0,0


In [25]:
y_test

Unnamed: 0,dos,normal,probe,r2l,u2r
0,1,0,0,0,0
1,1,0,0,0,0
2,0,1,0,0,0
3,0,0,1,0,0
4,0,0,1,0,0
5,0,1,0,0,0
6,0,1,0,0,0
7,0,0,0,1,0
8,0,1,0,0,0
9,0,0,0,1,0


In [26]:
target = target.values
train = kdd.values
test = kdd_t.values
y_test = y_test.values
cat_mat = train[:,-3:]
train = train[:,:-3]
cat_tst = test[:,-3:]
test = test[:,:-3]

In [27]:
# We rescale features to [0, 1]

In [28]:
min_max_scaler = MinMaxScaler()
train = min_max_scaler.fit_transform(train)
test = min_max_scaler.transform(test)

In [29]:
for idx, col in enumerate(list(kdd.columns)):
    print(idx, col)

0 duration
1 src_bytes
2 dst_bytes
3 land
4 wrong_fragment
5 urgent
6 hot
7 num_failed_logins
8 logged_in
9 num_compromised
10 root_shell
11 su_attempted
12 num_root
13 num_file_creations
14 num_shells
15 num_access_files
16 num_outbound_cmds
17 is_host_login
18 is_guest_login
19 count
20 srv_count
21 serror_rate
22 srv_serror_rate
23 rerror_rate
24 srv_rerror_rate
25 same_srv_rate
26 diff_srv_rate
27 srv_diff_host_rate
28 dst_host_count
29 dst_host_srv_count
30 dst_host_same_srv_rate
31 dst_host_diff_srv_rate
32 dst_host_same_src_port_rate
33 dst_host_srv_diff_host_rate
34 dst_host_serror_rate
35 dst_host_srv_serror_rate
36 dst_host_rerror_rate
37 dst_host_srv_rerror_rate
38 protocol_type
39 service
40 flag


## The Model

In [30]:
from keras.callbacks import EarlyStopping
from keras.models import Sequential
from keras.layers import Dense, Activation, Merge, Reshape, Dropout
from keras.layers.embeddings import Embedding

Using TensorFlow backend.


In [31]:
# We apply entity embedding for the label encoded features
# The input must be a list of arrays for each categorical
# feature as well as the array of continuous normalized features

In [32]:
train = [train] + [col for col in cat_mat.T]
test = [test] + [col for col in cat_tst.T]

In [33]:
def build_network():

    models = []
    
    model_dens = Sequential()
    model_dens.add(Dense(36, input_dim=38))
    model_dens.add(Activation('relu'))
    model_dens.add(Dropout(.15))
    model_dens.add(Dense(16))
    models.append(model_dens)

    model_proto = Sequential()
    model_proto.add(Embedding(3, 2, input_length=1))
    model_proto.add(Reshape(target_shape=(2,)))
    models.append(model_proto)

    model_serv = Sequential()
    model_serv.add(Embedding(70, 4, input_length=1))
    model_serv.add(Reshape(target_shape=(4,)))
    models.append(model_serv)

    model_flag = Sequential()
    model_flag.add(Embedding(11, 3, input_length=1))
    model_flag.add(Reshape(target_shape=(3,)))
    models.append(model_flag)
    
    model = Sequential()
    model.add(Merge(models, mode='concat'))

    model.add(Dense(32))
    model.add(Activation('relu'))
    model.add(Dropout(.15))
    model.add(Dense(16))
    model.add(Activation('relu'))
    model.add(Dropout(.15))
    model.add(Dense(5))
    model.add(Activation('softmax'))

    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

    return model

In [34]:
# We use early stopping on a holdout validation set

In [35]:
NN = build_network()
early_stopping = EarlyStopping(monitor='val_loss', min_delta=0, patience=3, verbose=0, mode='auto')



Instructions for updating:
keep_dims is deprecated, use keepdims instead
Instructions for updating:
keep_dims is deprecated, use keepdims instead


In [36]:
NN.fit(x=train, y=target, epochs=100, validation_split=0.1, batch_size=32, callbacks=[early_stopping])

Train on 113375 samples, validate on 12598 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100


<keras.callbacks.History at 0x7f936af4a828>

## The Performance

In [37]:
from sklearn.metrics import confusion_matrix
preds = NN.predict(test)
pred_lbls = np.argmax(preds, axis=1)
true_lbls = np.argmax(y_test, axis=1)

In [38]:
NN.evaluate(test, y_test)



[2.5954216310028659, 0.7484918381831086]

In [39]:
# With the confusion matrix, we can aggregate model predictions
# This helps to understand the mistakes and refine the model

In [40]:
confusion_matrix(true_lbls, pred_lbls)

array([[5708,  979,  770,    1,    0],
       [  95, 9185,  424,    2,    5],
       [ 164,  549, 1708,    0,    0],
       [   5, 2468,  146,  265,    3],
       [   5,   33,   12,    9,    8]])

In [41]:
from sklearn.metrics import f1_score
f1_score(true_lbls, pred_lbls, average='weighted')

0.71522868038376597

In [42]:
# This model also performs similarly though slightly worse.
# Note that this model shows less bias to classify as 'normal'
# This architecture may perform well in binary classification.