In [1]:
import tensorflow as tf
import numpy as np
import scipy.io 
train = scipy.io.loadmat('train.mat') 
train_X = np.concatenate((train['x1'],train['x2']), axis=1)
train_y = train['y']
test = scipy.io.loadmat('test.mat') 
test_X = np.concatenate((test['x1'],test['x2']), axis=1)
test_y = test['y']

class DNN(tf.keras.Model):

  def __init__(self):
    super().__init__()
    self.dense1 = tf.keras.layers.Dense(10, activation=tf.nn.relu)
    self.dense2 = tf.keras.layers.Dense(10, activation=tf.nn.relu)
    self.output_layer = tf.keras.layers.Dense(1, activation=tf.nn.sigmoid)

  def call(self, inputs):
    x = self.dense1(inputs)
    return self.output_layer(x)


batch_size = 10
epochs = 500
opt = tf.keras.optimizers.Adam(learning_rate=0.001)
model = DNN()
model.compile(loss="binary_crossentropy", optimizer=opt, metrics=["accuracy"])
model.fit(train_X, train_y, batch_size=batch_size, epochs=epochs, verbose=0)

score = model.evaluate(test_X, test_y, verbose=0)
print("Test loss:", score[0])
print("Test accuracy:", score[1])

Test loss: 0.14504672586917877
Test accuracy: 0.9666666388511658


In [5]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder

train_data = pd.read_csv('train_DefenseSystem.csv', parse_dates=['event_time'], infer_datetime_format=True)
test_data = pd.read_csv('test_DefenseSystem.csv', parse_dates=['event_time'], infer_datetime_format=True)

train_y = LabelEncoder().fit_transform(train_data['event_rule_category'])
pre_train_x = train_data.drop(['event_rule_category'], axis = 1)
pre_test_x = test_data
display(pre_train_x.head())
display(pre_train_x.info())
display(pre_train_x.describe())
display(np.unique(train_y))

Unnamed: 0,device_dev_name,device_family_name,device_hashed_mac,device_os_name,device_type_name,device_vendor_name,event_protocol_id,event_flow_outbound_or_inbound,event_role_device_or_router,event_role_server_or_client,event_rule_id,event_rule_name,event_rule_reference,event_rule_severity,event_self_ipv4,event_time,router_ip
0,Windows XP,Computer,a01f58b5c4fdb3122d6797ada0a3ee71fbe134dd,Windows XP,Desktop/Laptop,Microsoft Corp.,6,outbound,device,client,1050015,WEB Cross-site Scripting -34,CVE-2011-2133; CVE-2014-4116,4,192.168.1.243,2016-12-15 03:57:00,161.139.40.242
1,Windows,Computer,7c16de1f49ef32f5354a33ccd2ec5d0d5ef9e100,Windows,Desktop/Laptop,Microsoft Corp.,6,outbound,device,client,1130593,WEB Microsoft IIS HTTP.sys Remote Code Executi...,CVE-2015-1635; MS15-034,5,192.168.1.133,2016-12-11 06:54:00,199.221.3.43
2,Windows 8/10,Computer,6f8cdf9d553e2e1cba4c3b0ad226e6c7df119005,Windows 8/10,Desktop/Laptop,Microsoft Corp.,6,outbound,device,client,1056078,WEB Hashtable Collisions,CVE-2011-3414; CVE-2011-5034; CVE-2011-5035; C...,5,192.168.1.100,2016-12-24 01:20:00,157.2.60.102
3,Windows,Computer,4ac93ab7639e8800779afd887d6df7a93deb5c62,Windows,Desktop/Laptop,Microsoft Corp.,6,outbound,device,client,1050015,WEB Cross-site Scripting -34,CVE-2011-2133; CVE-2014-4116,4,192.168.1.6,2016-12-31 07:22:00,51.20.54.170
4,Windows,Computer,02de4773b0099f5b0a873516d76a888c3f4c6508,Windows,Desktop/Laptop,Microsoft Corp.,6,outbound,device,client,1050015,WEB Cross-site Scripting -34,CVE-2011-2133; CVE-2014-4116,4,192.168.1.44,2016-12-19 12:22:00,116.95.157.235


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5000 entries, 0 to 4999
Data columns (total 17 columns):
 #   Column                          Non-Null Count  Dtype         
---  ------                          --------------  -----         
 0   device_dev_name                 5000 non-null   object        
 1   device_family_name              5000 non-null   object        
 2   device_hashed_mac               5000 non-null   object        
 3   device_os_name                  5000 non-null   object        
 4   device_type_name                5000 non-null   object        
 5   device_vendor_name              5000 non-null   object        
 6   event_protocol_id               5000 non-null   int64         
 7   event_flow_outbound_or_inbound  5000 non-null   object        
 8   event_role_device_or_router     5000 non-null   object        
 9   event_role_server_or_client     5000 non-null   object        
 10  event_rule_id                   5000 non-null   int64         
 11  even

None

Unnamed: 0,event_protocol_id,event_rule_id,event_rule_severity
count,5000.0,5000.0,5000.0
mean,6.4102,1080985.0,4.3636
std,2.089308,36260.82,0.481083
min,0.0,1049802.0,4.0
25%,6.0,1050015.0,4.0
50%,6.0,1057007.0,4.0
75%,6.0,1130593.0,5.0
max,17.0,1132896.0,5.0


array([0, 1])

In [6]:
for k in pre_train_x.keys().drop(['device_hashed_mac','event_time']):
    print(k, pre_train_x[k].nunique())

device_dev_name 44
device_family_name 7
device_os_name 23
device_type_name 16
device_vendor_name 26
event_protocol_id 3
event_flow_outbound_or_inbound 2
event_role_device_or_router 2
event_role_server_or_client 2
event_rule_id 53
event_rule_name 53
event_rule_reference 38
event_rule_severity 2
event_self_ipv4 1100
router_ip 3857


In [15]:
pre_train_x['event_time'].apply(lambda x : x.timestamp())

0       1.481774e+09
1       1.481439e+09
2       1.482542e+09
3       1.483169e+09
4       1.482150e+09
            ...     
4995    1.481238e+09
4996    1.482970e+09
4997    1.480551e+09
4998    1.482730e+09
4999    1.480724e+09
Name: event_time, Length: 5000, dtype: float64

In [16]:
a_pre_data = pd.concat([pre_train_x,pre_test_x], axis=0)
a_pre_data = a_pre_data.drop(['device_hashed_mac'], axis=1)
for k in pre_train_x.keys().drop(['device_hashed_mac']):
    if k == 'event_self_ipv4': 
        tmp = a_pre_data['event_self_ipv4'].str.extract(r'(.*?)\.(.*?)\.(.*?)\.(.*)')
        a_pre_data['self_ip_class_A'] = (tmp[0]=='10').astype(int)
        a_pre_data['self_ip_class_B'] = (tmp[0]=='172').astype(int)
        a_pre_data['self_ip_class_B'] = (tmp[0]=='192').astype(int)
        a_pre_data['self_ip_others'] = ((tmp[0] != '192') * (tmp[0] != '172') * (tmp[0] != '10') == True).astype(int) # ip not start with 10, 172,192 => convert to 1
        a_pre_data = a_pre_data.drop(['event_self_ipv4'], axis=1)
    elif k == 'router_ip':
        tmp = a_pre_data['router_ip'].str.extract(r'(.*?)\.(.*?)\.(.*?)\.(.*)')
        tmp[tmp[0].isnull()] = -1
        a_pre_data['router_ip_class_A'] = ((0<=tmp[0].astype(int)) * (tmp[0].astype(int)<128)).astype(int)
        a_pre_data['router_ip_class_B'] = ((128<=tmp[0].astype(int)) * (tmp[0].astype(int)<192)).astype(int)
        a_pre_data['router_ip_class_C'] = ((192<=tmp[0].astype(int)) * (tmp[0].astype(int)<224)).astype(int)
        a_pre_data['router_ip_class_D'] = (224<=tmp[0].astype(int)) * (tmp[0].astype(int)<240).astype(int)
        a_pre_data['router_ip_class_E'] = (240<=tmp[0].astype(int)) * (tmp[0].astype(int)<255).astype(int)
        a_pre_data['router_ip_others'] = (tmp[0].astype(int) == -1).astype(int)
        a_pre_data = a_pre_data.drop(['router_ip'], axis=1)
    elif k == 'event_time':
        tmp = a_pre_data['event_time'].apply(lambda x : x.timestamp())
        a_pre_data['event_time'] = tmp 
    else:
        a_pre_data_one_hot =  pd.get_dummies(a_pre_data[k])
        a_pre_data = pd.concat([a_pre_data, a_pre_data_one_hot],axis=1)
        a_pre_data = a_pre_data.drop([k], axis=1)

train_x = a_pre_data[:len(pre_train_x)]
test_x = a_pre_data[len(pre_test_x):]
display(train_x.head())
display(train_x.info())
display(train_x.describe())

  f"evaluating in Python space because the {repr(op_str)} "
  f"evaluating in Python space because the {repr(op_str)} "
  f"evaluating in Python space because the {repr(op_str)} "
  f"evaluating in Python space because the {repr(op_str)} "


Unnamed: 0,event_time,ASUS RT Router,Amazon Kindle,Android 3.0-4.1,Android 4.2-5.1,Android 6.0,Apple Watch/iPad Pro,Axis Network Camera,Belkin F5D8235-4 v2,BlackBerry,...,5,self_ip_class_A,self_ip_class_B,self_ip_others,router_ip_class_A,router_ip_class_B,router_ip_class_C,router_ip_class_D,router_ip_class_E,router_ip_others
0,1481774000.0,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,1,0,0,0,0
1,1481439000.0,0,0,0,0,0,0,0,0,0,...,1,0,1,0,0,0,1,0,0,0
2,1482542000.0,0,0,0,0,0,0,0,0,0,...,1,0,1,0,0,1,0,0,0,0
3,1483169000.0,0,0,0,0,0,0,0,0,0,...,0,0,1,0,1,0,0,0,0,0
4,1482150000.0,0,0,0,0,0,0,0,0,0,...,0,0,1,0,1,0,0,0,0,0


<class 'pandas.core.frame.DataFrame'>
Int64Index: 5000 entries, 0 to 4999
Columns: 292 entries, event_time to router_ip_others
dtypes: float64(1), int64(9), uint8(282)
memory usage: 1.8 MB


None

Unnamed: 0,event_time,ASUS RT Router,Amazon Kindle,Android 3.0-4.1,Android 4.2-5.1,Android 6.0,Apple Watch/iPad Pro,Axis Network Camera,Belkin F5D8235-4 v2,BlackBerry,...,5,self_ip_class_A,self_ip_class_B,self_ip_others,router_ip_class_A,router_ip_class_B,router_ip_class_C,router_ip_class_D,router_ip_class_E,router_ip_others
count,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,...,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0
mean,1481885000.0,0.0008,0.0006,0.0008,0.0606,0.0108,0.0034,0.0004,0.0008,0.002,...,0.3636,0.0384,0.9266,0.025,0.279,0.4952,0.1746,0.0124,0.011,0.0278
std,775420.9,0.028276,0.02449,0.028276,0.238619,0.103371,0.058216,0.019998,0.028276,0.044681,...,0.481083,0.192179,0.260818,0.156141,0.448552,0.500027,0.379663,0.110674,0.104313,0.164416
min,1480551000.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,1481210000.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,1481853000.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,1482570000.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0
max,1483229000.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [None]:
class DNN(tf.keras.Model):

  def __init__(self):
    super().__init__()
    self.dense1 = tf.keras.layers.Dense(10, activation=tf.nn.relu)
    self.dense2 = tf.keras.layers.Dense(10, activation=tf.nn.relu)
    self.output_layer = tf.keras.layers.Dense(1, activation=tf.nn.sigmoid)

  def call(self, inputs):
    x = self.dense1(inputs)
    return self.output_layer(x)


batch_size = 10
epochs = 500
opt = tf.keras.optimizers.Adam(learning_rate=0.001)
model = DNN()
model.compile(loss="binary_crossentropy", optimizer=opt, metrics=["accuracy"])
model.fit(train_x, train_y, batch_size=batch_size, epochs=epochs, validation_split=0.2, verbose=0)



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

