# IoT Device Prediction from Network Data



Modified version created on Google drive

In [1]:
from pathlib import Path
import os
import re
import apsw
import pandas as pd
import time
import pprint

# The fun starts here...

In [21]:
cwd = Path.cwd()
db_path = os.path.join(cwd.parent, 'Data', 'NetCollector.sqlite')

In [22]:
import sqlite3
conn = sqlite3.connect(db_path)
cur = conn.cursor()
sql = """

select s.srcport,
       s.dstport,
       s.lensum,
       s.pktcount,
       s.endtime - s.starttime as durn, 
       d.manufacturer,
       d.device_type
from sessions s
         inner join v_session_mnf_devicetype d on s.sessionid = d.sessionid
where d.device_type is not null

"""

arr = []
for row in cur.execute(sql):
    arr.append(row)

    
cols = ['SourcePort',
        'DestnPort',
        'SessionPackets',
        'SessionPktLen',
        'SessionPktArrivalTime',
        'Manufacturer',
        'DeviceType']
import pandas as pd
df = pd.DataFrame(data=arr, columns=cols)


In [23]:
arr = []
for row in cur.execute(sql):
    arr.append(row)


In [24]:
cols = ['SourcePort',
        'DestnPort',
        'SessionPackets',
        'SessionPktLen',
        'SessionPktArrivalTime',
        'Manufacturer',
        'DeviceType']
import pandas as pd
df = pd.DataFrame(data=arr, columns=cols)

In [25]:
df.head()

Unnamed: 0,SourcePort,DestnPort,SessionPackets,SessionPktLen,SessionPktArrivalTime,Manufacturer,DeviceType
0,55630,63960,36384156,126998,-419.030715,Ubiquiti Networks Inc.,Other
1,63960,55630,9039457,125631,-419.074508,"Apple, Inc.",Other
2,49322,7550,306906857,228847,-419.074864,Ubiquiti Networks Inc.,UVC-G3-Flex Camera
3,49323,7550,84313180,83489,-419.072925,Ubiquiti Networks Inc.,UVC-G3-Flex Camera
4,443,58235,18288,127,-434.02574,Ubiquiti Networks Inc.,Other


In [26]:
import pandas_profiling
pandas_profiling.ProfileReport(df)



In [7]:
drop_columns = ['SourcePort', 'DestnPort']

In [8]:
df.drop(drop_columns, axis=1, inplace=True)

**Categorical data mapping**

For the deep learning algorithm to work, we need to get rid of all categorical data.  For the Manufacturer's we will create a 1:1 mapping of the manufacturer name as per the wireshark OUI lookup dataset and the relative position of that in our ordered array of unique entries

In [9]:
# This is based on the superset from all records in the database
# the result above is from limiting our dataset to only 10,000
# sessions worth of data

mans = """
Amazon Technologies Inc.
Apple, Inc.
AzureWave Technology Inc.
Beijing LT Honway Technology Co.,Ltd
Google, Inc.
Hewlett Packard
Intel Corporate
Murata Manufacturing Co., Ltd.
Raspberry Pi Foundation
Realtek Semiconductor Corp.
Rivet Networks
Samsung Electro-Mechanics(Thailand)
Technicolor CH USA Inc.
Topwell International Holdinds Limited
Ubiquiti Networks Inc.
""".splitlines()

mapping = {k: v for v, k in enumerate((x for x in mans if len(x)>0),1)}
df['Manufacturer'] = df['Manufacturer'].map(mapping)
df.head()

Unnamed: 0,SessionPackets,SessionPktLen,SessionPktArrivalTime,Manufacturer,DeviceType
0,36384156,126998,-419.030715,15,Other
1,9039457,125631,-419.074508,2,Other
2,306906857,228847,-419.074864,15,UVC-G3-Flex Camera
3,84313180,83489,-419.072925,15,UVC-G3-Flex Camera
4,18288,127,-434.02574,15,Other


In [10]:
df['SessionPackets'] = (df['SessionPackets'] - df['SessionPackets'].mean()) / \
    (df['SessionPackets'].max() - df['SessionPackets'].min())

df['SessionPktLen'] = (df['SessionPktLen'] - df['SessionPktLen'].mean()) / \
    (df['SessionPktLen'].max() - df['SessionPktLen'].min())
    
df['SessionPktArrivalTime'] = (df['SessionPktArrivalTime'] - df['SessionPktArrivalTime'].mean()) / \
    (df['SessionPktArrivalTime'].max() - df['SessionPktArrivalTime'].min())

In [11]:
df.head()

Unnamed: 0,SessionPackets,SessionPktLen,SessionPktArrivalTime,Manufacturer,DeviceType
0,0.04876,0.254945,-0.103186,15,Other
1,0.011819,0.252193,-0.103197,2,Other
2,0.414216,0.459968,-0.103197,15,UVC-G3-Flex Camera
3,0.113508,0.167361,-0.103196,15,UVC-G3-Flex Camera
4,-0.000368,-0.000447,-0.106819,15,Other


In [12]:
from pandas import read_csv
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from keras.utils import np_utils
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import LabelEncoder

Using TensorFlow backend.


In [13]:
features = df.drop('DeviceType', axis=1).values
labels = df['DeviceType'].values


In [14]:
features

array([[ 4.87599300e-02,  2.54945094e-01, -1.03186141e-01,
         1.50000000e+01],
       [ 1.18193007e-02,  2.52193312e-01, -1.03196750e-01,
         2.00000000e+00],
       [ 4.14215728e-01,  4.59967952e-01, -1.03196836e-01,
         1.50000000e+01],
       ...,
       [-3.89839273e-04, -6.90824970e-04, -1.63587142e-03,
         1.50000000e+01],
       [-3.92207444e-04, -7.00890010e-04, -1.67841770e-03,
         7.00000000e+00],
       [-3.91988594e-04, -7.00890010e-04, -1.67841770e-03,
         1.50000000e+01]])

In [15]:
encoder = LabelEncoder()
encoder.fit(labels)
encoded_labels = encoder.transform(labels)

In [16]:
dummy_labels = np_utils.to_categorical(encoded_labels)
dummy_labels.shape

(9688, 7)

In [17]:
def create_model():
  model = Sequential([
    Dense(32, input_dim=4, activation='relu'),
    Dense(64, activation='relu'),
    Dense(32, activation='relu'),
    Dense(7, activation='softmax')
  ])
  model.compile(loss= 'categorical_crossentropy' , optimizer= 'adam' , metrics=['accuracy'])
  return model


In [18]:
estimator = KerasClassifier(build_fn=create_model, epochs=100, batch_size=5, verbose=2)
kfold = KFold(n_splits=10, shuffle=True)
results = cross_val_score(estimator, features, dummy_labels, cv=kfold)
print("Accuracy: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

Epoch 1/100
 - 1s - loss: 0.6347 - accuracy: 0.8534
Epoch 2/100
 - 1s - loss: 0.5685 - accuracy: 0.8557
Epoch 3/100
 - 1s - loss: 0.5367 - accuracy: 0.8557
Epoch 4/100
 - 1s - loss: 0.5037 - accuracy: 0.8557
Epoch 5/100
 - 1s - loss: 0.4745 - accuracy: 0.8762
Epoch 6/100
 - 1s - loss: 0.4545 - accuracy: 0.8953
Epoch 7/100
 - 1s - loss: 0.4408 - accuracy: 0.8953
Epoch 8/100
 - 1s - loss: 0.4343 - accuracy: 0.8971
Epoch 9/100
 - 1s - loss: 0.4327 - accuracy: 0.8965
Epoch 10/100
 - 1s - loss: 0.4288 - accuracy: 0.8949
Epoch 11/100
 - 1s - loss: 0.4269 - accuracy: 0.8967
Epoch 12/100
 - 1s - loss: 0.4271 - accuracy: 0.8956
Epoch 13/100
 - 1s - loss: 0.4243 - accuracy: 0.8956
Epoch 14/100
 - 1s - loss: 0.4253 - accuracy: 0.8952
Epoch 15/100
 - 1s - loss: 0.4208 - accuracy: 0.8951
Epoch 16/100
 - 1s - loss: 0.4195 - accuracy: 0.8954
Epoch 17/100
 - 1s - loss: 0.4185 - accuracy: 0.8959
Epoch 18/100
 - 1s - loss: 0.4192 - accuracy: 0.8956
Epoch 19/100
 - 1s - loss: 0.4157 - accuracy: 0.8954
Ep

Epoch 56/100
 - 1s - loss: 0.4109 - accuracy: 0.8937
Epoch 57/100
 - 1s - loss: 0.4051 - accuracy: 0.8944
Epoch 58/100
 - 1s - loss: 0.4061 - accuracy: 0.8945
Epoch 59/100
 - 1s - loss: 0.4048 - accuracy: 0.8956
Epoch 60/100
 - 1s - loss: 0.4090 - accuracy: 0.8944
Epoch 61/100
 - 1s - loss: 0.4060 - accuracy: 0.8948
Epoch 62/100
 - 1s - loss: 0.4062 - accuracy: 0.8944
Epoch 63/100
 - 1s - loss: 0.4104 - accuracy: 0.8948
Epoch 64/100
 - 1s - loss: 0.4047 - accuracy: 0.8945
Epoch 65/100
 - 1s - loss: 0.4173 - accuracy: 0.8930
Epoch 66/100
 - 1s - loss: 0.4055 - accuracy: 0.8949
Epoch 67/100
 - 1s - loss: 0.4088 - accuracy: 0.8947
Epoch 68/100
 - 1s - loss: 0.4046 - accuracy: 0.8946
Epoch 69/100
 - 1s - loss: 0.4065 - accuracy: 0.8945
Epoch 70/100
 - 1s - loss: 0.4068 - accuracy: 0.8949
Epoch 71/100
 - 1s - loss: 0.4059 - accuracy: 0.8948
Epoch 72/100
 - 1s - loss: 0.4025 - accuracy: 0.8941
Epoch 73/100
 - 1s - loss: 0.4051 - accuracy: 0.8953
Epoch 74/100
 - 1s - loss: 0.4081 - accuracy: 

Epoch 11/100
 - 1s - loss: 0.4315 - accuracy: 0.8951
Epoch 12/100
 - 1s - loss: 0.4304 - accuracy: 0.8949
Epoch 13/100
 - 1s - loss: 0.4276 - accuracy: 0.8948
Epoch 14/100
 - 1s - loss: 0.4279 - accuracy: 0.8944
Epoch 15/100
 - 1s - loss: 0.4272 - accuracy: 0.8953
Epoch 16/100
 - 1s - loss: 0.4396 - accuracy: 0.8877
Epoch 17/100
 - 1s - loss: 0.4803 - accuracy: 0.8681
Epoch 18/100
 - 1s - loss: 0.4602 - accuracy: 0.8858
Epoch 19/100
 - 1s - loss: 0.4425 - accuracy: 0.8953
Epoch 20/100
 - 1s - loss: 0.4324 - accuracy: 0.8963
Epoch 21/100
 - 1s - loss: 0.4269 - accuracy: 0.8951
Epoch 22/100
 - 1s - loss: 0.4241 - accuracy: 0.8948
Epoch 23/100
 - 1s - loss: 0.4215 - accuracy: 0.8940
Epoch 24/100
 - 1s - loss: 0.4231 - accuracy: 0.8947
Epoch 25/100
 - 1s - loss: 0.4193 - accuracy: 0.8955
Epoch 26/100
 - 1s - loss: 0.4191 - accuracy: 0.8944
Epoch 27/100
 - 1s - loss: 0.4654 - accuracy: 0.8765
Epoch 28/100
 - 1s - loss: 0.4358 - accuracy: 0.8953
Epoch 29/100
 - 1s - loss: 0.4256 - accuracy: 

Epoch 66/100
 - 1s - loss: 0.4137 - accuracy: 0.8955
Epoch 67/100
 - 1s - loss: 0.4131 - accuracy: 0.8952
Epoch 68/100
 - 1s - loss: 0.4121 - accuracy: 0.8955
Epoch 69/100
 - 1s - loss: 0.4129 - accuracy: 0.8956
Epoch 70/100
 - 1s - loss: 0.4127 - accuracy: 0.8953
Epoch 71/100
 - 1s - loss: 0.4113 - accuracy: 0.8959
Epoch 72/100
 - 1s - loss: 0.4108 - accuracy: 0.8954
Epoch 73/100
 - 1s - loss: 0.4174 - accuracy: 0.8947
Epoch 74/100
 - 1s - loss: 0.4109 - accuracy: 0.8960
Epoch 75/100
 - 1s - loss: 0.4095 - accuracy: 0.8961
Epoch 76/100
 - 1s - loss: 0.4103 - accuracy: 0.8959
Epoch 77/100
 - 1s - loss: 0.4081 - accuracy: 0.8959
Epoch 78/100
 - 1s - loss: 0.4075 - accuracy: 0.8962
Epoch 79/100
 - 1s - loss: 0.4071 - accuracy: 0.8954
Epoch 80/100
 - 1s - loss: 0.4070 - accuracy: 0.8960
Epoch 81/100
 - 1s - loss: 0.4067 - accuracy: 0.8960
Epoch 82/100
 - 1s - loss: 0.4054 - accuracy: 0.8954
Epoch 83/100
 - 1s - loss: 0.4063 - accuracy: 0.8963
Epoch 84/100
 - 1s - loss: 0.4040 - accuracy: 

Epoch 21/100
 - 1s - loss: 0.4145 - accuracy: 0.8943
Epoch 22/100
 - 1s - loss: 0.4147 - accuracy: 0.8954
Epoch 23/100
 - 1s - loss: 0.4133 - accuracy: 0.8946
Epoch 24/100
 - 1s - loss: 0.4117 - accuracy: 0.8952
Epoch 25/100
 - 1s - loss: 0.4113 - accuracy: 0.8957
Epoch 26/100
 - 1s - loss: 0.4103 - accuracy: 0.8959
Epoch 27/100
 - 1s - loss: 0.4141 - accuracy: 0.8947
Epoch 28/100
 - 1s - loss: 0.4097 - accuracy: 0.8963
Epoch 29/100
 - 1s - loss: 0.4095 - accuracy: 0.8953
Epoch 30/100
 - 1s - loss: 0.4074 - accuracy: 0.8960
Epoch 31/100
 - 1s - loss: 0.4095 - accuracy: 0.8957
Epoch 32/100
 - 1s - loss: 0.4067 - accuracy: 0.8959
Epoch 33/100
 - 1s - loss: 0.4084 - accuracy: 0.8951
Epoch 34/100
 - 1s - loss: 0.4057 - accuracy: 0.8960
Epoch 35/100
 - 1s - loss: 0.4060 - accuracy: 0.8955
Epoch 36/100
 - 1s - loss: 0.4037 - accuracy: 0.8972
Epoch 37/100
 - 1s - loss: 0.4045 - accuracy: 0.8951
Epoch 38/100
 - 1s - loss: 0.4074 - accuracy: 0.8951
Epoch 39/100
 - 1s - loss: 0.4046 - accuracy: 

Epoch 76/100
 - 1s - loss: 0.4024 - accuracy: 0.8949
Epoch 77/100
 - 1s - loss: 0.3997 - accuracy: 0.8948
Epoch 78/100
 - 1s - loss: 0.3996 - accuracy: 0.8947
Epoch 79/100
 - 1s - loss: 0.4004 - accuracy: 0.8948
Epoch 80/100
 - 1s - loss: 0.3988 - accuracy: 0.8956
Epoch 81/100
 - 1s - loss: 0.3976 - accuracy: 0.8961
Epoch 82/100
 - 1s - loss: 0.4002 - accuracy: 0.8945
Epoch 83/100
 - 1s - loss: 0.3982 - accuracy: 0.8962
Epoch 84/100
 - 1s - loss: 0.4006 - accuracy: 0.8947
Epoch 85/100
 - 1s - loss: 0.3995 - accuracy: 0.8961
Epoch 86/100
 - 1s - loss: 0.3981 - accuracy: 0.8956
Epoch 87/100
 - 1s - loss: 0.3978 - accuracy: 0.8962
Epoch 88/100
 - 1s - loss: 0.3998 - accuracy: 0.8954
Epoch 89/100
 - 1s - loss: 0.3959 - accuracy: 0.8965
Epoch 90/100
 - 1s - loss: 0.3965 - accuracy: 0.8955
Epoch 91/100
 - 1s - loss: 0.3987 - accuracy: 0.8949
Epoch 92/100
 - 1s - loss: 0.3961 - accuracy: 0.8959
Epoch 93/100
 - 1s - loss: 0.4000 - accuracy: 0.8960
Epoch 94/100
 - 1s - loss: 0.3979 - accuracy: 

Epoch 31/100
 - 1s - loss: 0.4096 - accuracy: 0.8946
Epoch 32/100
 - 1s - loss: 0.4107 - accuracy: 0.8947
Epoch 33/100
 - 1s - loss: 0.4092 - accuracy: 0.8952
Epoch 34/100
 - 1s - loss: 0.4092 - accuracy: 0.8943
Epoch 35/100
 - 1s - loss: 0.4090 - accuracy: 0.8953
Epoch 36/100
 - 1s - loss: 0.4081 - accuracy: 0.8948
Epoch 37/100
 - 1s - loss: 0.4065 - accuracy: 0.8953
Epoch 38/100
 - 1s - loss: 0.4084 - accuracy: 0.8942
Epoch 39/100
 - 1s - loss: 0.4084 - accuracy: 0.8953
Epoch 40/100
 - 1s - loss: 0.4089 - accuracy: 0.8940
Epoch 41/100
 - 1s - loss: 0.4062 - accuracy: 0.8948
Epoch 42/100
 - 1s - loss: 0.4096 - accuracy: 0.8939
Epoch 43/100
 - 1s - loss: 0.4060 - accuracy: 0.8948
Epoch 44/100
 - 1s - loss: 0.4065 - accuracy: 0.8947
Epoch 45/100
 - 1s - loss: 0.4066 - accuracy: 0.8952
Epoch 46/100
 - 1s - loss: 0.4044 - accuracy: 0.8958
Epoch 47/100
 - 1s - loss: 0.4074 - accuracy: 0.8952
Epoch 48/100
 - 1s - loss: 0.4046 - accuracy: 0.8948
Epoch 49/100
 - 1s - loss: 0.4027 - accuracy: 