In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import f_classif
from sklearn.model_selection import cross_val_score
from sklearn.tree import DecisionTreeClassifier
from sklearn import metrics
from matplotlib import pyplot

In [2]:
#Sample CSV files of IPCam
fnamesIPcam = ["D:/alarmClockCam_1_Flow.csv",
          "D:/canarycam_1_Flow.csv",
          "D:/d3dtraffic_1_Flow.csv",
          "D:/ezviz_1_Flow.csv",
          "D:/netatmocam1_Flow.csv",
          "D:/v380Cam1_1_Flow.csv",
          ]

In [None]:
dfList = list()
for fname in fnamesIPcam: 
    data = pd.read_csv(fname)
    print('file:', fname, ' data size: ',data.shape)
    dfList.append(data)
############## combine the data frames
ipcamDF = pd.concat(dfList)
print(ipcamDF.shape)

In [4]:
#see unique labels
print(list(ipcamDF['Label'].unique()))

['IPCam_AlarmClock', 'IPCam_Canary', 'IPCam_D3D', 'IPCam_Ezviz', 'IPCam_Netatmo', 'IPCam_V380']


In [5]:

print(list(ipcamDF['Label'].unique()))


['IPCam_AlarmClock', 'IPCam_Canary', 'IPCam_D3D', 'IPCam_Ezviz', 'IPCam_Netatmo', 'IPCam_V380']


In [6]:
######### creating master DF
dfListAll = [ipcamDF]
masterDF = pd.concat(dfListAll)
print(masterDF.shape)

(10000, 84)


In [7]:
uniqueLabels = list(masterDF['Label'].unique())
print(uniqueLabels)

['IPCam_AlarmClock', 'IPCam_Canary', 'IPCam_D3D', 'IPCam_Ezviz', 'IPCam_Netatmo', 'IPCam_V380']


In [8]:
#discard all those columns that helps in uniquely identifying a flow
masterPruned = masterDF.iloc[:,np.arange(7,84)]
print('pruneddata size: ', masterPruned.shape)
ftList = list(masterPruned.columns)
print(ftList)

pruneddata size:  (10000, 77)
['Flow Duration', 'Tot Fwd Pkts', 'Tot Bwd Pkts', 'TotLen Fwd Pkts', 'TotLen Bwd Pkts', 'Fwd Pkt Len Max', 'Fwd Pkt Len Min', 'Fwd Pkt Len Mean', 'Fwd Pkt Len Std', 'Bwd Pkt Len Max', 'Bwd Pkt Len Min', 'Bwd Pkt Len Mean', 'Bwd Pkt Len Std', 'Flow Byts/s', 'Flow Pkts/s', 'Flow IAT Mean', 'Flow IAT Std', 'Flow IAT Max', 'Flow IAT Min', 'Fwd IAT Tot', 'Fwd IAT Mean', 'Fwd IAT Std', 'Fwd IAT Max', 'Fwd IAT Min', 'Bwd IAT Tot', 'Bwd IAT Mean', 'Bwd IAT Std', 'Bwd IAT Max', 'Bwd IAT Min', 'Fwd PSH Flags', 'Bwd PSH Flags', 'Fwd URG Flags', 'Bwd URG Flags', 'Fwd Header Len', 'Bwd Header Len', 'Fwd Pkts/s', 'Bwd Pkts/s', 'Pkt Len Min', 'Pkt Len Max', 'Pkt Len Mean', 'Pkt Len Std', 'Pkt Len Var', 'FIN Flag Cnt', 'SYN Flag Cnt', 'RST Flag Cnt', 'PSH Flag Cnt', 'ACK Flag Cnt', 'URG Flag Cnt', 'CWE Flag Count', 'ECE Flag Cnt', 'Down/Up Ratio', 'Pkt Size Avg', 'Fwd Seg Size Avg', 'Bwd Seg Size Avg', 'Fwd Byts/b Avg', 'Fwd Pkts/b Avg', 'Fwd Blk Rate Avg', 'Bwd Byts/b Av

In [9]:
#discard those flows that has 0 flow duration
masterSelected = masterPruned.drop(masterPruned[masterPruned['Flow Duration']==0].index)
masterSelected.shape

(10000, 77)

In [10]:
#identify those columns that has non-numeric values
typesList = masterSelected.dtypes
ftCount = len(ftList)
nonNumericFtList = list()
for fi in range(ftCount):
    if ftList[fi] != 'Label' and typesList[fi] != 'int64' and typesList[fi] != 'float64':
        print(ftList[fi], '  ', typesList[fi])
        nonNumericFtList.append(ftList[fi])
print(nonNumericFtList)

[]


In [11]:
for ft in nonNumericFtList:
    masterSelected[ft] = pd.to_numeric(masterSelected[ft], errors='coerce')

In [12]:
print(nonNumericFtList)
for ft in nonNumericFtList:
    countNaN = masterSelected[ft].isna().sum()
    print('converted to numeric ft: ', ft, '  countNaN: ', countNaN)

[]


In [13]:
number_inf = masterSelected[masterSelected == np.inf].count().sum()
print('count inf: ',number_inf)

count inf:  0


In [14]:
typesList = masterSelected.dtypes
ftCount = len(ftList)
nonNumericFtList = list()
for fi in range(ftCount):
    if ftList[fi] != 'Label' and typesList[fi] != 'int64' and typesList[fi] != 'float64':
        print(ftList[fi], '  ', typesList[fi])
        nonNumericFtList.append(ftList[fi])
print(nonNumericFtList)

[]


In [15]:
#identify those columns that has 0 std
count = 0
staticFtList = list()
for ft in ftList:
    if ft == 'Label':
        continue
    #print('ft : ', ft)
    ftMin = masterSelected[ft].min()
    ftMax = masterSelected[ft].max()
    #std = masterPruned[ft].std()
    
    if ftMin == ftMax:
        count += 1
        print('count: ', count, ' ft: ', ft, ' ftmin: ', ftMin, ' ftMax: ', ftMax)
        staticFtList.append(ft)
print(staticFtList)

count:  1  ft:  Fwd PSH Flags  ftmin:  0  ftMax:  0
count:  2  ft:  Fwd URG Flags  ftmin:  0  ftMax:  0
count:  3  ft:  Bwd URG Flags  ftmin:  0  ftMax:  0
count:  4  ft:  URG Flag Cnt  ftmin:  0  ftMax:  0
count:  5  ft:  CWE Flag Count  ftmin:  0  ftMax:  0
count:  6  ft:  ECE Flag Cnt  ftmin:  0  ftMax:  0
count:  7  ft:  Fwd Byts/b Avg  ftmin:  0  ftMax:  0
count:  8  ft:  Fwd Pkts/b Avg  ftmin:  0  ftMax:  0
count:  9  ft:  Fwd Blk Rate Avg  ftmin:  0  ftMax:  0
count:  10  ft:  Bwd Byts/b Avg  ftmin:  0  ftMax:  0
count:  11  ft:  Bwd Pkts/b Avg  ftmin:  0  ftMax:  0
count:  12  ft:  Bwd Blk Rate Avg  ftmin:  0  ftMax:  0
count:  13  ft:  Init Fwd Win Byts  ftmin:  -1  ftMax:  -1
count:  14  ft:  Fwd Seg Size Min  ftmin:  0  ftMax:  0
['Fwd PSH Flags', 'Fwd URG Flags', 'Bwd URG Flags', 'URG Flag Cnt', 'CWE Flag Count', 'ECE Flag Cnt', 'Fwd Byts/b Avg', 'Fwd Pkts/b Avg', 'Fwd Blk Rate Avg', 'Bwd Byts/b Avg', 'Bwd Pkts/b Avg', 'Bwd Blk Rate Avg', 'Init Fwd Win Byts', 'Fwd Seg Size 

In [16]:
#remove those columns that have 0 std
print('before droping column: ', masterSelected.shape)
for ft in staticFtList:
    masterSelected.drop([ft], axis=1,inplace=True)
print('after droping column: ', masterSelected.shape)    

before droping column:  (10000, 77)
after droping column:  (10000, 63)


In [17]:
xCount = masterSelected.shape[1]
print(xCount)
finalFtList = list(masterSelected.columns)
print(finalFtList)

63
['Flow Duration', 'Tot Fwd Pkts', 'Tot Bwd Pkts', 'TotLen Fwd Pkts', 'TotLen Bwd Pkts', 'Fwd Pkt Len Max', 'Fwd Pkt Len Min', 'Fwd Pkt Len Mean', 'Fwd Pkt Len Std', 'Bwd Pkt Len Max', 'Bwd Pkt Len Min', 'Bwd Pkt Len Mean', 'Bwd Pkt Len Std', 'Flow Byts/s', 'Flow Pkts/s', 'Flow IAT Mean', 'Flow IAT Std', 'Flow IAT Max', 'Flow IAT Min', 'Fwd IAT Tot', 'Fwd IAT Mean', 'Fwd IAT Std', 'Fwd IAT Max', 'Fwd IAT Min', 'Bwd IAT Tot', 'Bwd IAT Mean', 'Bwd IAT Std', 'Bwd IAT Max', 'Bwd IAT Min', 'Bwd PSH Flags', 'Fwd Header Len', 'Bwd Header Len', 'Fwd Pkts/s', 'Bwd Pkts/s', 'Pkt Len Min', 'Pkt Len Max', 'Pkt Len Mean', 'Pkt Len Std', 'Pkt Len Var', 'FIN Flag Cnt', 'SYN Flag Cnt', 'RST Flag Cnt', 'PSH Flag Cnt', 'ACK Flag Cnt', 'Down/Up Ratio', 'Pkt Size Avg', 'Fwd Seg Size Avg', 'Bwd Seg Size Avg', 'Subflow Fwd Pkts', 'Subflow Fwd Byts', 'Subflow Bwd Pkts', 'Subflow Bwd Byts', 'Init Bwd Win Byts', 'Fwd Act Data Pkts', 'Active Mean', 'Active Std', 'Active Max', 'Active Min', 'Idle Mean', 'Idle 

In [19]:
#report intermediate data frame to a csv
masterSelected.to_csv('D:/code/MLCode_output/ProcessedNPrunned_BITSDataset_missclass.csv', index = False)

In [20]:
df = pd.read_csv('D:/code/MLCode_output/ProcessedNPrunned_BITSDataset_missclass.csv')

In [21]:
# Shuffle the dataset
df_shuffled = df.sample(n=len(df))

In [22]:
# Save the shuffled dataset to a new CSV file
df_shuffled.to_csv('D:/code/MLCode_output/ProcessedNPrunned_BITSDataset_missclass_Shuffled.csv', index=False)

In [23]:
df_shuffled = df_shuffled.reset_index(drop=True)

In [24]:
#classification with all the features
masterDS = df_shuffled.values  #masterSelected.values
X = masterDS[:,:-1]
Y = masterDS[:,-1]
print("Xshape: ", X.shape)
print("Yshape: ", Y.shape)

Xshape:  (10000, 62)
Yshape:  (10000,)


Decision Tree Classifier

In [25]:
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.tree import DecisionTreeClassifier

In [28]:
#max tree depth = 10
depth = 11 #run the cells of 19, 20, 21 for the other values of depth -- to generate plot
clf = DecisionTreeClassifier(max_depth = depth, random_state = 42)
#scores = cross_val_score(clf, X, Y, cv=10)
#print(scores)

In [29]:
score_list = cross_val_score(clf, X, Y, cv=10)

In [30]:
print('score list: ', score_list)
print('depth = ', depth, ' avg. score = ', np.average(score_list), ' std. score: ', np.std(score_list))

score list:  [0.972 0.972 0.974 0.964 0.975 0.984 0.973 0.98  0.968 0.983]
depth =  11  avg. score =  0.9745000000000001  std. score:  0.006004165220911235


In [31]:
#create train and test set 
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.33, random_state=42)

In [32]:
depth = 11 #take final depth from above analysis
clf = DecisionTreeClassifier(max_depth = depth, random_state = 42)
clf.fit(X_train, y_train)

In [33]:
test_pred_decision_tree = clf.predict(X_test)

In [34]:
confusion_matrix = metrics.confusion_matrix(y_test, test_pred_decision_tree)
print(confusion_matrix)

[[ 141    0    0    0    0    1]
 [   0  125    2    1    6    5]
 [   1    0  554    2    1    3]
 [   0    0    0  839    0    1]
 [   1    3   20    0  295    4]
 [   1    5   10    2    5 1272]]


In [36]:
conf_mat = metrics.confusion_matrix(y_test, test_pred_decision_tree, labels=uniqueLabels)
#[i][j] ==> knwon to be in group i and predicted as group j
print(conf_mat)
print('\n')
print('Clock as Clock = ',conf_mat[0][0],' percent = ', (conf_mat[0][0]/(conf_mat[0][0]+conf_mat[0][1]+conf_mat[0][2]+conf_mat[0][3]+conf_mat[0][4]+conf_mat[0][5])))
print('Clock as Ezviz = ',conf_mat[0][1],' percent = ', (conf_mat[0][1]/(conf_mat[0][0]+conf_mat[0][1]+conf_mat[0][2]+conf_mat[0][3]+conf_mat[0][4]+conf_mat[0][5])))
print('Clock as D3D = ',conf_mat[0][2],' percent = ', (conf_mat[0][2]/(conf_mat[0][0]+conf_mat[0][1]+conf_mat[0][2]+conf_mat[0][3]+conf_mat[0][4]+conf_mat[0][5])))
print('Clock as Netatmo = ',conf_mat[0][3],' percent = ', (conf_mat[0][3]/(conf_mat[0][0]+conf_mat[0][1]+conf_mat[0][2]+conf_mat[0][3]+conf_mat[0][4]+conf_mat[0][5])))
print('Clock as Canary = ',conf_mat[0][4],' percent = ', (conf_mat[0][4]/(conf_mat[0][0]+conf_mat[0][1]+conf_mat[0][2]+conf_mat[0][3]+conf_mat[0][4]+conf_mat[0][5])))
print('Clock as Bulb = ',conf_mat[0][5],' percent = ', (conf_mat[0][5]/(conf_mat[0][0]+conf_mat[0][1]+conf_mat[0][2]+conf_mat[0][3]+conf_mat[0][4]+conf_mat[0][5])))
print('\n')

print('Ezviz as Clock = ',conf_mat[1][0],' percent = ', (conf_mat[1][0]/(conf_mat[1][0]+conf_mat[1][1]+conf_mat[1][2]+conf_mat[1][3]+conf_mat[1][4]+conf_mat[1][5])))
print('Ezviz as Ezviz = ',conf_mat[1][1],' percent = ', (conf_mat[1][1]/(conf_mat[1][0]+conf_mat[1][1]+conf_mat[1][2]+conf_mat[1][3]+conf_mat[1][4]+conf_mat[1][5])))
print('Ezviz as D3D =',conf_mat[1][2],' percent = ', (conf_mat[1][2]/(conf_mat[1][0]+conf_mat[1][1]+conf_mat[1][2]+conf_mat[1][3]+conf_mat[1][4]+conf_mat[1][5])))
print('Ezviz as Netatmo = ',conf_mat[1][3],' percent = ', (conf_mat[1][3]/(conf_mat[1][0]+conf_mat[1][1]+conf_mat[1][2]+conf_mat[1][3]+conf_mat[1][4]+conf_mat[1][5])))
print('Ezviz as Canary = ',conf_mat[1][4],' percent = ', (conf_mat[1][4]/(conf_mat[1][0]+conf_mat[1][1]+conf_mat[1][2]+conf_mat[1][3]+conf_mat[1][4]+conf_mat[1][5])))
print('Ezviz as Bulb = ',conf_mat[1][5],' percent = ', (conf_mat[1][5]/(conf_mat[1][0]+conf_mat[1][1]+conf_mat[1][2]+conf_mat[1][3]+conf_mat[1][4]+conf_mat[1][5])))
print('\n')

print('D3D as Clock = ',conf_mat[2][0],' percent = ', (conf_mat[2][0]/(conf_mat[2][0]+conf_mat[2][1]+conf_mat[2][2]+conf_mat[2][3]+conf_mat[2][4]+conf_mat[2][5])))
print('D3D as Ezviz = ',conf_mat[2][1],' percent = ', (conf_mat[2][1]/(conf_mat[2][0]+conf_mat[2][1]+conf_mat[2][2]+conf_mat[2][3]+conf_mat[2][4]+conf_mat[2][5])))
print('D3D as D3D = ',conf_mat[2][2],' percent = ', (conf_mat[2][2]/(conf_mat[2][0]+conf_mat[2][1]+conf_mat[2][2]+conf_mat[2][3]+conf_mat[2][4]+conf_mat[2][5])))
print('D3D as Netatmo = ',conf_mat[2][3],' percent = ', (conf_mat[2][3]/(conf_mat[2][0]+conf_mat[2][1]+conf_mat[2][2]+conf_mat[2][3]+conf_mat[2][4]+conf_mat[2][5])))
print('D3D as Canary = ',conf_mat[2][4],' percent = ', (conf_mat[2][4]/(conf_mat[2][0]+conf_mat[2][1]+conf_mat[2][2]+conf_mat[2][3]+conf_mat[2][4]+conf_mat[2][5])))
print('D3D as Bulb = ',conf_mat[2][5],' percent = ', (conf_mat[2][5]/(conf_mat[2][0]+conf_mat[2][1]+conf_mat[2][2]+conf_mat[2][3]+conf_mat[2][4]+conf_mat[2][5])))
print('\n')

print('Netatmo as Clock = ',conf_mat[3][0],' percent = ', (conf_mat[3][0]/(conf_mat[3][0]+conf_mat[3][1]+conf_mat[3][2]+conf_mat[3][3]+conf_mat[3][4]+conf_mat[3][5])))
print('Netatmo as Ezviz = ',conf_mat[3][1],' percent = ', (conf_mat[3][1]/(conf_mat[3][0]+conf_mat[3][1]+conf_mat[3][2]+conf_mat[3][3]+conf_mat[3][4]+conf_mat[3][5])))
print('Netatmo as D3D = ',conf_mat[3][2],' percent = ', (conf_mat[3][2]/(conf_mat[3][0]+conf_mat[3][1]+conf_mat[3][2]+conf_mat[3][3]+conf_mat[3][4]+conf_mat[3][5])))
print('Netatmo as Netatmo = ',conf_mat[3][3],' percent = ', (conf_mat[3][3]/(conf_mat[3][0]+conf_mat[3][1]+conf_mat[3][2]+conf_mat[3][3]+conf_mat[3][4]+conf_mat[3][5])))
print('Netatmo as Canary = ',conf_mat[3][4],' percent = ', (conf_mat[3][4]/(conf_mat[3][0]+conf_mat[3][1]+conf_mat[3][2]+conf_mat[3][3]+conf_mat[3][4]+conf_mat[3][5])))
print('Netatmo as Bulb = ',conf_mat[3][5],' percent = ', (conf_mat[3][5]/(conf_mat[3][0]+conf_mat[3][1]+conf_mat[3][2]+conf_mat[3][3]+conf_mat[3][4]+conf_mat[3][5])))
print('\n')

print('Canary as Clock = ',conf_mat[4][0],' percent = ', (conf_mat[4][0]/(conf_mat[4][0]+conf_mat[4][1]+conf_mat[4][2]+conf_mat[4][3]+conf_mat[4][4]+conf_mat[4][5])))
print('Canary as Ezviz = ',conf_mat[4][1],' percent = ', (conf_mat[4][1]/(conf_mat[4][0]+conf_mat[4][1]+conf_mat[4][2]+conf_mat[4][3]+conf_mat[4][4]+conf_mat[4][5])))
print('Canary as D3D = ',conf_mat[4][2],' percent = ', (conf_mat[4][2]/(conf_mat[4][0]+conf_mat[4][1]+conf_mat[4][2]+conf_mat[4][3]+conf_mat[4][4]+conf_mat[4][5])))
print('Canary as Netatmo = ',conf_mat[4][3],' percent = ', (conf_mat[4][3]/(conf_mat[4][0]+conf_mat[4][1]+conf_mat[4][2]+conf_mat[4][3]+conf_mat[4][4]+conf_mat[4][5])))
print('Canary as Canary = ',conf_mat[4][4],' percent = ', (conf_mat[4][4]/(conf_mat[4][0]+conf_mat[4][1]+conf_mat[4][2]+conf_mat[4][3]+conf_mat[4][4]+conf_mat[4][5])))
print('Canary as Bulb = ',conf_mat[4][5],' percent = ', (conf_mat[4][5]/(conf_mat[4][0]+conf_mat[4][1]+conf_mat[4][2]+conf_mat[4][3]+conf_mat[4][4]+conf_mat[4][5])))
print('\n')

print('Bulb as Clock = ',conf_mat[5][0],' percent = ', (conf_mat[5][0]/(conf_mat[5][0]+conf_mat[5][1]+conf_mat[5][2]+conf_mat[5][3]+conf_mat[5][4]+conf_mat[5][5])))
print('Bulb as Ezviz = ',conf_mat[5][1],' percent = ', (conf_mat[5][1]/(conf_mat[5][0]+conf_mat[5][1]+conf_mat[5][2]+conf_mat[5][3]+conf_mat[5][4]+conf_mat[5][5])))
print('Bulb as D3D = ',conf_mat[5][2],' percent = ', (conf_mat[5][2]/(conf_mat[5][0]+conf_mat[5][1]+conf_mat[5][2]+conf_mat[5][3]+conf_mat[5][4]+conf_mat[5][5])))
print('Bulb as Netatmo = ',conf_mat[5][3],' percent = ', (conf_mat[5][3]/(conf_mat[5][0]+conf_mat[5][1]+conf_mat[5][2]+conf_mat[5][3]+conf_mat[5][4]+conf_mat[5][5])))
print('Bulb as Canary = ',conf_mat[5][4],' percent = ', (conf_mat[5][4]/(conf_mat[5][0]+conf_mat[5][1]+conf_mat[5][2]+conf_mat[5][3]+conf_mat[5][4]+conf_mat[5][5])))
print('Bulb as Bulb = ',conf_mat[5][5],' percent = ', (conf_mat[5][5]/(conf_mat[5][0]+conf_mat[5][1]+conf_mat[5][2]+conf_mat[5][3]+conf_mat[5][4]+conf_mat[5][5])))

[[ 141    0    0    0    0    1]
 [   0  125    2    1    6    5]
 [   1    0  554    2    1    3]
 [   0    0    0  839    0    1]
 [   1    3   20    0  295    4]
 [   1    5   10    2    5 1272]]


Clock as Clock =  141  percent =  0.9929577464788732
Clock as Ezviz =  0  percent =  0.0
Clock as D3D =  0  percent =  0.0
Clock as Netatmo =  0  percent =  0.0
Clock as Canary =  0  percent =  0.0
Clock as Bulb =  1  percent =  0.007042253521126761


Ezviz as Clock =  0  percent =  0.0
Ezviz as Ezviz =  125  percent =  0.8992805755395683
Ezviz as D3D = 2  percent =  0.014388489208633094
Ezviz as Netatmo =  1  percent =  0.007194244604316547
Ezviz as Canary =  6  percent =  0.04316546762589928
Ezviz as Bulb =  5  percent =  0.03597122302158273


D3D as Clock =  1  percent =  0.0017825311942959
D3D as Ezviz =  0  percent =  0.0
D3D as D3D =  554  percent =  0.9875222816399287
D3D as Netatmo =  2  percent =  0.0035650623885918
D3D as Canary =  1  percent =  0.0017825311942959
D3D as Bulb = 

In [37]:
accuracy = metrics.accuracy_score(y_test, test_pred_decision_tree)
precision = metrics.precision_score(y_test, test_pred_decision_tree, average=None)
recall = metrics.recall_score(y_test, test_pred_decision_tree, average =None)
f1 = metrics.f1_score(y_test, test_pred_decision_tree, average=None)

In [38]:
print('accuracy = ', accuracy, ' precision: ', precision, ' recall: ', recall, '  f1: ',f1)

accuracy =  0.9775757575757575  precision:  [0.97916667 0.93984962 0.94539249 0.99407583 0.96091205 0.98911353]  recall:  [0.99295775 0.89928058 0.98752228 0.99880952 0.91331269 0.98223938]   f1:  [0.98601399 0.91911765 0.96599826 0.99643705 0.93650794 0.98566447]


Extra Tree Classifier

In [39]:
df = pd.read_csv('D:/code/MLCode_output/ProcessedNPrunned_BITSDataset_missclass.csv')

In [40]:
# Shuffle the dataset
df_shuffled = df.sample(n=len(df))

In [43]:
df = pd.read_csv('D:/code/MLCode_output/ProcessedNPrunned_BITSDataset_missclass.csv')

In [44]:
df_shuffled = df_shuffled.reset_index(drop=True)

In [45]:
#classification with all the features
masterDS = df_shuffled.values  #masterSelected.values
X = masterDS[:,:-1]
Y = masterDS[:,-1]
print("Xshape: ", X.shape)
print("Yshape: ", Y.shape)

Xshape:  (10000, 62)
Yshape:  (10000,)


In [46]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size = 0.25, random_state = 0)

In [47]:
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.datasets import make_classification
from sklearn.model_selection import KFold, cross_val_score
from numpy import mean

In [48]:
# Building the model
extra_tree_forest = ExtraTreesClassifier(n_estimators = 5, criterion ='entropy', max_features = xCount)

In [49]:
# Training the model
extra_tree_forest.fit(X, Y)

In [50]:
# Computing the importance of each feature
feature_importance = extra_tree_forest.feature_importances_

In [51]:
# Normalizing the individual importances
feature_importance_normalized = np.std([tree.feature_importances_ for tree in
                                        extra_tree_forest.estimators_],
                                        axis = 0)

In [52]:
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.ensemble import ExtraTreesClassifier
from numpy import mean
from numpy import std

In [53]:
# evaluate the model
cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
n_scores = cross_val_score(extra_tree_forest, X, Y, scoring='accuracy', cv=cv, n_jobs=1, error_score='raise')
# report performance
print('Accuracy: %.3f (%.3f)' % (mean(n_scores), std(n_scores)))

Accuracy: 0.991 (0.003)


In [54]:
y_pred = extra_tree_forest.predict(X_test)

In [55]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)

[[101   0   0   0   0   0]
 [  0 107   0   0   0   0]
 [  0   0 431   0   0   0]
 [  0   0   0 643   0   0]
 [  0   0   0   0 235   0]
 [  0   0   0   0   0 983]]


1.0

In [57]:
conf_mat = metrics.confusion_matrix(y_test, y_pred, labels=uniqueLabels)
#[i][j] ==> knwon to be in group i and predicted as group j
print(conf_mat) 
print('Clock as Clock = ',conf_mat[0][0],' percent = ', (conf_mat[0][0]/(conf_mat[0][0]+conf_mat[0][1]+conf_mat[0][2]+conf_mat[0][3]+conf_mat[0][4]+conf_mat[0][5])))
print('Clock as Ezviz = ',conf_mat[0][1],' percent = ', (conf_mat[0][1]/(conf_mat[0][0]+conf_mat[0][1]+conf_mat[0][2]+conf_mat[0][3]+conf_mat[0][4]+conf_mat[0][5])))
print('Clock as D3D = ',conf_mat[0][2],' percent = ', (conf_mat[0][2]/(conf_mat[0][0]+conf_mat[0][1]+conf_mat[0][2]+conf_mat[0][3]+conf_mat[0][4]+conf_mat[0][5])))
print('Clock as Netatmo = ',conf_mat[0][3],' percent = ', (conf_mat[0][3]/(conf_mat[0][0]+conf_mat[0][1]+conf_mat[0][2]+conf_mat[0][3]+conf_mat[0][4]+conf_mat[0][5])))
print('Clock as Canary = ',conf_mat[0][4],' percent = ', (conf_mat[0][4]/(conf_mat[0][0]+conf_mat[0][1]+conf_mat[0][2]+conf_mat[0][3]+conf_mat[0][4]+conf_mat[0][5])))
print('Clock as Bulb = ',conf_mat[0][5],' percent = ', (conf_mat[0][5]/(conf_mat[0][0]+conf_mat[0][1]+conf_mat[0][2]+conf_mat[0][3]+conf_mat[0][4]+conf_mat[0][5])))
print('\n')

print('Ezviz as Clock = ',conf_mat[1][0],' percent = ', (conf_mat[1][0]/(conf_mat[1][0]+conf_mat[1][1]+conf_mat[1][2]+conf_mat[1][3]+conf_mat[1][4]+conf_mat[1][5])))
print('Ezviz as Ezviz = ',conf_mat[1][1],' percent = ', (conf_mat[1][1]/(conf_mat[1][0]+conf_mat[1][1]+conf_mat[1][2]+conf_mat[1][3]+conf_mat[1][4]+conf_mat[1][5])))
print('Ezviz as D3D =',conf_mat[1][2],' percent = ', (conf_mat[1][2]/(conf_mat[1][0]+conf_mat[1][1]+conf_mat[1][2]+conf_mat[1][3]+conf_mat[1][4]+conf_mat[1][5])))
print('Ezviz as Netatmo = ',conf_mat[1][3],' percent = ', (conf_mat[1][3]/(conf_mat[1][0]+conf_mat[1][1]+conf_mat[1][2]+conf_mat[1][3]+conf_mat[1][4]+conf_mat[1][5])))
print('Ezviz as Canary = ',conf_mat[1][4],' percent = ', (conf_mat[1][4]/(conf_mat[1][0]+conf_mat[1][1]+conf_mat[1][2]+conf_mat[1][3]+conf_mat[1][4]+conf_mat[1][5])))
print('Ezviz as Bulb = ',conf_mat[1][5],' percent = ', (conf_mat[1][5]/(conf_mat[1][0]+conf_mat[1][1]+conf_mat[1][2]+conf_mat[1][3]+conf_mat[1][4]+conf_mat[1][5])))
print('\n')

print('D3D as Clock = ',conf_mat[2][0],' percent = ', (conf_mat[2][0]/(conf_mat[2][0]+conf_mat[2][1]+conf_mat[2][2]+conf_mat[2][3]+conf_mat[2][4]+conf_mat[2][5])))
print('D3D as Ezviz = ',conf_mat[2][1],' percent = ', (conf_mat[2][1]/(conf_mat[2][0]+conf_mat[2][1]+conf_mat[2][2]+conf_mat[2][3]+conf_mat[2][4]+conf_mat[2][5])))
print('D3D as D3D = ',conf_mat[2][2],' percent = ', (conf_mat[2][2]/(conf_mat[2][0]+conf_mat[2][1]+conf_mat[2][2]+conf_mat[2][3]+conf_mat[2][4]+conf_mat[2][5])))
print('D3D as Netatmo = ',conf_mat[2][3],' percent = ', (conf_mat[2][3]/(conf_mat[2][0]+conf_mat[2][1]+conf_mat[2][2]+conf_mat[2][3]+conf_mat[2][4]+conf_mat[2][5])))
print('D3D as Canary = ',conf_mat[2][4],' percent = ', (conf_mat[2][4]/(conf_mat[2][0]+conf_mat[2][1]+conf_mat[2][2]+conf_mat[2][3]+conf_mat[2][4]+conf_mat[2][5])))
print('D3D as Bulb = ',conf_mat[2][5],' percent = ', (conf_mat[2][5]/(conf_mat[2][0]+conf_mat[2][1]+conf_mat[2][2]+conf_mat[2][3]+conf_mat[2][4]+conf_mat[2][5])))
print('\n')

print('Netatmo as Clock = ',conf_mat[3][0],' percent = ', (conf_mat[3][0]/(conf_mat[3][0]+conf_mat[3][1]+conf_mat[3][2]+conf_mat[3][3]+conf_mat[3][4]+conf_mat[3][5])))
print('Netatmo as Ezviz = ',conf_mat[3][1],' percent = ', (conf_mat[3][1]/(conf_mat[3][0]+conf_mat[3][1]+conf_mat[3][2]+conf_mat[3][3]+conf_mat[3][4]+conf_mat[3][5])))
print('Netatmo as D3D = ',conf_mat[3][2],' percent = ', (conf_mat[3][2]/(conf_mat[3][0]+conf_mat[3][1]+conf_mat[3][2]+conf_mat[3][3]+conf_mat[3][4]+conf_mat[3][5])))
print('Netatmo as Netatmo = ',conf_mat[3][3],' percent = ', (conf_mat[3][3]/(conf_mat[3][0]+conf_mat[3][1]+conf_mat[3][2]+conf_mat[3][3]+conf_mat[3][4]+conf_mat[3][5])))
print('Netatmo as Canary = ',conf_mat[3][4],' percent = ', (conf_mat[3][4]/(conf_mat[3][0]+conf_mat[3][1]+conf_mat[3][2]+conf_mat[3][3]+conf_mat[3][4]+conf_mat[3][5])))
print('Netatmo as Bulb = ',conf_mat[3][5],' percent = ', (conf_mat[3][5]/(conf_mat[3][0]+conf_mat[3][1]+conf_mat[3][2]+conf_mat[3][3]+conf_mat[3][4]+conf_mat[3][5])))
print('\n')

print('Canary as Clock = ',conf_mat[4][0],' percent = ', (conf_mat[4][0]/(conf_mat[4][0]+conf_mat[4][1]+conf_mat[4][2]+conf_mat[4][3]+conf_mat[4][4]+conf_mat[4][5])))
print('Canary as Ezviz = ',conf_mat[4][1],' percent = ', (conf_mat[4][1]/(conf_mat[4][0]+conf_mat[4][1]+conf_mat[4][2]+conf_mat[4][3]+conf_mat[4][4]+conf_mat[4][5])))
print('Canary as D3D = ',conf_mat[4][2],' percent = ', (conf_mat[4][2]/(conf_mat[4][0]+conf_mat[4][1]+conf_mat[4][2]+conf_mat[4][3]+conf_mat[4][4]+conf_mat[4][5])))
print('Canary as Netatmo = ',conf_mat[4][3],' percent = ', (conf_mat[4][3]/(conf_mat[4][0]+conf_mat[4][1]+conf_mat[4][2]+conf_mat[4][3]+conf_mat[4][4]+conf_mat[4][5])))
print('Canary as Canary = ',conf_mat[4][4],' percent = ', (conf_mat[4][4]/(conf_mat[4][0]+conf_mat[4][1]+conf_mat[4][2]+conf_mat[4][3]+conf_mat[4][4]+conf_mat[4][5])))
print('Canary as Bulb = ',conf_mat[4][5],' percent = ', (conf_mat[4][5]/(conf_mat[4][0]+conf_mat[4][1]+conf_mat[4][2]+conf_mat[4][3]+conf_mat[4][4]+conf_mat[4][5])))
print('\n')

print('Bulb as Clock = ',conf_mat[5][0],' percent = ', (conf_mat[5][0]/(conf_mat[5][0]+conf_mat[5][1]+conf_mat[5][2]+conf_mat[5][3]+conf_mat[5][4]+conf_mat[5][5])))
print('Bulb as Ezviz = ',conf_mat[5][1],' percent = ', (conf_mat[5][1]/(conf_mat[5][0]+conf_mat[5][1]+conf_mat[5][2]+conf_mat[5][3]+conf_mat[5][4]+conf_mat[5][5])))
print('Bulb as D3D = ',conf_mat[5][2],' percent = ', (conf_mat[5][2]/(conf_mat[5][0]+conf_mat[5][1]+conf_mat[5][2]+conf_mat[5][3]+conf_mat[5][4]+conf_mat[5][5])))
print('Bulb as Netatmo = ',conf_mat[5][3],' percent = ', (conf_mat[5][3]/(conf_mat[5][0]+conf_mat[5][1]+conf_mat[5][2]+conf_mat[5][3]+conf_mat[5][4]+conf_mat[5][5])))
print('Bulb as Canary = ',conf_mat[5][4],' percent = ', (conf_mat[5][4]/(conf_mat[5][0]+conf_mat[5][1]+conf_mat[5][2]+conf_mat[5][3]+conf_mat[5][4]+conf_mat[5][5])))
print('Bulb as Bulb = ',conf_mat[5][5],' percent = ', (conf_mat[5][5]/(conf_mat[5][0]+conf_mat[5][1]+conf_mat[5][2]+conf_mat[5][3]+conf_mat[5][4]+conf_mat[5][5])))

[[101   0   0   0   0   0]
 [  0 107   0   0   0   0]
 [  0   0 431   0   0   0]
 [  0   0   0 643   0   0]
 [  0   0   0   0 235   0]
 [  0   0   0   0   0 983]]


Clock as Clock =  101  percent =  1.0
Clock as Ezviz =  0  percent =  0.0
Clock as D3D =  0  percent =  0.0
Clock as Netatmo =  0  percent =  0.0
Clock as Canary =  0  percent =  0.0
Clock as Bulb =  0  percent =  0.0


Ezviz as Clock =  0  percent =  0.0
Ezviz as Ezviz =  107  percent =  1.0
Ezviz as D3D = 0  percent =  0.0
Ezviz as Netatmo =  0  percent =  0.0
Ezviz as Canary =  0  percent =  0.0
Ezviz as Bulb =  0  percent =  0.0


D3D as Clock =  0  percent =  0.0
D3D as Ezviz =  0  percent =  0.0
D3D as D3D =  431  percent =  1.0
D3D as Netatmo =  0  percent =  0.0
D3D as Canary =  0  percent =  0.0
D3D as Bulb =  0  percent =  0.0


Netatmo as Clock =  0  percent =  0.0
Netatmo as Ezviz =  0  percent =  0.0
Netatmo as D3D =  0  percent =  0.0
Netatmo as Netatmo =  643  percent =  1.0
Netatmo as Canary =  0  percent = 

In [58]:
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred))

                 precision    recall  f1-score   support

        Dropcam       1.00      1.00      1.00      3793
          IPCam       1.00      1.00      1.00      2406
     NetatmoCam       1.00      1.00      1.00      9545
SamsungSmartCam       1.00      1.00      1.00     38501
      TPLinkCam       1.00      1.00      1.00      4226
    WithingsCam       1.00      1.00      1.00      6757

       accuracy                           1.00     65228
      macro avg       1.00      1.00      1.00     65228
   weighted avg       1.00      1.00      1.00     65228



Guassian Naive Baise Classifier

In [58]:
df = pd.read_csv('D:/code/MLCode_output/ProcessedNPrunned_BITSDataset_missclass.csv')

In [59]:
# Shuffle the dataset
df_shuffled = df.sample(n=len(df))

In [60]:
# Save the shuffled dataset to a new CSV file
df_shuffled.to_csv('D:/code/MLCode_output/ProcessedNPrunned_BITSDataset_missclass_gnb.csv', index=False)

In [61]:
df_shuffled = df_shuffled.reset_index(drop=True)

In [62]:
#classification with all the features
masterDS = df_shuffled.values  #masterSelected.values
X = masterDS[:,:-1]
Y = masterDS[:,-1]
print("Xshape: ", X.shape)
print("Yshape: ", Y.shape)

Xshape:  (10000, 62)
Yshape:  (10000,)


In [63]:
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=.25, random_state=50)

In [64]:
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
import numpy as np

In [65]:
clf = GaussianNB()
clf.fit(x_train, y_train.flatten())

In [66]:
predictions = clf.predict(x_test)
print('Accuracy Score: ', accuracy_score(predictions, y_test))

Accuracy Score:  0.3824


In [67]:
from sklearn.metrics import confusion_matrix 
cm = confusion_matrix(y_test, predictions) 
print(cm)

[[ 22   1   5   1  59   6]
 [  0  48  42   0  20   0]
 [ 37   0 258   2 144   1]
 [ 12   6  92 207 315   2]
 [  0   0   2   0 212   0]
 [  2 385 129  12 269 209]]


In [68]:
conf_mat = metrics.confusion_matrix(y_test, predictions, labels=uniqueLabels)
#[i][j] ==> knwon to be in group i and predicted as group j
print(conf_mat)
print('\n')
print('Clock as Clock = ',conf_mat[0][0],' percent = ', (conf_mat[0][0]/(conf_mat[0][0]+conf_mat[0][1]+conf_mat[0][2]+conf_mat[0][3]+conf_mat[0][4]+conf_mat[0][5])))
print('Clock as Ezviz = ',conf_mat[0][1],' percent = ', (conf_mat[0][1]/(conf_mat[0][0]+conf_mat[0][1]+conf_mat[0][2]+conf_mat[0][3]+conf_mat[0][4]+conf_mat[0][5])))
print('Clock as D3D = ',conf_mat[0][2],' percent = ', (conf_mat[0][2]/(conf_mat[0][0]+conf_mat[0][1]+conf_mat[0][2]+conf_mat[0][3]+conf_mat[0][4]+conf_mat[0][5])))
print('Clock as Netatmo = ',conf_mat[0][3],' percent = ', (conf_mat[0][3]/(conf_mat[0][0]+conf_mat[0][1]+conf_mat[0][2]+conf_mat[0][3]+conf_mat[0][4]+conf_mat[0][5])))
print('Clock as Canary = ',conf_mat[0][4],' percent = ', (conf_mat[0][4]/(conf_mat[0][0]+conf_mat[0][1]+conf_mat[0][2]+conf_mat[0][3]+conf_mat[0][4]+conf_mat[0][5])))
print('Clock as Bulb = ',conf_mat[0][5],' percent = ', (conf_mat[0][5]/(conf_mat[0][0]+conf_mat[0][1]+conf_mat[0][2]+conf_mat[0][3]+conf_mat[0][4]+conf_mat[0][5])))
print('\n')

print('Ezviz as Clock = ',conf_mat[1][0],' percent = ', (conf_mat[1][0]/(conf_mat[1][0]+conf_mat[1][1]+conf_mat[1][2]+conf_mat[1][3]+conf_mat[1][4]+conf_mat[1][5])))
print('Ezviz as Ezviz = ',conf_mat[1][1],' percent = ', (conf_mat[1][1]/(conf_mat[1][0]+conf_mat[1][1]+conf_mat[1][2]+conf_mat[1][3]+conf_mat[1][4]+conf_mat[1][5])))
print('Ezviz as D3D =',conf_mat[1][2],' percent = ', (conf_mat[1][2]/(conf_mat[1][0]+conf_mat[1][1]+conf_mat[1][2]+conf_mat[1][3]+conf_mat[1][4]+conf_mat[1][5])))
print('Ezviz as Netatmo = ',conf_mat[1][3],' percent = ', (conf_mat[1][3]/(conf_mat[1][0]+conf_mat[1][1]+conf_mat[1][2]+conf_mat[1][3]+conf_mat[1][4]+conf_mat[1][5])))
print('Ezviz as Canary = ',conf_mat[1][4],' percent = ', (conf_mat[1][4]/(conf_mat[1][0]+conf_mat[1][1]+conf_mat[1][2]+conf_mat[1][3]+conf_mat[1][4]+conf_mat[1][5])))
print('Ezviz as Bulb = ',conf_mat[1][5],' percent = ', (conf_mat[1][5]/(conf_mat[1][0]+conf_mat[1][1]+conf_mat[1][2]+conf_mat[1][3]+conf_mat[1][4]+conf_mat[1][5])))
print('\n')

print('D3D as Clock = ',conf_mat[2][0],' percent = ', (conf_mat[2][0]/(conf_mat[2][0]+conf_mat[2][1]+conf_mat[2][2]+conf_mat[2][3]+conf_mat[2][4]+conf_mat[2][5])))
print('D3D as Ezviz = ',conf_mat[2][1],' percent = ', (conf_mat[2][1]/(conf_mat[2][0]+conf_mat[2][1]+conf_mat[2][2]+conf_mat[2][3]+conf_mat[2][4]+conf_mat[2][5])))
print('D3D as D3D = ',conf_mat[2][2],' percent = ', (conf_mat[2][2]/(conf_mat[2][0]+conf_mat[2][1]+conf_mat[2][2]+conf_mat[2][3]+conf_mat[2][4]+conf_mat[2][5])))
print('D3D as Netatmo = ',conf_mat[2][3],' percent = ', (conf_mat[2][3]/(conf_mat[2][0]+conf_mat[2][1]+conf_mat[2][2]+conf_mat[2][3]+conf_mat[2][4]+conf_mat[2][5])))
print('D3D as Canary = ',conf_mat[2][4],' percent = ', (conf_mat[2][4]/(conf_mat[2][0]+conf_mat[2][1]+conf_mat[2][2]+conf_mat[2][3]+conf_mat[2][4]+conf_mat[2][5])))
print('D3D as Bulb = ',conf_mat[2][5],' percent = ', (conf_mat[2][5]/(conf_mat[2][0]+conf_mat[2][1]+conf_mat[2][2]+conf_mat[2][3]+conf_mat[2][4]+conf_mat[2][5])))
print('\n')

print('Netatmo as Clock = ',conf_mat[3][0],' percent = ', (conf_mat[3][0]/(conf_mat[3][0]+conf_mat[3][1]+conf_mat[3][2]+conf_mat[3][3]+conf_mat[3][4]+conf_mat[3][5])))
print('Netatmo as Ezviz = ',conf_mat[3][1],' percent = ', (conf_mat[3][1]/(conf_mat[3][0]+conf_mat[3][1]+conf_mat[3][2]+conf_mat[3][3]+conf_mat[3][4]+conf_mat[3][5])))
print('Netatmo as D3D = ',conf_mat[3][2],' percent = ', (conf_mat[3][2]/(conf_mat[3][0]+conf_mat[3][1]+conf_mat[3][2]+conf_mat[3][3]+conf_mat[3][4]+conf_mat[3][5])))
print('Netatmo as Netatmo = ',conf_mat[3][3],' percent = ', (conf_mat[3][3]/(conf_mat[3][0]+conf_mat[3][1]+conf_mat[3][2]+conf_mat[3][3]+conf_mat[3][4]+conf_mat[3][5])))
print('Netatmo as Canary = ',conf_mat[3][4],' percent = ', (conf_mat[3][4]/(conf_mat[3][0]+conf_mat[3][1]+conf_mat[3][2]+conf_mat[3][3]+conf_mat[3][4]+conf_mat[3][5])))
print('Netatmo as Bulb = ',conf_mat[3][5],' percent = ', (conf_mat[3][5]/(conf_mat[3][0]+conf_mat[3][1]+conf_mat[3][2]+conf_mat[3][3]+conf_mat[3][4]+conf_mat[3][5])))
print('\n')

print('Canary as Clock = ',conf_mat[4][0],' percent = ', (conf_mat[4][0]/(conf_mat[4][0]+conf_mat[4][1]+conf_mat[4][2]+conf_mat[4][3]+conf_mat[4][4]+conf_mat[4][5])))
print('Canary as Ezviz = ',conf_mat[4][1],' percent = ', (conf_mat[4][1]/(conf_mat[4][0]+conf_mat[4][1]+conf_mat[4][2]+conf_mat[4][3]+conf_mat[4][4]+conf_mat[4][5])))
print('Canary as D3D = ',conf_mat[4][2],' percent = ', (conf_mat[4][2]/(conf_mat[4][0]+conf_mat[4][1]+conf_mat[4][2]+conf_mat[4][3]+conf_mat[4][4]+conf_mat[4][5])))
print('Canary as Netatmo = ',conf_mat[4][3],' percent = ', (conf_mat[4][3]/(conf_mat[4][0]+conf_mat[4][1]+conf_mat[4][2]+conf_mat[4][3]+conf_mat[4][4]+conf_mat[4][5])))
print('Canary as Canary = ',conf_mat[4][4],' percent = ', (conf_mat[4][4]/(conf_mat[4][0]+conf_mat[4][1]+conf_mat[4][2]+conf_mat[4][3]+conf_mat[4][4]+conf_mat[4][5])))
print('Canary as Bulb = ',conf_mat[4][5],' percent = ', (conf_mat[4][5]/(conf_mat[4][0]+conf_mat[4][1]+conf_mat[4][2]+conf_mat[4][3]+conf_mat[4][4]+conf_mat[4][5])))
print('\n')

print('Bulb as Clock = ',conf_mat[5][0],' percent = ', (conf_mat[5][0]/(conf_mat[5][0]+conf_mat[5][1]+conf_mat[5][2]+conf_mat[5][3]+conf_mat[5][4]+conf_mat[5][5])))
print('Bulb as Ezviz = ',conf_mat[5][1],' percent = ', (conf_mat[5][1]/(conf_mat[5][0]+conf_mat[5][1]+conf_mat[5][2]+conf_mat[5][3]+conf_mat[5][4]+conf_mat[5][5])))
print('Bulb as D3D = ',conf_mat[5][2],' percent = ', (conf_mat[5][2]/(conf_mat[5][0]+conf_mat[5][1]+conf_mat[5][2]+conf_mat[5][3]+conf_mat[5][4]+conf_mat[5][5])))
print('Bulb as Netatmo = ',conf_mat[5][3],' percent = ', (conf_mat[5][3]/(conf_mat[5][0]+conf_mat[5][1]+conf_mat[5][2]+conf_mat[5][3]+conf_mat[5][4]+conf_mat[5][5])))
print('Bulb as Canary = ',conf_mat[5][4],' percent = ', (conf_mat[5][4]/(conf_mat[5][0]+conf_mat[5][1]+conf_mat[5][2]+conf_mat[5][3]+conf_mat[5][4]+conf_mat[5][5])))
print('Bulb as Bulb = ',conf_mat[5][5],' percent = ', (conf_mat[5][5]/(conf_mat[5][0]+conf_mat[5][1]+conf_mat[5][2]+conf_mat[5][3]+conf_mat[5][4]+conf_mat[5][5])))

[[ 22   1   5   1  59   6]
 [  0  48  42   0  20   0]
 [ 37   0 258   2 144   1]
 [ 12   6  92 207 315   2]
 [  0   0   2   0 212   0]
 [  2 385 129  12 269 209]]


Clock as Clock =  22  percent =  0.23404255319148937
Clock as Ezviz =  1  percent =  0.010638297872340425
Clock as D3D =  5  percent =  0.05319148936170213
Clock as Netatmo =  1  percent =  0.010638297872340425
Clock as Canary =  59  percent =  0.6276595744680851
Clock as Bulb =  6  percent =  0.06382978723404255


Ezviz as Clock =  0  percent =  0.0
Ezviz as Ezviz =  48  percent =  0.43636363636363634
Ezviz as D3D = 42  percent =  0.38181818181818183
Ezviz as Netatmo =  0  percent =  0.0
Ezviz as Canary =  20  percent =  0.18181818181818182
Ezviz as Bulb =  0  percent =  0.0


D3D as Clock =  37  percent =  0.083710407239819
D3D as Ezviz =  0  percent =  0.0
D3D as D3D =  258  percent =  0.583710407239819
D3D as Netatmo =  2  percent =  0.004524886877828055
D3D as Canary =  144  percent =  0.3257918552036199
D3D as Bulb = 

In [70]:
from sklearn.metrics import classification_report
print(classification_report(y_test, predictions))

                  precision    recall  f1-score   support

IPCam_AlarmClock       0.30      0.23      0.26        94
    IPCam_Canary       0.11      0.44      0.17       110
       IPCam_D3D       0.49      0.58      0.53       442
     IPCam_Ezviz       0.93      0.33      0.48       634
   IPCam_Netatmo       0.21      0.99      0.34       214
      IPCam_V380       0.96      0.21      0.34      1006

        accuracy                           0.38      2500
       macro avg       0.50      0.46      0.36      2500
    weighted avg       0.74      0.38      0.40      2500



KNN Classifier

In [72]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size = 0.25, random_state = 0)

In [73]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test) #avoid data leakage

In [74]:
from math import sqrt
class KNN():
  def __init__(self,k):
    self.k=k
    print(self.k)
  def fit(self,X_train,y_train):
    self.x_train=X_train
    self.y_train=y_train
  def calculate_euclidean(self,sample1,sample2):
    distance=0.0
    for i in range(len(sample1)):
      distance+=(sample1[i]-sample2[i])**2 #Euclidean Distance = sqrt(sum i to N (x1_i – x2_i)^2)
    return sqrt(distance)
  def nearest_neighbors(self,test_sample):
    distances=[]#calculate distances from a test sample to every sample in a training set
    for i in range(len(self.x_train)):
      distances.append((self.y_train[i],self.calculate_euclidean(self.x_train[i],test_sample)))
    distances.sort(key=lambda x:x[1])#sort in ascending order, based on a distance value
    neighbors=[]
    for i in range(self.k): #get first k samples
      neighbors.append(distances[i][0])
    return neighbors
  def predict(self,test_set):
    predictions=[]
    for test_sample in test_set:
      neighbors=self.nearest_neighbors(test_sample)
      labels=[sample for sample in neighbors]
      prediction=max(labels,key=labels.count)
      predictions.append(prediction)
    return predictions

In [75]:
model=KNN(5) #our model
model.fit(X_train,y_train)

5


In [76]:
from sklearn.neighbors import KNeighborsClassifier
classifier = KNeighborsClassifier(n_neighbors = 5, metric = 'minkowski', p = 2)#The default metric is minkowski, and with p=2 is equivalent to the standard Euclidean metric.
classifier.fit(X_train, y_train)

In [77]:
y_pred = classifier.predict(X_test)

In [78]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)

[[ 91   0   9   3   0   2]
 [  2  97   1   1   2   9]
 [  4   0 400   0   1   6]
 [  1   2   2 644   8   4]
 [  0   1   0   3 218  11]
 [  3   0   3  11  21 940]]


0.956

In [79]:
conf_mat = metrics.confusion_matrix(y_test, y_pred, labels=uniqueLabels)
#[i][j] ==> knwon to be in group i and predicted as group j
print(conf_mat)
print('\n')
print('Clock as Clock = ',conf_mat[0][0],' percent = ', (conf_mat[0][0]/(conf_mat[0][0]+conf_mat[0][1]+conf_mat[0][2]+conf_mat[0][3]+conf_mat[0][4]+conf_mat[0][5])))
print('Clock as Ezviz = ',conf_mat[0][1],' percent = ', (conf_mat[0][1]/(conf_mat[0][0]+conf_mat[0][1]+conf_mat[0][2]+conf_mat[0][3]+conf_mat[0][4]+conf_mat[0][5])))
print('Clock as D3D = ',conf_mat[0][2],' percent = ', (conf_mat[0][2]/(conf_mat[0][0]+conf_mat[0][1]+conf_mat[0][2]+conf_mat[0][3]+conf_mat[0][4]+conf_mat[0][5])))
print('Clock as Netatmo = ',conf_mat[0][3],' percent = ', (conf_mat[0][3]/(conf_mat[0][0]+conf_mat[0][1]+conf_mat[0][2]+conf_mat[0][3]+conf_mat[0][4]+conf_mat[0][5])))
print('Clock as Canary = ',conf_mat[0][4],' percent = ', (conf_mat[0][4]/(conf_mat[0][0]+conf_mat[0][1]+conf_mat[0][2]+conf_mat[0][3]+conf_mat[0][4]+conf_mat[0][5])))
print('Clock as Bulb = ',conf_mat[0][5],' percent = ', (conf_mat[0][5]/(conf_mat[0][0]+conf_mat[0][1]+conf_mat[0][2]+conf_mat[0][3]+conf_mat[0][4]+conf_mat[0][5])))
print('\n')

print('Ezviz as Clock = ',conf_mat[1][0],' percent = ', (conf_mat[1][0]/(conf_mat[1][0]+conf_mat[1][1]+conf_mat[1][2]+conf_mat[1][3]+conf_mat[1][4]+conf_mat[1][5])))
print('Ezviz as Ezviz = ',conf_mat[1][1],' percent = ', (conf_mat[1][1]/(conf_mat[1][0]+conf_mat[1][1]+conf_mat[1][2]+conf_mat[1][3]+conf_mat[1][4]+conf_mat[1][5])))
print('Ezviz as D3D =',conf_mat[1][2],' percent = ', (conf_mat[1][2]/(conf_mat[1][0]+conf_mat[1][1]+conf_mat[1][2]+conf_mat[1][3]+conf_mat[1][4]+conf_mat[1][5])))
print('Ezviz as Netatmo = ',conf_mat[1][3],' percent = ', (conf_mat[1][3]/(conf_mat[1][0]+conf_mat[1][1]+conf_mat[1][2]+conf_mat[1][3]+conf_mat[1][4]+conf_mat[1][5])))
print('Ezviz as Canary = ',conf_mat[1][4],' percent = ', (conf_mat[1][4]/(conf_mat[1][0]+conf_mat[1][1]+conf_mat[1][2]+conf_mat[1][3]+conf_mat[1][4]+conf_mat[1][5])))
print('Ezviz as Bulb = ',conf_mat[1][5],' percent = ', (conf_mat[1][5]/(conf_mat[1][0]+conf_mat[1][1]+conf_mat[1][2]+conf_mat[1][3]+conf_mat[1][4]+conf_mat[1][5])))
print('\n')

print('D3D as Clock = ',conf_mat[2][0],' percent = ', (conf_mat[2][0]/(conf_mat[2][0]+conf_mat[2][1]+conf_mat[2][2]+conf_mat[2][3]+conf_mat[2][4]+conf_mat[2][5])))
print('D3D as Ezviz = ',conf_mat[2][1],' percent = ', (conf_mat[2][1]/(conf_mat[2][0]+conf_mat[2][1]+conf_mat[2][2]+conf_mat[2][3]+conf_mat[2][4]+conf_mat[2][5])))
print('D3D as D3D = ',conf_mat[2][2],' percent = ', (conf_mat[2][2]/(conf_mat[2][0]+conf_mat[2][1]+conf_mat[2][2]+conf_mat[2][3]+conf_mat[2][4]+conf_mat[2][5])))
print('D3D as Netatmo = ',conf_mat[2][3],' percent = ', (conf_mat[2][3]/(conf_mat[2][0]+conf_mat[2][1]+conf_mat[2][2]+conf_mat[2][3]+conf_mat[2][4]+conf_mat[2][5])))
print('D3D as Canary = ',conf_mat[2][4],' percent = ', (conf_mat[2][4]/(conf_mat[2][0]+conf_mat[2][1]+conf_mat[2][2]+conf_mat[2][3]+conf_mat[2][4]+conf_mat[2][5])))
print('D3D as Bulb = ',conf_mat[2][5],' percent = ', (conf_mat[2][5]/(conf_mat[2][0]+conf_mat[2][1]+conf_mat[2][2]+conf_mat[2][3]+conf_mat[2][4]+conf_mat[2][5])))
print('\n')

print('Netatmo as Clock = ',conf_mat[3][0],' percent = ', (conf_mat[3][0]/(conf_mat[3][0]+conf_mat[3][1]+conf_mat[3][2]+conf_mat[3][3]+conf_mat[3][4]+conf_mat[3][5])))
print('Netatmo as Ezviz = ',conf_mat[3][1],' percent = ', (conf_mat[3][1]/(conf_mat[3][0]+conf_mat[3][1]+conf_mat[3][2]+conf_mat[3][3]+conf_mat[3][4]+conf_mat[3][5])))
print('Netatmo as D3D = ',conf_mat[3][2],' percent = ', (conf_mat[3][2]/(conf_mat[3][0]+conf_mat[3][1]+conf_mat[3][2]+conf_mat[3][3]+conf_mat[3][4]+conf_mat[3][5])))
print('Netatmo as Netatmo = ',conf_mat[3][3],' percent = ', (conf_mat[3][3]/(conf_mat[3][0]+conf_mat[3][1]+conf_mat[3][2]+conf_mat[3][3]+conf_mat[3][4]+conf_mat[3][5])))
print('Netatmo as Canary = ',conf_mat[3][4],' percent = ', (conf_mat[3][4]/(conf_mat[3][0]+conf_mat[3][1]+conf_mat[3][2]+conf_mat[3][3]+conf_mat[3][4]+conf_mat[3][5])))
print('Netatmo as Bulb = ',conf_mat[3][5],' percent = ', (conf_mat[3][5]/(conf_mat[3][0]+conf_mat[3][1]+conf_mat[3][2]+conf_mat[3][3]+conf_mat[3][4]+conf_mat[3][5])))
print('\n')

print('Canary as Clock = ',conf_mat[4][0],' percent = ', (conf_mat[4][0]/(conf_mat[4][0]+conf_mat[4][1]+conf_mat[4][2]+conf_mat[4][3]+conf_mat[4][4]+conf_mat[4][5])))
print('Canary as Ezviz = ',conf_mat[4][1],' percent = ', (conf_mat[4][1]/(conf_mat[4][0]+conf_mat[4][1]+conf_mat[4][2]+conf_mat[4][3]+conf_mat[4][4]+conf_mat[4][5])))
print('Canary as D3D = ',conf_mat[4][2],' percent = ', (conf_mat[4][2]/(conf_mat[4][0]+conf_mat[4][1]+conf_mat[4][2]+conf_mat[4][3]+conf_mat[4][4]+conf_mat[4][5])))
print('Canary as Netatmo = ',conf_mat[4][3],' percent = ', (conf_mat[4][3]/(conf_mat[4][0]+conf_mat[4][1]+conf_mat[4][2]+conf_mat[4][3]+conf_mat[4][4]+conf_mat[4][5])))
print('Canary as Canary = ',conf_mat[4][4],' percent = ', (conf_mat[4][4]/(conf_mat[4][0]+conf_mat[4][1]+conf_mat[4][2]+conf_mat[4][3]+conf_mat[4][4]+conf_mat[4][5])))
print('Canary as Bulb = ',conf_mat[4][5],' percent = ', (conf_mat[4][5]/(conf_mat[4][0]+conf_mat[4][1]+conf_mat[4][2]+conf_mat[4][3]+conf_mat[4][4]+conf_mat[4][5])))
print('\n')

print('Bulb as Clock = ',conf_mat[5][0],' percent = ', (conf_mat[5][0]/(conf_mat[5][0]+conf_mat[5][1]+conf_mat[5][2]+conf_mat[5][3]+conf_mat[5][4]+conf_mat[5][5])))
print('Bulb as Ezviz = ',conf_mat[5][1],' percent = ', (conf_mat[5][1]/(conf_mat[5][0]+conf_mat[5][1]+conf_mat[5][2]+conf_mat[5][3]+conf_mat[5][4]+conf_mat[5][5])))
print('Bulb as D3D = ',conf_mat[5][2],' percent = ', (conf_mat[5][2]/(conf_mat[5][0]+conf_mat[5][1]+conf_mat[5][2]+conf_mat[5][3]+conf_mat[5][4]+conf_mat[5][5])))
print('Bulb as Netatmo = ',conf_mat[5][3],' percent = ', (conf_mat[5][3]/(conf_mat[5][0]+conf_mat[5][1]+conf_mat[5][2]+conf_mat[5][3]+conf_mat[5][4]+conf_mat[5][5])))
print('Bulb as Canary = ',conf_mat[5][4],' percent = ', (conf_mat[5][4]/(conf_mat[5][0]+conf_mat[5][1]+conf_mat[5][2]+conf_mat[5][3]+conf_mat[5][4]+conf_mat[5][5])))
print('Bulb as Bulb = ',conf_mat[5][5],' percent = ', (conf_mat[5][5]/(conf_mat[5][0]+conf_mat[5][1]+conf_mat[5][2]+conf_mat[5][3]+conf_mat[5][4]+conf_mat[5][5])))

[[ 91   0   9   3   0   2]
 [  2  97   1   1   2   9]
 [  4   0 400   0   1   6]
 [  1   2   2 644   8   4]
 [  0   1   0   3 218  11]
 [  3   0   3  11  21 940]]


Clock as Clock =  91  percent =  0.8666666666666667
Clock as Ezviz =  0  percent =  0.0
Clock as D3D =  9  percent =  0.08571428571428572
Clock as Netatmo =  3  percent =  0.02857142857142857
Clock as Canary =  0  percent =  0.0
Clock as Bulb =  2  percent =  0.01904761904761905


Ezviz as Clock =  2  percent =  0.017857142857142856
Ezviz as Ezviz =  97  percent =  0.8660714285714286
Ezviz as D3D = 1  percent =  0.008928571428571428
Ezviz as Netatmo =  1  percent =  0.008928571428571428
Ezviz as Canary =  2  percent =  0.017857142857142856
Ezviz as Bulb =  9  percent =  0.08035714285714286


D3D as Clock =  4  percent =  0.009732360097323601
D3D as Ezviz =  0  percent =  0.0
D3D as D3D =  400  percent =  0.9732360097323601
D3D as Netatmo =  0  percent =  0.0
D3D as Canary =  1  percent =  0.0024330900243309003
D3D as Bulb =

In [81]:
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred))

                  precision    recall  f1-score   support

IPCam_AlarmClock       0.90      0.87      0.88       105
    IPCam_Canary       0.97      0.87      0.92       112
       IPCam_D3D       0.96      0.97      0.97       411
     IPCam_Ezviz       0.97      0.97      0.97       661
   IPCam_Netatmo       0.87      0.94      0.90       233
      IPCam_V380       0.97      0.96      0.96       978

        accuracy                           0.96      2500
       macro avg       0.94      0.93      0.93      2500
    weighted avg       0.96      0.96      0.96      2500



LKSVM Classifier

In [82]:
from sklearn.svm import SVC
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd

# Load the dataset
icam = pd.read_csv('D:/code/MLCode_output/ProcessedNPrunned_BITSDataset_missclass_gnb.csv') #UNSW dataset

# Splitting the data into features and labels
X = icam.iloc[:, :-1]
y = icam.iloc[:, -1]

# Scaling the features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Splitting the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=109)

# Create a svm Classifier
clf = SVC(kernel='linear') 

# Train the model using the training sets
clf.fit(X_train, y_train)

# Predict the response for test dataset
y_pred = clf.predict(X_test)

# Model Accuracy: how often is the classifier correct?Accuracy: 0.9476
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))

Accuracy: 0.8948


In [83]:
conf_mat = metrics.confusion_matrix(y_test, y_pred, labels=uniqueLabels)
#[i][j] ==> knwon to be in group i and predicted as group j
print(conf_mat)
print('\n')
print('Clock as Clock = ',conf_mat[0][0],' percent = ', (conf_mat[0][0]/(conf_mat[0][0]+conf_mat[0][1]+conf_mat[0][2]+conf_mat[0][3]+conf_mat[0][4]+conf_mat[0][5])))
print('Clock as Ezviz = ',conf_mat[0][1],' percent = ', (conf_mat[0][1]/(conf_mat[0][0]+conf_mat[0][1]+conf_mat[0][2]+conf_mat[0][3]+conf_mat[0][4]+conf_mat[0][5])))
print('Clock as D3D = ',conf_mat[0][2],' percent = ', (conf_mat[0][2]/(conf_mat[0][0]+conf_mat[0][1]+conf_mat[0][2]+conf_mat[0][3]+conf_mat[0][4]+conf_mat[0][5])))
print('Clock as Netatmo = ',conf_mat[0][3],' percent = ', (conf_mat[0][3]/(conf_mat[0][0]+conf_mat[0][1]+conf_mat[0][2]+conf_mat[0][3]+conf_mat[0][4]+conf_mat[0][5])))
print('Clock as Canary = ',conf_mat[0][4],' percent = ', (conf_mat[0][4]/(conf_mat[0][0]+conf_mat[0][1]+conf_mat[0][2]+conf_mat[0][3]+conf_mat[0][4]+conf_mat[0][5])))
print('Clock as Bulb = ',conf_mat[0][5],' percent = ', (conf_mat[0][5]/(conf_mat[0][0]+conf_mat[0][1]+conf_mat[0][2]+conf_mat[0][3]+conf_mat[0][4]+conf_mat[0][5])))
print('\n')

print('Ezviz as Clock = ',conf_mat[1][0],' percent = ', (conf_mat[1][0]/(conf_mat[1][0]+conf_mat[1][1]+conf_mat[1][2]+conf_mat[1][3]+conf_mat[1][4]+conf_mat[1][5])))
print('Ezviz as Ezviz = ',conf_mat[1][1],' percent = ', (conf_mat[1][1]/(conf_mat[1][0]+conf_mat[1][1]+conf_mat[1][2]+conf_mat[1][3]+conf_mat[1][4]+conf_mat[1][5])))
print('Ezviz as D3D =',conf_mat[1][2],' percent = ', (conf_mat[1][2]/(conf_mat[1][0]+conf_mat[1][1]+conf_mat[1][2]+conf_mat[1][3]+conf_mat[1][4]+conf_mat[1][5])))
print('Ezviz as Netatmo = ',conf_mat[1][3],' percent = ', (conf_mat[1][3]/(conf_mat[1][0]+conf_mat[1][1]+conf_mat[1][2]+conf_mat[1][3]+conf_mat[1][4]+conf_mat[1][5])))
print('Ezviz as Canary = ',conf_mat[1][4],' percent = ', (conf_mat[1][4]/(conf_mat[1][0]+conf_mat[1][1]+conf_mat[1][2]+conf_mat[1][3]+conf_mat[1][4]+conf_mat[1][5])))
print('Ezviz as Bulb = ',conf_mat[1][5],' percent = ', (conf_mat[1][5]/(conf_mat[1][0]+conf_mat[1][1]+conf_mat[1][2]+conf_mat[1][3]+conf_mat[1][4]+conf_mat[1][5])))
print('\n')

print('D3D as Clock = ',conf_mat[2][0],' percent = ', (conf_mat[2][0]/(conf_mat[2][0]+conf_mat[2][1]+conf_mat[2][2]+conf_mat[2][3]+conf_mat[2][4]+conf_mat[2][5])))
print('D3D as Ezviz = ',conf_mat[2][1],' percent = ', (conf_mat[2][1]/(conf_mat[2][0]+conf_mat[2][1]+conf_mat[2][2]+conf_mat[2][3]+conf_mat[2][4]+conf_mat[2][5])))
print('D3D as D3D = ',conf_mat[2][2],' percent = ', (conf_mat[2][2]/(conf_mat[2][0]+conf_mat[2][1]+conf_mat[2][2]+conf_mat[2][3]+conf_mat[2][4]+conf_mat[2][5])))
print('D3D as Netatmo = ',conf_mat[2][3],' percent = ', (conf_mat[2][3]/(conf_mat[2][0]+conf_mat[2][1]+conf_mat[2][2]+conf_mat[2][3]+conf_mat[2][4]+conf_mat[2][5])))
print('D3D as Canary = ',conf_mat[2][4],' percent = ', (conf_mat[2][4]/(conf_mat[2][0]+conf_mat[2][1]+conf_mat[2][2]+conf_mat[2][3]+conf_mat[2][4]+conf_mat[2][5])))
print('D3D as Bulb = ',conf_mat[2][5],' percent = ', (conf_mat[2][5]/(conf_mat[2][0]+conf_mat[2][1]+conf_mat[2][2]+conf_mat[2][3]+conf_mat[2][4]+conf_mat[2][5])))
print('\n')

print('Netatmo as Clock = ',conf_mat[3][0],' percent = ', (conf_mat[3][0]/(conf_mat[3][0]+conf_mat[3][1]+conf_mat[3][2]+conf_mat[3][3]+conf_mat[3][4]+conf_mat[3][5])))
print('Netatmo as Ezviz = ',conf_mat[3][1],' percent = ', (conf_mat[3][1]/(conf_mat[3][0]+conf_mat[3][1]+conf_mat[3][2]+conf_mat[3][3]+conf_mat[3][4]+conf_mat[3][5])))
print('Netatmo as D3D = ',conf_mat[3][2],' percent = ', (conf_mat[3][2]/(conf_mat[3][0]+conf_mat[3][1]+conf_mat[3][2]+conf_mat[3][3]+conf_mat[3][4]+conf_mat[3][5])))
print('Netatmo as Netatmo = ',conf_mat[3][3],' percent = ', (conf_mat[3][3]/(conf_mat[3][0]+conf_mat[3][1]+conf_mat[3][2]+conf_mat[3][3]+conf_mat[3][4]+conf_mat[3][5])))
print('Netatmo as Canary = ',conf_mat[3][4],' percent = ', (conf_mat[3][4]/(conf_mat[3][0]+conf_mat[3][1]+conf_mat[3][2]+conf_mat[3][3]+conf_mat[3][4]+conf_mat[3][5])))
print('Netatmo as Bulb = ',conf_mat[3][5],' percent = ', (conf_mat[3][5]/(conf_mat[3][0]+conf_mat[3][1]+conf_mat[3][2]+conf_mat[3][3]+conf_mat[3][4]+conf_mat[3][5])))
print('\n')

print('Canary as Clock = ',conf_mat[4][0],' percent = ', (conf_mat[4][0]/(conf_mat[4][0]+conf_mat[4][1]+conf_mat[4][2]+conf_mat[4][3]+conf_mat[4][4]+conf_mat[4][5])))
print('Canary as Ezviz = ',conf_mat[4][1],' percent = ', (conf_mat[4][1]/(conf_mat[4][0]+conf_mat[4][1]+conf_mat[4][2]+conf_mat[4][3]+conf_mat[4][4]+conf_mat[4][5])))
print('Canary as D3D = ',conf_mat[4][2],' percent = ', (conf_mat[4][2]/(conf_mat[4][0]+conf_mat[4][1]+conf_mat[4][2]+conf_mat[4][3]+conf_mat[4][4]+conf_mat[4][5])))
print('Canary as Netatmo = ',conf_mat[4][3],' percent = ', (conf_mat[4][3]/(conf_mat[4][0]+conf_mat[4][1]+conf_mat[4][2]+conf_mat[4][3]+conf_mat[4][4]+conf_mat[4][5])))
print('Canary as Canary = ',conf_mat[4][4],' percent = ', (conf_mat[4][4]/(conf_mat[4][0]+conf_mat[4][1]+conf_mat[4][2]+conf_mat[4][3]+conf_mat[4][4]+conf_mat[4][5])))
print('Canary as Bulb = ',conf_mat[4][5],' percent = ', (conf_mat[4][5]/(conf_mat[4][0]+conf_mat[4][1]+conf_mat[4][2]+conf_mat[4][3]+conf_mat[4][4]+conf_mat[4][5])))
print('\n')

print('Bulb as Clock = ',conf_mat[5][0],' percent = ', (conf_mat[5][0]/(conf_mat[5][0]+conf_mat[5][1]+conf_mat[5][2]+conf_mat[5][3]+conf_mat[5][4]+conf_mat[5][5])))
print('Bulb as Ezviz = ',conf_mat[5][1],' percent = ', (conf_mat[5][1]/(conf_mat[5][0]+conf_mat[5][1]+conf_mat[5][2]+conf_mat[5][3]+conf_mat[5][4]+conf_mat[5][5])))
print('Bulb as D3D = ',conf_mat[5][2],' percent = ', (conf_mat[5][2]/(conf_mat[5][0]+conf_mat[5][1]+conf_mat[5][2]+conf_mat[5][3]+conf_mat[5][4]+conf_mat[5][5])))
print('Bulb as Netatmo = ',conf_mat[5][3],' percent = ', (conf_mat[5][3]/(conf_mat[5][0]+conf_mat[5][1]+conf_mat[5][2]+conf_mat[5][3]+conf_mat[5][4]+conf_mat[5][5])))
print('Bulb as Canary = ',conf_mat[5][4],' percent = ', (conf_mat[5][4]/(conf_mat[5][0]+conf_mat[5][1]+conf_mat[5][2]+conf_mat[5][3]+conf_mat[5][4]+conf_mat[5][5])))
print('Bulb as Bulb = ',conf_mat[5][5],' percent = ', (conf_mat[5][5]/(conf_mat[5][0]+conf_mat[5][1]+conf_mat[5][2]+conf_mat[5][3]+conf_mat[5][4]+conf_mat[5][5])))

[[ 73   0   5   0   0  34]
 [  5  90   0   0   3  11]
 [  0   2 435   0   0  30]
 [ 22   1   7 575   1  12]
 [  2   2   0   0 173  48]
 [ 13   2  16   8  39 891]]


Clock as Clock =  73  percent =  0.6517857142857143
Clock as Ezviz =  0  percent =  0.0
Clock as D3D =  5  percent =  0.044642857142857144
Clock as Netatmo =  0  percent =  0.0
Clock as Canary =  0  percent =  0.0
Clock as Bulb =  34  percent =  0.30357142857142855


Ezviz as Clock =  5  percent =  0.045871559633027525
Ezviz as Ezviz =  90  percent =  0.8256880733944955
Ezviz as D3D = 0  percent =  0.0
Ezviz as Netatmo =  0  percent =  0.0
Ezviz as Canary =  3  percent =  0.027522935779816515
Ezviz as Bulb =  11  percent =  0.10091743119266056


D3D as Clock =  0  percent =  0.0
D3D as Ezviz =  2  percent =  0.004282655246252677
D3D as D3D =  435  percent =  0.9314775160599572
D3D as Netatmo =  0  percent =  0.0
D3D as Canary =  0  percent =  0.0
D3D as Bulb =  30  percent =  0.06423982869379015


Netatmo as Clock =  22  pe

Accuracy: 0.7034447396634503

Random Forest classifier

In [84]:
# split data into training and testing sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size = 0.33, random_state = 42)

In [85]:
# check the shape of X_train and X_test
X_train.shape, X_test.shape

((6700, 62), (3300, 62))

In [86]:
#from sklearn.preprocessing import CategoricalEncoder
import category_encoders as ce

# encode categorical variables with ordinal encoding
encoder = ce.OrdinalEncoder(cols=[])
X_train = encoder.fit_transform(X_train)
X_test = encoder.transform(X_test)

In [87]:
# import Random Forest classifier

from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# instantiate the classifier 
rfc = RandomForestClassifier(random_state=0)

# fit the model
rfc.fit(X_train, y_train)

# Predict the Test set results
y_pred = rfc.predict(X_test)

# Check accuracy score 
print('Model accuracy score with 10 decision-trees : {0:0.4f}'. format(accuracy_score(y_test, y_pred)))

Model accuracy score with 10 decision-trees : 0.9821


In [85]:
# instantiate the classifier with n_estimators = 100
rfc_100 = RandomForestClassifier(n_estimators=100, random_state=0)

# fit the model to the training set
rfc_100.fit(X_train, y_train)

# Predict on the test set results
y_pred_100 = rfc_100.predict(X_test)

# Check accuracy score 
print('Model accuracy score with 100 decision-trees : {0:0.4f}'. format(accuracy_score(y_test, y_pred_100)))

Model accuracy score with 100 decision-trees : 0.9963


In [88]:
# create the classifier with n_estimators = 100
clf = RandomForestClassifier(n_estimators=100, random_state=0)
# fit the model to the training set
clf.fit(X_train, y_train)

In [89]:
# Print the Confusion Matrix and slice it into four pieces
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
print('Confusion matrix\n\n', cm)

Confusion matrix

 [[ 125    1    6    0    0    5]
 [   2  122    0    1    3    6]
 [   2    3  550    0    2    3]
 [   0    0    2  840    1    4]
 [   0    2    1    2  283    6]
 [   1    2    0    1    3 1321]]


In [90]:
conf_mat = metrics.confusion_matrix(y_test, y_pred, labels=uniqueLabels)
#[i][j] ==> knwon to be in group i and predicted as group j
print(conf_mat)
print('\n')
print('Clock as Clock = ',conf_mat[0][0],' percent = ', (conf_mat[0][0]/(conf_mat[0][0]+conf_mat[0][1]+conf_mat[0][2]+conf_mat[0][3]+conf_mat[0][4]+conf_mat[0][5])))
print('Clock as Ezviz = ',conf_mat[0][1],' percent = ', (conf_mat[0][1]/(conf_mat[0][0]+conf_mat[0][1]+conf_mat[0][2]+conf_mat[0][3]+conf_mat[0][4]+conf_mat[0][5])))
print('Clock as D3D = ',conf_mat[0][2],' percent = ', (conf_mat[0][2]/(conf_mat[0][0]+conf_mat[0][1]+conf_mat[0][2]+conf_mat[0][3]+conf_mat[0][4]+conf_mat[0][5])))
print('Clock as Netatmo = ',conf_mat[0][3],' percent = ', (conf_mat[0][3]/(conf_mat[0][0]+conf_mat[0][1]+conf_mat[0][2]+conf_mat[0][3]+conf_mat[0][4]+conf_mat[0][5])))
print('Clock as Canary = ',conf_mat[0][4],' percent = ', (conf_mat[0][4]/(conf_mat[0][0]+conf_mat[0][1]+conf_mat[0][2]+conf_mat[0][3]+conf_mat[0][4]+conf_mat[0][5])))
print('Clock as Bulb = ',conf_mat[0][5],' percent = ', (conf_mat[0][5]/(conf_mat[0][0]+conf_mat[0][1]+conf_mat[0][2]+conf_mat[0][3]+conf_mat[0][4]+conf_mat[0][5])))
print('\n')

print('Ezviz as Clock = ',conf_mat[1][0],' percent = ', (conf_mat[1][0]/(conf_mat[1][0]+conf_mat[1][1]+conf_mat[1][2]+conf_mat[1][3]+conf_mat[1][4]+conf_mat[1][5])))
print('Ezviz as Ezviz = ',conf_mat[1][1],' percent = ', (conf_mat[1][1]/(conf_mat[1][0]+conf_mat[1][1]+conf_mat[1][2]+conf_mat[1][3]+conf_mat[1][4]+conf_mat[1][5])))
print('Ezviz as D3D =',conf_mat[1][2],' percent = ', (conf_mat[1][2]/(conf_mat[1][0]+conf_mat[1][1]+conf_mat[1][2]+conf_mat[1][3]+conf_mat[1][4]+conf_mat[1][5])))
print('Ezviz as Netatmo = ',conf_mat[1][3],' percent = ', (conf_mat[1][3]/(conf_mat[1][0]+conf_mat[1][1]+conf_mat[1][2]+conf_mat[1][3]+conf_mat[1][4]+conf_mat[1][5])))
print('Ezviz as Canary = ',conf_mat[1][4],' percent = ', (conf_mat[1][4]/(conf_mat[1][0]+conf_mat[1][1]+conf_mat[1][2]+conf_mat[1][3]+conf_mat[1][4]+conf_mat[1][5])))
print('Ezviz as Bulb = ',conf_mat[1][5],' percent = ', (conf_mat[1][5]/(conf_mat[1][0]+conf_mat[1][1]+conf_mat[1][2]+conf_mat[1][3]+conf_mat[1][4]+conf_mat[1][5])))
print('\n')

print('D3D as Clock = ',conf_mat[2][0],' percent = ', (conf_mat[2][0]/(conf_mat[2][0]+conf_mat[2][1]+conf_mat[2][2]+conf_mat[2][3]+conf_mat[2][4]+conf_mat[2][5])))
print('D3D as Ezviz = ',conf_mat[2][1],' percent = ', (conf_mat[2][1]/(conf_mat[2][0]+conf_mat[2][1]+conf_mat[2][2]+conf_mat[2][3]+conf_mat[2][4]+conf_mat[2][5])))
print('D3D as D3D = ',conf_mat[2][2],' percent = ', (conf_mat[2][2]/(conf_mat[2][0]+conf_mat[2][1]+conf_mat[2][2]+conf_mat[2][3]+conf_mat[2][4]+conf_mat[2][5])))
print('D3D as Netatmo = ',conf_mat[2][3],' percent = ', (conf_mat[2][3]/(conf_mat[2][0]+conf_mat[2][1]+conf_mat[2][2]+conf_mat[2][3]+conf_mat[2][4]+conf_mat[2][5])))
print('D3D as Canary = ',conf_mat[2][4],' percent = ', (conf_mat[2][4]/(conf_mat[2][0]+conf_mat[2][1]+conf_mat[2][2]+conf_mat[2][3]+conf_mat[2][4]+conf_mat[2][5])))
print('D3D as Bulb = ',conf_mat[2][5],' percent = ', (conf_mat[2][5]/(conf_mat[2][0]+conf_mat[2][1]+conf_mat[2][2]+conf_mat[2][3]+conf_mat[2][4]+conf_mat[2][5])))
print('\n')

print('Netatmo as Clock = ',conf_mat[3][0],' percent = ', (conf_mat[3][0]/(conf_mat[3][0]+conf_mat[3][1]+conf_mat[3][2]+conf_mat[3][3]+conf_mat[3][4]+conf_mat[3][5])))
print('Netatmo as Ezviz = ',conf_mat[3][1],' percent = ', (conf_mat[3][1]/(conf_mat[3][0]+conf_mat[3][1]+conf_mat[3][2]+conf_mat[3][3]+conf_mat[3][4]+conf_mat[3][5])))
print('Netatmo as D3D = ',conf_mat[3][2],' percent = ', (conf_mat[3][2]/(conf_mat[3][0]+conf_mat[3][1]+conf_mat[3][2]+conf_mat[3][3]+conf_mat[3][4]+conf_mat[3][5])))
print('Netatmo as Netatmo = ',conf_mat[3][3],' percent = ', (conf_mat[3][3]/(conf_mat[3][0]+conf_mat[3][1]+conf_mat[3][2]+conf_mat[3][3]+conf_mat[3][4]+conf_mat[3][5])))
print('Netatmo as Canary = ',conf_mat[3][4],' percent = ', (conf_mat[3][4]/(conf_mat[3][0]+conf_mat[3][1]+conf_mat[3][2]+conf_mat[3][3]+conf_mat[3][4]+conf_mat[3][5])))
print('Netatmo as Bulb = ',conf_mat[3][5],' percent = ', (conf_mat[3][5]/(conf_mat[3][0]+conf_mat[3][1]+conf_mat[3][2]+conf_mat[3][3]+conf_mat[3][4]+conf_mat[3][5])))
print('\n')

print('Canary as Clock = ',conf_mat[4][0],' percent = ', (conf_mat[4][0]/(conf_mat[4][0]+conf_mat[4][1]+conf_mat[4][2]+conf_mat[4][3]+conf_mat[4][4]+conf_mat[4][5])))
print('Canary as Ezviz = ',conf_mat[4][1],' percent = ', (conf_mat[4][1]/(conf_mat[4][0]+conf_mat[4][1]+conf_mat[4][2]+conf_mat[4][3]+conf_mat[4][4]+conf_mat[4][5])))
print('Canary as D3D = ',conf_mat[4][2],' percent = ', (conf_mat[4][2]/(conf_mat[4][0]+conf_mat[4][1]+conf_mat[4][2]+conf_mat[4][3]+conf_mat[4][4]+conf_mat[4][5])))
print('Canary as Netatmo = ',conf_mat[4][3],' percent = ', (conf_mat[4][3]/(conf_mat[4][0]+conf_mat[4][1]+conf_mat[4][2]+conf_mat[4][3]+conf_mat[4][4]+conf_mat[4][5])))
print('Canary as Canary = ',conf_mat[4][4],' percent = ', (conf_mat[4][4]/(conf_mat[4][0]+conf_mat[4][1]+conf_mat[4][2]+conf_mat[4][3]+conf_mat[4][4]+conf_mat[4][5])))
print('Canary as Bulb = ',conf_mat[4][5],' percent = ', (conf_mat[4][5]/(conf_mat[4][0]+conf_mat[4][1]+conf_mat[4][2]+conf_mat[4][3]+conf_mat[4][4]+conf_mat[4][5])))
print('\n')

print('Bulb as Clock = ',conf_mat[5][0],' percent = ', (conf_mat[5][0]/(conf_mat[5][0]+conf_mat[5][1]+conf_mat[5][2]+conf_mat[5][3]+conf_mat[5][4]+conf_mat[5][5])))
print('Bulb as Ezviz = ',conf_mat[5][1],' percent = ', (conf_mat[5][1]/(conf_mat[5][0]+conf_mat[5][1]+conf_mat[5][2]+conf_mat[5][3]+conf_mat[5][4]+conf_mat[5][5])))
print('Bulb as D3D = ',conf_mat[5][2],' percent = ', (conf_mat[5][2]/(conf_mat[5][0]+conf_mat[5][1]+conf_mat[5][2]+conf_mat[5][3]+conf_mat[5][4]+conf_mat[5][5])))
print('Bulb as Netatmo = ',conf_mat[5][3],' percent = ', (conf_mat[5][3]/(conf_mat[5][0]+conf_mat[5][1]+conf_mat[5][2]+conf_mat[5][3]+conf_mat[5][4]+conf_mat[5][5])))
print('Bulb as Canary = ',conf_mat[5][4],' percent = ', (conf_mat[5][4]/(conf_mat[5][0]+conf_mat[5][1]+conf_mat[5][2]+conf_mat[5][3]+conf_mat[5][4]+conf_mat[5][5])))
print('Bulb as Bulb = ',conf_mat[5][5],' percent = ', (conf_mat[5][5]/(conf_mat[5][0]+conf_mat[5][1]+conf_mat[5][2]+conf_mat[5][3]+conf_mat[5][4]+conf_mat[5][5])))

[[ 125    1    6    0    0    5]
 [   2  122    0    1    3    6]
 [   2    3  550    0    2    3]
 [   0    0    2  840    1    4]
 [   0    2    1    2  283    6]
 [   1    2    0    1    3 1321]]


Clock as Clock =  125  percent =  0.9124087591240876
Clock as Ezviz =  1  percent =  0.0072992700729927005
Clock as D3D =  6  percent =  0.043795620437956206
Clock as Netatmo =  0  percent =  0.0
Clock as Canary =  0  percent =  0.0
Clock as Bulb =  5  percent =  0.0364963503649635


Ezviz as Clock =  2  percent =  0.014925373134328358
Ezviz as Ezviz =  122  percent =  0.9104477611940298
Ezviz as D3D = 0  percent =  0.0
Ezviz as Netatmo =  1  percent =  0.007462686567164179
Ezviz as Canary =  3  percent =  0.022388059701492536
Ezviz as Bulb =  6  percent =  0.04477611940298507


D3D as Clock =  2  percent =  0.0035714285714285713
D3D as Ezviz =  3  percent =  0.005357142857142857
D3D as D3D =  550  percent =  0.9821428571428571
D3D as Netatmo =  0  percent =  0.0
D3D as Canary =  2  perce

In [91]:
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred))

                  precision    recall  f1-score   support

IPCam_AlarmClock       0.96      0.91      0.94       137
    IPCam_Canary       0.94      0.91      0.92       134
       IPCam_D3D       0.98      0.98      0.98       560
     IPCam_Ezviz       1.00      0.99      0.99       847
   IPCam_Netatmo       0.97      0.96      0.97       294
      IPCam_V380       0.98      0.99      0.99      1328

        accuracy                           0.98      3300
       macro avg       0.97      0.96      0.97      3300
    weighted avg       0.98      0.98      0.98      3300



XGBoost Classifier

In [97]:
ipcamDF['Label'].replace('IPCam_AlarmClock','0',inplace=True)
#uniqueLabels = list(ipcamDF['Label'].unique())
#print(uniqueLabels)
print(list(ipcamDF['Label'].unique()))

['0', 'IPCam_Canary', 'IPCam_D3D', 'IPCam_Ezviz', 'IPCam_Netatmo', 'IPCam_V380']


In [98]:
ipcamDF['Label'].replace('IPCam_Canary','1',inplace=True)
#uniqueLabels = list(ipcamDF['Label'].unique())
#print(uniqueLabels)
print(list(ipcamDF['Label'].unique()))

['0', '1', 'IPCam_D3D', 'IPCam_Ezviz', 'IPCam_Netatmo', 'IPCam_V380']


In [99]:
ipcamDF['Label'].replace('IPCam_D3D','2',inplace=True)
#uniqueLabels = list(ipcamDF['Label'].unique())
#print(uniqueLabels)
print(list(ipcamDF['Label'].unique()))

['0', '1', '2', 'IPCam_Ezviz', 'IPCam_Netatmo', 'IPCam_V380']


In [100]:
ipcamDF['Label'].replace('IPCam_Ezviz','3',inplace=True)
#uniqueLabels = list(ipcamDF['Label'].unique())
#print(uniqueLabels)
print(list(ipcamDF['Label'].unique()))

['0', '1', '2', '3', 'IPCam_Netatmo', 'IPCam_V380']


In [101]:
ipcamDF['Label'].replace('IPCam_Netatmo','4',inplace=True)
#uniqueLabels = list(ipcamDF['Label'].unique())
#print(uniqueLabels)
print(list(ipcamDF['Label'].unique()))

['0', '1', '2', '3', '4', 'IPCam_V380']


In [102]:
ipcamDF['Label'].replace('IPCam_V380','5',inplace=True)
#uniqueLabels = list(ipcamDF['Label'].unique())
#print(uniqueLabels)
print(list(ipcamDF['Label'].unique()))

['0', '1', '2', '3', '4', '5']


In [103]:
######### creating master DF
dfListAll = [ipcamDF]
masterDF = pd.concat(dfListAll)
print(masterDF.shape)

(10000, 84)


In [104]:
uniqueLabels = list(masterDF['Label'].unique())
print(uniqueLabels)

['0', '1', '2', '3', '4', '5']


In [105]:
#discard all those columns that helps in uniquely identifying a flow
masterPruned = masterDF.iloc[:,np.arange(7,84)]
print('pruneddata size: ', masterPruned.shape)
ftList = list(masterPruned.columns)
print(ftList)

pruneddata size:  (10000, 77)
['Flow Duration', 'Tot Fwd Pkts', 'Tot Bwd Pkts', 'TotLen Fwd Pkts', 'TotLen Bwd Pkts', 'Fwd Pkt Len Max', 'Fwd Pkt Len Min', 'Fwd Pkt Len Mean', 'Fwd Pkt Len Std', 'Bwd Pkt Len Max', 'Bwd Pkt Len Min', 'Bwd Pkt Len Mean', 'Bwd Pkt Len Std', 'Flow Byts/s', 'Flow Pkts/s', 'Flow IAT Mean', 'Flow IAT Std', 'Flow IAT Max', 'Flow IAT Min', 'Fwd IAT Tot', 'Fwd IAT Mean', 'Fwd IAT Std', 'Fwd IAT Max', 'Fwd IAT Min', 'Bwd IAT Tot', 'Bwd IAT Mean', 'Bwd IAT Std', 'Bwd IAT Max', 'Bwd IAT Min', 'Fwd PSH Flags', 'Bwd PSH Flags', 'Fwd URG Flags', 'Bwd URG Flags', 'Fwd Header Len', 'Bwd Header Len', 'Fwd Pkts/s', 'Bwd Pkts/s', 'Pkt Len Min', 'Pkt Len Max', 'Pkt Len Mean', 'Pkt Len Std', 'Pkt Len Var', 'FIN Flag Cnt', 'SYN Flag Cnt', 'RST Flag Cnt', 'PSH Flag Cnt', 'ACK Flag Cnt', 'URG Flag Cnt', 'CWE Flag Count', 'ECE Flag Cnt', 'Down/Up Ratio', 'Pkt Size Avg', 'Fwd Seg Size Avg', 'Bwd Seg Size Avg', 'Fwd Byts/b Avg', 'Fwd Pkts/b Avg', 'Fwd Blk Rate Avg', 'Bwd Byts/b Av

In [106]:
#discard those flows that has 0 flow duration
masterSelected = masterPruned.drop(masterPruned[masterPruned['Flow Duration']==0].index)
masterSelected.shape

(10000, 77)

In [107]:
#identify those columns that has non-numeric values
typesList = masterSelected.dtypes
ftCount = len(ftList)
nonNumericFtList = list()
for fi in range(ftCount):
    if ftList[fi] != 'Label' and typesList[fi] != 'int64' and typesList[fi] != 'float64':
        print(ftList[fi], '  ', typesList[fi])
        nonNumericFtList.append(ftList[fi])
print(nonNumericFtList)

[]


In [108]:
for ft in nonNumericFtList:
    masterSelected[ft] = pd.to_numeric(masterSelected[ft], errors='coerce')

In [109]:
print(nonNumericFtList)
for ft in nonNumericFtList:
    countNaN = masterSelected[ft].isna().sum()
    print('converted to numeric ft: ', ft, '  countNaN: ', countNaN)

[]


In [110]:
number_inf = masterSelected[masterSelected == np.inf].count().sum()
print('count inf: ',number_inf)

count inf:  0


In [111]:
typesList = masterSelected.dtypes
ftCount = len(ftList)
nonNumericFtList = list()
for fi in range(ftCount):
    if ftList[fi] != 'Label' and typesList[fi] != 'int64' and typesList[fi] != 'float64':
        print(ftList[fi], '  ', typesList[fi])
        nonNumericFtList.append(ftList[fi])
print(nonNumericFtList)

[]


In [112]:
#identify those columns that has 0 std
count = 0
staticFtList = list()
for ft in ftList:
    if ft == 'Label':
        continue
    #print('ft : ', ft)
    ftMin = masterSelected[ft].min()
    ftMax = masterSelected[ft].max()
    #std = masterPruned[ft].std()
    
    if ftMin == ftMax:
        count += 1
        print('count: ', count, ' ft: ', ft, ' ftmin: ', ftMin, ' ftMax: ', ftMax)
        staticFtList.append(ft)
print(staticFtList)

count:  1  ft:  Fwd PSH Flags  ftmin:  0  ftMax:  0
count:  2  ft:  Fwd URG Flags  ftmin:  0  ftMax:  0
count:  3  ft:  Bwd URG Flags  ftmin:  0  ftMax:  0
count:  4  ft:  URG Flag Cnt  ftmin:  0  ftMax:  0
count:  5  ft:  CWE Flag Count  ftmin:  0  ftMax:  0
count:  6  ft:  ECE Flag Cnt  ftmin:  0  ftMax:  0
count:  7  ft:  Fwd Byts/b Avg  ftmin:  0  ftMax:  0
count:  8  ft:  Fwd Pkts/b Avg  ftmin:  0  ftMax:  0
count:  9  ft:  Fwd Blk Rate Avg  ftmin:  0  ftMax:  0
count:  10  ft:  Bwd Byts/b Avg  ftmin:  0  ftMax:  0
count:  11  ft:  Bwd Pkts/b Avg  ftmin:  0  ftMax:  0
count:  12  ft:  Bwd Blk Rate Avg  ftmin:  0  ftMax:  0
count:  13  ft:  Init Fwd Win Byts  ftmin:  -1  ftMax:  -1
count:  14  ft:  Fwd Seg Size Min  ftmin:  0  ftMax:  0
['Fwd PSH Flags', 'Fwd URG Flags', 'Bwd URG Flags', 'URG Flag Cnt', 'CWE Flag Count', 'ECE Flag Cnt', 'Fwd Byts/b Avg', 'Fwd Pkts/b Avg', 'Fwd Blk Rate Avg', 'Bwd Byts/b Avg', 'Bwd Pkts/b Avg', 'Bwd Blk Rate Avg', 'Init Fwd Win Byts', 'Fwd Seg Size 

In [113]:
#remove those columns that have 0 std
print('before droping column: ', masterSelected.shape)
for ft in staticFtList:
    masterSelected.drop([ft], axis=1,inplace=True)
print('after droping column: ', masterSelected.shape)    

before droping column:  (10000, 77)
after droping column:  (10000, 63)


In [114]:
xCount = masterSelected.shape[1]
print(xCount)
finalFtList = list(masterSelected.columns)
print(finalFtList)

63
['Flow Duration', 'Tot Fwd Pkts', 'Tot Bwd Pkts', 'TotLen Fwd Pkts', 'TotLen Bwd Pkts', 'Fwd Pkt Len Max', 'Fwd Pkt Len Min', 'Fwd Pkt Len Mean', 'Fwd Pkt Len Std', 'Bwd Pkt Len Max', 'Bwd Pkt Len Min', 'Bwd Pkt Len Mean', 'Bwd Pkt Len Std', 'Flow Byts/s', 'Flow Pkts/s', 'Flow IAT Mean', 'Flow IAT Std', 'Flow IAT Max', 'Flow IAT Min', 'Fwd IAT Tot', 'Fwd IAT Mean', 'Fwd IAT Std', 'Fwd IAT Max', 'Fwd IAT Min', 'Bwd IAT Tot', 'Bwd IAT Mean', 'Bwd IAT Std', 'Bwd IAT Max', 'Bwd IAT Min', 'Bwd PSH Flags', 'Fwd Header Len', 'Bwd Header Len', 'Fwd Pkts/s', 'Bwd Pkts/s', 'Pkt Len Min', 'Pkt Len Max', 'Pkt Len Mean', 'Pkt Len Std', 'Pkt Len Var', 'FIN Flag Cnt', 'SYN Flag Cnt', 'RST Flag Cnt', 'PSH Flag Cnt', 'ACK Flag Cnt', 'Down/Up Ratio', 'Pkt Size Avg', 'Fwd Seg Size Avg', 'Bwd Seg Size Avg', 'Subflow Fwd Pkts', 'Subflow Fwd Byts', 'Subflow Bwd Pkts', 'Subflow Bwd Byts', 'Init Bwd Win Byts', 'Fwd Act Data Pkts', 'Active Mean', 'Active Std', 'Active Max', 'Active Min', 'Idle Mean', 'Idle 

In [115]:
#report intermediate data frame to a csv
masterSelected.to_csv('D:/code/MLCode_output/ProcessedNPrunned_BITSDataset_XGBnew.csv', index = False)

In [116]:
print(list(masterSelected['Label'].unique()))

['0', '1', '2', '3', '4', '5']


In [117]:
df = pd.read_csv('D:/code/MLCode_output/ProcessedNPrunned_BITSDataset_XGBnew.csv')

In [118]:
# Shuffle the dataset
df_shuffled = df.sample(n=len(df))

In [119]:
# Save the shuffled dataset to a new CSV file
df_shuffled.to_csv('D:/code/MLCode_output/ProcessedNPrunned_BITSDataset_XGBnew.csv', index=False)

In [120]:
df_shuffled = df_shuffled.reset_index(drop=True)

In [121]:
#classification with all the features
masterDS = df_shuffled.values  #masterSelected.values
X = masterDS[:,:-1]
Y = masterDS[:,-1]
print("Xshape: ", X.shape)
print("Yshape: ", Y.shape)

Xshape:  (10000, 62)
Yshape:  (10000,)


In [122]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size = 0.25, random_state = 0)
#y_train_xgb = y_train.map({"IPCam": 0, "Other":1, "VConf":2, "VShare": 3})
#X_train, X_test, y_train, y_test = train_test_split(X, Y, Label, test_size=test_size, stratify = labels)

#xgb = XGBClassifier(n_estimators = 400, learning_rate = 0.1, max_depth = 3)
#xgb.fit(X_train.values, y_train)

from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
y_train = le.fit_transform(y_train)
print(y_train.shape)

(7500,)


In [123]:
#import XGBClassifier
from xgboost import XGBClassifier
# declare parameters
params = {
            'objective':'binary:logistic',
            'max_depth': 4,
            #'alpha': 0.1,
            'learning_rate': 1.0,
            'n_estimators':250
         }
                      
# instantiate the classifier 
xgb_clf = XGBClassifier(**params)

# fit the classifier to the training data
xgb_clf.fit(X_train, y_train)

In [124]:
#alternatively view the parameters of the xgb trained model
print(xgb_clf)

XGBClassifier(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=None, device=None, early_stopping_rounds=None,
              enable_categorical=False, eval_metric=None, feature_types=None,
              gamma=None, grow_policy=None, importance_type=None,
              interaction_constraints=None, learning_rate=1.0, max_bin=None,
              max_cat_threshold=None, max_cat_to_onehot=None,
              max_delta_step=None, max_depth=4, max_leaves=None,
              min_child_weight=None, missing=nan, monotone_constraints=None,
              multi_strategy=None, n_estimators=250, n_jobs=None,
              num_parallel_tree=None, objective='multi:softprob', ...)


In [125]:
# make predictions on test data
y_pred = xgb_clf.predict(X_test)
print(y_pred.shape)

(2500,)


In [126]:
# check accuracy score
from sklearn.metrics import accuracy_score
print('XGBoost model accuracy score: {0:0.4f}'. format(accuracy_score(y_test, y_pred)))

XGBoost model accuracy score: 0.9936


In [127]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred) #our model
print(cm)
accuracy_score(y_test, y_pred)

[[ 95   0   0   0   0   0]
 [  0 103   0   2   2   1]
 [  0   0 438   0   0   1]
 [  0   0   0 631   0   5]
 [  0   0   0   0 218   4]
 [  0   1   0   0   0 999]]


0.9936

In [129]:
conf_mat = metrics.confusion_matrix(y_test, y_pred)#, labels=uniqueLabels)
#[i][j] ==> knwon to be in group i and predicted as group j
print(conf_mat)
print('\n')
print('Clock as Clock = ',conf_mat[0][0],' percent = ', (conf_mat[0][0]/(conf_mat[0][0]+conf_mat[0][1]+conf_mat[0][2]+conf_mat[0][3]+conf_mat[0][4]+conf_mat[0][5])))
print('Clock as Ezviz = ',conf_mat[0][1],' percent = ', (conf_mat[0][1]/(conf_mat[0][0]+conf_mat[0][1]+conf_mat[0][2]+conf_mat[0][3]+conf_mat[0][4]+conf_mat[0][5])))
print('Clock as D3D = ',conf_mat[0][2],' percent = ', (conf_mat[0][2]/(conf_mat[0][0]+conf_mat[0][1]+conf_mat[0][2]+conf_mat[0][3]+conf_mat[0][4]+conf_mat[0][5])))
print('Clock as Netatmo = ',conf_mat[0][3],' percent = ', (conf_mat[0][3]/(conf_mat[0][0]+conf_mat[0][1]+conf_mat[0][2]+conf_mat[0][3]+conf_mat[0][4]+conf_mat[0][5])))
print('Clock as Canary = ',conf_mat[0][4],' percent = ', (conf_mat[0][4]/(conf_mat[0][0]+conf_mat[0][1]+conf_mat[0][2]+conf_mat[0][3]+conf_mat[0][4]+conf_mat[0][5])))
print('Clock as Bulb = ',conf_mat[0][5],' percent = ', (conf_mat[0][5]/(conf_mat[0][0]+conf_mat[0][1]+conf_mat[0][2]+conf_mat[0][3]+conf_mat[0][4]+conf_mat[0][5])))
print('\n')

print('Ezviz as Clock = ',conf_mat[1][0],' percent = ', (conf_mat[1][0]/(conf_mat[1][0]+conf_mat[1][1]+conf_mat[1][2]+conf_mat[1][3]+conf_mat[1][4]+conf_mat[1][5])))
print('Ezviz as Ezviz = ',conf_mat[1][1],' percent = ', (conf_mat[1][1]/(conf_mat[1][0]+conf_mat[1][1]+conf_mat[1][2]+conf_mat[1][3]+conf_mat[1][4]+conf_mat[1][5])))
print('Ezviz as D3D =',conf_mat[1][2],' percent = ', (conf_mat[1][2]/(conf_mat[1][0]+conf_mat[1][1]+conf_mat[1][2]+conf_mat[1][3]+conf_mat[1][4]+conf_mat[1][5])))
print('Ezviz as Netatmo = ',conf_mat[1][3],' percent = ', (conf_mat[1][3]/(conf_mat[1][0]+conf_mat[1][1]+conf_mat[1][2]+conf_mat[1][3]+conf_mat[1][4]+conf_mat[1][5])))
print('Ezviz as Canary = ',conf_mat[1][4],' percent = ', (conf_mat[1][4]/(conf_mat[1][0]+conf_mat[1][1]+conf_mat[1][2]+conf_mat[1][3]+conf_mat[1][4]+conf_mat[1][5])))
print('Ezviz as Bulb = ',conf_mat[1][5],' percent = ', (conf_mat[1][5]/(conf_mat[1][0]+conf_mat[1][1]+conf_mat[1][2]+conf_mat[1][3]+conf_mat[1][4]+conf_mat[1][5])))
print('\n')

print('D3D as Clock = ',conf_mat[2][0],' percent = ', (conf_mat[2][0]/(conf_mat[2][0]+conf_mat[2][1]+conf_mat[2][2]+conf_mat[2][3]+conf_mat[2][4]+conf_mat[2][5])))
print('D3D as Ezviz = ',conf_mat[2][1],' percent = ', (conf_mat[2][1]/(conf_mat[2][0]+conf_mat[2][1]+conf_mat[2][2]+conf_mat[2][3]+conf_mat[2][4]+conf_mat[2][5])))
print('D3D as D3D = ',conf_mat[2][2],' percent = ', (conf_mat[2][2]/(conf_mat[2][0]+conf_mat[2][1]+conf_mat[2][2]+conf_mat[2][3]+conf_mat[2][4]+conf_mat[2][5])))
print('D3D as Netatmo = ',conf_mat[2][3],' percent = ', (conf_mat[2][3]/(conf_mat[2][0]+conf_mat[2][1]+conf_mat[2][2]+conf_mat[2][3]+conf_mat[2][4]+conf_mat[2][5])))
print('D3D as Canary = ',conf_mat[2][4],' percent = ', (conf_mat[2][4]/(conf_mat[2][0]+conf_mat[2][1]+conf_mat[2][2]+conf_mat[2][3]+conf_mat[2][4]+conf_mat[2][5])))
print('D3D as Bulb = ',conf_mat[2][5],' percent = ', (conf_mat[2][5]/(conf_mat[2][0]+conf_mat[2][1]+conf_mat[2][2]+conf_mat[2][3]+conf_mat[2][4]+conf_mat[2][5])))
print('\n')

print('Netatmo as Clock = ',conf_mat[3][0],' percent = ', (conf_mat[3][0]/(conf_mat[3][0]+conf_mat[3][1]+conf_mat[3][2]+conf_mat[3][3]+conf_mat[3][4]+conf_mat[3][5])))
print('Netatmo as Ezviz = ',conf_mat[3][1],' percent = ', (conf_mat[3][1]/(conf_mat[3][0]+conf_mat[3][1]+conf_mat[3][2]+conf_mat[3][3]+conf_mat[3][4]+conf_mat[3][5])))
print('Netatmo as D3D = ',conf_mat[3][2],' percent = ', (conf_mat[3][2]/(conf_mat[3][0]+conf_mat[3][1]+conf_mat[3][2]+conf_mat[3][3]+conf_mat[3][4]+conf_mat[3][5])))
print('Netatmo as Netatmo = ',conf_mat[3][3],' percent = ', (conf_mat[3][3]/(conf_mat[3][0]+conf_mat[3][1]+conf_mat[3][2]+conf_mat[3][3]+conf_mat[3][4]+conf_mat[3][5])))
print('Netatmo as Canary = ',conf_mat[3][4],' percent = ', (conf_mat[3][4]/(conf_mat[3][0]+conf_mat[3][1]+conf_mat[3][2]+conf_mat[3][3]+conf_mat[3][4]+conf_mat[3][5])))
print('Netatmo as Bulb = ',conf_mat[3][5],' percent = ', (conf_mat[3][5]/(conf_mat[3][0]+conf_mat[3][1]+conf_mat[3][2]+conf_mat[3][3]+conf_mat[3][4]+conf_mat[3][5])))
print('\n')

print('Canary as Clock = ',conf_mat[4][0],' percent = ', (conf_mat[4][0]/(conf_mat[4][0]+conf_mat[4][1]+conf_mat[4][2]+conf_mat[4][3]+conf_mat[4][4]+conf_mat[4][5])))
print('Canary as Ezviz = ',conf_mat[4][1],' percent = ', (conf_mat[4][1]/(conf_mat[4][0]+conf_mat[4][1]+conf_mat[4][2]+conf_mat[4][3]+conf_mat[4][4]+conf_mat[4][5])))
print('Canary as D3D = ',conf_mat[4][2],' percent = ', (conf_mat[4][2]/(conf_mat[4][0]+conf_mat[4][1]+conf_mat[4][2]+conf_mat[4][3]+conf_mat[4][4]+conf_mat[4][5])))
print('Canary as Netatmo = ',conf_mat[4][3],' percent = ', (conf_mat[4][3]/(conf_mat[4][0]+conf_mat[4][1]+conf_mat[4][2]+conf_mat[4][3]+conf_mat[4][4]+conf_mat[4][5])))
print('Canary as Canary = ',conf_mat[4][4],' percent = ', (conf_mat[4][4]/(conf_mat[4][0]+conf_mat[4][1]+conf_mat[4][2]+conf_mat[4][3]+conf_mat[4][4]+conf_mat[4][5])))
print('Canary as Bulb = ',conf_mat[4][5],' percent = ', (conf_mat[4][5]/(conf_mat[4][0]+conf_mat[4][1]+conf_mat[4][2]+conf_mat[4][3]+conf_mat[4][4]+conf_mat[4][5])))
print('\n')

print('Bulb as Clock = ',conf_mat[5][0],' percent = ', (conf_mat[5][0]/(conf_mat[5][0]+conf_mat[5][1]+conf_mat[5][2]+conf_mat[5][3]+conf_mat[5][4]+conf_mat[5][5])))
print('Bulb as Ezviz = ',conf_mat[5][1],' percent = ', (conf_mat[5][1]/(conf_mat[5][0]+conf_mat[5][1]+conf_mat[5][2]+conf_mat[5][3]+conf_mat[5][4]+conf_mat[5][5])))
print('Bulb as D3D = ',conf_mat[5][2],' percent = ', (conf_mat[5][2]/(conf_mat[5][0]+conf_mat[5][1]+conf_mat[5][2]+conf_mat[5][3]+conf_mat[5][4]+conf_mat[5][5])))
print('Bulb as Netatmo = ',conf_mat[5][3],' percent = ', (conf_mat[5][3]/(conf_mat[5][0]+conf_mat[5][1]+conf_mat[5][2]+conf_mat[5][3]+conf_mat[5][4]+conf_mat[5][5])))
print('Bulb as Canary = ',conf_mat[5][4],' percent = ', (conf_mat[5][4]/(conf_mat[5][0]+conf_mat[5][1]+conf_mat[5][2]+conf_mat[5][3]+conf_mat[5][4]+conf_mat[5][5])))
print('Bulb as Bulb = ',conf_mat[5][5],' percent = ', (conf_mat[5][5]/(conf_mat[5][0]+conf_mat[5][1]+conf_mat[5][2]+conf_mat[5][3]+conf_mat[5][4]+conf_mat[5][5])))

[[ 95   0   0   0   0   0]
 [  0 103   0   2   2   1]
 [  0   0 438   0   0   1]
 [  0   0   0 631   0   5]
 [  0   0   0   0 218   4]
 [  0   1   0   0   0 999]]


Clock as Clock =  95  percent =  1.0
Clock as Ezviz =  0  percent =  0.0
Clock as D3D =  0  percent =  0.0
Clock as Netatmo =  0  percent =  0.0
Clock as Canary =  0  percent =  0.0
Clock as Bulb =  0  percent =  0.0


Ezviz as Clock =  0  percent =  0.0
Ezviz as Ezviz =  103  percent =  0.9537037037037037
Ezviz as D3D = 0  percent =  0.0
Ezviz as Netatmo =  2  percent =  0.018518518518518517
Ezviz as Canary =  2  percent =  0.018518518518518517
Ezviz as Bulb =  1  percent =  0.009259259259259259


D3D as Clock =  0  percent =  0.0
D3D as Ezviz =  0  percent =  0.0
D3D as D3D =  438  percent =  0.9977220956719818
D3D as Netatmo =  0  percent =  0.0
D3D as Canary =  0  percent =  0.0
D3D as Bulb =  1  percent =  0.002277904328018223


Netatmo as Clock =  0  percent =  0.0
Netatmo as Ezviz =  0  percent =  0.0
Netatmo as D3D 