# Initialization

In [1]:
import imp
from sklearn.model_selection import KFold
from sklearn.model_selection import train_test_split
import numpy as np
#from data_parser import DataParser
from pull_data import Pull
import os
from sklearn.metrics import confusion_matrix
from prettytable import PrettyTable
from statistics import mean

#from scikit_IsolatedForest import IsolatedForest
from sklearn.ensemble import IsolationForest
#from scikit_LOFNovelty import LOFNovelty
from sklearn.neighbors import LocalOutlierFactor
#from scikit_OneClassSVM import OCSVM
from sklearn.svm import OneClassSVM

In [43]:
DATA_CLASS = {
    # CESNET DATASET
    "IKEA_APP" : "dev-annotated-datasets/ikea-app/train",
    "IKEA_HOMEKIT_CLEAR" : "dev-annotated-datasets/ikea-app/anomaly",
    "IKEA_HOMEKIT" : "dev-annotated-datasets/ikea-homekit/train",
    "IP_CAM" : "dev-annotated-datasets/ipcam/train",
    "IP_CAM_MISCONFIG" : "dev-annotated-datasets/ipcam/anomaly_cam",
    "NORMAL_USER" : "dev-annotated-datasets/normal-user/train",
    "VOICE_ASSISTANT" : "dev-annotated-datasets/voice-assistant/train",
    
    # UNSW DATASET
    ## HUBS
    "AMAZON_ECHO" : "../data-sets/unsw-traces-device/Amazon-Echo",
    "SMART_THINGS" : "../data-sets/unsw-traces-device/Smart-Things",
    ## CAMERAS
    "NETAMO_WELCOME" : "../data-sets/unsw-traces-device/Netatmo-Welcome",
    #"TP-Link-Day-Night-Cloud-Camera" : "../data-sets/unsw-traces-device/TP-Link-Day-Night-Cloud-Camera",
    "Samsung-Smart-Cam" : "../data-sets/unsw-traces-device/Samsung-Smart-Cam",
    "INSTEON_CAM" : "../data-sets/unsw-traces-device/Insteon-Camera",
    #"DROP_CAM" : "../data-sets/unsw-traces-device/Dropcam",
    "WITHINGS_SMART_BABY_MONITOR" : "../data-sets/unsw-traces-device/Withings-Smart-Baby-Monitor",
    ## SWITCHES AND TRIGGERS
    "BELKIN_WEMO_SWITCH" : "../data-sets/unsw-traces-device/Belkin-Wemo-Switch",
    "TP-Link-Smart-Plug" : "../data-sets/unsw-traces-device/TP-Link-Smart-Plug",
    #"iHome" : "../data-sets/unsw-traces-device/iHome",
    "BELKIN_WEMO_MOTION_SENSOR" : "../data-sets/unsw-traces-device/Belkin-Wemo-Motion-Sensor",
    ## AIR QUALITY SENSORS
    #"NEST-Protect-Smoke-Alarm" : "../data-sets/unsw-traces-device/NEST-Protect-Smoke-Alarm",
    "Netatmo-Weather-Station" : "../data-sets/unsw-traces-device/Netatmo-Weather-Station",
    ## HEATLTHCARE DEVICE
    #"Withings-Smart-Scale" : "../data-sets/unsw-traces-device/Withings-Smart-Scale",
    #"Blipcare-Blood-Pressure-Meter" : "../data-sets/unsw-traces-device/Blipcare-Blood-Pressure-Meter",
    "Withings-Aura-Smart-Sleep-Sensor" : "../data-sets/unsw-traces-device/Withings-Aura-Smart-Sleep-Sensor",
    ## LIGHT BULBS
    "Light-Bulbs-LiFX-Smart-Bulb" : "../data-sets/unsw-traces-device/Light-Bulbs-LiFX-Smart-Bulb",
    ## ELECTRONIC
    #"Triby-Speaker" : "../data-sets/unsw-traces-device/Triby-Speaker",
    #"PIX-STAR-Photo-Frame" : "../data-sets/unsw-traces-device/PIX-STAR-Photo-Frame",
    "HP-Printer" : "../data-sets/unsw-traces-device/HP-Printer",
    ## NON-IOT
    "Laptop" : "../data-sets/unsw-traces-device/Laptop",
    "ANDROID_PHONE" : "../data-sets/unsw-traces-device/Android-Phone",
    "Samsung-Galaxy-Tab" : "../data-sets/unsw-traces-device/Samsung-Galaxy-Tab",
    #"IPhone" : "../data-sets/unsw-traces-device/IPhone",
    
    # CTU13 BOTNET ATTACKS DATASET
    "BOTNET_SOGOU" : "../data-sets/botnet/sogou",
    "BOTNET_RBOT" : "../data-sets/botnet/rbot",
    "BOTNET_NERIS" : "../data-sets/botnet/neris",
}

# Function Definitions

In [34]:
class Metrics:
    def __init__(self,label):
        self.label = label
        self.accuracy = []
        self.precision = []
        self.recall = []
        self.f1 = []
        self.cnt = 0
    def update(self,y,pred,score):
       # for i in range(len(pred)):
       #     if pred[i] == -1 and score[i] > -10:
       #         pred[i] = 1
                
        try:
            tn, fp, fn, tp = confusion_matrix(y, pred).ravel()
        except Exception as e:
            # TN in all cases
            tn = 0
            fp = 0
            fn = 0
            tp = confusion_matrix(y, pred).ravel()[0]
        
        total = tp+tn+fp+fn
        accuracy = (tp+tn)/total
        if self.label == "Valid" or self.label == "Mix":
            precision = tp/(tp+fp)
            recall = tp/(tp+fn)
            f1 = 2*(precision*recall)/(precision+recall)
        else:
            try:
                precision = tn/(tn+fn) # Negative precision
                recall = tn/(tn+fp) # Negative recall
                f1 = 2*(precision*recall)/(precision+recall) # Negative f1
            except Exception as e:
                print("ERROR",self.label,e)
                precision = 0
                f1 = 0
                recall = 0
        #DEBUG
        #print("Valid:",y," Pred:",pred,)
        #print(tn,tp,fn,fp)
        #input("Press Enter to continue")
        self.accuracy.append(accuracy)
        self.precision.append(precision)
        self.recall.append(recall)
        self.f1.append(f1)
        self.cnt += 1
        
    def print(self):
        table = PrettyTable()
        table.field_names = [self.label+" Data","Accuracy", "Precision", "Recall", "F1 score"]
        for i in range(len(self.accuracy)):
            table.add_row([i,round(self.accuracy[i],3),round(self.precision[i],3),round(self.recall[i],3),round(self.f1[i],3)])
        
        table.add_row(["Avg",round(mean(self.accuracy),3),round(mean(self.precision),3),round(mean(self.recall),3),round(mean(self.f1),3)])
        print(table)
        # Return F1-score
        return round(mean(self.f1),3)

In [4]:
def print_metrics(y, pred, thr_pred=0.5, label=""):
    print("### Metric",label,"###")
    try:
        tn, fp, fn, tp = confusion_matrix(y, pred).ravel()
    except Exception as e:
        # TP in all cases
        tn = 0
        fp = 0
        fn = 0
        tp = confusion_matrix(y, pred).ravel()[0]
    #print(tn, fp, fn, tp)
    
    total = tp+tn+fp+fn
    acc = (tp+tn)/total
    if label == "Valid" or label == "Mix":
        prec = tp/(tp+fp)
        rec = tp/(tp+fn)
        f1 = 2*(prec*rec)/(prec+rec)
    
        print("TP: {:7d} {:6.2f}%".format(tp, tp*100/total))
        print("FN: {:7d} {:6.2f}%".format(fn, fn*100/total))
        print("FP: {:7d} {:6.2f}%".format(fp, fp*100/total))
        print("TN: {:7d} {:6.2f}%".format(tn, tn*100/total))
        print("Accuracy:   {:6.2f}%".format(acc*100))
        print("Precision:  {:6.4f}".format(prec))
        print("Recall:     {:6.4f}".format(rec))
        print("F1 score:   {:6.4f}".format(f1))
    
    else:
        try:
            prec_n = tn/(tn+fn)
            rec_n = tn/(tn+fp)
            f1_n = 2*(prec_n*rec_n)/(prec_n+rec_n)
        except Exception as e:
            print(e)
            prec_n = 0
            f1_n = 0
            rec_n = 0
        
        print("TP: {:7d} {:6.2f}%".format(tp, tp*100/total))
        print("FN: {:7d} {:6.2f}%".format(fn, fn*100/total))
        print("FP: {:7d} {:6.2f}%".format(fp, fp*100/total))
        print("TN: {:7d} {:6.2f}%".format(tn, tn*100/total))
        print("Accuracy:   {:6.2f}%".format(acc*100))
        print("Precision Anomaly:  {:6.4f}".format(prec_n))
        print("Recall Anomaly:     {:6.4f}".format(rec_n))
        print("F1 score Anomaly:   {:6.4f}".format(f1_n))
   

In [60]:
def runModel(models):
    UPPER_LIMIT = 400
    for key, model in models.items():
        print("### Model Name:",key," ###")
        m_valid = Metrics(label="Valid")
        m_anomaly = Metrics(label="Anomaly")
        m_mix = Metrics(label="Mix")
        kf = KFold(5, True)
        t_data = np.array(t.data[0:UPPER_LIMIT])
        a_data = np.array(a.data)
        print(len(t_data),len(a_data))
        iteration_cnt = 0
        for train_index, test_index in kf.split(t_data):
            iteration_cnt += 1
            #Train
            model.fit(t_data[train_index])
            #Evaluate 
            # Valid Evaluation
            y_pred_valid = model.predict(t_data[test_index])
            score_v = model.decision_function(t_data[test_index])
            
            #Anomaly Evaluation
            if len(a.data) <= max(test_index):
                y_pred_outliers = model.predict(a.data)
                score_a = model.decision_function(a.data)
            else:
                y_pred_outliers = model.predict(a_data[test_index])
                score_a = model.decision_function(a_data[test_index])
            
            # Add results to the metrics object
            #m_valid.update([1]*len(y_pred_valid),y_pred_valid,score_v)
            #m_anomaly.update([-1]*len(y_pred_outliers),y_pred_outliers,score_a)
            m_mix.update([1]*len(y_pred_valid) + [-1]*len(y_pred_outliers),np.concatenate((y_pred_valid,y_pred_outliers),axis=None),np.concatenate((score_v,score_a),axis=None))
            
            #print_metrics([1]*len(y_pred_valid),y_pred_valid,label="Valid")
            #print_metrics([-1]*len(y_pred_outliers),y_pred_outliers,label="Anomaly")
        #m_valid.print()
        #m_anomaly.print()
        f1score = m_mix.print()
        return f1score
        
        

# Pull Datasets

# MODELS

In [45]:
MODELS = {}
MODELS["IsolatedForest"] = {}
MODELS["LOF"] = {}
MODELS["OneClassSVM"] = {}
rng = np.random.RandomState(12345)
MODELS["IsolatedForest"]["IF1"] = IsolationForest(n_estimators = 250, max_samples='auto',max_features=5,bootstrap=True , behaviour='new',random_state=rng, contamination='auto')
#MODELS["IsolatedForest"]["IF2"] = IsolationForest(n_estimators = 20, max_samples='auto',max_features=5,bootstrap=True ,random_state=rng)
#MODELS["LOF"]["LOF1"] = LocalOutlierFactor(n_neighbors = 10, metric = "chebyshev", novelty=True, contamination=0.1)
MODELS["LOF"]["LOF2"] = LocalOutlierFactor(n_neighbors = 10, metric = "chebyshev", novelty=True, contamination='auto')
#MODELS["LOF"]["LOF3"] = LocalOutlierFactor(n_neighbors = 10, metric = "canberra", novelty=True, contamination='auto')
#MODELS["LOF"]["LOF4"] = LocalOutlierFactor(n_neighbors = 20, metric = "euclidean", novelty=True, contamination='auto')
#MODELS["LOF"]["LOF5"] = LocalOutlierFactor(n_neighbors = 20, metric = "minkowski", novelty=True, contamination='auto')
#MODELS["LOF"]["LOF4"] = LocalOutlierFactor(n_neighbors = 10, metric = "canberra", novelty=True, contamination='auto')
MODELS["OneClassSVM"]["OSVM1"] = OneClassSVM(kernel='poly',gamma="auto",coef0=1, nu=0.2)



In [65]:
t = Pull(DATA_CLASS["INSTEON_CAM"],1)
a = Pull(DATA_CLASS["IP_CAM"],1)
print("Valid:",len(t.data)," Anomaly:",len(a.data))#," Valid:",len(v.data))
print("Number of features:",t.features_cnt)
runModel(MODELS["LOF"])

Valid: 23059  Anomaly: 323
Number of features: 32
### Model Name: LOF2  ###
400 323
+----------+----------+-----------+--------+----------+
| Mix Data | Accuracy | Precision | Recall | F1 score |
+----------+----------+-----------+--------+----------+
|    0     |  0.973   |    1.0    | 0.862  |  0.926   |
|    1     |  0.988   |    1.0    | 0.938  |  0.968   |
|    2     |   0.99   |    1.0    |  0.95  |  0.974   |
|    3     |  0.993   |    1.0    | 0.962  |  0.981   |
|    4     |   0.99   |    1.0    |  0.95  |  0.974   |
|   Avg    |  0.987   |    1.0    | 0.932  |  0.965   |
+----------+----------+-----------+--------+----------+


0.965

# Evaluation

In [84]:
SRC_CLASS = "Samsung-Galaxy-Tab"
src_f1_score = []
t = Pull(DATA_CLASS[SRC_CLASS],1)
for data_cl in DATA_CLASS:
   # if data_cl == SRC_CLASS:
   #     continue
    a = Pull(DATA_CLASS[data_cl],1)
    print("RESULST FOR CLASS:",data_cl)
    if data_cl == SRC_CLASS:
        runModel(MODELS["LOF"])
    else:
        src_f1_score.append(runModel(MODELS["LOF"]))
#print(src_f1_score)
print(round(mean(src_f1_score),3))

RESULST FOR CLASS: IKEA_APP
### Model Name: LOF2  ###
400 388
+----------+----------+-----------+--------+----------+
| Mix Data | Accuracy | Precision | Recall | F1 score |
+----------+----------+-----------+--------+----------+
|    0     |   0.72   |   0.368   | 0.888  |   0.52   |
|    1     |  0.549   |   0.247   |  0.8   |  0.378   |
|    2     |   0.69   |   0.337   | 0.838  |   0.48   |
|    3     |  0.707   |   0.344   | 0.788  |  0.479   |
|    4     |  0.532   |   0.249   | 0.862  |  0.387   |
|   Avg    |   0.64   |   0.309   | 0.835  |  0.449   |
+----------+----------+-----------+--------+----------+
RESULST FOR CLASS: IKEA_HOMEKIT_CLEAR
### Model Name: LOF2  ###
400 16
+----------+----------+-----------+--------+----------+
| Mix Data | Accuracy | Precision | Recall | F1 score |
+----------+----------+-----------+--------+----------+
|    0     |  0.719   |   0.921   | 0.725  |  0.811   |
|    1     |  0.854   |   0.934   | 0.888  |   0.91   |
|    2     |  0.854   |   0

RESULST FOR CLASS: BELKIN_WEMO_SWITCH
### Model Name: LOF2  ###
400 12823
+----------+----------+-----------+--------+----------+
| Mix Data | Accuracy | Precision | Recall | F1 score |
+----------+----------+-----------+--------+----------+
|    0     |  0.856   |   0.843   | 0.875  |  0.859   |
|    1     |  0.869   |   0.855   | 0.888  |  0.871   |
|    2     |  0.906   |   0.971   | 0.838  |  0.899   |
|    3     |  0.912   |    1.0    | 0.825  |  0.904   |
|    4     |  0.825   |   0.817   | 0.838  |  0.827   |
|   Avg    |  0.874   |   0.897   | 0.852  |  0.872   |
+----------+----------+-----------+--------+----------+
RESULST FOR CLASS: TP-Link-Smart-Plug
### Model Name: LOF2  ###
400 141
+----------+----------+-----------+--------+----------+
| Mix Data | Accuracy | Precision | Recall | F1 score |
+----------+----------+-----------+--------+----------+
|    0     |  0.376   |   0.352   | 0.862  |   0.5    |
|    1     |  0.348   |   0.332   | 0.788  |  0.467   |
|    2     |  

RESULST FOR CLASS: Samsung-Galaxy-Tab
### Model Name: LOF2  ###
400 12147
+----------+----------+-----------+--------+----------+
| Mix Data | Accuracy | Precision | Recall | F1 score |
+----------+----------+-----------+--------+----------+
|    0     |   0.5    |    0.5    |  0.9   |  0.643   |
|    1     |   0.5    |    0.5    |  0.8   |  0.615   |
|    2     |   0.5    |    0.5    |  0.8   |  0.615   |
|    3     |   0.5    |    0.5    | 0.838  |  0.626   |
|    4     |   0.5    |    0.5    | 0.812  |  0.619   |
|   Avg    |   0.5    |    0.5    |  0.83  |  0.624   |
+----------+----------+-----------+--------+----------+
RESULST FOR CLASS: BOTNET_SOGOU
### Model Name: LOF2  ###
400 47
+----------+----------+-----------+--------+----------+
| Mix Data | Accuracy | Precision | Recall | F1 score |
+----------+----------+-----------+--------+----------+
|    0     |  0.843   |   0.866   | 0.888  |  0.877   |
|    1     |  0.866   |   0.889   |  0.9   |  0.894   |
|    2     |  0.843  

# Single Models

## Isolated Forest

### LOF Novelty

### OneClassSVM