# Initialization

In [23]:
import imp
from sklearn.model_selection import KFold
from sklearn.model_selection import train_test_split
import numpy as np
from pull_data import Pull
import os
from sklearn.metrics import confusion_matrix
from prettytable import PrettyTable
from statistics import mean

#from scikit_IsolatedForest import IsolatedForest
from sklearn.ensemble import IsolationForest
#from scikit_LOFNovelty import LOFNovelty
from sklearn.neighbors import LocalOutlierFactor
#from scikit_OneClassSVM import OCSVM
from sklearn.svm import OneClassSVM

In [34]:
# List of available classess in dataset
# Uncomment any class to include it into evaluation
DATA_CLASS = {
    # CESNET DATASET
    "IKEA_APP" : "dev-annotated-datasets/ikea-app/train",
    "IKEA_HOMEKIT_CLEAR" : "dev-annotated-datasets/ikea-app/anomaly",
    "IKEA_HOMEKIT" : "dev-annotated-datasets/ikea-homekit/train",
    "IP_CAM" : "dev-annotated-datasets/ipcam/train",
    "IP_CAM_MISCONFIG" : "dev-annotated-datasets/ipcam/anomaly_cam",
    "NORMAL_USER" : "dev-annotated-datasets/normal-user/train",
    "VOICE_ASSISTANT" : "dev-annotated-datasets/voice-assistant/train",
}

# Function Definitions

In [35]:
class Metrics:
    def __init__(self,label):
        self.label = label
        self.accuracy = []
        self.precision = []
        self.recall = []
        self.f1 = []
        self.cnt = 0
    def update(self,y,pred,score):
        try:
            tn, fp, fn, tp = confusion_matrix(y, pred).ravel()
        except Exception as e:
            # TN in all cases
            tn = 0
            fp = 0
            fn = 0
            tp = confusion_matrix(y, pred).ravel()[0]
        
        total = tp+tn+fp+fn
        accuracy = (tp+tn)/total
        if self.label == "Valid" or self.label == "Mix":
            precision = tp/(tp+fp)
            recall = tp/(tp+fn)
            f1 = 2*(precision*recall)/(precision+recall)
        else:
            try:
                precision = tn/(tn+fn) # Negative precision
                recall = tn/(tn+fp) # Negative recall
                f1 = 2*(precision*recall)/(precision+recall) # Negative f1
            except Exception as e:
                print("ERROR",self.label,e)
                precision = 0
                f1 = 0
                recall = 0
        self.accuracy.append(accuracy)
        self.precision.append(precision)
        self.recall.append(recall)
        self.f1.append(f1)
        self.cnt += 1
        
    def print(self):
        table = PrettyTable()
        table.field_names = [self.label+" Data","Accuracy", "Precision", "Recall", "F1 score"]
        for i in range(len(self.accuracy)):
            table.add_row([i,round(self.accuracy[i],3),round(self.precision[i],3),round(self.recall[i],3),round(self.f1[i],3)])
        
        table.add_row(["Avg",round(mean(self.accuracy),3),round(mean(self.precision),3),round(mean(self.recall),3),round(mean(self.f1),3)])
        print(table)
        # Return F1-score
        return round(mean(self.f1),3)

In [36]:
def print_metrics(y, pred, thr_pred=0.5, label=""):
    print("### Metric",label,"###")
    try:
        tn, fp, fn, tp = confusion_matrix(y, pred).ravel()
    except Exception as e:
        # TP in all cases
        tn = 0
        fp = 0
        fn = 0
        tp = confusion_matrix(y, pred).ravel()[0]
    #print(tn, fp, fn, tp)
    
    total = tp+tn+fp+fn
    acc = (tp+tn)/total
    if label == "Valid" or label == "Mix":
        prec = tp/(tp+fp)
        rec = tp/(tp+fn)
        f1 = 2*(prec*rec)/(prec+rec)
    
        print("TP: {:7d} {:6.2f}%".format(tp, tp*100/total))
        print("FN: {:7d} {:6.2f}%".format(fn, fn*100/total))
        print("FP: {:7d} {:6.2f}%".format(fp, fp*100/total))
        print("TN: {:7d} {:6.2f}%".format(tn, tn*100/total))
        print("Accuracy:   {:6.2f}%".format(acc*100))
        print("Precision:  {:6.4f}".format(prec))
        print("Recall:     {:6.4f}".format(rec))
        print("F1 score:   {:6.4f}".format(f1))
    
    else:
        try:
            prec_n = tn/(tn+fn)
            rec_n = tn/(tn+fp)
            f1_n = 2*(prec_n*rec_n)/(prec_n+rec_n)
        except Exception as e:
            print(e)
            prec_n = 0
            f1_n = 0
            rec_n = 0
        
        print("TP: {:7d} {:6.2f}%".format(tp, tp*100/total))
        print("FN: {:7d} {:6.2f}%".format(fn, fn*100/total))
        print("FP: {:7d} {:6.2f}%".format(fp, fp*100/total))
        print("TN: {:7d} {:6.2f}%".format(tn, tn*100/total))
        print("Accuracy:   {:6.2f}%".format(acc*100))
        print("Precision Anomaly:  {:6.4f}".format(prec_n))
        print("Recall Anomaly:     {:6.4f}".format(rec_n))
        print("F1 score Anomaly:   {:6.4f}".format(f1_n))
   

In [37]:
def runModel(models):
    UPPER_LIMIT = 400 # set upper limit for flows training dataset
    for key, model in models.items():
        print("### Model Name:",key," ###")
        # Set label based on the type of dataset data.. 
        m_valid = Metrics(label="Valid")
        m_anomaly = Metrics(label="Anomaly")
        m_mix = Metrics(label="Mix")
        kf = KFold(5, True)
        
        # uniform random select for anomaly
        if len(a.data) < UPPER_LIMIT:
            max_limit = len(a.data)
        else:
            max_limit = UPPER_LIMIT
        a_idx = np.random.choice(len(a.data), max_limit, replace=False)
        
        # uniform random select for valid
        if len(t.data) < UPPER_LIMIT:
            max_limit = len(t.data)
        else:
            max_limit = UPPER_LIMIT
        t_idx = np.random.choice(len(t.data), max_limit, replace=False)
        
        t_data = np.array(t.data)[t_idx]
        a_data = np.array(a.data)[a_idx]
        print(len(t_data),len(a_data))
        iteration_cnt = 0
        for train_index, test_index in kf.split(t_data):
            iteration_cnt += 1
            #Train
            model.fit(t_data[train_index])
            #Evaluate 
            # Valid Evaluation
            y_pred_valid = model.predict(t_data[test_index])
            score_v = model.decision_function(t_data[test_index])
            
            #Anomaly Evaluation
            if len(a.data) <= max(test_index):
                y_pred_outliers = model.predict(a.data)
                score_a = model.decision_function(a.data)
            else:
                y_pred_outliers = model.predict(a_data[test_index])
                score_a = model.decision_function(a_data[test_index])
            # Uncomment valid/anomaly in case you would like to evaluate these type of traffic separately
            # Add results to the metrics object
            #m_valid.update([1]*len(y_pred_valid),y_pred_valid,score_v)
            #m_anomaly.update([-1]*len(y_pred_outliers),y_pred_outliers,score_a)
            m_mix.update([1]*len(y_pred_valid) + [-1]*len(y_pred_outliers),np.concatenate((y_pred_valid,y_pred_outliers),axis=None),np.concatenate((score_v,score_a),axis=None))
            
            #print_metrics([1]*len(y_pred_valid),y_pred_valid,label="Valid")
            #print_metrics([-1]*len(y_pred_outliers),y_pred_outliers,label="Anomaly")
        #m_valid.print()
        #m_anomaly.print()
        f1score = m_mix.print()
        return f1score
        
        

# Pull Datasets

# MODELS

In [38]:
MODELS = {}
MODELS["IsolatedForest"] = {}
MODELS["LOF"] = {}
MODELS["OneClassSVM"] = {}
rng = np.random.RandomState(12345)
MODELS["IsolatedForest"]["IF1"] = IsolationForest(n_estimators = 250, max_samples='auto',max_features=5,bootstrap=True , behaviour='new',random_state=rng, contamination='auto')
#MODELS["IsolatedForest"]["IF2"] = IsolationForest(n_estimators = 20, max_samples='auto',max_features=5,bootstrap=True ,random_state=rng)
#MODELS["LOF"]["LOF1"] = LocalOutlierFactor(n_neighbors = 10, metric = "chebyshev", novelty=True, contamination=0.1)
MODELS["LOF"]["LOF2"] = LocalOutlierFactor(n_neighbors = 10, metric = "chebyshev", novelty=True, contamination='auto')
#MODELS["LOF"]["LOF3"] = LocalOutlierFactor(n_neighbors = 10, metric = "canberra", novelty=True, contamination='auto')
#MODELS["LOF"]["LOF4"] = LocalOutlierFactor(n_neighbors = 20, metric = "euclidean", novelty=True, contamination='auto')
#MODELS["LOF"]["LOF5"] = LocalOutlierFactor(n_neighbors = 20, metric = "minkowski", novelty=True, contamination='auto')
#MODELS["LOF"]["LOF4"] = LocalOutlierFactor(n_neighbors = 10, metric = "canberra", novelty=True, contamination='auto')
MODELS["OneClassSVM"]["OSVM1"] = OneClassSVM(kernel='poly',gamma="auto",coef0=1, nu=0.2)



In [39]:
# Quickly compare two classes
t = Pull(DATA_CLASS["NORMAL_USER"],1)
a = Pull(DATA_CLASS["IP_CAM"],1)
print("Valid:",len(t.data)," Anomaly:",len(a.data))#," Valid:",len(v.data))
print("Number of features:",t.features_cnt)
runModel(MODELS["LOF"])

Valid: 1027  Anomaly: 323
Number of features: 32
### Model Name: LOF2  ###
400 323
+----------+----------+-----------+--------+----------+
| Mix Data | Accuracy | Precision | Recall | F1 score |
+----------+----------+-----------+--------+----------+
|    0     |  0.638   |   0.332   | 0.812  |  0.471   |
|    1     |  0.655   |   0.355   |  0.9   |  0.509   |
|    2     |  0.645   |   0.342   |  0.85  |  0.487   |
|    3     |  0.655   |   0.355   |  0.9   |  0.509   |
|    4     |  0.623   |   0.311   | 0.738  |  0.437   |
|   Avg    |  0.643   |   0.339   |  0.84  |  0.483   |
+----------+----------+-----------+--------+----------+




0.483

# Evaluation
Measurements for experiments section in the paper

In [40]:
SRC_CLASS = "IP_CAM"
src_f1_score = []
t = Pull(DATA_CLASS[SRC_CLASS],1)
for data_cl in DATA_CLASS:
   # if data_cl == SRC_CLASS:
   #     continue
    a = Pull(DATA_CLASS[data_cl],1)
    print("RESULST FOR CLASS:",data_cl)
    if data_cl == SRC_CLASS:
        runModel(MODELS["LOF"])
    else:
        src_f1_score.append(runModel(MODELS["LOF"]))
#print(src_f1_score)
print(round(mean(src_f1_score),3))

RESULST FOR CLASS: IKEA_APP
### Model Name: LOF2  ###
323 388
+----------+----------+-----------+--------+----------+
| Mix Data | Accuracy | Precision | Recall | F1 score |
+----------+----------+-----------+--------+----------+
|    0     |  0.815   |   0.902   | 0.708  |  0.793   |
|    1     |  0.877   |   0.889   | 0.862  |  0.875   |
|    2     |  0.869   |   0.944   | 0.785  |  0.857   |
|    3     |  0.867   |    0.98   |  0.75  |   0.85   |
|    4     |  0.875   |   0.929   | 0.812  |  0.867   |
|   Avg    |  0.861   |   0.929   | 0.783  |  0.848   |
+----------+----------+-----------+--------+----------+
RESULST FOR CLASS: IKEA_HOMEKIT_CLEAR
### Model Name: LOF2  ###
323 16
+----------+----------+-----------+--------+----------+
| Mix Data | Accuracy | Precision | Recall | F1 score |
+----------+----------+-----------+--------+----------+
|    0     |  0.864   |   0.982   | 0.846  |  0.909   |
|    1     |  0.765   |   0.979   | 0.723  |  0.832   |
|    2     |   0.84   |   0



RESULST FOR CLASS: IP_CAM
### Model Name: LOF2  ###
323 323
+----------+----------+-----------+--------+----------+
| Mix Data | Accuracy | Precision | Recall | F1 score |
+----------+----------+-----------+--------+----------+
|    0     |  0.477   |   0.486   | 0.785  |   0.6    |
|    1     |  0.531   |   0.519   | 0.862  |  0.647   |
|    2     |  0.531   |    0.52   | 0.785  |  0.626   |
|    3     |  0.461   |   0.475   |  0.75  |  0.582   |
|    4     |  0.492   |   0.495   | 0.812  |  0.615   |
|   Avg    |  0.498   |   0.499   | 0.799  |  0.614   |
+----------+----------+-----------+--------+----------+




RESULST FOR CLASS: IP_CAM_MISCONFIG
### Model Name: LOF2  ###
323 400
+----------+----------+-----------+--------+----------+
| Mix Data | Accuracy | Precision | Recall | F1 score |
+----------+----------+-----------+--------+----------+
|    0     |   0.8    |   0.855   | 0.723  |  0.783   |
|    1     |  0.831   |   0.852   |  0.8   |  0.825   |
|    2     |  0.815   |   0.836   | 0.785  |   0.81   |
|    3     |  0.797   |   0.806   | 0.781  |  0.794   |
|    4     |  0.773   |   0.807   | 0.719  |   0.76   |
|   Avg    |  0.803   |   0.831   | 0.762  |  0.794   |
+----------+----------+-----------+--------+----------+




RESULST FOR CLASS: NORMAL_USER
### Model Name: LOF2  ###
323 400
+----------+----------+-----------+--------+----------+
| Mix Data | Accuracy | Precision | Recall | F1 score |
+----------+----------+-----------+--------+----------+
|    0     |  0.862   |    0.98   | 0.738  |  0.842   |
|    1     |  0.846   |   0.941   | 0.738  |  0.828   |
|    2     |  0.892   |    1.0    | 0.785  |  0.879   |
|    3     |   0.75   |   0.722   | 0.812  |  0.765   |
|    4     |  0.906   |   0.981   | 0.828  |  0.898   |
|   Avg    |  0.851   |   0.925   |  0.78  |  0.842   |
+----------+----------+-----------+--------+----------+
RESULST FOR CLASS: VOICE_ASSISTANT
### Model Name: LOF2  ###
323 332
+----------+----------+-----------+--------+----------+
| Mix Data | Accuracy | Precision | Recall | F1 score |
+----------+----------+-----------+--------+----------+
|    0     |  0.838   |   0.907   | 0.754  |  0.824   |
|    1     |  0.831   |   0.906   | 0.738  |  0.814   |
|    2     |  0.885   |   



# Single Models
Used for independet testing with any part of provided dataset

## Isolated Forest

In [41]:
#Create Model
rng = np.random.RandomState(12345)
clf = IsolationForest(n_estimators = 100, max_samples="auto",max_features=1,bootstrap=False ,random_state=rng, behaviour='new', contamination='auto')

kf = KFold(3, True)
t_data = np.array(t.data)
a_data = np.array(a.data)
iteration_cnt = 0
for train_index, test_index in kf.split(t_data):
    iteration_cnt += 1
    #Train
    clf.fit(t_data[train_index])
    #Evaluate 
    y_pred_valid = clf.predict(t_data[test_index])
    y_pred_outliers = clf.predict(a.data)
    print("===== Iteration:",iteration_cnt,"=====")
    #print_metrics([1]*len(y_pred_valid),y_pred_valid,label="Valid")
    #print_metrics([-1]*len(y_pred_outliers),y_pred_outliers,label="Anomaly")
    #Concatenate above results from anomaly and valid dataset for clearer results
    print_metrics([1]*len(y_pred_valid)+[-1]*len(y_pred_outliers),np.concatenate((y_pred_valid,y_pred_outliers),axis=None),label="Mix")
    
    #m_valid.update([1]*len(y_pred_valid),y_pred_valid)
    #m_anomaly.update([-1]*len(y_pred_outliers),y_pred_outliers)
    #m_valid.print()
    #m_anomaly.print()
    

  warn(


===== Iteration: 1 =====
### Metric Mix ###
TP:      99  22.50%
FN:       9   2.05%
FP:     259  58.86%
TN:      73  16.59%
Accuracy:    39.09%
Precision:  0.2765
Recall:     0.9167
F1 score:   0.4249


  warn(


===== Iteration: 2 =====
### Metric Mix ###
TP:      96  21.82%
FN:      12   2.73%
FP:     173  39.32%
TN:     159  36.14%
Accuracy:    57.95%
Precision:  0.3569
Recall:     0.8889
F1 score:   0.5093
===== Iteration: 3 =====
### Metric Mix ###
TP:      98  22.32%
FN:       9   2.05%
FP:     243  55.35%
TN:      89  20.27%
Accuracy:    42.60%
Precision:  0.2874
Recall:     0.9159
F1 score:   0.4375


  warn(


### LOF Novelty

In [42]:
#Create Model
clf = LocalOutlierFactor(n_neighbors = 10, metric = "minkowski", novelty=True, contamination='auto')

kf = KFold(3, True)
t_data = np.array(t.data)
a_data = np.array(a.data)
iteration_cnt = 0
for train_index, test_index in kf.split(t_data):
    iteration_cnt += 1
    #Train
    clf.fit(t_data[train_index])
    #Evaluate 
    y_pred_valid = clf.predict(t_data[test_index])
    y_pred_outliers = clf.predict(a.data)
    print("===== Iteration:",iteration_cnt,"=====")
    #print_metrics([1]*len(y_pred_valid),y_pred_valid,label="Valid")
    #print_metrics([-1]*len(y_pred_outliers),y_pred_outliers,label="Anomaly")
    #Concatenate above results from anomaly and valid dataset for clearer results
    print_metrics([1]*len(y_pred_valid)+[-1]*len(y_pred_outliers),np.concatenate((y_pred_valid,y_pred_outliers),axis=None),label="Mix")
    score_v = clf.decision_function(t_data[test_index])
    score_a = clf.decision_function(a.data)
    cnt = 0
    for i in score_a:
        if i < -10 :
            cnt += 1
    #print(cnt)
    



===== Iteration: 1 =====
### Metric Mix ###
TP:      81  18.41%
FN:      27   6.14%
FP:      26   5.91%
TN:     306  69.55%
Accuracy:    87.95%
Precision:  0.7570
Recall:     0.7500
F1 score:   0.7535
===== Iteration: 2 =====
### Metric Mix ###
TP:      90  20.45%
FN:      18   4.09%
FP:      25   5.68%
TN:     307  69.77%
Accuracy:    90.23%
Precision:  0.7826
Recall:     0.8333
F1 score:   0.8072
===== Iteration: 3 =====
### Metric Mix ###
TP:      85  19.36%
FN:      22   5.01%
FP:      25   5.69%
TN:     307  69.93%
Accuracy:    89.29%
Precision:  0.7727
Recall:     0.7944
F1 score:   0.7834




### OneClassSVM

In [43]:
#Create Model
clf = OneClassSVM(kernel='sigmoid',gamma="auto",coef0=0.0, nu=0.1)

kf = KFold(3, True)
t_data = np.array(t.data)
a_data = np.array(a.data)
iteration_cnt = 0
for train_index, test_index in kf.split(t_data):
    iteration_cnt += 1
    #Train
    clf.fit(t_data[train_index])
    #Evaluate 
    y_pred_valid = clf.predict(t_data[test_index])
    y_pred_outliers = clf.predict(a.data)
    print("===== Iteration:",iteration_cnt,"=====")
    #print_metrics([1]*len(y_pred_valid),y_pred_valid,label="Valid")
    #print_metrics([-1]*len(y_pred_outliers),y_pred_outliers,label="Anomaly")
    #Concatenate above results from anomaly and valid dataset for clearer results
    print_metrics([1]*len(y_pred_valid)+[-1]*len(y_pred_outliers),np.concatenate((y_pred_valid,y_pred_outliers),axis=None),label="Mix")
    


===== Iteration: 1 =====
### Metric Mix ###
TP:       0   0.00%
FN:     108  24.55%
FP:       0   0.00%
TN:     332  75.45%
Accuracy:    75.45%
Precision:     nan
Recall:     0.0000
F1 score:      nan
===== Iteration: 2 =====
### Metric Mix ###
TP:       0   0.00%
FN:     108  24.55%
FP:       0   0.00%
TN:     332  75.45%
Accuracy:    75.45%
Precision:     nan
Recall:     0.0000
F1 score:      nan
===== Iteration: 3 =====
### Metric Mix ###
TP:       0   0.00%
FN:     107  24.37%
FP:       0   0.00%
TN:     332  75.63%
Accuracy:    75.63%
Precision:     nan
Recall:     0.0000
F1 score:      nan


  prec = tp/(tp+fp)
  prec = tp/(tp+fp)
  prec = tp/(tp+fp)
