In [1]:
import numpy as np
from numpy import linalg as LA
import pickle
import copy
import pandas as pd
import scipy
import time
import csv

In [2]:
# Generates a bipolar random matrix size D*feature_size
def genRandomMatrix(D, feature_size):
    random_matrix = np.random.rand(D, feature_size)
    return np.where(random_matrix>0.5, 1, -1)

In [3]:
def binarizeMatrix(m):
    return np.where(m>0, 1, -1)

In [4]:
# Encode data by dot multiplying random matrix and inputData
# Inputs: inputData(num_trainData*feature_size), random_matrix(D*feature_size)
# H(D*1) = random_matrix(D*feature_size) * data.T(feature_size*1)
# H.T is a 1*D numpy matrix
# return: encoded_data, a list of num_trainData numbers of 1*D numpy matrices
def encodeData(inputData, random_matrix):
    encoded_data = []
    for data in inputData:
        H = binarizeMatrix(np.dot(random_matrix, data.T))
        encoded_data.append(H.T[0])
    return encoded_data

In [5]:

def train(encoded_data):
    train_out = []
    for hv in encoded_data:
        if len(train_out) == 0:
            train_out = hv
        else:
            train_out += hv
    return train_out

In [6]:
def cosAngle(u, v):
    return np.dot(u,v)/(LA.norm(u)*LA.norm(v))

In [7]:
#Load in the benign dataset
inData = []
label = 'benign'
fileAdr = '../iot_datasets/danmini/' + label + '.csv'
print('loading ' + fileAdr)
with open(fileAdr) as csvfile:
    readCSV = csv.reader(csvfile, delimiter=',')
    for buffer in readCSV:
        for index, item in enumerate(buffer):
            buffer[index] = float(item)

        inData.append(buffer)


loading ../iot_datasets/danmini/benign.csv


In [8]:
#set the first 2/3 of benign dataset for training and the last 1/3 for testing
btrain = inData[:26366]
print('samples for training: ' + str(len(btrain)))
btest = inData[26366:]
print('samples for testing: ' + str(len(btest)))

trainData = np.matrix(btrain)
num_trainData, feature_size = trainData.shape
print('feature size: ' + str(feature_size))

samples for training: 26366
samples for testing: 13183
feature size: 115


In [9]:
#encode data and add it together to form the final benign class vector used for classification
D = 500
random_matrix = genRandomMatrix(D, feature_size)
encoded_data = encodeData(trainData, random_matrix)

benignHV = train(encoded_data)


In [10]:
#classify the hypervector as either benign or malicious traffic
def test(benignHV, test_dict):
    correct = 0
    total = 0
    for label in test_dict:
        for hv in test_dict[label]:
            angle = cosAngle(hv, benignHV)
            if angle > 0.5 and label == 'benign':
                correct += 1
            elif angle < 0.5 and label == 'malicious':
                correct += 1
            total += 1
    return correct, total
            

In [11]:
#load in the attack datasets, all used for testing
attackLabels = ['ack', 'combo', 'gafScan', 'gafUdp', 'junk', 'scan', 'syn', 'tcp', 'udp', 'udpplain']
testData = []

for label in attackLabels:
    fileAdr = '../iot_datasets/danmini/' + label + '.csv'
    print('loading ' + fileAdr)
    with open(fileAdr) as csvfile:
        readCSV = csv.reader(csvfile, delimiter=',')
        for buffer in readCSV:
            for index, item in enumerate(buffer):
                buffer[index] = float(item)

            testData.append(buffer)

loading ../iot_datasets/danmini/ack.csv
loading ../iot_datasets/danmini/combo.csv
loading ../iot_datasets/danmini/gafScan.csv
loading ../iot_datasets/danmini/gafUdp.csv
loading ../iot_datasets/danmini/junk.csv
loading ../iot_datasets/danmini/scan.csv
loading ../iot_datasets/danmini/syn.csv
loading ../iot_datasets/danmini/tcp.csv
loading ../iot_datasets/danmini/udp.csv
loading ../iot_datasets/danmini/udpplain.csv


In [14]:
testData = np.matrix(testData)
btestData = np.matrix(btest)
b_encoded_test = encodeData(btestData, random_matrix)
encoded_test = encodeData(testData, random_matrix)

print('done')

done


In [13]:
testDict = {}
testDict['benign'] = b_encoded_test
testDict['malicious'] = encoded_test
testlen = len(testDict['benign']) + len(testDict['malicious'])
#print(testlen)
print('done')

done


In [16]:
correct, total = test(benignHV, testDict)
accuracy = (correct / total) * 100
print("accuracy: " + str(accuracy))

accuracy: 99.77157300956378
