In [7]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os

In [8]:
data_path = './data/cic_araki_portmap.csv'

In [9]:
header = True # If data has the header, it will be False
label_index = -1 # If label comes last index, then -1.
train_size = 0.2 # Train size

In [10]:
# Data Read 
def read_data(data_path):
    if header == True:
        data = pd.read_csv(data_path)
    else:
        data = pd.read_csv(data_path, header = None)
    data = data.to_numpy()
    return data

In [11]:
# Count the amount of normal(benign) traffic
def count(string, data):
    count = 0
    for i in range(0, data.shape[0], 1):
        if data[i, label_index] == string:
            count = count + 1
    return count

In [12]:
# See how many Normal traffic and attack data in the data.
def overview(data):
    normal_size = count(0., data)
    attack_size = count(1., data)
    print("Whole normal instances :", normal_size)
    print("Whole attack instances :", attack_size)
    return None

In [13]:
# Pick the spetific traffic
def pick(string, data):
    print("Picking the samples of " + str(string) + "...")
    data_target = data[0:count(string, data), :].copy() # just initialize
    index = 0
    for i in range(0, data.shape[0], 1):
        if data[i, label_index] == string:
            data_target[index, :] = data[i, :].copy()
            index = index + 1
    print("Picked : ", data_target.shape)
    return data_target

In [33]:
# Pick the continuous features
def reduct(data, label_index):
    all_same = []
    for i in range(0, data.shape[1], 1):
        if np.all(data[:, i] == data[0, i]):
            all_same.append(i)
    if label_index != None:
        all_same.append(label_index)
    
    data_target = np.delete(data, all_same, axis = 1) 
    data_target = data_target.astype('float32')
    print('Reducted :', data_target.shape)
    return data_target

In [None]:
# Identifiy the features which we should reduct.
def reduct_index(data, label_index):
    all_same = []
    for i in range(0, data.shape[1], 1):
        if np.all(data[:, i] == data[0, i]):
            all_same.append(i)
    if label_index != None:
        all_same.append(label_index)
    return all_same

In [16]:
# Normalize
def normalize_data(data):
    mean = data.mean(axis = 0)
    std = data.std(axis = 0, ddof = 1)
    data_normalized = (data - mean) / std
    return data_normalized, mean, std 

In [17]:
# Cartesian coordinate
def coordinate_data(data):
    area_length = data.shape[1]
    cov_length = int(1 / 2 * (1 + (area_length - 1)) * (area_length - 1))
    coordinate = np.zeros((cov_length, 2), "int8")
    index = 0
    for j in range(1, area_length, 1):
        for k in range(j, area_length, 1):
            coordinate[index] = np.array([k, j-1])
            index = index + 1
    return coordinate, cov_length

In [18]:
# TAM lower
def tam_lower(data):
    tam = np.zeros((data.shape[0], cov_length),  "float32")
    for i in range(0, data.shape[0], 1):
        for j in range(0, cov_length, 1):
            tam[i, j] = data[i, coordinate[j, 0]].copy() * data[i, coordinate[j, 1]].copy() / 2
    return tam

In [19]:
# Calc Covariance of TAM using lower TAM.
def covariance(tam):
    cov = np.cov(tam, ddof = 1)
    return cov

In [46]:
# data picking
data = read_data(data_path)
train_data = data[:(int(data.shape[0]*train_size)), :]
overview(train_data)

# preprocess
train_normal = pick(0., train_data)
train_reducted = reduct(train_normal, -1)
train_reducted_normalized, mean, std = normalize_data(train_reducted)

# training
coordinate, cov_length = coordinate_data(train_reducted_normalized)
tam_data = tam_lower(train_reducted_normalized)
cov = covariance(tam_data)

Whole normal instances : 4678
Whole attack instances : 11322
Picking the samples of 0.0...
Picked :  (4678, 41)
Reducted : (4678, 37)


In [44]:
# evaluation
test_data = data[(int(data.shape[0]*train_size)):, :]
overview(test_data)

# preprocess
test_normal = pick(0., test_data)
test_attack = pick(0., test_data)
test_reducted = np.delete(test_data, reduct_index(train, -1), 1)
print(test_reducted .shape)

Whole normal instances : 8
Whole attack instances : 63992
(64000, 37)
