In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import entropy
from matplotlib.collections import LineCollection
from sklearn.metrics import RocCurveDisplay
from pycaret.anomaly import *
from sklearn.preprocessing import MinMaxScaler

In [None]:
iforest = load_model('models/iforest')
histogram = load_model('models/histogram')
pca = load_model('models/pca')
svm = load_model('models/svm')
cluster = load_model('models/cluster')

In [None]:
files = ['evaluation','arp','combination','injection','readbomb','recon','synflood']
col = ['eth.src', 'eth.dst', 'ip.src', 'ip.dst', 'ip.len', 'ip.ttl', 'ip.proto', 'attack']
data = []
time = []
file_entropy = []
attack_entropy = []
for file in files:
    temp_data = pd.read_csv('testsets/raw/raw_'+file+'.csv')
    data.append(temp_data)
    temp_time = temp_data['time'].unique().shape[0]
    time.append(temp_time)
    file_entropy.append(np.zeros((temp_time,temp_data.shape[1])))
    attack_entropy.append(np.zeros((temp_time,)))


In [None]:
def calc_entropy(value):
    uvalue,counts = np.unique(value, return_counts=True)
    return entropy(counts)

def get_entropy(column,i,df):
    temp = df.loc[df['time'] == i]
    return calc_entropy(temp[column])

In [None]:
def smooth(y, box_pts):
    box = np.ones(box_pts)/box_pts
    y_smooth = np.convolve(y, box, mode='same')
    return y_smooth

In [None]:
# for f in range(len(files)):
#     for i in range(time[f]):
#         result = [get_entropy(col[j],i,data[f]) for j in range(1,len(col)-1)]
#         file_entropy[f][i,1:-1] = result
#     for k in range(time[f]):
#         attack = data[f].loc[data[f]['time'] == k]['attack'].to_numpy()
#         value = 0
#         if np.sum(attack)>1:
#             value = 1
#         file_entropy[f][k,-1] = value
        

            
#     attack_entropy[f] = file_entropy[f][:,-1]
    
#     to_file = pd.DataFrame(file_entropy[f])
#     to_file.columns = col
#     to_file.to_csv('testsets/entropy/'+files[f]+'_entropy.csv', index=False)
#     attack_to_file = pd.DataFrame(attack_entropy[f])
#     attack_to_file.columns = ['attack']
#     attack_to_file.to_csv('testsets/entropy/'+files[f]+'_attack.csv', index=False)

for f in range(len(files)):
    file_entropy[f] = pd.read_csv('testsets/entropy/'+files[f]+'_entropy.csv')[['eth.src', 'eth.dst', 'ip.src', 'ip.dst', 'ip.len', 'ip.ttl', 'ip.proto', 'attack']].to_numpy()
    attack_entropy[f] = pd.read_csv('testsets/entropy/'+files[f]+'_attack.csv').to_numpy()

In [None]:
attack_time = []
for f in range(len(files)):  
    attack_time.append(np.asarray(np.where(attack_entropy[f] > 0))[0])

In [None]:
smooth_entropy = [file_entropy[i].copy() for i in range(len(file_entropy))]
for f in range(len(files)):
    for i in range(smooth_entropy[f].shape[1]):
        smooth_entropy[f][:,i] = smooth(smooth_entropy[f][:,i],1000)

for f in range(len(files)):
    smooth_entropy[f] = smooth_entropy[f][1000:-500,:]
    for j in range(len(smooth_entropy[f][:,-1])):
        if smooth_entropy[f][j,-1] > 0:
            smooth_entropy[f][j,-1] = 1

In [None]:
smooth_time = []
for f in range(len(files)):
    smooth_time.append(np.asarray(np.where(smooth_entropy[f][:,-1] == 1))[0])
    
smooth_time = np.asarray(smooth_time)

f=6

In [None]:
# for i in range(1,file_entropy[f].shape[1]-1):
#     x = range(file_entropy[f].shape[0])
#     y = file_entropy[f][:,i]
#     points = np.array([x, y]).T.reshape(-1, 1, 2)
#     segments = np.concatenate([points[:-1], points[1:]], axis=1)

#     cm = dict(zip(range(0,2,1),list("br")))
#     colors = list( map( cm.get , np.isin(x,attack_time[f]) ))

#     lc = LineCollection(segments, colors=colors, linewidths= 1)
#     fig, ax = plt.subplots()
#     ax.add_collection(lc)

#     ax.autoscale()
#     ax.margins(y=1)
#     plt.xlabel('Time')
#     plt.ylabel(col[i])
#     plt.title(col[i])
#     plt.ylim(0.9,1)
#     plt.savefig('plot/'+files[f]+'/entropy/'+files[f]+'_'+col[i]+'_entropy.pdf')
#     plt.show()

In [None]:
# for i in range(0,len(col)-1):
# # for i in range(0,1):
#     x = range(smooth_entropy[f].shape[0])
#     y = smooth_entropy[f][:,i]
#     points = np.array([x, y]).T.reshape(-1, 1, 2)
#     segments = np.concatenate([points[:-1], points[1:]], axis=1)

#     cm = dict(zip(range(0,2,1),list("br")))
#     colors = list( map( cm.get , np.isin(x,smooth_time[f]) ))

#     lc = LineCollection(segments, colors=colors, linewidths=1)
#     fig, ax = plt.subplots()
#     ax.add_collection(lc)

#     ax.autoscale()
#     ax.margins(y=1)
#     plt.title(col[i])
#     plt.xlabel('Time (second)')
#     plt.ylabel('Entropy')
# #     plt.ylim(0.67,0.71)
#     plt.savefig('plot/'+files[f]+'/smooth/'+files[f]+'_'+col[i]+'_entropy.pdf')
#     plt.show()

In [None]:
smooth_entropy[f] = pd.DataFrame(smooth_entropy[f])
smooth_entropy[f].columns = col

In [None]:
# plotcol = np.asarray([['eth.src', 'eth.dst', 'ip.src', 'ip.dst']
#           ,['ip.len', 'ip.id', 'ip.ttl', 'ip.proto']
#           ,['tcp.srcport', 'tcp.dstport', 'tcp.seq', 'tcp.ack']
#           ,['tcp.flags', 'tcp.window_size', 'tcp.time_delta', 'tcp.time_relative']])

# fig, axs = plt.subplots(4, 4,constrained_layout = True,figsize=(20,10))



# for i in range(len(plotcol)):
#     for j in range(len(plotcol[i,:])):
#         x = range(smooth_entropy[f].shape[0])
#         y = smooth_entropy[f][plotcol[i,j]]
#         points = np.array([x, y]).T.reshape(-1, 1, 2)
#         segments = np.concatenate([points[:-1], points[1:]], axis=1)
#         cm = dict(zip(range(0,2,1),list("br")))
#         colors = list( map( cm.get , np.isin(x,smooth_time[f]) ))

#         lc = LineCollection(segments, colors=colors, linewidths=1)
#         axs[i, j].plot(x, y)
#         axs[i, j].set_title(plotcol[i,j])
#         axs[i, j].add_collection(lc)
#         axs[i, j].autoscale()
#         axs[i, j].margins(y=1)
#         axs[i, j].set(xlabel='Time (second)', ylabel='Entropy')
        
# plt.savefig('plot/'+files[f]+'/smooth/'+files[f]+'_entropy.pdf')

In [None]:
test = smooth_entropy[f].drop(['attack'],axis=1)


In [None]:
cluster_result = predict_model(cluster,test)
# print(iforest_result)
anomaly = cluster_result['Anomaly']
anomaly.to_csv('plot/'+files[f]+'/result/anomaly_value_cluster.csv', index=False)
pd.DataFrame(smooth_entropy[f]['attack']).to_csv('plot/'+files[f]+'/result/true_value_cluster.csv', index=False)

TP = 0
FP = 0
TN = 0
FN = 0
fp_time = []
fn_time = []
for i in range(smooth_entropy[f].shape[0]):
    if i not in smooth_time[f] and anomaly[i]==0:
        TN += 1
    elif i in smooth_time[f] and anomaly[i]==1:
        TP += 1
    elif i in smooth_time[f] and anomaly[i]==0:
        FN += 1
        fn_time.append(i)
    elif i not in smooth_time[f] and anomaly[i]==1:
        FP += 1
        fp_time.append(i)

print("True positive =",TP)
print("False positive =",FP)
print("True negative =",TN)
print("False negative =",FN)

In [None]:
iforest_result = predict_model(iforest,test)
# print(iforest_result)
anomaly = iforest_result['Anomaly']
anomaly.to_csv('plot/'+files[f]+'/result/anomaly_value_iforest.csv', index=False)
pd.DataFrame(smooth_entropy[f]['attack']).to_csv('plot/'+files[f]+'/result/true_value_iforest.csv', index=False)

TP = 0
FP = 0
TN = 0
FN = 0
fp_time = []
fn_time = []
for i in range(smooth_entropy[f].shape[0]):
    if i not in smooth_time[f] and anomaly[i]==0:
        TN += 1
    elif i in smooth_time[f] and anomaly[i]==1:
        TP += 1
    elif i in smooth_time[f] and anomaly[i]==0:
        FN += 1
        fn_time.append(i)
    elif i not in smooth_time[f] and anomaly[i]==1:
        FP += 1
        fp_time.append(i)

print("True positive =",TP)
print("False positive =",FP)
print("True negative =",TN)
print("False negative =",FN)

In [None]:
histogram_result = predict_model(histogram,test)
anomaly = histogram_result['Anomaly']
anomaly.to_csv('plot/'+files[f]+'/result/anomaly_value_histogram.csv', index=False)
pd.DataFrame(smooth_entropy[f]['attack']).to_csv('plot/'+files[f]+'/result/true_value_histogram.csv', index=False)

TP = 0
FP = 0
TN = 0
FN = 0
fp_time = []
fn_time = []
for i in range(smooth_entropy[f].shape[0]):
    if i not in smooth_time[f] and anomaly[i]==0:
        TN += 1
    elif i in smooth_time[f] and anomaly[i]==1:
        TP += 1
    elif i in smooth_time[f] and anomaly[i]==0:
        FN += 1
        fn_time.append(i)
    elif i not in smooth_time[f] and anomaly[i]==1:
        FP += 1
        fp_time.append(i)

print("True positive =",TP)
print("False positive =",FP)
print("True negative =",TN)
print("False negative =",FN)

In [None]:
pca_result = predict_model(pca,test)
anomaly = pca_result['Anomaly']
anomaly.to_csv('plot/'+files[f]+'/result/anomaly_value_pca.csv', index=False)
pd.DataFrame(smooth_entropy[f]['attack']).to_csv('plot/'+files[f]+'/result/true_value_pca.csv', index=False)

TP = 0
FP = 0
TN = 0
FN = 0
fp_time = []
fn_time = []
for i in range(smooth_entropy[f].shape[0]):
    if i not in smooth_time[f] and anomaly[i]==0:
        TN += 1
    elif i in smooth_time[f] and anomaly[i]==1:
        TP += 1
    elif i in smooth_time[f] and anomaly[i]==0:
        FN += 1
        fn_time.append(i)
    elif i not in smooth_time[f] and anomaly[i]==1:
        FP += 1
        fp_time.append(i)

print("True positive =",TP)
print("False positive =",FP)
print("True negative =",TN)
print("False negative =",FN)

In [None]:
svm_result = predict_model(svm,data =test)
anomaly = svm_result['Anomaly']
anomaly.to_csv('plot/'+files[f]+'/result/anomaly_value_svm.csv', index=False)
pd.DataFrame(smooth_entropy[f]['attack']).to_csv('plot/'+files[f]+'/result/true_value_svm.csv', index=False)

TP = 0
FP = 0
TN = 0
FN = 0
fp_time = []
fn_time = []
for i in range(smooth_entropy[f].shape[0]):
    if i not in smooth_time[f] and anomaly[i]==0:
        TN += 1
    elif i in smooth_time[f] and anomaly[i]==1:
        TP += 1
    elif i in smooth_time[f] and anomaly[i]==0:
        FN += 1
        fn_time.append(i)
    elif i not in smooth_time[f] and anomaly[i]==1:
        FP += 1
        fp_time.append(i)

print("True positive =",TP)
print("False positive =",FP)
print("True negative =",TN)
print("False negative =",FN)