In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
from sklearn.metrics import classification_report, confusion_matrix
pd.options.mode.chained_assignment = None  # default='warn'

### 读取数据

In [2]:
# 指定文件路径
# file_path = './data/portv3.csv'
# file_path = './data/new_portv3_p75.csv'
file_path = './data/new_portv3_p50.csv'

In [3]:
# read csv file
# 从csv文件中读取csv数据
data_raw = pd.read_csv(file_path)
data_raw

Unnamed: 0,Time,Destination,Length,Source Port,Second
0,0.000000,10.0.0.6,230,27361.0,1
1,0.771891,10.0.0.11,306,0.0,1
2,0.771884,10.0.0.11,306,0.0,1
3,0.770577,10.0.0.5,244,19225.0,1
4,0.770571,10.0.0.5,244,19225.0,1
...,...,...,...,...,...
48613,96.960024,10.0.0.4,253,22208.0,150
48614,96.960031,10.0.0.7,253,0.0,150
48615,96.960039,10.0.0.7,253,0.0,150
48616,96.896038,10.0.0.9,262,12277.0,150


### 整理数据

In [4]:
# drop unnecessary columns
# 忽略不用的列
data = data_raw.copy()

if ['No.', 'Protocol', 'info', 'Source', 'Destination Port'] in list(data.columns):
    data = data.drop(['No.', 'Protocol', 'info', 'Source', 'Destination Port'], axis=1)
    
data['Source Port'] = data['Source Port'].fillna(0)
data

Unnamed: 0,Time,Destination,Length,Source Port,Second
0,0.000000,10.0.0.6,230,27361.0,1
1,0.771891,10.0.0.11,306,0.0,1
2,0.771884,10.0.0.11,306,0.0,1
3,0.770577,10.0.0.5,244,19225.0,1
4,0.770571,10.0.0.5,244,19225.0,1
...,...,...,...,...,...
48613,96.960024,10.0.0.4,253,22208.0,150
48614,96.960031,10.0.0.7,253,0.0,150
48615,96.960039,10.0.0.7,253,0.0,150
48616,96.896038,10.0.0.9,262,12277.0,150


In [5]:
# ignore negative time
# 忽略负数时间，保留time >= 0s的数据
# '~'表示取反，即不取'Time'小于0的数据
data = data[~(data['Time'] < 0)]
data

Unnamed: 0,Time,Destination,Length,Source Port,Second
0,0.000000,10.0.0.6,230,27361.0,1
1,0.771891,10.0.0.11,306,0.0,1
2,0.771884,10.0.0.11,306,0.0,1
3,0.770577,10.0.0.5,244,19225.0,1
4,0.770571,10.0.0.5,244,19225.0,1
...,...,...,...,...,...
48613,96.960024,10.0.0.4,253,22208.0,150
48614,96.960031,10.0.0.7,253,0.0,150
48615,96.960039,10.0.0.7,253,0.0,150
48616,96.896038,10.0.0.9,262,12277.0,150


In [6]:
# 拿到整个数据中最大的秒数并向上取整
# get the largest time from the data, e.g. 75.370354
max_time = np.max(data['Time'])
# round up the number, e.g. 76
max_time = int(np.ceil(max_time))
max_time

150

In [7]:
# 按每秒切分数据为一组
time_range = pd.cut(data['Time'],                       # 指定是time这一列
                    np.arange(0, max_time+1, 1),        # 指定划分的区间，e.g. [0,76]这个区间，以1s为间隔划分
                    right=False,                        # 每个区间不包含右边, e.g. [0,1)
                    labels=np.arange(1, max_time+1, 1)  # 指定每个区间的标签，这里即使第几秒, e.g. [0,1) -> 1s
                   )
# 将每一行数据划分到这个区间
data['Second'] = time_range
data

Unnamed: 0,Time,Destination,Length,Source Port,Second
0,0.000000,10.0.0.6,230,27361.0,1
1,0.771891,10.0.0.11,306,0.0,1
2,0.771884,10.0.0.11,306,0.0,1
3,0.770577,10.0.0.5,244,19225.0,1
4,0.770571,10.0.0.5,244,19225.0,1
...,...,...,...,...,...
48613,96.960024,10.0.0.4,253,22208.0,97
48614,96.960031,10.0.0.7,253,0.0,97
48615,96.960039,10.0.0.7,253,0.0,97
48616,96.896038,10.0.0.9,262,12277.0,97


#### 注意！使用完pd.cut之后，Second这一列的data type已经变为Category了，需要转化为int否则后面groupby会出错

In [8]:
data.dtypes

Time            float64
Destination      object
Length            int64
Source Port     float64
Second         category
dtype: object

In [9]:
data['Second'] = data.Second.cat.codes + 1
data

Unnamed: 0,Time,Destination,Length,Source Port,Second
0,0.000000,10.0.0.6,230,27361.0,1
1,0.771891,10.0.0.11,306,0.0,1
2,0.771884,10.0.0.11,306,0.0,1
3,0.770577,10.0.0.5,244,19225.0,1
4,0.770571,10.0.0.5,244,19225.0,1
...,...,...,...,...,...
48613,96.960024,10.0.0.4,253,22208.0,97
48614,96.960031,10.0.0.7,253,0.0,97
48615,96.960039,10.0.0.7,253,0.0,97
48616,96.896038,10.0.0.9,262,12277.0,97


In [10]:
data.dtypes

Time           float64
Destination     object
Length           int64
Source Port    float64
Second           int16
dtype: object

In [11]:
# # 指定每秒取前多少个packet
# # 如果实际每秒没有这么多packet，这里不会报错的
# top_n = 100

# # 每个区间(即每秒)取前top_n个packet
# # 因此原本整个数据有27779行，现在只剩3796行
# data = data.groupby('Second').head(top_n).reset_index(drop=True)
# data

In [12]:
# 指定每秒随机取多少个packet
# 如果实际每秒没有这么多packet，这里不会报错的
random_n = 100

# 每个区间(即每秒)随机取packet
# 因此原本整个数据有27779行，现在只剩3796行
data = data.groupby('Second').apply(lambda x: x.sample(min(random_n, len(x)))).reset_index(drop=True)
data

Unnamed: 0,Time,Destination,Length,Source Port,Second
0,0.511674,10.0.0.7,251,26737.0,1
1,0.720004,10.0.0.5,256,19385.0,1
2,0.164064,10.0.0.3,245,20484.0,1
3,0.352014,10.0.0.2,257,19497.0,1
4,0.363939,10.0.0.8,265,26396.0,1
...,...,...,...,...,...
14991,149.003807,10.0.0.6,224,12414.0,150
14992,149.772274,10.0.0.3,252,11645.0,150
14993,149.699791,10.0.0.3,275,0.0,150
14994,149.127848,10.0.0.3,262,12067.0,150


### 统计每个区间多少个source, destination, packet length

你要查第几秒的数据，就改`query_second`。查哪个就改哪个dataframe。

In [13]:
source_statistics = data[['Second', 'Source Port', 'Time']].groupby(['Second', 'Source Port']).count().rename(columns={'Time': 'count'})
source_statistics = source_statistics.reset_index()
# remove zero count rows
source_statistics = source_statistics[source_statistics['count'] != 0].reset_index(drop=True)
source_statistics

Unnamed: 0,Second,Source Port,count
0,1,0.0,22
1,1,10027.0,2
2,1,10314.0,1
3,1,10460.0,2
4,1,10525.0,1
...,...,...,...
6745,150,28209.0,1
6746,150,28372.0,1
6747,150,29615.0,1
6748,150,29743.0,2


In [14]:
# 查询source ip第几秒的数据
query_second = 1
source_statistics[source_statistics['Second'] == query_second]

Unnamed: 0,Second,Source Port,count
0,1,0.0,22
1,1,10027.0,2
2,1,10314.0,1
3,1,10460.0,2
4,1,10525.0,1
5,1,10569.0,2
6,1,11262.0,2
7,1,11548.0,2
8,1,11693.0,1
9,1,13632.0,2


In [15]:
destination_statistics = data[['Second', 'Destination', 'Time']].groupby(['Second', 'Destination']).count().rename(columns={'Time': 'count'})
destination_statistics = destination_statistics.reset_index()
# remove zero count rows
destination_statistics = destination_statistics[destination_statistics['count'] != 0].reset_index(drop=True)
destination_statistics

Unnamed: 0,Second,Destination,count
0,1,10.0.0.1,3
1,1,10.0.0.10,5
2,1,10.0.0.11,6
3,1,10.0.0.12,6
4,1,10.0.0.2,14
...,...,...,...
1529,150,10.0.0.5,4
1530,150,10.0.0.6,14
1531,150,10.0.0.7,2
1532,150,10.0.0.8,16


In [16]:
# 查询destination ip第几秒的数据
query_second = 1
destination_statistics[destination_statistics['Second'] == query_second]

Unnamed: 0,Second,Destination,count
0,1,10.0.0.1,3
1,1,10.0.0.10,5
2,1,10.0.0.11,6
3,1,10.0.0.12,6
4,1,10.0.0.2,14
5,1,10.0.0.3,8
6,1,10.0.0.4,11
7,1,10.0.0.5,12
8,1,10.0.0.6,7
9,1,10.0.0.7,13


In [17]:
packet_len_statistics = data[['Second', 'Length', 'Time']].groupby(['Second', 'Length']).count().rename(columns={'Time': 'count'})
packet_len_statistics = packet_len_statistics.reset_index()
# remove zero count rows
packet_len_statistics = packet_len_statistics[packet_len_statistics['count'] != 0].reset_index(drop=True)
packet_len_statistics

Unnamed: 0,Second,Length,count
0,1,229,2
1,1,230,1
2,1,231,3
3,1,232,1
4,1,234,4
...,...,...,...
4681,150,273,2
4682,150,275,4
4683,150,285,2
4684,150,289,2


In [18]:
# 查询packet length第几秒的数据
query_second = 1
packet_len_statistics[packet_len_statistics['Second'] == query_second]

Unnamed: 0,Second,Length,count
0,1,229,2
1,1,230,1
2,1,231,3
3,1,232,1
4,1,234,4
5,1,235,2
6,1,239,4
7,1,240,4
8,1,243,7
9,1,244,5


In [19]:
# # 保存统计数据为一个多页的excel
# # Create a Pandas Excel writer using XlsxWriter as the engine.
# excel_writer = pd.ExcelWriter('statistics.xlsx', engine='xlsxwriter')

# # Write each dataframe to a different worksheet.
# source_statistics.to_excel(excel_writer, sheet_name='source')
# destination_statistics.to_excel(excel_writer, sheet_name='destination')
# packet_len_statistics.to_excel(excel_writer, sheet_name='packet_length')

# # Close the Pandas Excel writer and output the Excel file.
# excel_writer.save()

### 计算destination的单熵

In [20]:
destination_temp = destination_statistics.groupby(['Second', 'Destination']).sum().rename(columns={'count': 'p'})
# remove zero count rows
destination_temp = destination_temp[destination_temp['p'] != 0]
# 计算概率p
destination_temp = destination_temp.groupby(level=0).apply(lambda x: x / x.sum())
# 计算log(p)
destination_temp['log_p'] = destination_temp.groupby(level=0).apply(lambda x: np.log2(x))
# 计算乘积
destination_temp['multiply'] = round(destination_temp['p'] * destination_temp['log_p'], 4)
destination_temp

Unnamed: 0_level_0,Unnamed: 1_level_0,p,log_p,multiply
Second,Destination,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,10.0.0.1,0.03,-5.058894,-0.1518
1,10.0.0.10,0.05,-4.321928,-0.2161
1,10.0.0.11,0.06,-4.058894,-0.2435
1,10.0.0.12,0.06,-4.058894,-0.2435
1,10.0.0.2,0.14,-2.836501,-0.3971
...,...,...,...,...
150,10.0.0.5,0.04,-4.643856,-0.1858
150,10.0.0.6,0.14,-2.836501,-0.3971
150,10.0.0.7,0.02,-5.643856,-0.1129
150,10.0.0.8,0.16,-2.643856,-0.4230


In [21]:
# 把每个组的乘积加起来即为单熵
destination_entropy = destination_temp[['multiply']].groupby(level=0).sum()
destination_entropy = destination_entropy.reset_index()
# 取负数
destination_entropy['multiply'] = destination_entropy['multiply'] * -1
destination_entropy

Unnamed: 0,Second,multiply
0,1,3.4604
1,2,3.2796
2,3,3.3820
3,4,3.4548
4,5,3.5273
...,...,...
145,146,3.4676
146,147,3.4485
147,148,3.2925
148,149,3.4441


### 计算destination之于source port的条件熵

In [22]:
# 拿到上面计算的destination的概率
dest_p = destination_temp
dest_p = destination_temp.reset_index()
# 忽略掉不要的列
dest_p = dest_p.drop(['log_p', 'multiply'], axis=1)
dest_p

Unnamed: 0,Second,Destination,p
0,1,10.0.0.1,0.03
1,1,10.0.0.10,0.05
2,1,10.0.0.11,0.06
3,1,10.0.0.12,0.06
4,1,10.0.0.2,0.14
...,...,...,...
1529,150,10.0.0.5,0.04
1530,150,10.0.0.6,0.14
1531,150,10.0.0.7,0.02
1532,150,10.0.0.8,0.16


In [23]:
%%time
dest_src_temp = data[['Second','Source Port','Destination','Time']].groupby(['Second','Destination','Source Port']).count().rename(columns={'Time': 'count'})
# remove zero count rows
dest_src_temp = dest_src_temp[dest_src_temp['count'] != 0]
# 统计相同destination的情况下，有多少个source
dest_src_temp = dest_src_temp.groupby(['Second','Destination']).apply(lambda grp: grp.groupby('Source Port').sum())
# 计算destination之于source的条件概率condition_p
dest_src_temp['condition_p'] = dest_src_temp.groupby(['Second','Destination']).apply(lambda grp: grp.groupby('Source Port').sum() / grp.sum())
# 计算log(condition_p)
dest_src_temp['log_condition_p'] = np.log2(dest_src_temp['condition_p'])
# 计算乘积
dest_src_temp['p_multiply'] = round(dest_src_temp['condition_p'] * dest_src_temp['log_condition_p'], 4)
dest_src_temp

CPU times: total: 2.12 s
Wall time: 2.11 s


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,count,condition_p,log_condition_p,p_multiply
Second,Destination,Source Port,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,10.0.0.1,23531.0,1,0.333333,-1.584963,-0.5283
1,10.0.0.1,26087.0,2,0.666667,-0.584963,-0.3900
1,10.0.0.10,0.0,2,0.400000,-1.321928,-0.5288
1,10.0.0.10,10569.0,2,0.400000,-1.321928,-0.5288
1,10.0.0.10,20178.0,1,0.200000,-2.321928,-0.4644
...,...,...,...,...,...,...
150,10.0.0.9,15572.0,1,0.100000,-3.321928,-0.3322
150,10.0.0.9,18269.0,2,0.200000,-2.321928,-0.4644
150,10.0.0.9,23581.0,1,0.100000,-3.321928,-0.3322
150,10.0.0.9,27083.0,1,0.100000,-3.321928,-0.3322


In [24]:
# 把每个组的乘积加起来
dest_src_entropy = dest_src_temp
dest_src_entropy['p_sum'] = dest_src_entropy['p_multiply'].groupby(['Second','Destination']).sum()
dest_src_entropy = dest_src_entropy.reset_index()
dest_src_entropy = dest_src_entropy.drop(['count'], axis=1)
dest_src_entropy

Unnamed: 0,Second,Destination,Source Port,condition_p,log_condition_p,p_multiply,p_sum
0,1,10.0.0.1,23531.0,0.333333,-1.584963,-0.5283,-0.9183
1,1,10.0.0.1,26087.0,0.666667,-0.584963,-0.3900,-0.9183
2,1,10.0.0.10,0.0,0.400000,-1.321928,-0.5288,-1.5220
3,1,10.0.0.10,10569.0,0.400000,-1.321928,-0.5288,-1.5220
4,1,10.0.0.10,20178.0,0.200000,-2.321928,-0.4644,-1.5220
...,...,...,...,...,...,...,...
7386,150,10.0.0.9,15572.0,0.100000,-3.321928,-0.3322,-2.7220
7387,150,10.0.0.9,18269.0,0.200000,-2.321928,-0.4644,-2.7220
7388,150,10.0.0.9,23581.0,0.100000,-3.321928,-0.3322,-2.7220
7389,150,10.0.0.9,27083.0,0.100000,-3.321928,-0.3322,-2.7220


In [25]:
# 将destination的概率和条件概率两个dataframe合并
dest_src_entropy = pd.merge(dest_src_entropy, dest_p, on=['Second','Destination'], how='left')
# 将两个概率相乘
dest_src_entropy['total_multiply'] = round(dest_src_entropy['p_sum'] * dest_src_entropy['p'], 4)

dest_src_entropy = dest_src_entropy[['Second','Destination','total_multiply']]
# 扔掉重复行
dest_src_entropy = dest_src_entropy.drop_duplicates()
dest_src_entropy

Unnamed: 0,Second,Destination,total_multiply
0,1,10.0.0.1,-0.0275
2,1,10.0.0.10,-0.0761
5,1,10.0.0.11,-0.0951
8,1,10.0.0.12,-0.1151
12,1,10.0.0.2,-0.3931
...,...,...,...
7364,150,10.0.0.5,-0.0400
7366,150,10.0.0.6,-0.4131
7374,150,10.0.0.7,0.0000
7375,150,10.0.0.8,-0.4924


In [26]:
# 把每个组的乘积加起来即为单熵
dest_src_entropy = dest_src_entropy[['Second','total_multiply']].groupby(['Second']).sum()
dest_src_entropy = dest_src_entropy.reset_index()
# 取负数
dest_src_entropy['total_multiply'] = dest_src_entropy['total_multiply'].apply(lambda x: -1*x if x != 0 else x)
dest_src_entropy

Unnamed: 0,Second,total_multiply
0,1,2.2161
1,2,2.2931
2,3,2.2379
3,4,2.1930
4,5,2.2216
...,...,...
145,146,2.2253
146,147,2.1833
147,148,2.4316
148,149,2.2447


### 计算destination之于packet length的条件熵

In [27]:
%%time
dest_len_temp = data[['Second','Length','Destination','Time']].groupby(['Second','Destination','Length']).count().rename(columns={'Time': 'count'})
# remove zero count rows
dest_len_temp = dest_len_temp[dest_len_temp['count'] != 0]
# 统计相同destination的情况下，有多少个source
dest_len_temp = dest_len_temp.groupby(['Second','Destination']).apply(lambda grp: grp.groupby('Length').sum())
# 计算destination之于source的条件概率condition_p
dest_len_temp['condition_p'] = dest_len_temp.groupby(['Second','Destination']).apply(lambda grp: grp.groupby('Length').sum() / grp.sum())
# 计算log(condition_p)
dest_len_temp['log_condition_p'] = np.log2(dest_len_temp['condition_p'])
# 计算乘积
dest_len_temp['p_multiply'] = round(dest_len_temp['condition_p'] * dest_len_temp['log_condition_p'], 4)
dest_len_temp

CPU times: total: 2.02 s
Wall time: 2.02 s


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,count,condition_p,log_condition_p,p_multiply
Second,Destination,Length,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,10.0.0.1,243,2,0.666667,-0.584963,-0.3900
1,10.0.0.1,261,1,0.333333,-1.584963,-0.5283
1,10.0.0.10,232,1,0.200000,-2.321928,-0.4644
1,10.0.0.10,247,2,0.400000,-1.321928,-0.5288
1,10.0.0.10,283,2,0.400000,-1.321928,-0.5288
...,...,...,...,...,...,...
150,10.0.0.9,230,1,0.100000,-3.321928,-0.3322
150,10.0.0.9,243,1,0.100000,-3.321928,-0.3322
150,10.0.0.9,249,1,0.100000,-3.321928,-0.3322
150,10.0.0.9,254,2,0.200000,-2.321928,-0.4644


In [28]:
# 把每个组的乘积加起来
dest_len_entropy = dest_len_temp
dest_len_entropy['p_sum'] = dest_len_entropy['p_multiply'].groupby(['Second','Destination']).sum()
dest_len_entropy = dest_len_entropy.reset_index()
dest_len_entropy = dest_len_entropy.drop(['count'], axis=1)
dest_len_entropy

Unnamed: 0,Second,Destination,Length,condition_p,log_condition_p,p_multiply,p_sum
0,1,10.0.0.1,243,0.666667,-0.584963,-0.3900,-0.9183
1,1,10.0.0.1,261,0.333333,-1.584963,-0.5283,-0.9183
2,1,10.0.0.10,232,0.200000,-2.321928,-0.4644,-1.5220
3,1,10.0.0.10,247,0.400000,-1.321928,-0.5288,-1.5220
4,1,10.0.0.10,283,0.400000,-1.321928,-0.5288,-1.5220
...,...,...,...,...,...,...,...
6389,150,10.0.0.9,230,0.100000,-3.321928,-0.3322,-2.3220
6390,150,10.0.0.9,243,0.100000,-3.321928,-0.3322,-2.3220
6391,150,10.0.0.9,249,0.100000,-3.321928,-0.3322,-2.3220
6392,150,10.0.0.9,254,0.200000,-2.321928,-0.4644,-2.3220


In [29]:
# 将destination的概率和条件概率两个dataframe合并
dest_len_entropy = pd.merge(dest_len_entropy, dest_p, on=['Second','Destination'], how='left')
# 将两个概率相乘
dest_len_entropy['total_multiply'] = round(dest_len_entropy['p_sum'] * dest_len_entropy['p'], 4)

dest_len_entropy = dest_len_entropy[['Second','Destination','total_multiply']]
# 扔掉重复行
dest_len_entropy = dest_len_entropy.drop_duplicates()
dest_len_entropy

Unnamed: 0,Second,Destination,total_multiply
0,1,10.0.0.1,-0.0275
2,1,10.0.0.10,-0.0761
5,1,10.0.0.11,-0.0951
8,1,10.0.0.12,-0.1151
12,1,10.0.0.2,-0.3931
...,...,...,...
6367,150,10.0.0.5,-0.0400
6369,150,10.0.0.6,-0.4131
6377,150,10.0.0.7,0.0000
6378,150,10.0.0.8,-0.5200


In [30]:
# 把每个组的乘积加起来即为单熵
dest_len_entropy = dest_len_entropy[['Second','total_multiply']].groupby(['Second']).sum()
dest_len_entropy = dest_len_entropy.reset_index()
# 取负数
dest_len_entropy['total_multiply'] = dest_len_entropy['total_multiply'].apply(lambda x: -1*x if x != 0 else x)
dest_len_entropy

Unnamed: 0,Second,total_multiply
0,1,2.3160
1,2,2.4567
2,3,2.2668
3,4,2.3015
4,5,2.1568
...,...,...
145,146,2.2691
146,147,2.3355
147,148,2.3214
148,149,2.3798


### Old Attack Method 1

In [31]:
destination_entropy1 = destination_entropy.copy()

In [32]:
first_10_avg = destination_entropy1['multiply'][:10].sum() / 10
destination_entropy1['old_attack_m1'] = np.where(destination_entropy1['multiply'] < first_10_avg, 0, 1)
destination_entropy1['old_attack_m1'].loc[:9] = -1
destination_entropy1

Unnamed: 0,Second,multiply,old_attack_m1
0,1,3.4604,-1
1,2,3.2796,-1
2,3,3.3820,-1
3,4,3.4548,-1
4,5,3.5273,-1
...,...,...,...
145,146,3.4676,1
146,147,3.4485,1
147,148,3.2925,0
148,149,3.4441,1


### Old Attack Method 2

In [33]:
destination_entropy2 = destination_entropy.copy()

In [34]:
destination_entropy2['cum_sum'] = destination_entropy2['multiply'].cumsum()
destination_entropy2['avg_entropy'] = destination_entropy2['cum_sum'] / destination_entropy2['Second']
destination_entropy2['old_attack_m2'] = np.where(destination_entropy2['multiply'] < destination_entropy2['avg_entropy'], 0, 1)
destination_entropy2['old_attack_m2'].loc[:9] = -1
destination_entropy2

Unnamed: 0,Second,multiply,cum_sum,avg_entropy,old_attack_m2
0,1,3.4604,3.4604,3.460400,-1
1,2,3.2796,6.7400,3.370000,-1
2,3,3.3820,10.1220,3.374000,-1
3,4,3.4548,13.5768,3.394200,-1
4,5,3.5273,17.1041,3.420820,-1
...,...,...,...,...,...
145,146,3.4676,328.2360,2.248192,1
146,147,3.4485,331.6845,2.256357,1
147,148,3.2925,334.9770,2.263358,1
148,149,3.4441,338.4211,2.271283,1


### TQV Attack Method 1

In [35]:
tqv_dest_entropy_m1 = destination_entropy.copy()
tqv_dest_src_entropy_m1 = dest_src_entropy.copy()
tqv_dest_len_entropy_m1 = dest_len_entropy.copy()

In [36]:
tqv_dest_first_10_avg = tqv_dest_entropy_m1['multiply'][:10].sum() / 10
tqv_dest_entropy_m1['dest_attack'] = np.where(tqv_dest_entropy_m1['multiply'] < tqv_dest_first_10_avg, 0, 1)
tqv_dest_entropy_m1['dest_attack'].loc[:9] = -1
tqv_dest_entropy_m1

Unnamed: 0,Second,multiply,dest_attack
0,1,3.4604,-1
1,2,3.2796,-1
2,3,3.3820,-1
3,4,3.4548,-1
4,5,3.5273,-1
...,...,...,...
145,146,3.4676,1
146,147,3.4485,1
147,148,3.2925,0
148,149,3.4441,1


In [37]:
tqv_dest_src_first_10_avg = tqv_dest_src_entropy_m1['total_multiply'][:10].sum() / 10
tqv_dest_src_entropy_m1['dest_src_attack'] = np.where(tqv_dest_src_entropy_m1['total_multiply'] < tqv_dest_src_first_10_avg, 0, 1)
tqv_dest_src_entropy_m1['dest_src_attack'].loc[:9] = -1
tqv_dest_src_entropy_m1

Unnamed: 0,Second,total_multiply,dest_src_attack
0,1,2.2161,-1
1,2,2.2931,-1
2,3,2.2379,-1
3,4,2.1930,-1
4,5,2.2216,-1
...,...,...,...
145,146,2.2253,0
146,147,2.1833,0
147,148,2.4316,1
148,149,2.2447,1


In [38]:
tqv_dest_len_first_10_avg = tqv_dest_len_entropy_m1['total_multiply'][:10].sum() / 10
tqv_dest_len_entropy_m1['dest_len_attack'] = np.where(tqv_dest_len_entropy_m1['total_multiply'] < tqv_dest_len_first_10_avg, 0, 1)
tqv_dest_len_entropy_m1['dest_len_attack'].loc[:9] = -1
tqv_dest_len_entropy_m1

Unnamed: 0,Second,total_multiply,dest_len_attack
0,1,2.3160,-1
1,2,2.4567,-1
2,3,2.2668,-1
3,4,2.3015,-1
4,5,2.1568,-1
...,...,...,...
145,146,2.2691,0
146,147,2.3355,1
147,148,2.3214,1
148,149,2.3798,1


In [39]:
# merge three entropy attack together
tqv_method1 = tqv_dest_entropy_m1[['Second','dest_attack']].merge(tqv_dest_src_entropy_m1[['Second','dest_src_attack']], on='Second')
tqv_method1 = tqv_method1.merge(tqv_dest_len_entropy_m1[['Second','dest_len_attack']], on='Second')
tqv_method1

Unnamed: 0,Second,dest_attack,dest_src_attack,dest_len_attack
0,1,-1,-1,-1
1,2,-1,-1,-1
2,3,-1,-1,-1
3,4,-1,-1,-1
4,5,-1,-1,-1
...,...,...,...,...
145,146,1,0,0
146,147,1,0,1
147,148,0,1,1
148,149,1,1,1


In [40]:
def tqv_attack_logic(row):
    if row['dest_attack'] == 1:
        return 1
    elif row['dest_attack'] == 0 and row['dest_src_attack'] == 1 and row['dest_len_attack'] == 1:
        return 1
    elif row['dest_attack'] == 0 and row['dest_src_attack'] == 0 and row['dest_len_attack'] == 0 :
        return 0
    elif row['dest_attack'] == 0 and row['dest_src_attack'] == 1 and row['dest_len_attack'] == 0:
        return 0
    elif row['dest_attack'] == 0 and row['dest_src_attack'] == 0 and row['dest_len_attack'] == 1:
        return 1
    else:
        # when encounters the first 10 rows
        return -1

In [41]:
tqv_method1['tqv_attack_m1'] = tqv_method1.apply(lambda row: tqv_attack_logic(row), axis=1)
tqv_method1

Unnamed: 0,Second,dest_attack,dest_src_attack,dest_len_attack,tqv_attack_m1
0,1,-1,-1,-1,-1
1,2,-1,-1,-1,-1
2,3,-1,-1,-1,-1
3,4,-1,-1,-1,-1
4,5,-1,-1,-1,-1
...,...,...,...,...,...
145,146,1,0,0,1
146,147,1,0,1,1
147,148,0,1,1,1
148,149,1,1,1,1


### TQV Attack Method 2

In [42]:
tqv_dest_entropy_m2 = destination_entropy.copy()
tqv_dest_src_entropy_m2 = dest_src_entropy.copy()
tqv_dest_len_entropy_m2 = dest_len_entropy.copy()

In [43]:
tqv_dest_entropy_m2['cum_sum'] = tqv_dest_entropy_m2['multiply'].cumsum()
tqv_dest_entropy_m2['avg_entropy'] = tqv_dest_entropy_m2['cum_sum'] / tqv_dest_entropy_m2['Second']
tqv_dest_entropy_m2['dest_attack'] = np.where(tqv_dest_entropy_m2['multiply'] < tqv_dest_entropy_m2['avg_entropy'], 0, 1)
tqv_dest_entropy_m2['dest_attack'].loc[:9] = -1
tqv_dest_entropy_m2

Unnamed: 0,Second,multiply,cum_sum,avg_entropy,dest_attack
0,1,3.4604,3.4604,3.460400,-1
1,2,3.2796,6.7400,3.370000,-1
2,3,3.3820,10.1220,3.374000,-1
3,4,3.4548,13.5768,3.394200,-1
4,5,3.5273,17.1041,3.420820,-1
...,...,...,...,...,...
145,146,3.4676,328.2360,2.248192,1
146,147,3.4485,331.6845,2.256357,1
147,148,3.2925,334.9770,2.263358,1
148,149,3.4441,338.4211,2.271283,1


In [44]:
tqv_dest_src_entropy_m2['cum_sum'] = tqv_dest_src_entropy_m2['total_multiply'].cumsum()
tqv_dest_src_entropy_m2['avg_entropy'] = tqv_dest_src_entropy_m2['cum_sum'] / tqv_dest_src_entropy_m2['Second']
tqv_dest_src_entropy_m2['dest_src_attack'] = np.where(tqv_dest_src_entropy_m2['total_multiply'] < tqv_dest_src_entropy_m2['avg_entropy'], 0, 1)
tqv_dest_src_entropy_m2['dest_src_attack'].loc[:9] = -1
tqv_dest_src_entropy_m2

Unnamed: 0,Second,total_multiply,cum_sum,avg_entropy,dest_src_attack
0,1,2.2161,2.2161,2.216100,-1
1,2,2.2931,4.5092,2.254600,-1
2,3,2.2379,6.7471,2.249033,-1
3,4,2.1930,8.9401,2.235025,-1
4,5,2.2216,11.1617,2.232340,-1
...,...,...,...,...,...
145,146,2.2253,400.9700,2.746370,0
146,147,2.1833,403.1533,2.742539,0
147,148,2.4316,405.5849,2.740439,0
148,149,2.2447,407.8296,2.737111,0


In [45]:
tqv_dest_len_entropy_m2['cum_sum'] = tqv_dest_len_entropy_m2['total_multiply'].cumsum()
tqv_dest_len_entropy_m2['avg_entropy'] = tqv_dest_len_entropy_m2['cum_sum'] / tqv_dest_len_entropy_m2['Second']
tqv_dest_len_entropy_m2['dest_len_attack'] = np.where(tqv_dest_len_entropy_m2['total_multiply'] < tqv_dest_len_entropy_m2['avg_entropy'], 0, 1)
tqv_dest_len_entropy_m2['dest_len_attack'].loc[:9] = -1
tqv_dest_len_entropy_m2

Unnamed: 0,Second,total_multiply,cum_sum,avg_entropy,dest_len_attack
0,1,2.3160,2.3160,2.316000,-1
1,2,2.4567,4.7727,2.386350,-1
2,3,2.2668,7.0395,2.346500,-1
3,4,2.3015,9.3410,2.335250,-1
4,5,2.1568,11.4978,2.299560,-1
...,...,...,...,...,...
145,146,2.2691,290.2126,1.987758,1
146,147,2.3355,292.5481,1.990123,1
147,148,2.3214,294.8695,1.992361,1
148,149,2.3798,297.2493,1.994962,1


In [46]:
# merge three entropy attack together
tqv_method2 = tqv_dest_entropy_m2[['Second','dest_attack']].merge(tqv_dest_src_entropy_m2[['Second','dest_src_attack']], on='Second')
tqv_method2 = tqv_method2.merge(tqv_dest_len_entropy_m2[['Second','dest_len_attack']], on='Second')
tqv_method2

Unnamed: 0,Second,dest_attack,dest_src_attack,dest_len_attack
0,1,-1,-1,-1
1,2,-1,-1,-1
2,3,-1,-1,-1
3,4,-1,-1,-1
4,5,-1,-1,-1
...,...,...,...,...
145,146,1,0,1
146,147,1,0,1
147,148,1,0,1
148,149,1,0,1


In [47]:
tqv_method2['tqv_attack_m2'] = tqv_method2.apply(lambda row: tqv_attack_logic(row), axis=1)
tqv_method2

Unnamed: 0,Second,dest_attack,dest_src_attack,dest_len_attack,tqv_attack_m2
0,1,-1,-1,-1,-1
1,2,-1,-1,-1,-1
2,3,-1,-1,-1,-1
3,4,-1,-1,-1,-1
4,5,-1,-1,-1,-1
...,...,...,...,...,...
145,146,1,0,1,1
146,147,1,0,1,1
147,148,1,0,1,1
148,149,1,0,1,1


### TQV Attack Method 3

In [48]:
tqv_dest_entropy_m3 = destination_entropy.copy()
tqv_dest_src_entropy_m3 = dest_src_entropy.copy()
tqv_dest_len_entropy_m3 = dest_len_entropy.copy()

In [49]:
def check_attack(row, key, lower_, higher_):

    if row[key] >= lower_ and row[key] <= higher_:
        return 2
    elif row[key] < lower_:
        return 0
    elif row[key] > higher_:
        return 1

In [94]:
tqv_dest_entropy_m3['cum_sum'] = tqv_dest_entropy_m3['multiply'].cumsum()
tqv_dest_entropy_m3['avg_entropy'] = tqv_dest_entropy_m3['cum_sum'] / tqv_dest_entropy_m3['Second']

# get 1-10s avergae entropy
# dest_lower_ = 0.9 * tqv_dest_entropy_m3['avg_entropy'][9]
# dest_higher_ = 1.1 * tqv_dest_entropy_m3['avg_entropy'][9]

# use std
dest_lower_ = tqv_dest_entropy_m3['avg_entropy'][9] - 2 * np.std(tqv_dest_entropy_m3['multiply'][:10])
dest_higher_ = tqv_dest_entropy_m3['avg_entropy'][9] + 2 * np.std(tqv_dest_entropy_m3['multiply'][:10])

print(f'dest 1到10s平均熵的0.9至1.1倍为: [{dest_lower_}, {dest_higher_}]')

tqv_dest_entropy_m3['dest_attack'] = tqv_dest_entropy_m3.apply(lambda row: check_attack(row, 'multiply', dest_lower_, dest_higher_), axis=1)
tqv_dest_entropy_m3['dest_attack'].loc[:9] = -1
tqv_dest_entropy_m3

dest 1到10s平均熵的0.9至1.1倍为: [3.2882015565434024, 3.5605984434565974]


Unnamed: 0,Second,multiply,cum_sum,avg_entropy,dest_attack
0,1,3.4604,3.4604,3.460400,-1
1,2,3.2796,6.7400,3.370000,-1
2,3,3.3820,10.1220,3.374000,-1
3,4,3.4548,13.5768,3.394200,-1
4,5,3.5273,17.1041,3.420820,-1
...,...,...,...,...,...
145,146,3.4676,328.2360,2.248192,2
146,147,3.4485,331.6845,2.256357,2
147,148,3.2925,334.9770,2.263358,2
148,149,3.4441,338.4211,2.271283,2


In [101]:
tqv_dest_src_entropy_m3['cum_sum'] = tqv_dest_src_entropy_m3['total_multiply'].cumsum()
tqv_dest_src_entropy_m3['avg_entropy'] = tqv_dest_src_entropy_m3['cum_sum'] / tqv_dest_src_entropy_m3['Second']

# get 1-10s avergae entropy
# dest_src_lower_ = 0.9 * tqv_dest_src_entropy_m3['avg_entropy'][9]
# dest_src_higher_ = 1.1 * tqv_dest_src_entropy_m3['avg_entropy'][9]

dest_src_lower_ = tqv_dest_src_entropy_m3['avg_entropy'][9] - 2 * np.std(tqv_dest_src_entropy_m3['total_multiply'][:10])
dest_src_higher_ = tqv_dest_src_entropy_m3['avg_entropy'][9] + 2 * np.std(tqv_dest_src_entropy_m3['total_multiply'][:10])

print(f'平均为: {tqv_dest_src_entropy_m3["avg_entropy"][9]}\
    dest_src 1到10s平均熵的0.9至1.1倍为: [{dest_src_lower_}, {dest_src_higher_}]')

tqv_dest_src_entropy_m3['dest_src_attack'] = tqv_dest_src_entropy_m3.apply(lambda row: check_attack(row, 'total_multiply',dest_src_lower_,dest_src_higher_), axis=1)
tqv_dest_src_entropy_m3['dest_src_attack'].loc[:9] = -1
tqv_dest_src_entropy_m3

平均为: 2.2269200000000002    dest_src 1到10s平均熵的0.9至1.1倍为: [2.131205098338869, 2.3226349016611314]


Unnamed: 0,Second,total_multiply,cum_sum,avg_entropy,dest_src_attack
0,1,2.2161,2.2161,2.216100,-1
1,2,2.2931,4.5092,2.254600,-1
2,3,2.2379,6.7471,2.249033,-1
3,4,2.1930,8.9401,2.235025,-1
4,5,2.2216,11.1617,2.232340,-1
...,...,...,...,...,...
145,146,2.2253,400.9700,2.746370,2
146,147,2.1833,403.1533,2.742539,2
147,148,2.4316,405.5849,2.740439,1
148,149,2.2447,407.8296,2.737111,2


In [102]:
tqv_dest_len_entropy_m3['cum_sum'] = tqv_dest_len_entropy_m3['total_multiply'].cumsum()
tqv_dest_len_entropy_m3['avg_entropy'] = tqv_dest_len_entropy_m3['cum_sum'] / tqv_dest_len_entropy_m3['Second']

# get 1-10s avergae entropy
# dest_len_lower_ = 0.9 * tqv_dest_len_entropy_m3['avg_entropy'][9]
# dest_len_higher_ = 1.1 * tqv_dest_len_entropy_m3['avg_entropy'][9]

dest_len_lower_ = tqv_dest_len_entropy_m3['avg_entropy'][9] - 2 * np.std(tqv_dest_len_entropy_m3['total_multiply'][:10])
dest_len_higher_ = tqv_dest_len_entropy_m3['avg_entropy'][9] + 2 * np.std(tqv_dest_len_entropy_m3['total_multiply'][:10])

print(f'dest_len 1到10s平均熵的0.9至1.1倍为: [{dest_len_lower_}, {dest_len_higher_}]')

tqv_dest_len_entropy_m3['dest_len_attack'] = tqv_dest_len_entropy_m3.apply(lambda row: check_attack(row, 'total_multiply',dest_len_lower_,dest_len_higher_), axis=1)
tqv_dest_len_entropy_m3['dest_len_attack'].loc[:9] = -1
tqv_dest_len_entropy_m3

dest_len 1到10s平均熵的0.9至1.1倍为: [2.126927953409042, 2.455232046590958]


Unnamed: 0,Second,total_multiply,cum_sum,avg_entropy,dest_len_attack
0,1,2.3160,2.3160,2.316000,-1
1,2,2.4567,4.7727,2.386350,-1
2,3,2.2668,7.0395,2.346500,-1
3,4,2.3015,9.3410,2.335250,-1
4,5,2.1568,11.4978,2.299560,-1
...,...,...,...,...,...
145,146,2.2691,290.2126,1.987758,2
146,147,2.3355,292.5481,1.990123,2
147,148,2.3214,294.8695,1.992361,2
148,149,2.3798,297.2493,1.994962,2


In [78]:
# merge three entropy attack together
tqv_method3 = tqv_dest_entropy_m3[['Second','dest_attack']].merge(tqv_dest_src_entropy_m3[['Second','dest_src_attack']], on='Second')
tqv_method3 = tqv_method3.merge(tqv_dest_len_entropy_m3[['Second','dest_len_attack']], on='Second')
tqv_method3

Unnamed: 0,Second,dest_attack,dest_src_attack,dest_len_attack
0,1,-1,-1,-1
1,2,-1,-1,-1
2,3,-1,-1,-1
3,4,-1,-1,-1
4,5,-1,-1,-1
...,...,...,...,...
145,146,2,2,2
146,147,2,2,2
147,148,2,1,2
148,149,2,2,2


In [79]:
def new_tqv_attack_logic(row):
    if row['dest_attack'] == 1:
        return 1
    elif row['dest_attack'] == 2:
        if row['dest_src_attack'] == 2 and row['dest_len_attack'] == 2:
            return 1
        else:
            return 0
    elif row['dest_attack'] == 0 and row['dest_src_attack'] == 1 and row['dest_len_attack'] == 1:
        return 1
    elif row['dest_attack'] == 0 and row['dest_src_attack'] == 0 and row['dest_len_attack'] == 0 :
        return 0
    elif row['dest_attack'] == 0 and row['dest_src_attack'] == 1 and row['dest_len_attack'] == 0:
        return 0
    elif row['dest_attack'] == 0:
        return 0
    # elif row['dest_attack'] == 0 and row['dest_src_attack'] == 0 and row['dest_len_attack'] == 1:
        #return 1
    else:
        # when encounters the first 10 rows
        return -1

In [80]:
tqv_method3['tqv_attack_m3'] = tqv_method3.apply(lambda row: new_tqv_attack_logic(row), axis=1)
tqv_method3

Unnamed: 0,Second,dest_attack,dest_src_attack,dest_len_attack,tqv_attack_m3
0,1,-1,-1,-1,-1
1,2,-1,-1,-1,-1
2,3,-1,-1,-1,-1
3,4,-1,-1,-1,-1
4,5,-1,-1,-1,-1
...,...,...,...,...,...
145,146,2,2,2,1
146,147,2,2,2,1
147,148,2,1,2,0
148,149,2,2,2,1


### F1, Recall, Precesion

In [81]:
# prepare everything for calculation
true_attack = pd.read_csv('./data/port-ver3-packets.csv', usecols=['Interval start', 'Attack'])
true_attack = true_attack.iloc[1:].reset_index(drop=True)
true_attack = true_attack.rename(columns={'Interval start': 'Second', 'Attack': 'true_attack'})
true_attack['true_attack'] = true_attack['true_attack'].replace('-', -1)
true_attack['true_attack'] = true_attack['true_attack'].astype(int)
true_attack

Unnamed: 0,Second,true_attack
0,2,-1
1,3,-1
2,4,-1
3,5,-1
4,6,-1
...,...,...
144,146,1
145,147,1
146,148,1
147,149,1


In [82]:
attack_compare = true_attack.merge(destination_entropy1[['Second','old_attack_m1']], on='Second')
attack_compare = attack_compare.merge(destination_entropy2[['Second','old_attack_m2']], on='Second')
attack_compare = attack_compare.merge(tqv_method1[['Second','tqv_attack_m1']], on='Second')
attack_compare = attack_compare.merge(tqv_method2[['Second','tqv_attack_m2']], on='Second')
attack_compare = attack_compare.merge(tqv_method3[['Second','tqv_attack_m3']], on='Second')
attack_compare

Unnamed: 0,Second,true_attack,old_attack_m1,old_attack_m2,tqv_attack_m1,tqv_attack_m2,tqv_attack_m3
0,2,-1,-1,-1,-1,-1,-1
1,3,-1,-1,-1,-1,-1,-1
2,4,-1,-1,-1,-1,-1,-1
3,5,-1,-1,-1,-1,-1,-1
4,6,-1,-1,-1,-1,-1,-1
...,...,...,...,...,...,...,...
144,146,1,1,1,1,1,1
145,147,1,1,1,1,1,1
146,148,1,0,1,1,1,0
147,149,1,1,1,1,1,1


In [83]:
# remove first 10 seconds
attack_compare = attack_compare.iloc[10:]

In [84]:
attack_compare[:20]

Unnamed: 0,Second,true_attack,old_attack_m1,old_attack_m2,tqv_attack_m1,tqv_attack_m2,tqv_attack_m3
10,12,1,1,1,1,1,1
11,13,1,0,0,0,0,1
12,14,1,1,1,1,1,1
13,15,1,0,0,1,1,1
14,16,1,1,1,1,1,1
15,17,1,0,0,0,0,1
16,18,1,1,1,1,1,1
17,19,1,1,1,1,1,1
18,20,1,0,0,0,0,0
19,21,1,1,1,1,1,1


In [85]:
def metrics(attack_compare, predicted_label, true_label='true_attack'):
    print(f'================== The metrics of {predicted_label} ==================')
    print(classification_report(attack_compare[true_label], attack_compare[predicted_label]))
    print()

In [86]:
metrics(attack_compare, 'old_attack_m1')
metrics(attack_compare, 'old_attack_m2')
metrics(attack_compare, 'tqv_attack_m1')
metrics(attack_compare, 'tqv_attack_m2')
metrics(attack_compare, 'tqv_attack_m3')

              precision    recall  f1-score   support

           0       0.55      1.00      0.71        49
           1       1.00      0.56      0.71        90

    accuracy                           0.71       139
   macro avg       0.78      0.78      0.71       139
weighted avg       0.84      0.71      0.71       139


              precision    recall  f1-score   support

           0       0.60      1.00      0.75        49
           1       1.00      0.64      0.78        90

    accuracy                           0.77       139
   macro avg       0.80      0.82      0.77       139
weighted avg       0.86      0.77      0.77       139


              precision    recall  f1-score   support

           0       0.89      1.00      0.94        49
           1       1.00      0.93      0.97        90

    accuracy                           0.96       139
   macro avg       0.95      0.97      0.95       139
weighted avg       0.96      0.96      0.96       139


              pr

In [62]:
def f1(attack_compare, predicted_label, true_label='true_attack'):
    print(f'==== The metrics of {predicted_label} ====')
    print(confusion_matrix(attack_compare[true_label], attack_compare[predicted_label]))
    print()

In [87]:
f1(attack_compare, 'old_attack_m1')
f1(attack_compare, 'old_attack_m2')
f1(attack_compare, 'tqv_attack_m1')
f1(attack_compare, 'tqv_attack_m2')
f1(attack_compare, 'tqv_attack_m3')

==== The metrics of old_attack_m1 ====
[[49  0]
 [40 50]]

==== The metrics of old_attack_m2 ====
[[49  0]
 [32 58]]

==== The metrics of tqv_attack_m1 ====
[[49  0]
 [ 6 84]]

==== The metrics of tqv_attack_m2 ====
[[49  0]
 [ 4 86]]

==== The metrics of tqv_attack_m3 ====
[[49  0]
 [17 73]]



### Plot

In [88]:
# destination entropy
destination_entropy

Unnamed: 0,Second,multiply
0,1,3.4604
1,2,3.2796
2,3,3.3820
3,4,3.4548
4,5,3.5273
...,...,...
145,146,3.4676
146,147,3.4485
147,148,3.2925
148,149,3.4441


In [89]:
# destination condition on source entropy
dest_src_entropy

Unnamed: 0,Second,total_multiply
0,1,2.2161
1,2,2.2931
2,3,2.2379
3,4,2.1930
4,5,2.2216
...,...,...
145,146,2.2253
146,147,2.1833
147,148,2.4316
148,149,2.2447


In [90]:
# destination condition on length entropy
dest_len_entropy

Unnamed: 0,Second,total_multiply
0,1,2.3160
1,2,2.4567
2,3,2.2668
3,4,2.3015
4,5,2.1568
...,...,...
145,146,2.2691
146,147,2.3355
147,148,2.3214
148,149,2.3798


In [91]:
fig = px.line(destination_entropy,
              x='Second', 
              y="multiply",
              title="Entropy of Destination"
             )
# fig.update_xaxes(range = [-4,4])
fig.update_yaxes(title='Entropy')
fig.show()

In [92]:
fig = px.line(dest_src_entropy,
              x='Second', 
              y="total_multiply",
              title="Entropy of Destination conditioned on Source Port"
             )
# fig.update_xaxes(rangemode='tozero')
fig.update_yaxes(title='Entropy')
fig.show()

In [93]:
fig = px.line(dest_len_entropy,
              x='Second', 
              y="total_multiply",
              title="Entropy of Destination conditioned on Packet Length"
             )
# fig.update_xaxes(range = [-4,4])
fig.update_yaxes(title='Entropy')
fig.show()