<a href="https://colab.research.google.com/github/plant310/5UJob/blob/master/NSL_KDD_TRY_2_0.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

###连接谷歌网盘，读取数据集

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')
# 进入指定KDD99数据集文件夹
import os
os.chdir("/content/gdrive/My Drive/Self_ANN/NSL_KDD")
# 查看文件目录
!ls

Mounted at /content/gdrive
KDDTest+.txt  KDDTrain+.txt  training_attack_types.txt


### 数据集读取并预处理

In [None]:
import numpy as np
import pandas as pd
from sklearn.utils import shuffle
from collections import Counter
from sklearn.preprocessing import MinMaxScaler 

In [None]:
# 创建攻击类型的映射字典
from collections import defaultdict
category = defaultdict(list)
category['benign'].append('normal')

with open('training_attack_types.txt', 'r') as f:
  for line in f.readlines():
    attack, cat = line.strip().split(' ')
    category[cat].append(attack)
attack_mapping = dict((v,k) for k in category for v in category[k])
print(attack_mapping)

# 读取训练集数据并将攻击类型合为五种
header_names = ['duration', 'protocol_type', 'service', 'flag', 'src_bytes', 'dst_bytes', 'land', 'wrong_fragment', 'urgent', 'hot', 'num_failed_logins', 'logged_in', 'num_compromised', 'root_shell', 'su_attempted', 'num_root', 'num_file_creations', 'num_shells', 'num_access_files', 'num_outbound_cmds', 'is_host_login', 'is_guest_login', 'count', 'srv_count', 'serror_rate', 'srv_serror_rate', 'rerror_rate', 'srv_rerror_rate', 'same_srv_rate', 'diff_srv_rate', 'srv_diff_host_rate', 'dst_host_count', 'dst_host_srv_count', 'dst_host_same_srv_rate', 'dst_host_diff_srv_rate', 'dst_host_same_src_port_rate', 'dst_host_srv_diff_host_rate', 'dst_host_serror_rate', 'dst_host_srv_serror_rate', 'dst_host_rerror_rate', 'dst_host_srv_rerror_rate', 'attack_type', 'success_pred']
train_df = pd.read_csv("KDDTrain+.txt", names=header_names)
train_df['label'] = train_df['attack_type'].map(lambda x:attack_mapping[x]) #把攻击类型归到五大类中
train_df.drop(['attack_type'], axis=1, inplace=True) # 去掉细分攻击类型那一列
train_df.drop(['success_pred'], axis=1, inplace=True) # 去掉最后一列score-the severity of the traffic input itself

# 读取测试集
test_df = pd.read_csv("KDDTest+.txt", names=header_names)
test_df['label'] = test_df['attack_type'].map(lambda x:attack_mapping[x])
test_df.drop(['attack_type'], axis=1, inplace=True)
test_df.drop(['success_pred'], axis=1, inplace=True)

{'normal': 'benign', 'apache2': 'dos', 'back': 'dos', 'mailbomb': 'dos', 'processtable': 'dos', 'snmpgetattack': 'dos', 'teardrop': 'dos', 'smurf': 'dos', 'land': 'dos', 'neptune': 'dos', 'pod': 'dos', 'udpstorm': 'dos', 'ps': 'u2r', 'buffer_overflow': 'u2r', 'perl': 'u2r', 'rootkit': 'u2r', 'loadmodule': 'u2r', 'xterm': 'u2r', 'sqlattack': 'u2r', 'httptunnel': 'u2r', 'ftp_write': 'r2l', 'guess_passwd': 'r2l', 'snmpguess': 'r2l', 'imap': 'r2l', 'spy': 'r2l', 'warezclient': 'r2l', 'warezmaster': 'r2l', 'multihop': 'r2l', 'phf': 'r2l', 'named': 'r2l', 'sendmail': 'r2l', 'xlock': 'r2l', 'xsnoop': 'r2l', 'worm': 'probe', 'nmap': 'probe', 'ipsweep': 'probe', 'portsweep': 'probe', 'satan': 'probe', 'mscan': 'probe', 'saint': 'probe'}


In [None]:
## 区分训练/测试集、特征集/标签集
Y_train = train_df['label']  # 训练数据标签列
Y_test = test_df['label']  # 测试集标签列
X_train = train_df.drop('label', axis=1)
X_test = test_df.drop('label', axis=1)

In [None]:
## 查看数据类型使用，与预处理无关

train_num = X_train.shape[0]
total_data = pd.concat([X_train, X_test], axis=0)  #行拼接

# 共有三种协议类型
protocol_type = total_data['protocol_type']
protocol_type_unique = list(set(protocol_type))
print(protocol_type_unique)

# 网络服务类型的种类66
service = total_data['service']
service_unique = list(set(service))
print(service_unique)
print('网络服务类型的种类为',len(service_unique))

# 网络连接状态11种
flag = total_data['flag']
flag_unique = list(set(flag))
print(flag_unique)
print('网络连接状态的种类为',len(flag_unique))


['udp', 'tcp', 'icmp']
['echo', 'http', 'gopher', 'sunrpc', 'courier', 'pop_3', 'nnsp', 'tim_i', 'finger', 'login', 'name', 'uucp_path', 'Z39_50', 'printer', 'ecr_i', 'eco_i', 'domain_u', 'nntp', 'netbios_ssn', 'klogin', 'ldap', 'netstat', 'urh_i', 'X11', 'private', 'urp_i', 'http_2784', 'remote_job', 'smtp', 'link', 'ftp', 'uucp', 'sql_net', 'pm_dump', 'red_i', 'domain', 'mtp', 'supdup', 'shell', 'imap4', 'auth', 'pop_2', 'whois', 'ftp_data', 'time', 'IRC', 'kshell', 'tftp_u', 'systat', 'ntp_u', 'discard', 'ctf', 'http_443', 'netbios_dgm', 'efs', 'exec', 'ssh', 'rje', 'http_8001', 'other', 'telnet', 'hostnames', 'aol', 'csnet_ns', 'harvest', 'iso_tsap', 'daytime', 'vmnet', 'netbios_ns', 'bgp']
网络服务类型的种类为 70
['SH', 'RSTO', 'S0', 'SF', 'RSTR', 'S2', 'RSTOS0', 'S3', 'S1', 'REJ', 'OTH']
网络连接状态的种类为 11


In [None]:
print(total_data.shape)

(148517, 41)


In [None]:
# 特征集进行onehot编码

train_num = X_train.shape[0]
total_data = pd.concat([X_train, X_test], axis=0)  #行拼接

x = total_data.values[:,:]
#Transform to numeric features
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
# Encode Protocol type feature (3 different values)
labelencoder_x_1 = LabelEncoder()
x[:, 1] = labelencoder_x_1.fit_transform(x[:, 1])
# Encode Service feature (67 different values)
labelencoder_x_2 = LabelEncoder()
x[:, 2] = labelencoder_x_2.fit_transform(x[:, 2])
# Encode Flag feature (11 different values)
labelencoder_x_3 = LabelEncoder()
x[:, 3] = labelencoder_x_3.fit_transform(x[:, 3])

print(x.shape)


# Encoded by one-hot vector
from sklearn.compose import ColumnTransformer
# transform the second feature to 3-dim one-hot vector
ct = ColumnTransformer([("ProtocolType", OneHotEncoder(), [1])], remainder = 'passthrough')
x = ct.fit_transform(x)
print(x.shape)


# transform the third feature to 70-dim one-hot vector
ct = ColumnTransformer([("Service", OneHotEncoder(), [4])], remainder = 'passthrough')
x = ct.fit_transform(x)
print(x.shape)

# transform the fourth feature to 11-dim one-hot vector
ct = ColumnTransformer([("Flag", OneHotEncoder(), [74])], remainder = 'passthrough')  # 这里因为数据集比原作者少，导致Service有66种类型，原71改为70
x = ct.fit_transform(x)
print(x.shape)

(148517, 41)
(148517, 43)
(148517, 112)
(148517, 122)


In [None]:
print(Y_train.shape)
y = pd.concat([Y_train, Y_test], axis=0)  #行拼接
print(y.shape)

(125973,)
(148517,)


In [None]:
# 标签 Encode label (5 labels)
y = pd.concat([Y_train, Y_test], axis=0)  #行拼接
y = y.values
print("编码前", y)
labelencoder_y = LabelEncoder()
y = labelencoder_y.fit_transform(y)
print("编码后", y)

编码前 ['benign' 'benign' 'dos' ... 'dos' 'benign' 'probe']
编码后 [0 0 1 ... 1 0 2]


In [None]:
# 归一化/标准化
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
x = scaler.fit_transform(x)

In [None]:
x_train = x[:train_num,:]
x_test = x[train_num:,:]

y_train = y[:train_num]
y_test = y[train_num:]

### 复现一篇文章自编码器降维算法----[NDAE降维](https://github.com/ngoctn-lqdtu/A-Deep-Learning-Approach-to-Network-Intrusion-Detection/blob/master/KDD99_5_class_NDAE_classifier.ipynb)

In [None]:
# 降维前随机森林预测值
from sklearn.ensemble import RandomForestClassifier
rfc1 = RandomForestClassifier(n_jobs=-1, n_estimators=10)
rfc1.fit(x_train, y_train)
rfc1.score(x_test, y_test)

0.7457416607523066

In [None]:
import keras
from keras.layers import Input, Dense
from keras.models import Model
from keras import backend as K
import tensorflow as tf
%tensorflow_version 1.x
import tensorflow as tf
print(tf.__version__)

TensorFlow is already loaded. Please restart the runtime to change versions.
2.8.2


In [None]:
class DenseTranspose(keras.layers.Layer):
  def __init__(self, dense, activation=None, **kwargs):
    self.dense = dense
    self.activation = keras.activations.get(activation)
    super().__init__(**kwargs)
  def build(self, batch_input_shape):
    self.biases = self.add_weight(name="bias", initializer="zeros",shape=[self.dense.input_shape[-1]])
    self.W = tf.transpose(self.dense.weights[0]) 
    super().build(batch_input_shape)
  def compute_output_shape(self, input_shape):
    return (input_shape[0], self.dense.input_shape[-1])
  def call(self, inputs):
    z = tf.matmul(inputs, self.W)
    return self.activation(z + self.biases)

In [None]:
K.clear_session()
num_hidden = (122, 10, 20, 20)

Dense_11 = Dense(units=num_hidden[1], activation='sigmoid')
Dense_12 = Dense(units=num_hidden[2], activation='sigmoid')
Dense_13 = Dense(units=num_hidden[3], activation='sigmoid')

inputs_1 = Input(shape=(num_hidden[0],))

#Encoder
encoded_11 = Dense_11(inputs_1)
encoded_12 = Dense_12(encoded_11)
encoded_13 = Dense_13(encoded_12)

#Decoder
decoded_11 = DenseTranspose(Dense_13, activation='sigmoid')(encoded_13)
decoded_12 = DenseTranspose(Dense_12, activation='sigmoid')(decoded_11)
outputs_1 = DenseTranspose(Dense_11, activation='sigmoid')(decoded_12)

AE_1=Model(inputs_1, outputs_1)
Encoder_1=Model(inputs_1, decoded_12)

In [None]:
AE_1.compile(optimizer='rmsprop', loss= 'mse')
AE_1.fit(x_train,x_train,epochs=50,batch_size=256,shuffle=True)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x7fe8e2c86890>

In [None]:
AE_1_encoded_train = Encoder_1.predict(x_train)
AE_1_encoded_test = Encoder_1.predict(x_test)

In [None]:
K.clear_session()
num_hidden = (10, 20, 20)

Dense_21 = Dense(units=num_hidden[1], activation='sigmoid')
Dense_22 = Dense(units=num_hidden[2], activation='sigmoid')


inputs_2 = Input(shape=(num_hidden[0],))

#Encoder
encoded_21 = Dense_21(inputs_2)
encoded_22 = Dense_22(encoded_21)


#Decoder
decoded_21 = DenseTranspose(Dense_22, activation='sigmoid')(encoded_22)
outputs_2 = DenseTranspose(Dense_21, activation='sigmoid')(decoded_21)

AE_2=Model(inputs_2, outputs_2)
Encoder_2=Model(inputs_2, decoded_21)

In [None]:
AE_2.compile(optimizer='rmsprop', loss= 'mse')
AE_2.fit(AE_1_encoded_train,AE_1_encoded_train,epochs=100,batch_size=256,shuffle=True)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.callbacks.History at 0x7fe8e29b7f50>

In [None]:
AE_2_encoded_train = Encoder_2.predict(AE_1_encoded_train)
AE_2_encoded_test = Encoder_2.predict(AE_1_encoded_test)

In [None]:
from sklearn.ensemble import RandomForestClassifier
rfc1 = RandomForestClassifier(n_jobs=-1, n_estimators=10)
rfc1.fit(AE_2_encoded_train, y_train)
rfc1.score(AE_2_encoded_test, y_test)

0.6958392476933996

### 免疫

In [None]:
import torch
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import time

In [None]:
# 区分训练集中的自体与非自体，测试集中的已知攻击和未知攻击
train_num = AE_2_encoded_train.shape
Abnormaldata, Normaldata, test_NonSelf2= np.empty(shape=train_num), np.empty(shape=train_num), np.empty(shape=train_num)

flag, flag2, flag3 = 0, 0, 0
for i in range(0, train_num[0]):
  if y_train[i] in [1,3,4]:  # 已知攻击
    Abnormaldata[flag,:] = AE_2_encoded_train[i,:]
    flag = flag + 1
  if y_train[i] == 0:  # 正常样本被labelencode为0
    Normaldata[flag2,:] = AE_2_encoded_train[i,:]
    flag2 = flag2 + 1
  if y_train[i] in [2]:  # 作为未知攻击测试用
    test_NonSelf2[flag3,:] = AE_2_encoded_train[i,:]
    flag3 = flag3 + 1

test_NonSelf = Abnormaldata[int(flag*0.8):flag,:]  # 已知攻击用于测试
test_Self = Normaldata[int(flag2*0.8):flag2,:]  # 自体用于测试

Abnormaldata = Abnormaldata[:flag,:][0:int(flag*0.8),:]  # 非自体用于训练
print("攻击数据量", Abnormaldata.shape)
Normaldata = Normaldata[:flag2, :][0:int(flag2*0.8),:]  # 自体用于训练
print("正常样本数量", Normaldata.shape)
test_NonSelf2 = test_NonSelf2[:flag3,:][0:int(flag3*0.1),:]  # 未知攻击用于测试
print("测试的攻击样本数据量",test_NonSelf2.shape)

攻击数据量 (37579, 20)
正常样本数量 (53874, 20)
测试的攻击样本数据量 (1165, 20)


In [None]:
#  矩阵计算欧式距离
def EuclideanDistance(A, B):
    m, n = A.shape[0], B.shape[0]
    # AA经过pow函数对矩阵每个元素求二次方，axis=1横向求和，此时AA.shape=(m,1),扩展n-1次，AA.shape=(m,n)
    AA = torch.pow(A, 2).sum(1, keepdim=True).expand(m, n)
    # sum操作后BB.shape=(n,1)
    BB = torch.pow(B, 2).sum(1, keepdim=True).expand(n, m).T
    dist = AA + BB
    # 表示dist - 2* A * B.T
    dist.addmm_(1, -2, A, B.t())
    # clamp()函数可以限定dist内元素的最大最小范围，dist最后开方，得到样本之间的距离矩阵
    dist = dist.clamp(min=1e-12).sqrt()
    return dist


# 生成自体检测器[计算与非自体的最小距离-非自体半径=自体检测器半径]
def Gen_Self_Detector(Normaldata, Abnormaldata, Nonself_Radius):
    Normaldata_num = len(Normaldata)
    Radius = np.empty(shape=(Normaldata_num))
    temp = 2 * Nonself_Radius

    # 自体与非自体欧式距离
    process_Nor = torch.tensor(Normaldata)
    process_Abnor = torch.tensor(Abnormaldata)
    Self_Nonself_distance = EuclideanDistance(process_Nor, process_Abnor)

    for i in range(0, Normaldata_num):
        min_distance = min(Self_Nonself_distance[i])  # 与非自体最小距离
        #index = Self_Nonself_distance[i].tolist().index(min_distance)  # 最小距离索引

        # 最小距离应减去非自体半径，依情况判断微调
        if min_distance < temp:
            Radius[i] = min_distance * 0.4
        else:
            Radius[i] = min(Self_Nonself_distance[i]) - Nonself_Radius
        #print("第{}个自体与第{}个非自体最近，半径为 {}".format(i, index, Radius[i]))

        print("第{}个自体半径为 {}".format(i, Radius[i]))
    return Radius


step = 200  # 由于上万数据的矩阵运算，内存不足，因此分步计算
nonself_radius = 0
Normaldata_num = len(Normaldata)
Radius = np.empty(shape=Normaldata_num)  # 用于存储自体检测器的半径
for i in range(0, Normaldata_num, step):
  print("实验进行{}/{}.".format(i, Normaldata_num))
  local_Normaldata = Normaldata[i:i+step,:]
  curr_radius = Gen_Self_Detector(local_Normaldata, Abnormaldata, nonself_radius) # 调用函数计算自体检测器
  Radius[i:i+step] = curr_radius
if i < Normaldata_num:
  print("实验进行{}/{}.".format(Normaldata_num, Normaldata_num))
  local_Normaldata = Normaldata[i:Normaldata_num,:]
  curr_radius = Gen_Self_Detector(local_Normaldata, Abnormaldata, nonself_radius)
  Radius[i:Normaldata_num] = curr_radius

Radius = Radius.T  # 得到最终的检测器半径

[1;30;43m流式输出内容被截断，只能显示最后 5000 行内容。[0m
第174个自体半径为 0.00012434151790227487
第175个自体半径为 9.878246068763309e-05
第176个自体半径为 4.50388613911515e-05
第177个自体半径为 0.0005009454381943175
第178个自体半径为 5.946208438136866e-05
第179个自体半径为 0.0002577854466416567
第180个自体半径为 0.0002844522658728871
第181个自体半径为 0.00037311493961754807
第182个自体半径为 8.174983596564826e-05
第183个自体半径为 0.000807066805321463
第184个自体半径为 0.0008975372792352921
第185个自体半径为 0.0004328767053748291
第186个自体半径为 0.03482896922770052
第187个自体半径为 0.0002844522658728871
第188个自体半径为 0.001243988672376603
第189个自体半径为 0.002502444581715462
第190个自体半径为 0.0011964086905180906
第191个自体半径为 1e-06
第192个自体半径为 0.00046114034288027633
第193个自体半径为 6.53129666699619e-05
第194个自体半径为 4.526131743229988e-05
第195个自体半径为 1e-06
第196个自体半径为 0.00015619346909918915
第197个自体半径为 0.0005916121006505406
第198个自体半径为 0.0009460185033841882
第199个自体半径为 0.0004567501128971317
实验进行49000/53874.
第0个自体半径为 8.545465169074817e-05
第1个自体半径为 0.0006152591888566627
第2个自体半径为 0.0022550961930785356
第3个自体半径为 0.000736000979929

### 未知攻击测试

In [None]:
def test(normal_sample, attack_sample, self_detector, detector_radius):
    true_self, false_self, true_nonself, false_nonself = 0, 0, 0, 0
    detector_num = self_detector.shape[0]
    error_nonself = pd.DataFrame(columns=["attack_sample", "self_detector", "detector_radius"])
    error_self = pd.DataFrame(columns=['normal_sample'])  # 未落入任何自体检测器的正常样本

    # 对每一个正常的样本,遍历所有自体检测器，计算正常自体被检测出的个数
    t=0
    for i in range(0, len(normal_sample)):
        flag = 0
        for k in range(0, detector_num):
            distance = np.linalg.norm(normal_sample[i] - self_detector[k])
            if distance < detector_radius[k]:
                true_self = true_self + 1
                flag = 1
                break
        if flag == 0:
          error_self.loc[t] = [i]
          t = t + 1
          


    # 对每一个异常样本，遍历所有自体检测器，计算异常点被检测器覆盖个数
    for i in range(0, len(attack_sample)):
        for k in range(0, detector_num):
            distance = np.linalg.norm(attack_sample[i] - self_detector[k])
            if distance < detector_radius[k]:
                false_nonself = false_nonself + 1
                error_nonself.loc[false_nonself] = [i, k, detector_radius[k]]
                print("第{}个异常样本，被第{}个自体检测器错误覆盖，距离{}，该检测器半径为{}".format(i, k, distance, detector_radius[k]))
                break

    false_self = len(normal_sample) - true_self
    true_nonself = len(attack_sample) - false_nonself
    print('true_self:{}, false_self:{}, true_nonself:{}, false_nonself:{}'.format(true_self, false_self, true_nonself, false_nonself))
    print('自体准确率：{}， 非自体识别准确率：{}'.format(true_self / len(normal_sample), true_nonself / len(attack_sample)))
    return error_nonself, error_self




#  矩阵计算欧式距离
def EuclideanDistance(A, B):
    m, n = A.shape[0], B.shape[0]
    # AA经过pow函数对矩阵每个元素求二次方，axis=1横向求和，此时AA.shape=(m,1),扩展n-1次，AA.shape=(m,n)
    AA = torch.pow(A, 2).sum(1, keepdim=True).expand(m, n)
    # sum操作后BB.shape=(n,1)
    BB = torch.pow(B, 2).sum(1, keepdim=True).expand(n, m).T
    dist = AA + BB
    # 表示dist - 2* A * B.T
    dist.addmm_(1, -2, A, B.t())
    # clamp()函数可以限定dist内元素的最大最小范围，dist最后开方，得到样本之间的距离矩阵
    dist = dist.clamp(min=1e-12).sqrt()
    return dist



test_Self = AE_2_encoded_test[y_test==0,:]
'''test_NonSelf1 = AE_2_encoded_test[y_test==1,:]
test_NonSelf = np.r_[test_NonSelf1, test_NonSelf2]'''
error_nonself, error_self = test(test_Self, test_NonSelf2, Normaldata, Radius)


第0个异常样本，被第3092个自体检测器错误覆盖，距离6.502202920131574e-05，该检测器半径为0.0001537197552241818
第1个异常样本，被第23262个自体检测器错误覆盖，距离0.00011859021146393898，该检测器半径为0.00017410687188262108
第2个异常样本，被第793个自体检测器错误覆盖，距离7.638934657640018e-07，该检测器半径为1e-06
第3个异常样本，被第6559个自体检测器错误覆盖，距离0.0007402637738442873，该检测器半径为0.0007474388805345792
第4个异常样本，被第66个自体检测器错误覆盖，距离7.760057923318025e-07，该检测器半径为1e-06
第6个异常样本，被第25103个自体检测器错误覆盖，距离9.958337103959735e-05，该检测器半径为0.00010613917468223686
第9个异常样本，被第1886个自体检测器错误覆盖，距离0.00016199306123011547，该检测器半径为0.000299043979499568
第11个异常样本，被第1503个自体检测器错误覆盖，距离5.990192700378956e-07，该检测器半径为1e-06
第12个异常样本，被第4262个自体检测器错误覆盖，距离1.566950468963324e-05，该检测器半径为1.6306378209415943e-05
第14个异常样本，被第87个自体检测器错误覆盖，距离2.2891598193950898e-07，该检测器半径为1e-06
第15个异常样本，被第23262个自体检测器错误覆盖，距离0.00016306907632314472，该检测器半径为0.00017410687188262108
第16个异常样本，被第295个自体检测器错误覆盖，距离0.00028328861183709263，该检测器半径为0.00040246646445255186
第17个异常样本，被第23262个自体检测器错误覆盖，距离0.00012611306189751374，该检测器半径为0.00017410687188262108
第21个异常样本，被第154个自体检测器错误覆盖，距离9.185692

### 测试已知攻击检测率

In [None]:
# 对上述生成的检测器，进行已知攻击样本测试

def test(normal_sample, attack_sample, self_detector, detector_radius):

    true_self, false_self, true_nonself, false_nonself = 0, 0, 0, 0
    detector_num = self_detector.shape[0]
    error_nonself = pd.DataFrame(columns=["attack_sample", "self_detector", "detector_radius"])
    error_self = pd.DataFrame(columns=['normal_sample'])  # 未落入任何自体检测器的正常样本

    # 对每一个正常的样本,遍历所有自体检测器，计算正常自体被检测出的个数
    t=0
    for i in range(0, len(normal_sample)):
        flag = 0
        for k in range(0, detector_num):
            distance = np.linalg.norm(normal_sample[i] - self_detector[k])
            if distance < detector_radius[k]:
                true_self = true_self + 1
                flag = 1
                break
        if flag == 0:
          error_self.loc[t] = [i]
          t = t + 1
          


    # 对每一个异常样本，遍历所有自体检测器，计算异常点被检测器覆盖个数
    for i in range(0, len(attack_sample)):
        for k in range(0, detector_num):
            distance = np.linalg.norm(attack_sample[i] - self_detector[k])
            if distance < detector_radius[k]:
                false_nonself = false_nonself + 1
                error_nonself.loc[false_nonself] = [i, k, detector_radius[k]]
                print("第{}个异常样本，被第{}个自体检测器错误覆盖，距离{}，该检测器半径为{}".format(i, k, distance, detector_radius[k]))
                break

    false_self = len(normal_sample) - true_self
    true_nonself = len(attack_sample) - false_nonself
    print('true_self:{}, false_self:{}, true_nonself:{}, false_nonself:{}'.format(true_self, false_self, true_nonself, false_nonself))
    print('自体准确率：{}， 非自体识别准确率：{}'.format(true_self / len(normal_sample), true_nonself / len(attack_sample)))
    return error_nonself, error_self




#  矩阵计算欧式距离
def EuclideanDistance(A, B):
    m, n = A.shape[0], B.shape[0]
    # AA经过pow函数对矩阵每个元素求二次方，axis=1横向求和，此时AA.shape=(m,1),扩展n-1次，AA.shape=(m,n)
    AA = torch.pow(A, 2).sum(1, keepdim=True).expand(m, n)
    # sum操作后BB.shape=(n,1)
    BB = torch.pow(B, 2).sum(1, keepdim=True).expand(n, m).T
    dist = AA + BB
    # 表示dist - 2* A * B.T
    dist.addmm_(1, -2, A, B.t())
    # clamp()函数可以限定dist内元素的最大最小范围，dist最后开方，得到样本之间的距离矩阵
    dist = dist.clamp(min=1e-12).sqrt()
    return dist



#test_Self = AE_2_encoded_test[y_test==0,:]
'''test_NonSelf1 = AE_2_encoded_test[y_test==1,:]
test_NonSelf = np.r_[test_NonSelf1, test_NonSelf2]'''

error_nonself, error_self = test(test_Self, test_NonSelf, Normaldata, Radius)

第0个异常样本，被第7525个自体检测器错误覆盖，距离4.4901726189945825e-07，该检测器半径为1e-06
第1个异常样本，被第447个自体检测器错误覆盖，距离3.513641989991544e-07，该检测器半径为1e-06
第2个异常样本，被第694个自体检测器错误覆盖，距离5.204751899010116e-07，该检测器半径为1e-06
第6个异常样本，被第1722个自体检测器错误覆盖，距离0.00025863200422715873，该检测器半径为0.0004888416564472845
第7个异常样本，被第8288个自体检测器错误覆盖，距离0.007609855068089561，该检测器半径为0.008435844708329329
第8个异常样本，被第8087个自体检测器错误覆盖，距离5.306152672612581e-07，该检测器半径为1e-06
第12个异常样本，被第694个自体检测器错误覆盖，距离5.289387549789351e-07，该检测器半径为1e-06
第15个异常样本，被第958个自体检测器错误覆盖，距离6.906169826307093e-07，该检测器半径为1e-06
第17个异常样本，被第2392个自体检测器错误覆盖，距离0.0093395897657163，该检测器半径为0.011158385693387961
第19个异常样本，被第958个自体检测器错误覆盖，距离6.041865205982132e-07，该检测器半径为1e-06
第21个异常样本，被第22091个自体检测器错误覆盖，距离0.0003047538075124706，该检测器半径为0.00041392263496978205
第23个异常样本，被第2095个自体检测器错误覆盖，距离7.232821679658152e-07，该检测器半径为1e-06
第35个异常样本，被第447个自体检测器错误覆盖，距离9.381816631436724e-07，该检测器半径为1e-06
第39个异常样本，被第7525个自体检测器错误覆盖，距离4.56860951320298e-07，该检测器半径为1e-06
第46个异常样本，被第11807个自体检测器错误覆盖，距离1.1279106198866622e-05，该检测器半径为1.56141275