In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
from decimal import Decimal
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
#设置print格式
np.set_printoptions(precision=3, suppress=True)
np.set_printoptions(threshold=20)
np.set_printoptions(formatter={'float': '{: 0.3f}'.format})
pd.set_option('display.max_rows', 500)  #最大行数
pd.set_option('display.max_columns', 500)    #最大列数
pd.set_option('display.width', 4000)        #页面宽度

def convert_num(x, fill=None):
    x = str(x)
    try:
        res = Decimal(x)/ Decimal(10000)
    except Exception:
        res = fill if fill else np.NaN
    return res

#数据路径
import os
url_base = os.getcwd()

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
#引入数据
# /CSE-CIC-IDS2018 all Data
url_data = f"{url_base}/all.csv"

# info data (column names, col types)
url_info = f"{url_base}/all_names.names"

# Import info data
df_info = pd.read_csv(url_info, sep=":", skiprows=1, index_col=False, names=["colname", "type"])

colnames = df_info.colname.values
coltypes = np.where(df_info["type"].str.contains("continuous"), "float", "str")
# print(df_info)

# Import data
df = pd.read_csv(url_data, names=colnames, index_col=False, dtype=dict(zip(colnames, coltypes)))
# print(df)

# Dumminize
X = pd.get_dummies(df.iloc[:,:-1]).values
# print(X)
# X1 = pd.get_dummies(df.iloc[:,:-1])
# print(X1)

for x1 in X:
    if np.isinf(x1).any():
        print(x1)
        for i in range(len(x1)):
            if np.isinf(x1[i]):
                print(i)
        # print(np.isinf(x1).any())

# Create Traget Flag
# Anomaly data when status is normal, Otherwise, Not anomaly.
y = np.where(df.Label == "BENIGN", 1, 0)
print(y)
# Split Data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=123)

[1 1 1 ... 1 1 1]


In [3]:
#数据预处理
#标准化函数
scaler = StandardScaler()
X_soc = X_train
X_soc_test = X_test
X_train = scaler.fit_transform(X_train)
print(X_train.shape)
X_test = scaler.transform(X_test)

X_train = np.reshape(X_train,[-1,78,1])
X_test = np.reshape(X_test,[-1,78,1])

def dense_to_one_hot(labels_dense, num_classes):
    """Convert class labels from scalars to one-hot vectors."""
    num_labels = labels_dense.shape[0]
    index_offset = np.arange(num_labels) * num_classes
    labels_one_hot = np.zeros((num_labels, num_classes))
    labels_one_hot.flat[index_offset+labels_dense.ravel()] = 1
    return labels_one_hot
 
num_classes  = 2
one_hot_train = dense_to_one_hot(y_train,num_classes)
one_hot_test = dense_to_one_hot(y_test,num_classes)
print(one_hot_train.shape)
print(one_hot_test)

(524284, 78)
(524284, 2)
[[ 0.000  1.000]
 [ 0.000  1.000]
 [ 0.000  1.000]
 ...
 [ 0.000  1.000]
 [ 0.000  1.000]
 [ 1.000  0.000]]


In [4]:
#建立模型结构，就是几个全连接
#import tflearn
import tflearn
from tflearn.layers.core import input_data, dropout, fully_connected
from tflearn.layers.conv import conv_1d, max_pool_1d,global_max_pool
from tflearn.layers.normalization import local_response_normalization
from tflearn.layers.estimator import regression

# Building convolutional network
network = input_data(shape=[None,78,1], name='input')
# network = fully_connected(network, 64, activation='tanh')
network = fully_connected(network, 32, activation='tanh')
network = fully_connected(network, 10, activation='tanh')
network = dropout(network, 0.5)

network = fully_connected(network, 2, activation='softmax')
network = regression(network, optimizer='adam', learning_rate=0.01,
                     loss='categorical_crossentropy', name='target')

Instructions for updating:
non-resource variables are not supported in the long term
curses is not supported on this machine (please install/reinstall curses for an optimal experience)
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


In [5]:
# Training
model = tflearn.DNN(network, tensorboard_verbose=0)
model.fit({'input': X_train}, {'target': one_hot_train}, n_epoch=1,
          validation_set=({'input': X_test}, {'target': one_hot_test}),
          show_metric=True, run_id='cnn_demo')

Training Step: 8191  | total loss: [1m[32m0.05029[0m[0m | time: 27.956s
| Adam | epoch: 001 | loss: 0.05029 - acc: 0.9924 -- iter: 524224/524284
Training Step: 8192  | total loss: [1m[32m0.05268[0m[0m | time: 34.720s
| Adam | epoch: 001 | loss: 0.05268 - acc: 0.9916 | val_loss: 0.03165 - val_acc: 0.9931 -- iter: 524284/524284
--


In [6]:
#提取tflearn生成会话中的变量

input_ = model.session.graph.get_tensor_by_name('input/X:0')
output_ = model.session.graph.get_tensor_by_name('FullyConnected_3/BiasAdd:0')
softmax_ = model.session.graph.get_tensor_by_name('FullyConnected_3/Softmax:0')
target_ =model.session.graph.get_tensor_by_name('target/Y:0')
#构造损失函数
loss_ = tf.nn.softmax_cross_entropy_with_logits(logits=softmax_, labels=target_)#output_
#loss梯度
gradw, = tf.gradients(loss_,input_)
#向前梯度
derivative, = tf.gradients(output_[0,1], input_)

Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See `tf.nn.softmax_cross_entropy_with_logits_v2`.



In [14]:
#将生成的对抗样本合法化
def x1_transform(x1):
    x1 = scaler.inverse_transform(x1)
    x1x = []
    x1x1 = np.int32(x1[0][0:17])
    x1x  =  np.append(x1x,x1x1)
    x1x1 = np.float32(x1[0][17:24])
    x1x  =  np.append(x1x,x1x1)
    x1x1 = np.int32(x1[0][24:26])
    x1x  =  np.append(x1x,x1x1)
    x1x1 = np.float32(x1[0][26:34])
    x1x  =  np.append(x1x,x1x1)
    x1x1 = np.int32(np.rint(x1[0][34:78]))
    x1x  =  np.append(x1x,x1x1)
    x1x = np.around(x1x, decimals=2)
    x1 = scaler.transform(np.reshape(x1x,[1,78]))
    return x1,x1x
    #x1是归一化的，x1x才是生成对抗样本

In [15]:
def Deepfool(sample_n): 
    ok = True
    targets = [0,1]
    sample = np.array(X_train[sample_n:sample_n+1])
    x1 = np.reshape(sample,[1,78])
    #最大最小值
    x1_d = np.min(X_soc,axis=0)
    x1_u = np.max(X_soc,axis=0)
    
    #初始参数
    r_tot = np.zeros([1,78])
    c = np.zeros(x1.shape[1]-34)

    for epoch in range(1000):
        #计算梯度
        x_ = np.reshape(x1,[-1,78,1])
        y1,_,gg_ = model.session.run([softmax_,loss_,gradw],{input_:x_,target_:[targets]})

        if np.argmax(y1) == np.argmax(targets):
            #如果生成了对抗样本。
            x1,x1x = x1_transform(x1)
            #将生成的对抗样本合法化
            x_ = np.reshape(x1,[-1,78,1])
            y1,_,gg_ = model.session.run([softmax_,loss_,gradw],{input_:x_,target_:[targets]})
            if np.argmax(y1) == np.argmax(targets):
                #如果生成的对抗样本合法化后依然是对抗样本。
                d_d = x1x-X_soc_test[sample_n]
                L0 = np.linalg.norm(d_d,ord=0)
                print('L0=',L0)
                break#生成成功退出循环

        #用deepflow的方式计算更改样本
        gg=[]
        gg = np.append(gg_[0][0:34],c)
        gg = np.reshape(gg,[1,78])
        pert = np.abs(y1[0][np.argmax(targets)])/np.linalg.norm(gg)
        r_i = (pert+1e-8) * gg/np.linalg.norm(gg)
        r_tot = np.float32(r_tot+r_i)

        x1 = x1-1.1*r_tot
        

        #对x1的特征进行一个限制，限制在所有样本最大值内。
        x1 = scaler.inverse_transform(x1)
        x1x = []
        for i in range(78):
            x1x1 = np.float32(np.clip(x1[0][i:i+1],x1_d[i],x1_u[i]))
            x1x  =  np.append(x1x,x1x1)
        x1  = np.reshape(x1x,[1,78])
        x1 = scaler.transform(np.reshape(x1x,[1,78]))

    if epoch == 999:
        print('对抗样本生成失败，迭代次数 =',epoch)
        return False
    else:
        print('对抗样本生成成功，迭代次数 =',epoch,' , L0 =',L0)
        return x1x

In [16]:
#实现saliency_map
def saliency_map(derivative, mask):
        mask = np.reshape(mask,[78])
        #print('mask',mask[0],mask[0,21,0])
        alphas = derivative * mask
        
        # pixel influence on sum of residual classes
        betas = -np.ones_like(alphas)
        
        sal_map = np.abs(alphas) * np.abs(betas) * np.sign(alphas * betas)
        # find optimal pixel & direction of perturbation
        #print(sal_map*1000)
        ''''''
        for sa in range(len(sal_map)):
            if mask[sa] ==0:
                sal_map[sa] = float("inf")#10000#
                
        idx = np.argmin(sal_map)
        #print('sal_map=', sal_map)
        #转换成(p1,p2)格式      
        idx = np.unravel_index(idx, mask.shape)
        pix_sign = np.sign(alphas)[idx]
        
        return idx, pix_sign

In [None]:
def JSMA(sample_n):
    ok = True
    targets = [0,1]
    sample = np.array(X_train[sample_n:sample_n+1])
    x1 = np.reshape(sample,[1,78])

    mask = np.ones_like(x1)
    #最大最小值
    x1_d = np.min(X_soc,axis=0)
    x1_u = np.max(X_soc,axis=0)
    #定义边界
    max_=scaler.transform([x1_u])
    min_=scaler.transform([x1_d])
    max_ = np.reshape(max_,[78])
    min_ = np.reshape(min_,[78])

    c = np.zeros(x1.shape[1]-34)
    L0 = 0

    for epoch in range(1000):

        x_ = np.reshape(x1,[-1,78,1])
        y1,loss1,gg_,d = model.session.run([softmax_,loss_,gradw,derivative],{input_:x_,target_:[targets]})
        
        if np.argmax(y1) == np.argmax(targets):
            x1,x1x = x1_transform(x1)
            #将生成的对抗样本合法化

            x1 = scaler.transform(np.reshape(x1x,[1,78]))
            x_ = np.reshape(x1,[-1,78,1])
            y1,loss1,gg_ = model.session.run([softmax_,loss_,gradw],{input_:x_,target_:[targets]})

            if np.argmax(y1) == np.argmax(targets):
                #如果生成的对抗样本合法化后依然是对抗样本。
                d_d = x1x-X_soc_test[sample_n]
                L0 = np.linalg.norm(d_d,ord=0)
                print('L0=',L0)
                break#生成成功退出循环
    
    
        #JSMA更新样本
        gg=[]
        gg = np.append(d[0][0:34],c)
        idx, pix_sign=saliency_map(gg, mask)
        x1 = np.reshape(x1,[78])
        x1[idx]+=pix_sign * 0.1 * (max_[idx] - min_[idx]) 
        
        #达到极限的点不再参与更新
        if (x1[idx]<=min_[idx]) or (x1[idx]>=max_[idx]):
            mask[0][idx]=0
            x1[idx]=np.clip(x1[idx], min_[idx], max_[idx])
        
        #对x1的特征进行一个限制，限制在所有样本最大值内。
        x1 = np.reshape(x1,[1,78])
        x1 = scaler.inverse_transform(x1)
        x1x = []
        for i in range(78):
            x1x1 = np.float32(np.clip(x1[0][i:i+1],x1_d[i],x1_u[i]))
            x1x  =  np.append(x1x,x1x1)
        x1  = np.reshape(x1x,[1,78])
        x1 = scaler.transform(np.reshape(x1x,[1,78]))

    x1 = scaler.inverse_transform(x1)
    if epoch == 999:
        print('对抗样本生成失败，迭代次数 =',epoch)
        return False
    else:
        print('对抗样本生成成功，迭代次数 =',epoch,' , L0 =',L0)
        return x1x

In [17]:
#数据特征的前34是数值，只用改变前34个特征。
positive = np.int32(np.where(y_train==1)).reshape(-1)#正样本位置
negative = np.int32(np.where(y_train==0)).reshape(-1)#负样本位置
print('positive',positive,len(positive))        
print('negative',negative,len(negative))
#输入负样本的位置，生成对抗样本。不是所有样本都能生成对抗样本，大部分可以。
aes = negative[100]
ae = Deepfool(aes)
print(ae)
# ae = JSMA(aes)

positive [     0      2      5 ... 524280 524282 524283] 379722
negative [     1      3      4 ... 524273 524279 524281] 144562
L0= 57.0
对抗样本生成成功，迭代次数 = 1  , L0 = 57.0
[ 0.000  18704.000  17.000 ...  0.000  0.000  0.000]


In [23]:
print(len(ae))
for i in ae:
    print(i,',',end='')

78
0.0 ,18704.0 ,17.0 ,119999984.0 ,1.0 ,54115.0 ,94248.0 ,12529669.0 ,16668.0 ,0.0 ,2720.0 ,4850.0 ,7827.0 ,533.0 ,3340.0 ,0.0 ,-13.0 ,0.0 ,120000000.0 ,102562944.0 ,120000000.0 ,70756264.0 ,0.0 ,25408514.0 ,119999999.0 ,119999999.0 ,120000000.0 ,56220776.0 ,120000000.0 ,77722072.0 ,1.0 ,0.0 ,0.0 ,0.0 ,40.0 ,20.0 ,20408.0 ,20408.0 ,0.0 ,6.0 ,2.0 ,3.0 ,12.0 ,0.0 ,0.0 ,0.0 ,1.0 ,0.0 ,0.0 ,0.0 ,0.0 ,1.0 ,3.0 ,0.0 ,6.0 ,40.0 ,0.0 ,0.0 ,0.0 ,0.0 ,0.0 ,0.0 ,1.0 ,0.0 ,1.0 ,6.0 ,29200.0 ,0.0 ,0.0 ,40.0 ,0.0 ,0.0 ,0.0 ,0.0 ,0.0 ,0.0 ,0.0 ,0.0 ,

In [24]:
print(len(X_soc_test[negative[0]]))
for i in X_soc_test[negative[0]]:
    print(i,',',end='')

78
60368.0 ,53.0 ,17.0 ,23695.0 ,1.0 ,1.0 ,43.0 ,139.0 ,43.0 ,43.0 ,43.0 ,0.0 ,139.0 ,139.0 ,139.0 ,0.0 ,23695.0 ,0.0 ,23695.0 ,23695.0 ,0.0 ,0.0 ,0.0 ,0.0 ,0.0 ,0.0 ,0.0 ,0.0 ,0.0 ,0.0 ,0.0 ,0.0 ,0.0 ,0.0 ,20.0 ,20.0 ,42.20299641 ,42.20299641 ,43.0 ,139.0 ,75.0 ,55.42562584 ,3072.0 ,0.0 ,0.0 ,0.0 ,0.0 ,0.0 ,0.0 ,0.0 ,0.0 ,1.0 ,112.5 ,43.0 ,139.0 ,20.0 ,0.0 ,0.0 ,0.0 ,0.0 ,0.0 ,0.0 ,1.0 ,43.0 ,1.0 ,139.0 ,-1.0 ,-1.0 ,0.0 ,20.0 ,0.0 ,0.0 ,0.0 ,0.0 ,0.0 ,0.0 ,0.0 ,0.0 ,

In [None]:
# 