In [16]:
import numpy as np
import pandas as pd
# from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
import torch
from torch.utils.data import DataLoader
from torch.utils.data import TensorDataset


class KddData(object):

    def __init__(self, batch_size, file_name1, file_name2):
        data1 = pd.read_csv(file_name1)
        data2 = pd.read_csv(file_name2)
        
        
        self._encoder = {
            'label':    LabelEncoder()
        }
        self.batch_size = batch_size

        target = np.array(data1['label'])
        features = np.array(data1.drop('label', axis=1))
        data_X, data_y = self.__encode_data(features, target)
        self.train_dataset = TensorDataset(
            torch.from_numpy(data_X.astype(np.float32)),
            torch.from_numpy(data_y.astype(np.int64))
        )

        target = np.array(data2['label'])
        features = np.array(data2.drop('label', axis=1))
        data_X, data_y = self.__encode_data(features, target)
        self.test_dataset = TensorDataset(
            torch.from_numpy(data_X.astype(np.float32)),
            torch.from_numpy(data_y.astype(np.int64))
        )

 
        self.train_dataloader = DataLoader(self.train_dataset, self.batch_size, shuffle=True)
        self.test_dataloader = DataLoader(self.test_dataset, self.batch_size, shuffle=True)

    """将数据中字符串部分转换为数字，并将输入的41维特征转换为8*8的矩阵"""
    def __encode_data(self, data_X, data_y):
        self._encoder['label'].fit(list(set(data_y)))
        data_X = np.pad(data_X, ((0, 0), (0, 100 - len(data_X[0]))), 'constant').reshape(-1, 1, 100)
        data_y = self._encoder['label'].transform(data_y)
        return data_X, data_y

    """将数据拆分为训练集和测试集，并转换为TensorDataset对象"""
    def __split_data_to_tensor(self, data_X, data_y):
        X_train, X_test, y_train, y_test = train_test_split(data_X, data_y, test_size=0.3)
        train_dataset = TensorDataset(
            torch.from_numpy(X_train.astype(np.float32)),
            torch.from_numpy(y_train.astype(np.int64))
        )
        test_dataset = TensorDataset(
            torch.from_numpy(X_test.astype(np.float32)),
            torch.from_numpy(y_test.astype(np.int64))
        )
        return train_dataset, test_dataset

    """接受一个数组进行解码"""
    def decode(self, data, label=False):
        if not label:
            _data = list(data)
            _data[1] = self._encoder['protocal'].inverse_transform([_data[1]])[0]
            _data[2] = self._encoder['service'].inverse_transform([_data[2]])[0]
            _data[2] = self._encoder['flag'].inverse_transform([_data[3]])[0]
            return _data
        return self._encoder['label'].inverse_transform(data)
    
    def encode(self, data, label=False):
        if not label:
            _data = list(data)
            _data[1] = self._encoder['protocal'].transform([_data[1]])[0]
            _data[2] = self._encoder['service'].transform([_data[2]])[0]
            _data[3] = self._encoder['flag'].transform([_data[3]])[0]
            return _data
        return self._encoder['label'].transform([data])[0]

batch_size = 64

In [17]:
import torch.nn as nn

class CNN(nn.Module):
    def __init__(self, in_dim, num_class):
        super(CNN, self).__init__()

        self.conv1 = nn.Sequential(       
            nn.Conv1d(1, 32, 5, 1, 2),
            nn.BatchNorm1d(32), 
            nn.ELU(),                     
            nn.Conv1d(32, 32, 5, 1, 2),
            nn.BatchNorm1d(32), 
            nn.ELU(),           
            nn.MaxPool1d(3, 3, 0),
            nn.Dropout(0.1)
        )
        self.conv2 = nn.Sequential(       
            nn.Conv1d(32, 64, 5, 1, 2),
            nn.BatchNorm1d(64), 
            nn.ReLU(),                     
            nn.Conv1d(64, 64, 5, 1, 2),
            nn.BatchNorm1d(64), 
            nn.ReLU(),           
            nn.MaxPool1d(3, 3, 0),
            nn.Dropout(0.1)
        )
        self.out1 = nn.Sequential(
            nn.Flatten(),
            nn.Linear(704, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Linear(512, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(0.1)
        )
        self.out2 = nn.Sequential(
            nn.Linear(256, num_class)
        )

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        output = self.out1(x)
        output = self.out2(output)
        return output


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
# from ignite.metrics import Precision, Recall,

# 神经网络参数
batch_size = 32
learning_rate = 1e-2
num_epoches = 40
USE_GPU = False
num_class = 2

trainFile = 'train-wired.csv'
testFile = 'test-wired.csv'
testFile2 = 'test-kr.csv'
dataset = KddData(batch_size, trainFile, testFile2)
model = CNN(1, num_class)

def train():
    
    global model

    if USE_GPU:
        model = model.cuda()

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=learning_rate)

    for epoch in range(num_epoches):
        print('epoch {}'.format(epoch + 1))
        print('*' * 10)
        running_loss = 0.0
        running_acc = 0.0
        for i, data in enumerate(dataset.train_dataloader, 1):
            img, label = data
            if USE_GPU:
                img = img.cuda()
                label = label.cuda()
            img = Variable(img)
            label = Variable(label)
            # 向前传播
            out = model(img)
            loss = criterion(out, label)
            running_loss += loss.item() * label.size(0)
            _, pred = torch.max(out, 1)
            num_correct = (pred == label).sum()
            accuracy = (pred == label).float().mean()
            running_acc += num_correct.item()
            # 向后传播
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        print('Finish {} epoch, Loss: {:.6f}, Acc: {:.6f}'.format(
            epoch + 1, running_loss / (len(dataset.train_dataset)), running_acc / (len(
                dataset.train_dataset))))
        model.eval()
            
train()