In [None]:
# pytorch
import torch
from torch import nn
import pytorch_lightning as pl
from torch.utils.data import Dataset, DataLoader
from torch.nn import functional as F
from torch import optim
from torch.autograd import Variable

# Helper libraries
import numpy as np
import pandas as pd
import time
import matplotlib.pyplot as plt
import seaborn as sns
from qqdm.notebook import qqdm
import random
from sklearn.decomposition import IncrementalPCA
#sys
import os
import glob
import math

In [None]:
def same_seeds(seed):
    # python random
    random.seed(seed)
    # Numpy
    np.random.seed(seed)
    # Torch
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True

# 为了结果可复现
Seed = 42
same_seeds(Seed)

In [None]:
# 加载数据
raw_data = pd.read_csv('/home/jsm/code/python/IoT-botnet/data/UNSW-NB15 - CSV Files/unsw15_train.csv')
temp = raw_data.loc[raw_data['attack_cat'] == 'Normal']
temp = temp.drop(['attack_cat', 'label'], axis=1, inplace=False)
temp = temp.sample(1000*3, random_state=Seed)
# temp = gdata.iloc[:,-3]
# gdata = pd.concat([gdata, temp], axis=1)
# gdata.columns = range(gdata.shape[-1])
# print(gdata.values.shape)

In [None]:
ipca = IncrementalPCA(n_components=128, batch_size=150)
tr_data = ipca.fit_transform(temp.values)
print(tr_data.shape)

In [None]:
class MyDataset(Dataset):
    def __init__(self, data):
        # 在数据1维处增加1个维度 example: (64, 197) --> (64, 1, 197)
        self.data = data.unsqueeze(1)
        # self.data = torch.FloatTensor(self.data)
    
    def __getitem__(self, index):
        return self.data[index]
    
    def __len__(self):
        return len(self.data)

dataset = MyDataset(torch.from_numpy(tr_data).float())

In [None]:
#在处理好数据后定义
# InputLength = 64
workspace_dir = '.'

In [None]:
# 网络参数初始化
def weights_init(m):
    classname = m.__class__.__name__
    # 初始化网络层
    if classname.find('Conv') != -1:
        m.weight.data.normal_(0.0, 0.02)
    elif classname.find('BatchNorm') != -1:
        m.weight.data.normal_(1.0, 0.02)
        m.bias.data.fill_(0)

In [None]:
# 生成器
class Generator(nn.Module):
    """
    Input shape: (N, 1, in_dim)
    Output shape: (N, 1, in_dim)
    """
    def __init__(self, in_dim, dim=32):
        super(Generator, self).__init__()
        self.in_dim = in_dim
        self.dim = dim
        self.inlayer = nn.Sequential(
            nn.Linear(self.in_dim, self.in_dim, bias=False),
            # tf 默认为0.3， torch 默认为0.01
            nn.LeakyReLU(negative_slope=0.2)
        )
        self.midlayer = nn.Sequential(
            # tf中一维卷积filter表示卷积核的个数，与torch中的out_channel相同
            nn.Conv1d(1, 32, kernel_size=3, padding=1),
            nn.LeakyReLU(negative_slope=0.2),

            nn.Conv1d(32, 32, kernel_size=3, padding=1),
            nn.LeakyReLU(negative_slope=0.2),

            nn.Conv1d(32, 32, kernel_size=3, padding=1),
            nn.LeakyReLU(negative_slope=0.2)
        )
        self.outlayer = nn.Sequential(
            nn.Conv1d(32, 1, kernel_size=3, padding=1),
            nn.Tanh()
        )
        self.apply(weights_init)

    def forward(self, x):
        y = self.inlayer(x)
        y = self.midlayer(y)
        y = self.outlayer(y)
        return y

In [None]:
# 判别器
class Discriminator(nn.Module):
    """
    Input shape: (N, 1, in_dim)
    Output shape: (N, )
    """
    def __init__(self, in_dim, in_channel=1, batch=32):
        super(Discriminator, self).__init__()
        self.in_channel = in_channel
        self.in_dim = in_dim
        self.batch = batch
        # self.channel = channel
        self.inlayer = nn.Sequential(
            nn.Conv1d(in_channel, 32, kernel_size=3, padding=1),
            nn.LeakyReLU(negative_slope=0.2),
            nn.MaxPool1d(kernel_size = 2) # shape: (N, 32, in_dim/2)
        )
        self.midlayer1 = nn.Sequential(
            nn.Conv1d(32, 32, kernel_size=3, padding=1),
            nn.LeakyReLU(negative_slope=0.2),
            nn.MaxPool1d(kernel_size=2), # shape: (N, 32, in_dim/2/2)
            nn.Flatten(), # shape: (N, 32*in_dim/2/2)
        )
        self.temp_dim = 32 * math.floor(math.floor(self.in_dim / 2) / 2)
        self.midlayer2 = nn.Sequential(
            nn.Linear(self.temp_dim, 64),
            nn.Dropout(0.4),
            nn.LeakyReLU(negative_slope=0.2)
        )
        self.outlayer = nn.Linear(64, 1)
    
    def forward(self, x):
        y = self.inlayer(x)
        y = self.midlayer1(y)
        y = self.midlayer2(y)
        y = self.outlayer(y)
        y = y.view(-1)
        return y        

In [None]:
# Trainnig hyperparmeters
batch_size = 128
in_dim = tr_data.shape[-1]
z_dim = in_dim
z_sample = Variable(torch.randn(batch_size, 1, z_dim)).cuda()
lr = 1e-4

n_epoch = 1000
n_critic = 10
# 待改
clip_value = 0.1

ckpt_dir = os.path.join(workspace_dir, 'checkpoints')
os.makedirs(ckpt_dir, exist_ok=True)

# Model
G = Generator(in_dim).cuda()
D = Discriminator(in_dim, 1).cuda()
G.train()
D.train()

# Loss
criterion = nn.BCELoss()

# Optimizer
opt_D = torch.optim.RMSprop(D.parameters(), lr=lr)
opt_G = torch.optim.RMSprop(D.parameters(), lr=lr)

#DataLoader
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=1)

In [None]:
steps = 0
for e, epoch in enumerate(range(n_epoch)):
    progress_bar = qqdm(dataloader)
    for i, data in enumerate(progress_bar):
        mid_data = data
        # print(mid_data.size())
        mid_data = mid_data.cuda()
        bs = mid_data.size(0)

        # ============================================
        #  Train D
        # ============================================
        z = Variable(torch.randn(bs, 1, z_dim)).cuda()
        r_data = Variable(mid_data).cuda()
        f_data = G(z)
        # print(f_data)
        # """ Medium: Use WGAN Loss. """
        # Label
        r_label = torch.ones((bs)).cuda()
        f_label = torch.zeros((bs)).cuda()

        # Model forwarding
        r_logit = D(r_data.detach())
        f_logit = D(f_data.detach())
        # print('r_logit: {}'.format(r_logit))
        # print('f_logit: {}'.format(f_logit))
        # print('r_logit size {}, f_logit size{}'.format(r_logit.size(), f_logit.size()))
        # print('r_logit size {}, r_label size{}'.format(r_logit.size(), r_label.size()))
        # # Compute the loss for the discriminator
        # r_loss = criterion(r_logit, r_label) 
        # f_loss = criterion(f_logit, f_label)
        # # loss_D = (r_loss + f_loss) / 2

        # WGAN Loss
        loss_D = -torch.mean(D(r_data)) + torch.mean(D(f_data))

        # Model backwarding
        D.zero_grad()
        loss_D.backward()

        # Updata the discriminator
        opt_D.step()

        """ Medium: Clip weights of discriminator. """
        for p in D.parameters():
            p.data.clamp_(-clip_value, clip_value)
        
        # ============================================
        #  Train G
        # ============================================
        if steps % n_critic == 0:
            # Generate some fake data
            z = Variable(torch.randn(bs, 1, z_dim)).cuda()
            f_data = G(z)

            # Model forearding
            f_logit = D(f_data)

            # WGAN Loss
            loss_G = -torch.mean(D(f_data))

            # Model backwarding
            G.zero_grad()
            loss_G.backward()

            # Updata the generator
            opt_G.step()
        
        steps += 1

        # Set the info of the progress bar
        #   Note that the value of the GAN loss is not directly related to
        #   the quality of the generated images.
        progress_bar.set_infos({
            'Loss_D': round(loss_D.item(), 4),
            'Loss_G': round(loss_G.item(), 4),
            'Epoch': e+1,
            'Step': steps,
        })

        G.eval()
        f_data_sample = G(z_sample).data
        G.train()

        if (e+1) % 5 == 0 or e == 0:
            # Save the checkpoints
            torch.save(G.state_dict(), os.path.join(ckpt_dir, 'G.pth'))
            torch.save(D.state_dict(), os.path.join(ckpt_dir, 'D.pth'))