In [78]:
# pytorch
import torch
from torch import nn
# import pytorch_lightning as pl
from pytorch_lightning import LightningDataModule, LightningModule, Trainer
from torch.utils.data import Dataset, DataLoader
from torch.nn import functional as F
from torch import optim
from torch.autograd import Variable

# Helper libraries
import numpy as np
import pandas as pd
import time
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm.notebook import tqdm
import random
from sklearn.decomposition import IncrementalPCA
#sys
import os
from collections import OrderedDict
import glob
import math

In [79]:
def same_seeds(seed):
    # python random
    random.seed(seed)
    # Numpy
    np.random.seed(seed)
    # Torch
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True

# 为了结果可复现
Seed = 42
same_seeds(Seed)

In [80]:
AVAIL_GPUS = min(1, torch.cuda.device_count())
BATCH_SIZE = 256 if AVAIL_GPUS else 64
NUM_WORKERS = int(os.cpu_count() / 7)

In [81]:
# 加载数据
raw_data = pd.read_csv('/home/jsm/code/python/IoT-botnet/data/UNSW-NB15 - CSV Files/unsw15_train.csv')
temp = raw_data.loc[raw_data['attack_cat'] == 'Normal']

In [82]:
temp_drop = temp.drop(['196', 'attack_cat', 'label'], axis=1, inplace=False)
temp_sameple = temp_drop.sample(1024*60, random_state=Seed)

In [83]:
temp_sameple.shape
tr_data = temp_sameple

In [84]:
class MyDataset(Dataset):
    def __init__(
        self,
        # batch_size,
        # num_workers,
        data
    ):
        # 在数据1维处增加1个维度 example: (64, 197) --> (64, 1, 197)
        # self.batch_size = batch_size
        # self.num_workers = num_workers
        self.data = data.unsqueeze(1)
    
    def __getitem__(self, index):
        return self.data[index]
    
    def __len__(self):
        return len(self.data)

dataset = MyDataset(torch.from_numpy(tr_data.values).float())

In [85]:
#在处理好数据后定义
# InputLength = 64
workspace_dir = '.'

In [86]:
# 网络参数初始化
def weights_init(m):
    classname = m.__class__.__name__
    # 初始化网络层
    if classname.find('Conv') != -1:
        m.weight.data.normal_(0.0, 0.02)
    elif classname.find('BatchNorm') != -1:
        m.weight.data.normal_(1.0, 0.02)
        m.bias.data.fill_(0)

In [87]:
# 生成器
class Generator(nn.Module):
    """
    Input shape: (N, 1, in_dim)
    Output shape: (N, 1, in_dim)
    """
    def __init__(self, in_dim, dim=32):
        super(Generator, self).__init__()
        self.in_dim = in_dim
        self.dim = dim
        self.inlayer = nn.Sequential(
            nn.Linear(self.in_dim, self.in_dim, bias=False),
            # tf 默认为0.3， torch 默认为0.01
            nn.LeakyReLU(negative_slope=0.2)
        )
        self.midlayer = nn.Sequential(
            # tf中一维卷积filter表示卷积核的个数，与torch中的out_channel相同
            nn.Conv1d(1, 32, kernel_size=3, padding=1),
            nn.LeakyReLU(negative_slope=0.2),

            nn.Conv1d(32, 32, kernel_size=3, padding=1),
            nn.LeakyReLU(negative_slope=0.2),

            nn.Conv1d(32, 32, kernel_size=3, padding=1),
            nn.LeakyReLU(negative_slope=0.2)
        )
        self.outlayer = nn.Sequential(
            nn.Conv1d(32, 1, kernel_size=3, padding=1),
            nn.Tanh()
        )
        self.apply(weights_init)

    def forward(self, x):
        y = self.inlayer(x)
        y = self.midlayer(y)
        y = self.outlayer(y)
        return y

In [88]:
# 判别器
class Discriminator(nn.Module):
    """
    Input shape: (N, 1, in_dim)
    Output shape: (N, )
    """
    def __init__(self, in_dim, in_channel=1, batch=32):
        super(Discriminator, self).__init__()
        self.in_channel = in_channel
        self.in_dim = in_dim
        self.batch = batch
        # self.channel = channel
        self.inlayer = nn.Sequential(
            nn.Conv1d(in_channel, 32, kernel_size=3, padding=1),
            nn.LeakyReLU(negative_slope=0.2),
            nn.MaxPool1d(kernel_size = 2) # shape: (N, 32, in_dim/2)
        )
        self.midlayer1 = nn.Sequential(
            nn.Conv1d(32, 32, kernel_size=3, padding=1),
            nn.LeakyReLU(negative_slope=0.2),
            nn.MaxPool1d(kernel_size=2), # shape: (N, 32, in_dim/2/2)
            nn.Flatten(), # shape: (N, 32*in_dim/2/2)
        )
        self.temp_dim = 32 * math.floor(math.floor(self.in_dim / 2) / 2)
        self.midlayer2 = nn.Sequential(
            nn.Linear(self.temp_dim, 64),
            nn.Dropout(0.4),
            nn.LeakyReLU(negative_slope=0.2)
        )
        self.outlayer = nn.Linear(64, 1)
    
    def forward(self, x):
        y = self.inlayer(x)
        y = self.midlayer1(y)
        y = self.midlayer2(y)
        y = self.outlayer(y)
        y = y.view(-1)
        return y        

In [131]:
class WGAN(LightningModule):
    def __init__(
        self,
        in_dim: int = 16,
        in_channels: int = 1,
        lr: float = 1e-4,
        n_critic: int = 5,
        clip_value: float = 0.1,
        batch_size: int = BATCH_SIZE,
        **kwargs
    ):
        super().__init__()
        self.save_hyperparameters()

        # networks
        self.generator = Generator(in_dim=self.hparams.in_dim)
        self.discriminator = Discriminator(in_dim=self.hparams.in_dim, in_channel=self.hparams.in_channels)
        self.validation_z = torch.randn(10, self.hparams.in_channels, self.hparams.in_dim)

    def forward(self, z):
        return self.generator(z)

    def adversarial_loss(self, y_hat, y):
        return -torch.mean(self.discriminator(y)) + torch.mean(self.discriminator(y_hat))
    
    def training_step(self, batch, batch_idx, optimizer_idx):

        data = batch
        z = torch.randn(self.hparams.batch_size, self.hparams.in_channels, self.hparams.in_dim)
        z = z.type_as(data)
        # train generator
        if optimizer_idx == 0 and (batch_idx % self.hparams.n_critic == 0 and batch_idx != 0):
            # print('batch_idx {}, optimizer_idx{}'.format(batch_idx, optimizer_idx))
            # generate data
            self.generated_data = self(z)
            
            # log sampled data
            # sample_data = self.generated_data[:10]
            # self.logger.experiment.add_scalar("generated_data", sample_data[0], 0)

            # generator of WGAN loss
            g_loss = -torch.mean(self.discriminator(self(z)))
            self.logger.experiment.add_scalar("g_loss", g_loss, self.current_epoch)
            tqdm_dict = {"g_loss": g_loss}
            output = OrderedDict({"loss": g_loss, "progress_bar": tqdm_dict, "log": tqdm_dict})
            return output

        # train discriminator
        if optimizer_idx == 1:
            # print('batch_idx {}, optimizer_idx{}'.format(batch_idx, optimizer_idx))
            # discriminator of WGAN loss
            d_loss = -torch.mean(self.discriminator(data)) + torch.mean(self.discriminator(self(z)))
            self.logger.experiment.add_scalar("d_loss", d_loss, self.current_epoch)
            # Clip weights of discriminator
            for p in self.discriminator.parameters():
                p.data.clamp_(-self.hparams.clip_value, self.hparams.clip_value)

            tqdm_dict = {"d_loss": d_loss}
            output = OrderedDict({"loss": d_loss, "progress_bar": tqdm_dict, "log": tqdm_dict})
            return output

    def configure_optimizers(self):
        lr = self.hparams.lr

        opt_g = torch.optim.RMSprop(self.generator.parameters(), lr=lr)
        opt_d = torch.optim.RMSprop(self.discriminator.parameters(), lr=lr)
        
        return [opt_g, opt_d], []

    # def on_epoch_end(self):
        # z = self.validation_z.type_as(self.generator.model[0].weight)

        # # log sampled data
        # sample_data = self(z)
        # # self.logger.experiment.add_scalar("generated_data", sample_data[0], self.current_epoch)
        # self.log("generated_data", sample_data, logger=True)
        # print("g_loss {}, d_loss {}".format(g_loss, d_loss))

In [132]:
tr_dataloder = DataLoader(dataset, batch_size=BATCH_SIZE, num_workers=NUM_WORKERS)
in_dim = tr_data.shape[-1]
wgan = WGAN(in_dim)

In [133]:
trainer = Trainer(
    gpus = AVAIL_GPUS,
    max_epochs=10,
    progress_bar_refresh_rate = 20
)
trainer.fit(wgan, tr_dataloder)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name          | Type          | Params
------------------------------------------------
0 | generator     | Generator     | 44.8 K
1 | discriminator | Discriminator | 103 K 
------------------------------------------------
148 K     Trainable params
0         Non-trainable params
148 K     Total params
0.594     Total estimated model params size (MB)


Training: -1it [00:00, ?it/s]