In [23]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
from torch.utils.data import Dataset, random_split, DataLoader, Subset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.metrics import accuracy_score, classification_report
from tqdm import tqdm
import random
import numpy as np
import os
import joblib

In [44]:
class Preprocessing:
    def __init__(self, is_train=True):
        self.transformer = dict()
        self.is_train = is_train
        if not self.is_train:
            self.transformer = self.load()

    def __call__(self, df, is_timeseries=False):
        if self.is_train:
            return self.fit_transform(df)
        else:
            return self.transform(df)

    def fit_transform(self, df):
        for column in df.columns:
            if 'cpu-util' in column:
                self.transformer[column] = MinMaxScaler()
            else:
                self.transformer[column] = StandardScaler()
            value = self.transformer[column].fit_transform(pd.DataFrame(df[column]))
            df.loc[:, column] = value
        return df

    def transform(self, df):
        for column in df.columns:
            value = self.transformer[column].transform(
                pd.DataFrame(df[column]))
            df.loc[:, column] = value
        return df

    def dump(self, filename='/tmp/mlp_transfomer.bin'):
        with open(filename, 'wb') as f:
            joblib.dump(self.transformer, f)

    def load(self, filename='/tmp/mlp_transfomer.bin'):
        with open(filename, 'rb') as f:
            data = joblib.load(f)
        return data


In [45]:
class NetworkMetricsDataset(Dataset):
    def __init__(self, path, metrics, device, transformer=None):
        self.path = path
        self.metrics = metrics
        self.device = device
        self.transformer = transformer
        
        data = []
        for metric in tqdm(self.metrics):
            df = pd.read_csv(os.path.join(self.path, metric + '.tsv'), sep="\t", index_col=0)
            df = df.fillna(0)
            df = df.sort_values("timestamp")
            df = df.set_index("timestamp")
            columns = {name: metric + '-' + name for name in df.columns}
            df.rename(columns=columns, inplace=True)
            if self.transformer:
                df = self.transformer(df)
            data.append(df)
        self.dataframe = pd.concat(data, axis=1)
        self.data = self.dataframe.values
        self.data_size = len(self.dataframe)
        self.labels = pd.read_csv(os.path.join(self.path, 'label.tsv'), sep="\t", index_col=0).set_index("timestamp").values
        
    def __len__(self):
        return self.data_size
    
    def __getitem__(self, idx):
        ret = self.data[idx]
        ret = torch.tensor(ret, dtype=torch.float, device=self.device)
        label = self.labels[idx]
        label = torch.tensor(label, dtype=torch.float, device=self.device)
        
        return ret, label

In [46]:
class MLPClassifier(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(MLPClassifier, self).__init__()
        self.input_dim = input_dim
        self.output_dim = output_dim
        hidden_dim = 128
        self.mlp = nn.Sequential(
            nn.Linear(self.input_dim, hidden_dim),
            nn.ReLU(),
            nn.Dropout(p=0.2),
            nn.Linear(hidden_dim, self.output_dim),
#             nn.Softmax(dim=1), # loss functionにsoftmaxが組み込まれているため不要
        )

    def forward(self, x):
        return self.mlp(x)

In [47]:
device = 'cpu'
path = "../dataset/train"
metrics = ["cpu-util", "tx-pps", "rx-pps", "network-incoming-packets-rate", "network-outgoing-packets-rate", "prefix-activity-received-current-prefixes"]
events = {
    'normal': 0,
    'ixnetwork-bgp-hijacking-start': 1,
    'ixnetwork-bgp-injection-start': 2,
    'node-down': 3,
    'interface-down': 4,
    'packet-loss-delay': 5,
}
seed = 1
torch.manual_seed(seed)
random.seed(seed)
np.random.seed(seed)

batchsize = 16
max_epoch = 100
model_dir = "models/mlp"
os.makedirs(model_dir, exist_ok=True)
transformer = Preprocessing(is_train=True)
dataset = NetworkMetricsDataset(path, metrics, device, transformer)
transformer.dump()

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:00<00:00, 19.12it/s]


In [39]:
train_indices, val_indices = train_test_split(
    list(range(len(dataset))),
    test_size=0.2,
    stratify=dataset.labels,
    random_state=seed,
)
train_dataset = Subset(dataset, train_indices)
train_size = len(train_dataset)
val_dataset = Subset(dataset, val_indices)
val_size = len(val_dataset)
print(f'train size : {train_size} val size: {val_size}')
train_dataloader = DataLoader(train_dataset, batch_size=batchsize)
val_dataloader = DataLoader(val_dataset, batch_size=val_size)

val_data, val_labels = iter(val_dataloader).next()
val_data = val_data.float().to(device)
val_labels = val_labels.long().to(device).view(-1)

train size : 4775 val size: 1194


In [40]:

input_dim = list(train_dataset[0][0].shape)[-1]
output_dim = len(events.keys())
model = MLPClassifier(input_dim, output_dim).to(device)
loss_function = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)


In [41]:
for epoch in range(1, max_epoch+1):
    running_loss = 0
    correct = 0
    total = 0
    model = model.train()
    # Training
    for train_data, train_labels in train_dataloader:
        train_data = train_data.float().to(device)
        train_labels = train_labels.long().to(device).view(-1)
        
        model.zero_grad()
        train_scores = model(train_data)
        loss = loss_function(train_scores, train_labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        _, predict = torch.max(train_scores.data, 1)
        correct += (predict == train_labels).sum().item()
        total += train_labels.size(0)

    train_loss = running_loss / len(train_dataloader)
    train_acc = correct / total

    # Check model validation 
    model = model.eval()
    with torch.no_grad():
        val_scores = model(val_data)
        val_loss = loss_function(val_scores, val_labels)

        bi_scores = torch.argmax(val_scores, dim=1).to(device).numpy()
        y_val_scores = val_labels.to(device).numpy()
        val_acc = accuracy_score(y_val_scores, bi_scores)
    
    print(f'EPOCH: [{epoch}/{max_epoch}] train loss: {train_loss:.4f} train acc: {train_acc:.4f} val loss: {val_loss:.4f} val acc: {val_acc:4f}')
    # Export model
    if epoch % 10 == 0:
        torch.save(model.state_dict(), f"./{model_dir}/mlp_{epoch}.mdl")


EPOCH: [1/100] train loss: 0.7817 train acc: 0.7516 val loss: 0.6498 val acc: 0.793132
EPOCH: [2/100] train loss: 0.6390 train acc: 0.8132 val loss: 0.6022 val acc: 0.818258
EPOCH: [3/100] train loss: 0.6041 train acc: 0.8237 val loss: 0.5756 val acc: 0.825796
EPOCH: [4/100] train loss: 0.5733 train acc: 0.8329 val loss: 0.5453 val acc: 0.839196
EPOCH: [5/100] train loss: 0.5538 train acc: 0.8392 val loss: 0.5352 val acc: 0.831658
EPOCH: [6/100] train loss: 0.5356 train acc: 0.8448 val loss: 0.5095 val acc: 0.841709
EPOCH: [7/100] train loss: 0.5142 train acc: 0.8494 val loss: 0.4941 val acc: 0.850921
EPOCH: [8/100] train loss: 0.4998 train acc: 0.8540 val loss: 0.4866 val acc: 0.852596
EPOCH: [9/100] train loss: 0.4931 train acc: 0.8565 val loss: 0.4812 val acc: 0.849246
EPOCH: [10/100] train loss: 0.4844 train acc: 0.8576 val loss: 0.4767 val acc: 0.849246
EPOCH: [11/100] train loss: 0.4806 train acc: 0.8565 val loss: 0.4747 val acc: 0.850084
EPOCH: [12/100] train loss: 0.4700 train 

EPOCH: [95/100] train loss: 0.3560 train acc: 0.8850 val loss: 0.4724 val acc: 0.876884
EPOCH: [96/100] train loss: 0.3600 train acc: 0.8825 val loss: 0.4538 val acc: 0.876884
EPOCH: [97/100] train loss: 0.3638 train acc: 0.8840 val loss: 0.4638 val acc: 0.878559
EPOCH: [98/100] train loss: 0.3600 train acc: 0.8838 val loss: 0.4623 val acc: 0.876884
EPOCH: [99/100] train loss: 0.3606 train acc: 0.8831 val loss: 0.4641 val acc: 0.876047
EPOCH: [100/100] train loss: 0.3530 train acc: 0.8840 val loss: 0.4619 val acc: 0.876884


In [42]:
model_path = os.path.join(model_dir, "mlp_100.mdl")
path = '../dataset/test'
transformer = Preprocessing(is_train=False)
dataset = NetworkMetricsDataset(path, metrics, device, transformer)

input_dim = list(dataset[0][0].shape)[-1]
output_dim = len(events.keys())

test_dataloader = DataLoader(dataset, batch_size=len(dataset))
test_data, test_label = iter(test_dataloader).next()
test_data = test_data.float().to(device)
test_label = test_label.long().to(device).view(-1)

model = MLPClassifier(input_dim, output_dim).to(device)
model.load_state_dict(torch.load(model_path))
model = model.eval()
loss_function = nn.CrossEntropyLoss()
with torch.no_grad():
    test_scores = model(test_data)
    loss = loss_function(test_scores, test_label)
    bi_scores = torch.argmax(test_scores, dim=1).to('cpu').numpy()
    y_test_scores = test_label.to('cpu').numpy()
print(accuracy_score(y_test_scores, bi_scores))
print(classification_report(y_test_scores, bi_scores, target_names=list(events.keys())))

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:00<00:00, 27.72it/s]


0.9034314644665291
                               precision    recall  f1-score   support

                       normal       0.88      0.99      0.93      3505
ixnetwork-bgp-hijacking-start       0.99      0.71      0.82       377
ixnetwork-bgp-injection-start       0.97      0.95      0.96       329
                    node-down       1.00      1.00      1.00       140
               interface-down       0.90      0.60      0.72       157
            packet-loss-delay       0.98      0.64      0.77       825

                     accuracy                           0.90      5333
                    macro avg       0.95      0.81      0.87      5333
                 weighted avg       0.91      0.90      0.90      5333

