In [1]:
import numpy as np
import pandas as pd

from trace_process import *

import xgboost
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split

  from pandas import MultiIndex, Int64Index


In [2]:
network_out_path = f"{DEFAULT_PREFIX}/{TEST_PATH}/{NETWORK_OUT}"
memory_path = f"{DEFAULT_PREFIX}/{TEST_PATH}/{MEMORY}"
network_out_raw_path = f"../{network_out_path}"
memory_raw_path = f"../{memory_path}"

In [9]:
time_delta = 100 * NANO_TO_MICRO

In [4]:
memory = build_array(memory_raw_path, limit=2)
network_out_np = build_array(network_out_raw_path, limit=2)

In [5]:
packets_df = save_as_pd(network_out_np, ['time', 'packet_size'], f"{DEFAULT_PREFIX}/{TEST_PATH}/{NETWORK_OUT}.csv")
memory_df = save_as_pd(memory, ['time', 'memory'], f"{DEFAULT_PREFIX}/{TEST_PATH}/{MEMORY}.csv")

In [10]:
flow_times, flow_sizes = get_flow(network_out_np, time_delta)

In [7]:
flows_df = pd.DataFrame({"s": flow_times[:,0], "e": flow_times[:,1], "size": flow_sizes})
flows_df.to_csv(f"{DEFAULT_PREFIX}/{TEST_PATH}/flows.csv")

In [5]:
memory_normalized = normalize(memory, axis=0, norm='max')
network_out_np_normalized = normalize(network_out_np, axis=0, norm='max')

In [11]:
flow_trace_packet, flow_sizes = get_flow_trace(network_out_np, time_delta, network_out_np_normalized)
# flow_trace_packet, flow_sizes = get_flow_trace(network_out_np, 500, network_out_np)

In [7]:
flow_sizes = flow_sizes / max(flow_sizes)

In [8]:
memory_of_flows = np.searchsorted(memory[:,0], flow_times)

NameError: name 'flow_times' is not defined

In [None]:
all_memory = [np.sum(memory_normalized[f[0]:f[1] + 1,0]) / (1 + f[1] - f[0]) for f in memory_of_flows]

In [12]:
EM_THRESHOLD = np.median(flow_sizes)
MAX_LEN = max([len(f) for f in flow_trace_packet])

In [13]:
MAX_LEN

446

In [None]:
padded_flows = np.array([np.pad(f, (MAX_LEN - len(f), 0), 'constant') for f in flow_trace_packet])
flows_size_classes = (flow_sizes > EM_THRESHOLD) * 1.
flow_size_classes = np.roll(flows_size_classes, -1)

In [11]:
columns = ['p' + str(i) for i in range(9)] + ['m']

In [12]:
dd = np.c_[padded_flows, all_memory] # TO ADD MEMORY

In [13]:
df = pd.DataFrame(dd, columns=columns)

In [14]:
data_train, data_test, labels_train, labels_test = train_test_split(df, flow_size_classes, test_size=0.20, random_state=42)

In [15]:
import torch
from torch.utils.data import TensorDataset, DataLoader

from tqdm.notebook import tqdm

In [51]:
tensor_x = torch.Tensor(data_train.values) 
tensor_y = torch.Tensor(labels_train)

train_dataset = TensorDataset(tensor_x[:,None,], tensor_y) 
train_dataloader = DataLoader(train_dataset, batch_size=64) 

In [80]:
from torch import nn
from torch.functional import F

class CNN2Model(nn.Module):
    def __init__(self, filters=(8, 4, 2)):
        super().__init__()
        self.filters = filters

        self.conv1 = nn.Conv1d(1, filters[0], 2)
        self.bn1 = nn.BatchNorm1d(filters[0])
        self.linear = nn.Linear(filters[0] * 4, 1)

    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = F.max_pool1d(x, 2)
        x = self.bn1(x)

        x = x.view(-1, self.filters[0] * 4)

        x = self.linear(x)
        x = F.sigmoid(x)

        
        return x


In [81]:
N_EPOCHS = 10
BATCH_SIZE = 64

criterion = nn.BCELoss()
model = CNN2Model()
optimizer = torch.optim.SGD(model.parameters(), lr = 0.005, momentum=0.5)

if torch.cuda.is_available():
    model = model.cuda()
    criterion = criterion.cuda()
    device = torch.device("cuda:0")
else:
    device = torch.device("cpu")


def get_loss_and_correct(model, batch, criterion, device):
    data, target = batch
    data, target = data.to(device, dtype=torch.float), target.to(device, dtype=torch.float)
    output = model(data)
    output = torch.squeeze(output)

    loss = criterion(output, target)

    pred = torch.round(output)
    true_num = pred.eq(target.data.view_as(pred)).sum()

    return loss, true_num

def step(loss, optimizer):
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()


train_losses = []
train_accuracies = []

pbar = tqdm(range(N_EPOCHS))

for i in pbar:
  total_train_loss = 0.0
  total_train_correct = 0.0

  model.train()

  for batch in tqdm(train_dataloader, leave=False):
    loss, correct = get_loss_and_correct(model, batch, criterion, device)
    step(loss, optimizer)
    total_train_loss += loss.item()
    total_train_correct += correct.item()

  mean_train_loss = total_train_loss / len(train_dataset)
  train_accuracy = total_train_correct / len(train_dataset)

  train_losses.append(mean_train_loss)

  train_accuracies.append(train_accuracy)

  pbar.set_postfix({'train_loss': mean_train_loss, 'train_accuracy': train_accuracy})


  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/615262 [00:00<?, ?it/s]

  0%|          | 0/615262 [00:00<?, ?it/s]

  0%|          | 0/615262 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [64]:
param = {'eta': 0.05, 'objective': 'binary:logistic', 'predictor': 'gpu_predictor', 'max_depth': 10, 'tree_method': 'gpu_hist', 'eval_metric': 'error'}
evallist = [(dtest, 'eval'), (dtrain, 'train')]

NameError: name 'dtest' is not defined

In [19]:
model = XGBClassifier(**param)
model.fit(data_train, labels_train)



XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
              eta=0.05, eval_metric='error', gamma=0, gpu_id=0,
              importance_type=None, interaction_constraints='',
              learning_rate=0.0500000007, max_delta_step=0, max_depth=10,
              min_child_weight=1, missing=nan, monotone_constraints='()',
              n_estimators=100, n_jobs=24, num_parallel_tree=1,
              predictor='gpu_predictor', random_state=0, reg_alpha=0,
              reg_lambda=1, scale_pos_weight=1, subsample=1,
              tree_method='gpu_hist', validate_parameters=1, verbosity=None)

In [20]:
print('test score: ', np.sum((model.predict(data_test) == labels_test) * 1) / len(labels_test))
print('train score: ', np.sum((model.predict(data_train) == labels_train) * 1) / len(labels_train))

test score:  0.7051902813304949
train score:  0.7052437138790638
