In [None]:
# data format:  timestamp, src, dest, length, src port, dst port

import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from datetime import datetime

import matplotlib.pyplot as plt
import pandas as pd


print('done.')

In [None]:
# load data
print('Loading data...')
data = np.load('data_correct.npy')
nsamples = data.shape[0] 
max_src = data[:,1].max()
data = torch.tensor(data).float()
print("Samples:", nsamples,"\n") 

# prepare timestamps
print('Pre-processing timestamps...\n')
for i in range(int(max_src)):
    timestamps = data[ data[:,1] == i, 0 ] 
    timestamps[1:] = timestamps[1:] - timestamps[:-1] 
    timestamps[0] = 0 
    data[ data[:,1] == i, 0 ] = timestamps 

### shuffle the data
print('Shuffle the bit...\n')
data = data[ torch.randperm(data.shape[0]) ]
 
# split in training, validation and test set
print('Creating dataset...')
train = data[:int(nsamples*0.6)]
validation = data[int(nsamples*0.6):int(nsamples*0.8)]
test = data[int(nsamples*0.8):]


print("Samples train:", train.shape[0])
print("Samples validation:", validation.shape[0])
print("Samples test:", test.shape[0])
print("Total samples:", data.shape[0])

print('\ndone.')
test.size()

In [None]:
# define network
class Net(nn.Module): 
    def __init__(self): 
        super(Net,self).__init__() 
        self.loss_fn = nn.CrossEntropyLoss()  # LogSoftmax + ClassNLL Loss 
        
        self.layers = nn.Sequential( 
            nn.Linear(5,30), #Input: "timestamp", mac source","destination","source port","dest port"
            nn.ReLU(), 
            nn.Linear(30,90), 
            nn.ReLU(), 
            nn.Linear(90,90) #Out: "User"         
        )
        
    def forward(self,x): 
        return self.layers(x)
    
    def loss_function(self, net_out, target):
        return self.loss_fn(net_out, target)
    
print('done.')

In [None]:
# params
stop_at = train.shape[0] - 1
stop_at = 100000
nbatch = 64
lear_rate = 1e-4
n = 21 #epochs
stamp = 100

# network preparation
print("Creating the model...\n")
net = Net()
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 
net.to(device)
optimizer = optim.Adam(params=net.parameters(), lr=lear_rate)
print("Ready to go!")

In [None]:
# train
print("Start training...")
dateTimeObj_start_0 = datetime.now()
print(dateTimeObj_start_0)

accuracy_train = 0
max_acc_0 = 0
listLoss_a = []
listAccuracy_val_a = []


for epoch in range(n):  

    dateTimeObj_start= datetime.now()    
    net.train()
    mean_loss = 0.
    correct = 0    
    for it,batch in enumerate(train.split(nbatch)): 
        if it*nbatch > stop_at: break
        batch = batch.to(device) #move calculus to GPU
        targets = batch[:,1].long()   
        net_input = batch[:,(0,2,3,4,5)]  
        optimizer.zero_grad()     

        output = net(net_input)
        loss = net.loss_function(output, targets)  # that loss hides the one-hot encoding
        loss.backward()   # Back propagation
        optimizer.step()  # Updating gradients
        mean_loss += loss.item() #average of batch loss

    dateTimeObj_end = datetime.now()

    net.eval() #evaluation
    with torch.no_grad(): #do not update the model
        correct_eval = 0
        for it_eval,batch_eval in enumerate(validation.split(nbatch)):  
            if it_eval*nbatch > stop_at: break
            targets_eval = batch_eval[:,1].long()
            net_input_eval = batch_eval[:,(0,2,3,4,5)]
            optimizer.zero_grad()
            output_eval = net(net_input_eval)
            _, predicted_eval = torch.max(output_eval.data, 1)
            correct_eval += (predicted_eval == targets_eval).sum()

    tot = it_eval*nbatch
    accuracy_eval = 1.*correct_eval/tot

    if epoch != 0:
        listLoss_a.append(mean_loss/it)
        listAccuracy_val_a.append(accuracy_eval)

    if epoch%stamp == 0 or epoch == n-1:
        print(f"Epoch {epoch}, MeanLoss: {mean_loss/it} and Accuracy val: {100.*accuracy_eval}%")

#time taken
dateTimeObj_end = datetime.now()
print("Trained in: ", dateTimeObj_end-dateTimeObj_start_0)

#best accuracy
max_val_a = max(listAccuracy_val_a)
for i,element in enumerate(listAccuracy_val_a):
    if element == max_val_a:
        break
print("\nBest value of validation is: ", max_val_a.item()*100,"% at", i, "epoch")

In [None]:
# Graphic
listLoss_a = np.asarray(listLoss_a, dtype=np.float32)
listAccuracy_val_a = np.asarray(listAccuracy_val_a, dtype=np.float32)

epochs = range(n-1)

df_train_a=pd.DataFrame({'Epochs': epochs, 'Loss': listLoss_a})
df_acc_val_a=pd.DataFrame({'Epochs': epochs, 'Accuracy': listAccuracy_val_a*100})

# plot
plt.plot( 'Epochs', 'Loss', data = df_train_a, linestyle='-', marker='o', color = 'skyblue' )
plt.title('Loss')
plt.xlabel('Epochs')
plt.ylabel('Value')
plt.show()

plt.plot( 'Epochs', 'Accuracy', data = df_acc_val_a, linestyle = '-', marker='o', color = 'black', label = "Val")
plt.title('Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Value')
plt.legend().remove()

In [None]:
# test
net.eval() #evaluation
with torch.no_grad(): #non modificare la rete
    correct = 0
    for it,batch in enumerate(test.split(64)): 
        if it*64 > stop_at: break #limit to 100k samples
            
        batch = batch.to(device) 
        targets = batch[:,1].long()
        net_input = batch[:,(0,2,3,4,5)]
        
        output = net(net_input) 
        _, predicted = output.max(dim=1) 
        correct += (predicted == targets).sum() 

        
    print(f"Accuracy test: {100. * correct / (it*64)} %") 