In [1]:
import json
import pandas as pd
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import numpy as np
import os
from tqdm import trange, tqdm
import joblib

from pcap_processor import calculate_features



In [2]:
# Check if CUDA (NVIDIA's GPU programming toolkit) is available
if torch.cuda.is_available():
    print("CUDA is available. PyTorch can use the GPU.")
    print("Number of GPUs available:", torch.cuda.device_count())
    print("GPU Name:", torch.cuda.get_device_name(0))
else:
    print("CUDA is not available. PyTorch cannot use the GPU.")

CUDA is available. PyTorch can use the GPU.
Number of GPUs available: 1
GPU Name: NVIDIA GeForce RTX 3070 Ti Laptop GPU


In [3]:
# Load the tensors from the .pt files
data_tensors = torch.load('data_tensors_15s_0.2s.pt')
target_tensors = torch.load('target_tensors_15s_0.2s.pt')

print("Data tensors and target tensors loaded successfully!")

# Example usage: Checking the shapes of the loaded tensors
print(f"Number of data samples: {len(data_tensors)}")
print(f"Shape of first data sample: {data_tensors[0].shape}")
print(f"Shape of target tensor: {target_tensors.shape}")

Data tensors and target tensors loaded successfully!
Number of data samples: 3000
Shape of first data sample: torch.Size([75, 22])
Shape of target tensor: torch.Size([3000])


In [4]:

# Define the RNN model
class RNNModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(RNNModel, self).__init__()
        self.rnn = nn.LSTM(input_size, hidden_size, batch_first=True, num_layers=3)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        _, (h_n, _) = self.rnn(x)
        out = self.fc(h_n[-1])
        return out


# Define custom dataset
class PacketCaptureDataset(Dataset):
    def __init__(self, data, targets):
        self.data = data
        self.targets = targets

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx], self.targets[idx]



In [5]:
train_data, test_data, train_targets, test_targets = train_test_split(
    data_tensors, target_tensors, test_size=0.2, random_state=42
)

# Create datasets and data loaders
train_dataset = PacketCaptureDataset(train_data, train_targets)
test_dataset = PacketCaptureDataset(test_data, test_targets)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


In [6]:
input_size = len(
    train_dataset[0][0][0]
)  # Assuming all packet captures have the same structure
hidden_size = 64
output_size = 1
model = RNNModel(input_size, hidden_size, output_size)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


In [7]:
# load the weights from model_weights.pt
model.load_state_dict(torch.load('model_weights_15s_0.2s.pt'))

# started with 20 000 epochs
num_epochs = 3850
for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    for inputs, targets in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs.squeeze(), targets)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}, Loss: {total_loss}")

Epoch 1, Loss: 3888.2604990005493
Epoch 2, Loss: 10134.598386764526
Epoch 3, Loss: 20618.627663612366
Epoch 4, Loss: 21512.75691986084
Epoch 5, Loss: 19950.727451324463
Epoch 6, Loss: 14292.935881137848
Epoch 7, Loss: 7124.20839881897
Epoch 8, Loss: 4558.89302444458
Epoch 9, Loss: 7817.175944328308
Epoch 10, Loss: 5025.222065925598
Epoch 11, Loss: 3586.1453046798706
Epoch 12, Loss: 3462.0882720947266
Epoch 13, Loss: 3097.9479117393494
Epoch 14, Loss: 3189.0981950759888
Epoch 15, Loss: 2856.250958442688
Epoch 16, Loss: 2533.362078666687
Epoch 17, Loss: 2136.2343578338623
Epoch 18, Loss: 1958.3404068946838
Epoch 19, Loss: 1841.0504546165466
Epoch 20, Loss: 1760.5934977531433
Epoch 21, Loss: 1723.6164917945862
Epoch 22, Loss: 1687.2137327194214
Epoch 23, Loss: 1566.0098400115967
Epoch 24, Loss: 1451.7284712791443
Epoch 25, Loss: 1480.6423621177673
Epoch 26, Loss: 1325.4556608200073
Epoch 27, Loss: 1242.0047464370728
Epoch 28, Loss: 1280.794306755066
Epoch 29, Loss: 1195.9625072479248
Epoc

KeyboardInterrupt: 

In [8]:
# save the weights of the model to a file
torch.save(model.state_dict(), 'model_weights_15s_0.2s.pt')

In [9]:
total_absolute_error = 0
num_samples = len(test_targets)

for i in range(num_samples):
    predicted_value = model(test_data[i].unsqueeze(0)).item()
    actual_value = test_targets[i].item()
    absolute_error = abs(predicted_value - actual_value)
    total_absolute_error += absolute_error

average_absolute_error = total_absolute_error / num_samples
print(f"Average Absolute Error: {average_absolute_error}")


Average Absolute Error: 27.45367503210902


In [10]:
# get the median absolute error
absolute_errors = []
for i in range(num_samples):
    predicted_value = model(test_data[i].unsqueeze(0)).item()
    actual_value = test_targets[i].item()
    absolute_error = abs(predicted_value - actual_value)
    absolute_errors.append(absolute_error)

median_absolute_error = np.median(absolute_errors)
print(f"Median Absolute Error: {median_absolute_error}")

Median Absolute Error: 4.619607176631689


In [11]:
# print each actual vs prediction value
for i in range(num_samples):
    predicted_value = model(test_data[i].unsqueeze(0)).item()
    actual_value = test_targets[i].item()
    print(f"Actual: {actual_value}, Predicted: {predicted_value}")

Actual: 0.23467867076396942, Predicted: -7.8198089599609375
Actual: 23.574888229370117, Predicted: 15.40676498413086
Actual: 0.3150703012943268, Predicted: 3.9375572204589844
Actual: 0.8776228427886963, Predicted: 1.3719329833984375
Actual: 5.211883068084717, Predicted: 17.052579879760742
Actual: 8.56942367553711, Predicted: 4.5892181396484375
Actual: 17.285400390625, Predicted: 92.34220123291016
Actual: 1.7512636184692383, Predicted: 7.202430725097656
Actual: 6.08070182800293, Predicted: 0.291717529296875
Actual: 2.4700632095336914, Predicted: 10.773662567138672
Actual: 0.4156245291233063, Predicted: 1.2748374938964844
Actual: 0.6440844535827637, Predicted: 2.397686004638672
Actual: 0.1825038343667984, Predicted: -2.919464111328125
Actual: 20.93109893798828, Predicted: 10.031332015991211
Actual: 43.1968879699707, Predicted: 17.75193214416504
Actual: 11.367669105529785, Predicted: 17.715579986572266
Actual: 13.284963607788086, Predicted: 5.312580108642578
Actual: 0.8706076741218567, Pr

In [12]:
# calculate the average percentage error of the model
total_percentage_error = 0
for i in range(num_samples):
    predicted_value = model(test_data[i].unsqueeze(0)).item()
    actual_value = test_targets[i].item()
    percentage_error = abs(predicted_value - actual_value) / actual_value
    total_percentage_error += percentage_error

average_percentage_error = (total_percentage_error / num_samples) * 100
print(f"Average Percentage Error: {average_percentage_error}%")

Average Percentage Error: 804.5827477861416%


In [13]:
# calculate the median percentage error of the model
percentage_errors = []
for i in range(num_samples):
    predicted_value = model(test_data[i].unsqueeze(0)).item()
    actual_value = test_targets[i].item()
    percentage_error = abs(predicted_value - actual_value) / actual_value
    percentage_errors.append(percentage_error)

median_percentage_error = np.median(percentage_errors) * 100
print(f"Median Percentage Error: {median_percentage_error}%")


Median Percentage Error: 106.59714811308665%
