In [1]:
import json
import pandas as pd
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import numpy as np
import os
from tqdm import trange, tqdm
import joblib

from pcap_processor import calculate_features



In [2]:
# Check if CUDA (NVIDIA's GPU programming toolkit) is available
if torch.cuda.is_available():
    print("CUDA is available. PyTorch can use the GPU.")
    print("Number of GPUs available:", torch.cuda.device_count())
    print("GPU Name:", torch.cuda.get_device_name(0))
else:
    print("CUDA is not available. PyTorch cannot use the GPU.")

CUDA is available. PyTorch can use the GPU.
Number of GPUs available: 1
GPU Name: NVIDIA GeForce RTX 3070 Ti Laptop GPU


In [3]:
# Load the tensors from the .pt files
data_tensors = torch.load('data_tensors_15s_0.2s.pt')
target_tensors = torch.load('target_tensors_15s_0.2s.pt')

print("Data tensors and target tensors loaded successfully!")

# Example usage: Checking the shapes of the loaded tensors
print(f"Number of data samples: {len(data_tensors)}")
print(f"Shape of first data sample: {data_tensors[0].shape}")
print(f"Shape of target tensor: {target_tensors.shape}")

Data tensors and target tensors loaded successfully!
Number of data samples: 3000
Shape of first data sample: torch.Size([75, 22])
Shape of target tensor: torch.Size([3000])


In [4]:

# Define the RNN model
class RNNModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(RNNModel, self).__init__()
        self.rnn = nn.LSTM(input_size, hidden_size, batch_first=True, num_layers=3)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        _, (h_n, _) = self.rnn(x)
        out = self.fc(h_n[-1])
        return out


# Define custom dataset
class PacketCaptureDataset(Dataset):
    def __init__(self, data, targets):
        self.data = data
        self.targets = targets

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx], self.targets[idx]



In [5]:
train_data, test_data, train_targets, test_targets = train_test_split(
    data_tensors, target_tensors, test_size=0.2, random_state=42
)

# Create datasets and data loaders
train_dataset = PacketCaptureDataset(train_data, train_targets)
test_dataset = PacketCaptureDataset(test_data, test_targets)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


In [6]:
input_size = len(
    train_dataset[0][0][0]
)  # Assuming all packet captures have the same structure
hidden_size = 64
output_size = 1
model = RNNModel(input_size, hidden_size, output_size)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


In [23]:
# load the weights from model_weights.pt
model.load_state_dict(torch.load('model_weights_15s_0.2s.pt'))

# started with 20 000 epochs
num_epochs = 6190
for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    for inputs, targets in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs.squeeze(), targets)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}, Loss: {total_loss}")

Epoch 1, Loss: 2718.2787556648254
Epoch 2, Loss: 2672.2286052703857
Epoch 3, Loss: 2519.68243265152
Epoch 4, Loss: 2298.9113380908966
Epoch 5, Loss: 2243.2686359882355
Epoch 6, Loss: 2177.1760363578796
Epoch 7, Loss: 2117.3608889579773
Epoch 8, Loss: 1860.6713120937347
Epoch 9, Loss: 1993.2802453041077
Epoch 10, Loss: 1590.422622680664
Epoch 11, Loss: 1623.5845110416412
Epoch 12, Loss: 1513.4116195440292
Epoch 13, Loss: 1393.7480030059814
Epoch 14, Loss: 1444.4676005840302
Epoch 15, Loss: 1168.7459495067596
Epoch 16, Loss: 1343.8467333316803
Epoch 17, Loss: 1463.784267425537
Epoch 18, Loss: 1195.314344882965
Epoch 19, Loss: 1183.565724849701
Epoch 20, Loss: 912.4654130935669
Epoch 21, Loss: 1186.6335334777832
Epoch 22, Loss: 1720.0611164569855
Epoch 23, Loss: 1756.1033506393433
Epoch 24, Loss: 1084.12655544281
Epoch 25, Loss: 742.5547494888306
Epoch 26, Loss: 682.0947470664978
Epoch 27, Loss: 714.0762040615082
Epoch 28, Loss: 566.5037317276001
Epoch 29, Loss: 518.5030407905579
Epoch 30

KeyboardInterrupt: 

In [24]:
# save the weights of the model to a file
torch.save(model.state_dict(), 'model_weights_15s_0.2s.pt')

In [25]:
total_absolute_error = 0
num_samples = len(test_targets)

for i in range(num_samples):
    predicted_value = model(test_data[i].unsqueeze(0)).item()
    actual_value = test_targets[i].item()
    absolute_error = abs(predicted_value - actual_value)
    total_absolute_error += absolute_error

average_absolute_error = total_absolute_error / num_samples
print(f"Average Absolute Error: {average_absolute_error}")


Average Absolute Error: 26.46669761021932


In [26]:
# get the median absolute error
absolute_errors = []
for i in range(num_samples):
    predicted_value = model(test_data[i].unsqueeze(0)).item()
    actual_value = test_targets[i].item()
    absolute_error = abs(predicted_value - actual_value)
    absolute_errors.append(absolute_error)

median_absolute_error = np.median(absolute_errors)
print(f"Median Absolute Error: {median_absolute_error}")

Median Absolute Error: 2.4673261642456055


In [27]:
# print each actual vs prediction value
for i in range(num_samples):
    predicted_value = model(test_data[i].unsqueeze(0)).item()
    actual_value = test_targets[i].item()
    print(f"Actual: {actual_value}, Predicted: {predicted_value}")

Actual: 0.23467867076396942, Predicted: -0.805694580078125
Actual: 23.574888229370117, Predicted: 19.429845809936523
Actual: 0.3150703012943268, Predicted: -0.038890838623046875
Actual: 0.8776228427886963, Predicted: 0.8409538269042969
Actual: 5.211883068084717, Predicted: 23.717283248901367
Actual: 8.56942367553711, Predicted: 61.35693359375
Actual: 17.285400390625, Predicted: 5.500743865966797
Actual: 1.7512636184692383, Predicted: 3.8577423095703125
Actual: 6.08070182800293, Predicted: 0.45185089111328125
Actual: 2.4700632095336914, Predicted: 7.936801910400391
Actual: 0.4156245291233063, Predicted: 1.7829513549804688
Actual: 0.6440844535827637, Predicted: -0.5613517761230469
Actual: 0.1825038343667984, Predicted: -0.018268585205078125
Actual: 20.93109893798828, Predicted: 15.576160430908203
Actual: 43.1968879699707, Predicted: 3.8442726135253906
Actual: 11.367669105529785, Predicted: 37.989070892333984
Actual: 13.284963607788086, Predicted: 7.895626068115234
Actual: 0.8706076741218

In [28]:
# calculate the average percentage error of the model
total_percentage_error = 0
for i in range(num_samples):
    predicted_value = model(test_data[i].unsqueeze(0)).item()
    actual_value = test_targets[i].item()
    percentage_error = abs(predicted_value - actual_value) / actual_value
    total_percentage_error += percentage_error

average_percentage_error = (total_percentage_error / num_samples) * 100
print(f"Average Percentage Error: {average_percentage_error}%")

Average Percentage Error: 643.042097776565%


In [29]:
# calculate the median percentage error of the model
percentage_errors = []
for i in range(num_samples):
    predicted_value = model(test_data[i].unsqueeze(0)).item()
    actual_value = test_targets[i].item()
    percentage_error = abs(predicted_value - actual_value) / actual_value
    percentage_errors.append(percentage_error)

median_percentage_error = np.median(percentage_errors) * 100
print(f"Median Percentage Error: {median_percentage_error}%")


Median Percentage Error: 77.33287797964378%
