In [1]:
import json
import pandas as pd
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import numpy as np
import os
from tqdm import trange, tqdm
import joblib

from pcap_processor import calculate_features



In [2]:
# Check if CUDA (NVIDIA's GPU programming toolkit) is available
if torch.cuda.is_available():
    print("CUDA is available. PyTorch can use the GPU.")
    print("Number of GPUs available:", torch.cuda.device_count())
    print("GPU Name:", torch.cuda.get_device_name(0))
else:
    print("CUDA is not available. PyTorch cannot use the GPU.")

CUDA is available. PyTorch can use the GPU.
Number of GPUs available: 1
GPU Name: NVIDIA GeForce RTX 3070 Ti Laptop GPU


In [3]:
# Load the tensors from the .pt files
data_tensors = torch.load('data_tensors_15s_0.2s.pt')
target_tensors = torch.load('target_tensors_15s_0.2s.pt')

print("Data tensors and target tensors loaded successfully!")

# Example usage: Checking the shapes of the loaded tensors
print(f"Number of data samples: {len(data_tensors)}")
print(f"Shape of first data sample: {data_tensors[0].shape}")
print(f"Shape of target tensor: {target_tensors.shape}")

Data tensors and target tensors loaded successfully!
Number of data samples: 3000
Shape of first data sample: torch.Size([75, 22])
Shape of target tensor: torch.Size([3000])


In [4]:

# Define the RNN model
class RNNModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(RNNModel, self).__init__()
        self.rnn = nn.LSTM(input_size, hidden_size, batch_first=True, num_layers=3)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        _, (h_n, _) = self.rnn(x)
        out = self.fc(h_n[-1])
        return out


# Define custom dataset
class PacketCaptureDataset(Dataset):
    def __init__(self, data, targets):
        self.data = data
        self.targets = targets

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx], self.targets[idx]



In [5]:
train_data, test_data, train_targets, test_targets = train_test_split(
    data_tensors, target_tensors, test_size=0.2, random_state=42
)

# Create datasets and data loaders
train_dataset = PacketCaptureDataset(train_data, train_targets)
test_dataset = PacketCaptureDataset(test_data, test_targets)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


In [6]:
input_size = len(
    train_dataset[0][0][0]
)  # Assuming all packet captures have the same structure
hidden_size = 64
output_size = 1
model = RNNModel(input_size, hidden_size, output_size)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


In [7]:
# load the weights from model_weights.pt
model.load_state_dict(torch.load('model_weights_15s_0.2s.pt'))

num_epochs = 1000
for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    for inputs, targets in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs.squeeze(), targets)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}, Loss: {total_loss}")

Epoch 1, Loss: 958787.7364959717
Epoch 2, Loss: 952933.1342010498
Epoch 3, Loss: 951325.9551696777
Epoch 4, Loss: 947402.8665161133
Epoch 5, Loss: 941630.0093536377
Epoch 6, Loss: 939139.8631439209
Epoch 7, Loss: 929769.0181884766
Epoch 8, Loss: 931986.7373504639
Epoch 9, Loss: 929539.2409057617
Epoch 10, Loss: 938487.1340713501


KeyboardInterrupt: 

In [8]:
total_absolute_error = 0
num_samples = len(test_targets)

for i in range(num_samples):
    predicted_value = model(test_data[i].unsqueeze(0)).item()
    actual_value = test_targets[i].item()
    absolute_error = abs(predicted_value - actual_value)
    total_absolute_error += absolute_error

average_absolute_error = total_absolute_error / num_samples
print(f"Average Absolute Error: {average_absolute_error}")


Average Absolute Error: 25.510194551000993


In [9]:
# save the weights of the model to a file
torch.save(model.state_dict(), 'model_weights_15s_0.2s.pt')

In [10]:
# print each actual vs prediction value
for i in range(num_samples):
    predicted_value = model(test_data[i].unsqueeze(0)).item()
    actual_value = test_targets[i].item()
    print(f"Actual: {actual_value}, Predicted: {predicted_value}")

Actual: 0.23467867076396942, Predicted: 9.718567848205566
Actual: 23.574888229370117, Predicted: 6.268156051635742
Actual: 0.3150703012943268, Predicted: 6.061835289001465
Actual: 0.8776228427886963, Predicted: 16.11424446105957
Actual: 5.211883068084717, Predicted: 57.77309036254883
Actual: 8.56942367553711, Predicted: 9.253083229064941
Actual: 17.285400390625, Predicted: 61.15926742553711
Actual: 1.7512636184692383, Predicted: 65.57344055175781
Actual: 6.08070182800293, Predicted: 10.383735656738281
Actual: 2.4700632095336914, Predicted: 15.669567108154297
Actual: 0.4156245291233063, Predicted: 9.92772102355957
Actual: 0.6440844535827637, Predicted: 7.059261322021484
Actual: 0.1825038343667984, Predicted: 3.98478364944458
Actual: 20.93109893798828, Predicted: 9.94721508026123
Actual: 43.1968879699707, Predicted: 10.941052436828613
Actual: 11.367669105529785, Predicted: 14.838638305664062
Actual: 13.284963607788086, Predicted: 5.016214370727539
Actual: 0.8706076741218567, Predicted: 3