In [1]:
import json
import pandas as pd
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import numpy as np
import os
from tqdm import trange, tqdm
import joblib

from pcap_processor import calculate_features



In [2]:
# Check if CUDA (NVIDIA's GPU programming toolkit) is available
if torch.cuda.is_available():
    print("CUDA is available. PyTorch can use the GPU.")
    print("Number of GPUs available:", torch.cuda.device_count())
    print("GPU Name:", torch.cuda.get_device_name(0))
else:
    print("CUDA is not available. PyTorch cannot use the GPU.")

CUDA is available. PyTorch can use the GPU.
Number of GPUs available: 1
GPU Name: NVIDIA GeForce RTX 3070 Ti Laptop GPU


In [3]:
# Load the tensors from the .pt files
data_tensors = torch.load('data_tensors.pt')
target_tensors = torch.load('target_tensors.pt')

print("Data tensors and target tensors loaded successfully!")

# Example usage: Checking the shapes of the loaded tensors
print(f"Number of data samples: {len(data_tensors)}")
print(f"Shape of first data sample: {data_tensors[0].shape}")
print(f"Shape of target tensor: {target_tensors.shape}")

Data tensors and target tensors loaded successfully!
Number of data samples: 3000
Shape of first data sample: torch.Size([300, 22])
Shape of target tensor: torch.Size([3000])


In [4]:

# Define the RNN model
class RNNModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(RNNModel, self).__init__()
        self.rnn = nn.LSTM(input_size, hidden_size, batch_first=True, num_layers=3)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        _, (h_n, _) = self.rnn(x)
        out = self.fc(h_n[-1])
        return out


# Define custom dataset
class PacketCaptureDataset(Dataset):
    def __init__(self, data, targets):
        self.data = data
        self.targets = targets

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx], self.targets[idx]



In [5]:
train_data, test_data, train_targets, test_targets = train_test_split(
    data_tensors, target_tensors, test_size=0.2, random_state=42
)

# Create datasets and data loaders
train_dataset = PacketCaptureDataset(train_data, train_targets)
test_dataset = PacketCaptureDataset(test_data, test_targets)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


In [6]:
input_size = len(
    train_dataset[0][0][0]
)  # Assuming all packet captures have the same structure
hidden_size = 64
output_size = 1
model = RNNModel(input_size, hidden_size, output_size)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


In [7]:
num_epochs = 100
for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    for inputs, targets in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs.squeeze(), targets)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}, Loss: {total_loss}")

Epoch 1, Loss: 1105627.8437461853
Epoch 2, Loss: 1085449.924797058
Epoch 3, Loss: 1076405.9253616333
Epoch 4, Loss: 1070121.9141540527
Epoch 5, Loss: 1065475.9992523193
Epoch 6, Loss: 1062240.065170288
Epoch 7, Loss: 1052815.7422027588
Epoch 8, Loss: 1047536.7873916626
Epoch 9, Loss: 1041054.8440246582
Epoch 10, Loss: 1036360.2265930176
Epoch 11, Loss: 1030330.341003418
Epoch 12, Loss: 1024835.4910583496
Epoch 13, Loss: 1019212.2686309814
Epoch 14, Loss: 1016227.6970977783
Epoch 15, Loss: 1014130.1175384521
Epoch 16, Loss: 1010405.3761444092
Epoch 17, Loss: 1002005.8225860596
Epoch 18, Loss: 1003288.1115112305
Epoch 19, Loss: 991330.7876281738
Epoch 20, Loss: 983684.4919586182
Epoch 21, Loss: 977376.2052383423
Epoch 22, Loss: 974710.0755310059
Epoch 23, Loss: 967834.6111297607
Epoch 24, Loss: 964372.5303039551
Epoch 25, Loss: 959663.8687591553
Epoch 26, Loss: 951919.1328125
Epoch 27, Loss: 956297.2208709717
Epoch 28, Loss: 948366.291923523
Epoch 29, Loss: 947921.2465057373
Epoch 30, Lo

In [10]:
total_percent_error = 0
num_samples = len(test_targets)

for i in range(num_samples):
    predicted_value = model(test_data[i].unsqueeze(0)).item()
    actual_value = test_targets[i].item()
    percent_error = abs(predicted_value - actual_value) / actual_value * 100
    total_percent_error += percent_error

average_percent_error = total_percent_error / num_samples
print(f"Average Percent Error: {average_percent_error}%")


Average Percent Error: 1770.4961760531432%


In [11]:
outputs

tensor([[  9.7930],
        [  7.0405],
        [ 23.6834],
        [  6.3497],
        [  7.3142],
        [  7.3228],
        [147.7668],
        [  5.3759],
        [  6.3505],
        [ 12.9467],
        [  6.0732],
        [  5.7586],
        [ 10.5661],
        [  8.2382],
        [ 12.5062],
        [ 12.8790],
        [  7.9381],
        [  5.4403],
        [  7.2841],
        [ 60.4897],
        [ 30.3898],
        [ 65.9363],
        [ 25.9893],
        [ 17.4231],
        [ 14.1098],
        [ 17.6980],
        [ 14.6423],
        [  7.2882],
        [ 13.9150],
        [  5.8500],
        [  9.5164],
        [  9.6665]], grad_fn=<AddmmBackward0>)