In [1]:
import json
import pandas as pd
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import numpy as np
import os
from tqdm import trange, tqdm
import joblib

from pcap_processor import calculate_features



In [2]:
# Check if CUDA (NVIDIA's GPU programming toolkit) is available
if torch.cuda.is_available():
    print("CUDA is available. PyTorch can use the GPU.")
    print("Number of GPUs available:", torch.cuda.device_count())
    print("GPU Name:", torch.cuda.get_device_name(0))
else:
    print("CUDA is not available. PyTorch cannot use the GPU.")

CUDA is available. PyTorch can use the GPU.
Number of GPUs available: 1
GPU Name: NVIDIA GeForce RTX 3070 Ti Laptop GPU


In [3]:
# Load the tensors from the .pt files
data_tensors = torch.load('data_tensors.pt')
target_tensors = torch.load('target_tensors.pt')

print("Data tensors and target tensors loaded successfully!")

# Example usage: Checking the shapes of the loaded tensors
print(f"Number of data samples: {len(data_tensors)}")
print(f"Shape of first data sample: {data_tensors[0].shape}")
print(f"Shape of target tensor: {target_tensors.shape}")

Data tensors and target tensors loaded successfully!
Number of data samples: 3000
Shape of first data sample: torch.Size([300, 22])
Shape of target tensor: torch.Size([3000])


In [4]:

# Define the RNN model
class RNNModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(RNNModel, self).__init__()
        self.rnn = nn.LSTM(input_size, hidden_size, batch_first=True, num_layers=3)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        _, (h_n, _) = self.rnn(x)
        out = self.fc(h_n[-1])
        return out


# Define custom dataset
class PacketCaptureDataset(Dataset):
    def __init__(self, data, targets):
        self.data = data
        self.targets = targets

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx], self.targets[idx]



In [5]:
train_data, test_data, train_targets, test_targets = train_test_split(
    data_tensors, target_tensors, test_size=0.2, random_state=42
)

# Create datasets and data loaders
train_dataset = PacketCaptureDataset(train_data, train_targets)
test_dataset = PacketCaptureDataset(test_data, test_targets)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


In [6]:
input_size = len(
    train_dataset[0][0][0]
)  # Assuming all packet captures have the same structure
hidden_size = 64
output_size = 1
model = RNNModel(input_size, hidden_size, output_size)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


In [17]:
num_epochs = 1000
for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    for inputs, targets in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs.squeeze(), targets)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}, Loss: {total_loss}")

Epoch 1, Loss: 904488.94190979
Epoch 2, Loss: 882696.6627731323
Epoch 3, Loss: 852020.382232666
Epoch 4, Loss: 820885.4600219727
Epoch 5, Loss: 825455.1453704834
Epoch 6, Loss: 815107.8433837891
Epoch 7, Loss: 785213.9485931396
Epoch 8, Loss: 774749.9514312744
Epoch 9, Loss: 779210.4373703003
Epoch 10, Loss: 782630.1522521973
Epoch 11, Loss: 774057.358833313
Epoch 12, Loss: 762152.7754211426
Epoch 13, Loss: 756797.2387084961
Epoch 14, Loss: 753030.9103546143
Epoch 15, Loss: 747701.8036804199
Epoch 16, Loss: 735576.3759613037
Epoch 17, Loss: 749037.3171310425
Epoch 18, Loss: 721434.1661376953
Epoch 19, Loss: 725805.8415489197
Epoch 20, Loss: 719593.089805603
Epoch 21, Loss: 717773.7697753906
Epoch 22, Loss: 717704.3955192566
Epoch 23, Loss: 719785.527557373
Epoch 24, Loss: 743465.7851333618
Epoch 25, Loss: 758287.2177696228
Epoch 26, Loss: 720463.9990158081
Epoch 27, Loss: 708377.4208221436
Epoch 28, Loss: 718780.7085113525
Epoch 29, Loss: 721716.3757629395
Epoch 30, Loss: 715324.848419

KeyboardInterrupt: 

In [18]:
total_percent_error = 0
num_samples = len(test_targets)

for i in range(num_samples):
    predicted_value = model(test_data[i].unsqueeze(0)).item()
    actual_value = test_targets[i].item()
    percent_error = abs(predicted_value - actual_value) / actual_value * 100
    total_percent_error += percent_error

average_percent_error = total_percent_error / num_samples
print(f"Average Percent Error: {average_percent_error}%")


Average Percent Error: 204.87137841853746%


In [19]:
# save the weights of the model to a file
torch.save(model.state_dict(), 'model_weights.pt')

In [14]:
outputs

tensor([[ 17.3065],
        [  4.8220],
        [ 17.4890],
        [ 17.7287],
        [  6.5869],
        [  4.8088],
        [  7.2169],
        [  5.1928],
        [ 30.0326],
        [  4.6693],
        [ 26.4676],
        [  4.8073],
        [  5.0645],
        [ 89.1381],
        [  5.0451],
        [ 10.7334],
        [ 26.0784],
        [ 17.4757],
        [  5.4479],
        [ 52.9089],
        [  6.9590],
        [  5.2951],
        [ 12.9902],
        [  5.1598],
        [ 17.4171],
        [ 19.3253],
        [ 17.4863],
        [ 56.1411],
        [ 53.0042],
        [ 17.5029],
        [ 53.7434],
        [163.5647]], grad_fn=<AddmmBackward0>)