In [1]:
import json
import pandas as pd
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import numpy as np
from csv_to_vec import calculate_features
import os
from tqdm import trange


In [2]:

# Define the RNN model
class RNNModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers):
        super(RNNModel, self).__init__()
        self.rnn = nn.LSTM(input_size, hidden_size, num_layers=num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        _, (h_n, _) = self.rnn(x)
        out = self.fc(h_n[-1])
        return out


# Define custom dataset
class PacketCaptureDataset(Dataset):
    def __init__(self, data, targets):
        self.data = data
        self.targets = targets

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx], self.targets[idx]



In [3]:
packet_capture_data = []  # List of lists of dicts
for i in trange(100):
    filename = f"../dfs/out{i}.csv"
    df = pd.read_csv(filename)
    features = calculate_features(df, 1)
    packet_capture_data.append(features)


speed_test_results = json.load(open("../dfs/ground_truths.json"))[:100]


# Convert data to tensors
data_tensors = []
for packet_capture in packet_capture_data:
    packet_capture_tensor = torch.tensor(
        [list(timeslice.values()) for timeslice in packet_capture], dtype=torch.float32
    )
    data_tensors.append(packet_capture_tensor)
target_tensors = torch.tensor(speed_test_results, dtype=torch.float32)

100%|██████████| 100/100 [00:11<00:00,  8.35it/s]


In [4]:
device = "cuda" if torch.cuda.is_available() else "cpu"

In [5]:
train_data, test_data, train_targets, test_targets = train_test_split(
    data_tensors, target_tensors, test_size=0.2, random_state=42
)


train_data = [data.to(device) for data in train_data]
train_targets = train_targets.to(device)
test_data = [data.to(device) for data in test_data]
test_targets = test_targets.to(device)

# Create datasets and data loaders
train_dataset = PacketCaptureDataset(train_data, train_targets)
test_dataset = PacketCaptureDataset(test_data, test_targets)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


In [6]:
input_size = len(
    packet_capture_data[0][0]
)  # Assuming all packet captures have the same structure
hidden_size = 768
output_size = 1
num_layers = 4
model = RNNModel(input_size, hidden_size, output_size, num_layers).to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


In [7]:
num_epochs = 1000
for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    for inputs, targets in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs.squeeze(), targets)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    if (epoch + 1) % 10 == 0:
        print(f"Epoch {epoch+1}, Loss: {total_loss}")

Epoch 10, Loss: 6100.870697021484
Epoch 20, Loss: 9225.327545166016
Epoch 30, Loss: 7803.260772705078
Epoch 40, Loss: 6037.5985107421875
Epoch 50, Loss: 5992.712020874023
Epoch 60, Loss: 5988.865737915039
Epoch 70, Loss: 6006.6195068359375
Epoch 80, Loss: 6135.953643798828
Epoch 90, Loss: 7933.091796875
Epoch 100, Loss: 7805.915222167969
Epoch 110, Loss: 5856.771942138672
Epoch 120, Loss: 7619.424697875977
Epoch 130, Loss: 7398.637298583984
Epoch 140, Loss: 5342.2714920043945
Epoch 150, Loss: 5108.782562255859
Epoch 160, Loss: 5335.70751953125
Epoch 170, Loss: 4927.200592041016
Epoch 180, Loss: 4797.329238891602
Epoch 190, Loss: 4572.948623657227
Epoch 200, Loss: 6628.154102325439
Epoch 210, Loss: 4256.708694458008
Epoch 220, Loss: 4150.814239501953
Epoch 230, Loss: 3948.1390419006348
Epoch 240, Loss: 7422.523239135742
Epoch 250, Loss: 3671.971363067627
Epoch 260, Loss: 5572.993408203125
Epoch 270, Loss: 3356.458786010742
Epoch 280, Loss: 3082.106185913086
Epoch 290, Loss: 3935.2801532

In [8]:
model.eval()
test_loss = 0
num_examples = 0
with torch.no_grad():
    for inputs, targets in test_loader:
        # print(inputs)
        # print(inputs.size())
        outputs = model(inputs)
        loss = criterion(outputs.squeeze(), targets)
        test_loss += loss.item()
        num_examples += inputs.size(0)

average_test_loss = test_loss / num_examples
print(f"Average Test Loss per Example: {average_test_loss}")

Average Test Loss per Example: 600.16220703125
