Necessary Imports

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, random_split
import pandas as pd

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


Data Preprocessing

In [None]:
df1 = pd.read_csv('Syn.csv',usecols=[' Total Backward Packets',' Down/Up Ratio', 'Fwd Packets/s',' Bwd Packets/s','Total Length of Fwd Packets',' Total Length of Bwd Packets',' Fwd Packet Length Mean',' Label'])
# df2 = pd.read_csv('UDP.csv',usecols=[' Total Backward Packets',' Down/Up Ratio', 'Fwd Packets/s',' Bwd Packets/s','Total Length of Fwd Packets',' Total Length of Bwd Packets',' Fwd Packet Length Mean',' Label'])
df3 = pd.read_csv('UDPLag.csv',usecols=[' Total Backward Packets',' Down/Up Ratio', 'Fwd Packets/s',' Bwd Packets/s','Total Length of Fwd Packets',' Total Length of Bwd Packets',' Fwd Packet Length Mean',' Label'])



In [None]:
unique_labels = df3[' Label'].unique()
print(unique_labels)

['UDP' 'BENIGN' 'UDPLag' 'Syn']


In [None]:
df_merged = pd.concat([df1, df3], ignore_index=True)
df_merged.head()

Unnamed: 0,Total Backward Packets,Total Length of Fwd Packets,Total Length of Bwd Packets,Fwd Packet Length Mean,Fwd Packets/s,Bwd Packets/s,Down/Up Ratio,Label
0,2,42.0,12.0,6.0,0.1940999,0.055457,0.0,Syn
1,4,48.0,24.0,6.0,0.1783669,0.089183,0.0,Syn
2,0,12.0,0.0,6.0,2000000.0,0.0,0.0,Syn
3,0,12.0,0.0,6.0,0.0,0.0,0.0,Syn
4,2,48.0,12.0,6.0,0.2238923,0.055973,0.0,Syn


In [None]:
unique_labels = df_merged[' Label'].unique()
print(unique_labels)

['Syn' 'BENIGN' 'UDP' 'UDPLag']


In [None]:
df_cleaned = df_merged.dropna(subset=[' Label'])

In [None]:
unique_labels = df_cleaned[' Label'].unique()
print(unique_labels)

['Syn' 'BENIGN' 'UDP' 'UDPLag']


In [None]:
label_map = {'BENIGN': 0, 'UDPLag': 1, 'UDP': 2, 'Syn': 3}
df_cleaned.loc[:, ' Label'] = df_cleaned[' Label'].map(label_map).astype(float)

In [None]:
df_cleaned.head()

Unnamed: 0,Total Backward Packets,Total Length of Fwd Packets,Total Length of Bwd Packets,Fwd Packet Length Mean,Fwd Packets/s,Bwd Packets/s,Down/Up Ratio,Label
0,2,42.0,12.0,6.0,0.1940999,0.055457,0.0,3.0
1,4,48.0,24.0,6.0,0.1783669,0.089183,0.0,3.0
2,0,12.0,0.0,6.0,2000000.0,0.0,0.0,3.0
3,0,12.0,0.0,6.0,0.0,0.0,0.0,3.0
4,2,48.0,12.0,6.0,0.2238923,0.055973,0.0,3.0


Preprocessing using dataloader

In [None]:

class LargeCSVDataset(Dataset):
    def __init__(self, dataframe):
        # self.data1 = pd.read_csv(csv_path)
        # self.data2 = pd.read_csv(csv1_path)

        # # Merge on a common column
        # self.data = pd.merge(self.data1, self.data1, on='id', how='outer')

        self.data = dataframe
        self.X = self.data.iloc[:, :-1].values
        self.y = self.data.iloc[:, -1].values
        self.input_size = self.X.shape[1]
        self.num_classes = len(set(self.y))

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        X = torch.tensor(self.X[idx], dtype=torch.float32)
        y = torch.tensor(self.y[idx], dtype=torch.long)
        return X, y



In [None]:
# Load the full dataset
dataset = LargeCSVDataset(df_cleaned)


train_size = int(0.8 * len(dataset))
val_size = int(0.1 * len(dataset))
test_size = len(dataset) - train_size - val_size

train_set, val_set, test_set = random_split(dataset, [train_size, val_size, test_size])

train_loader = DataLoader(train_set, batch_size=64, shuffle=True)
val_loader = DataLoader(val_set, batch_size=64)
test_loader = DataLoader(test_set, batch_size=64)


 Model definition

In [None]:
class Classifier(nn.Module):
    def __init__(self, input_size, num_classes):
        super(Classifier, self).__init__()
        self.net = nn.Sequential(
            nn.Linear(input_size, 128),
            nn.ReLU(),
            nn.Linear(128, 128),
            nn.ReLU(),
            nn.Linear(128, 128),
            nn.ReLU(),
            nn.Linear(128, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, num_classes)
        )

    def forward(self, x):
        return self.net(x)

model = Classifier(input_size=dataset.input_size, num_classes=dataset.num_classes)
model.to(device)

Classifier(
  (net): Sequential(
    (0): Linear(in_features=7, out_features=128, bias=True)
    (1): ReLU()
    (2): Linear(in_features=128, out_features=128, bias=True)
    (3): ReLU()
    (4): Linear(in_features=128, out_features=128, bias=True)
    (5): ReLU()
    (6): Linear(in_features=128, out_features=128, bias=True)
    (7): ReLU()
    (8): Linear(in_features=128, out_features=64, bias=True)
    (9): ReLU()
    (10): Linear(in_features=64, out_features=4, bias=True)
  )
)

Criterion and Optimizer

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

Training with validation

In [None]:
for epoch in range(20):
    model.train()
    total_loss = 0

    for X_batch, y_batch in train_loader:
        X_batch = X_batch.to(device)
        y_batch = y_batch.to(device)

        optimizer.zero_grad()
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    # Validation
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for X_batch, y_batch in val_loader:
            X_batch = X_batch.to(device)
            y_batch = y_batch.to(device)
            outputs = model(X_batch)
            _, preds = torch.max(outputs, 1)
            total += y_batch.size(0)
            correct += (preds == y_batch).sum().item()

    val_acc = 100 * correct / total
    print(f"Epoch {epoch+1}, Loss: {total_loss:.4f}, Val Acc: {val_acc:.2f}%")

Epoch 1, Loss: 59491.6782, Val Acc: 96.94%
Epoch 2, Loss: 9897.4804, Val Acc: 96.94%
Epoch 3, Loss: 9849.7510, Val Acc: 96.94%
Epoch 4, Loss: 9849.7360, Val Acc: 96.94%
Epoch 5, Loss: 9849.7329, Val Acc: 96.94%
Epoch 6, Loss: 9849.7340, Val Acc: 96.94%
Epoch 7, Loss: 9849.8407, Val Acc: 96.94%
Epoch 8, Loss: 9849.9251, Val Acc: 96.94%
Epoch 9, Loss: 9849.8482, Val Acc: 96.94%
Epoch 10, Loss: 9849.7325, Val Acc: 96.94%
Epoch 11, Loss: 9849.5978, Val Acc: 96.94%
Epoch 12, Loss: 9849.9424, Val Acc: 96.94%
Epoch 13, Loss: 9850.0206, Val Acc: 96.94%
Epoch 14, Loss: 9849.8428, Val Acc: 96.94%
Epoch 15, Loss: 9849.7338, Val Acc: 96.94%
Epoch 16, Loss: 9849.6868, Val Acc: 96.94%
Epoch 17, Loss: 9849.6746, Val Acc: 96.94%
Epoch 18, Loss: 9849.8571, Val Acc: 96.94%
Epoch 19, Loss: 9849.8517, Val Acc: 96.94%
Epoch 20, Loss: 9850.0466, Val Acc: 96.94%


Final Test Accuracy

In [None]:
from sklearn.metrics import precision_recall_fscore_support, accuracy_score,classification_report


model.eval()
y_true = []
y_pred = []
with torch.no_grad():
    for X_batch, y_batch in test_loader:
        X_batch = X_batch.to(device)
        y_batch = y_batch.to(device)
        outputs = model(X_batch)
        _, preds = torch.max(outputs, 1)
        y_true.extend(y_batch.cpu().numpy())
        y_pred.extend(preds.cpu().numpy())

test_acc = accuracy_score(y_true, y_pred)
precision, recall, f1, _ = precision_recall_fscore_support(y_true, y_pred, average='macro')

print(f"\nTest Accuracy: {test_acc:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}, F1: {f1:.4f}")
print(classification_report(y_true, y_pred, output_dict=True))

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



Test Accuracy: 0.9697, Precision: 0.2424, Recall: 0.2500, F1: 0.2462
{'0': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 3948.0}, '1': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 192.0}, '2': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 11148.0}, '3': {'precision': 0.9697010535661907, 'recall': 1.0, 'f1-score': 0.9846174898576856, 'support': 489284.0}, 'accuracy': 0.9697010535661907, 'macro avg': {'precision': 0.24242526339154769, 'recall': 0.25, 'f1-score': 0.2461543724644214, 'support': 504572.0}, 'weighted avg': {'precision': 0.9403201332873804, 'recall': 0.9697010535661907, 'f1-score': 0.9547846172746959, 'support': 504572.0}}


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
