In [1]:
import kagglehub
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import StandardScaler, LabelEncoder
import scapy.all as scapy
import time

In [2]:
datasets = {
    'Benign-Monday': 'cicids2017/Benign-Monday-no-metadata.parquet',
    'Botnet-Friday': 'cicids2017/Botnet-Friday-no-metadata.parquet',
    'Bruteforce-Tuesday': 'cicids2017/Bruteforce-Tuesday-no-metadata.parquet',
    'DDoS-Friday': 'cicids2017/DDoS-Friday-no-metadata.parquet',
    'DoS-Wednesday': 'cicids2017/DoS-Wednesday-no-metadata.parquet',
    'Infiltration-Thursday': 'cicids2017/Infiltration-Thursday-no-metadata.parquet',
    'Portscan-Friday': 'cicids2017/Portscan-Friday-no-metadata.parquet',
    'WebAttacks-Thursday': 'cicids2017/WebAttacks-Thursday-no-metadata.parquet'
}

# Read the datasets into DataFrames
df_data = {key: pd.read_parquet(path) for key, path in datasets.items()}

In [3]:
frames = [pd.read_parquet(path) for path in datasets.values()]
df = pd.concat(frames, ignore_index=True)

In [4]:
features = df.drop(columns=['Label'])  # Exclude label column
labels = df['Label']

In [5]:
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(labels)


In [6]:
scaler = StandardScaler()
X = scaler.fit_transform(features)


In [7]:
X_tensor = torch.tensor(X, dtype=torch.float32).unsqueeze(2)  # Add channel dimension
y_tensor = torch.tensor(y, dtype=torch.long)

In [8]:
dataset = TensorDataset(X_tensor, y_tensor)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

In [9]:
class CNN_LSTM(nn.Module):
    def __init__(self, input_size, num_classes):
        super(CNN_LSTM, self).__init__()
        self.conv1 = nn.Conv1d(in_channels=1, out_channels=64, kernel_size=3, padding=1)
        self.batch_norm = nn.BatchNorm1d(64)
        
        # The new LSTM input size should match the Conv1d output size
        self.lstm = nn.LSTM(input_size=64, hidden_size=64, num_layers=2, batch_first=True)
        
        self.fc1 = nn.Linear(64, 32)
        self.fc2 = nn.Linear(32, num_classes)
        self.dropout = nn.Dropout(0.3)
    
    def forward(self, x):
        x = x.permute(0, 2, 1)  # Change shape for CNN: [batch_size, 1, input_size]
        x = torch.relu(self.conv1(x))  # Conv1D reduces feature dim
        x = self.batch_norm(x)

        x = x.permute(0, 2, 1)  # Change back to [batch_size, sequence_length, channels]
        
        # Ensure LSTM receives the correct input size
        x, _ = self.lstm(x)
        x = self.dropout(torch.relu(self.fc1(x[:, -1, :])))
        x = self.fc2(x)
        return x


In [10]:
num_classes = len(np.unique(y))
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = CNN_LSTM(input_size=X.shape[1], num_classes=num_classes).to(device)


In [11]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
for epoch in range(10):
    for batch_X, batch_y in dataloader:
        batch_X, batch_y = batch_X.to(device), batch_y.to(device)
        optimizer.zero_grad()
        outputs = model(batch_X)
        loss = criterion(outputs, batch_y)
        loss.backward()
        optimizer.step()
    print(f"Epoch {epoch+1}/10, Loss: {loss.item():.4f}")

Epoch 1/10, Loss: 0.0234


In [None]:
print(X_tensor.shape)  


In [None]:
torch.save(model.state_dict(), "ids_model.pth")
