## Load in data

In [1]:
import pandas as pd

monday_data = pd.read_csv('data/Monday-WorkingHours.pcap_ISCX.csv')
tuesday_data = pd.read_csv('data/Tuesday-WorkingHours.pcap_ISCX.csv')
wednesday_data = pd.read_csv('data/Wednesday-workingHours.pcap_ISCX.csv')
thursday_web_attacks_data = pd.read_csv('data/Thursday-WorkingHours-Morning-WebAttacks.pcap_ISCX.csv')
thursday_infiltration_data = pd.read_csv('data/Thursday-WorkingHours-Afternoon-Infilteration.pcap_ISCX.csv')
friday_morning_data = pd.read_csv('data/Friday-WorkingHours-Morning.pcap_ISCX.csv')
friday_port_scan_data = pd.read_csv('data/Friday-WorkingHours-Afternoon-PortScan.pcap_ISCX.csv')
friday_ddos_data = pd.read_csv('data/Friday-WorkingHours-Afternoon-DDos.pcap_ISCX.csv')

In [2]:
full_data = pd.concat([monday_data, tuesday_data, wednesday_data,
                       thursday_web_attacks_data, thursday_infiltration_data,
                       friday_morning_data, friday_port_scan_data, friday_ddos_data], ignore_index=True)

print(full_data.shape)
full_data.head()

(2830743, 79)


Unnamed: 0,Destination Port,Flow Duration,Total Fwd Packets,Total Backward Packets,Total Length of Fwd Packets,Total Length of Bwd Packets,Fwd Packet Length Max,Fwd Packet Length Min,Fwd Packet Length Mean,Fwd Packet Length Std,...,min_seg_size_forward,Active Mean,Active Std,Active Max,Active Min,Idle Mean,Idle Std,Idle Max,Idle Min,Label
0,49188,4,2,0,12,0,6,6,6.0,0.0,...,20,0.0,0.0,0,0,0.0,0.0,0,0,BENIGN
1,49188,1,2,0,12,0,6,6,6.0,0.0,...,20,0.0,0.0,0,0,0.0,0.0,0,0,BENIGN
2,49188,1,2,0,12,0,6,6,6.0,0.0,...,20,0.0,0.0,0,0,0.0,0.0,0,0,BENIGN
3,49188,1,2,0,12,0,6,6,6.0,0.0,...,20,0.0,0.0,0,0,0.0,0.0,0,0,BENIGN
4,49486,3,2,0,12,0,6,6,6.0,0.0,...,20,0.0,0.0,0,0,0.0,0.0,0,0,BENIGN


: 

In [None]:
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
import numpy as np

full_data.replace([np.inf, -np.inf], None, inplace=True)
full_data.dropna(inplace=True)
full_data.columns = full_data.columns.str.strip()

label_encoder = LabelEncoder()

full_data['Label'] = label_encoder.fit_transform(full_data['Label'])

In [None]:
feature_columns = full_data.columns.difference(['Label'])

scaler = StandardScaler()

full_data[feature_columns] = scaler.fit_transform(full_data[feature_columns])

full_data.head()

In [None]:
from sklearn.model_selection import train_test_split

X = full_data[feature_columns]
y = full_data['Label']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

print(f"Training set size: {X_train.shape}")
print(f"Test set size: {X_test.shape}")

#### Hybrid Autoencoder

In [20]:
from qiskit import QuantumCircuit, transpile
from qiskit_aer import Aer
from qiskit_machine_learning.connectors import TorchConnector
from qiskit.circuit import Parameter
import torch
import torch.nn as nn


class HybridAutoencoder(nn.Module):
    def __init__(self, input_dim):
        super(HybridAutoencoder, self).__init__()
        
        # Classical Encoder: More layers for deeper feature extraction
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, 256),
            nn.ReLU(),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 4)  # Compress to 4 dimensions (latent space)
        )
        
        # Quantum Layer: Create a PQC for the quantum bottleneck
        self.quantum_circuit = create_quantum_bottleneck()
        
        # Decoder: Classical decoder mirrors the encoder
        self.decoder = nn.Sequential(
            nn.Linear(4, 64),
            nn.ReLU(),
            nn.Linear(64, 128),
            nn.ReLU(),
            nn.Linear(128, 256),
            nn.ReLU(),
            nn.Linear(256, input_dim)
        )
    
    def forward(self, x):
        encoded = self.encoder(x)  # Classical encoding
        quantum_encoded = self.quantum_circuit(encoded)  # Quantum layer processes the latent space
        decoded = self.decoder(quantum_encoded)  # Classical decoding
        return encoded, decoded

# Example quantum circuit acting as the bottleneck (PQC)
def create_quantum_bottleneck():
    qc = QuantumCircuit(4)
    params = [Parameter(f'θ{i}') for i in range(4)]
    
    for i in range(4):
        qc.rx(params[i], i)
    
    qc.cx(0, 1)
    qc.cx(1, 2)
    qc.cx(2, 3)
    
    # Transpile the circuit
    backend = Aer.get_backend('statevector_simulator')
    tqc = transpile(qc, backend)
    
    quantum_layer = TorchConnector(tqc)
    
    return quantum_layer

#### Train the Autoencoder

In [29]:
# Initialize the hybrid autoencoder
input_dim = X_train.shape[1]
hybrid_autoencoder = HybridAutoencoder(input_dim=input_dim)

# Define the loss function and optimizer
criterion = nn.MSELoss()  # Reconstruction loss
optimizer = torch.optim.Adam(hybrid_autoencoder.parameters(), lr=0.001)

# Training loop
num_epochs = 50
for epoch in range(num_epochs):
    # Zero out the gradients
    optimizer.zero_grad()
    
    # Forward pass through the hybrid autoencoder (classical + quantum parts)
    encoded, decoded = hybrid_autoencoder(torch.tensor(X_train).float())
    
    # Compute the loss (reconstruction loss)
    loss = criterion(decoded, torch.tensor(X_train).float())
    
    # Backpropagation and optimization
    loss.backward()  # Calculate gradients
    optimizer.step()  # Update parameters
    
    # Print loss every 10 epochs
    if epoch % 10 == 0:
        print(f'Epoch {epoch}/{num_epochs}, Loss: {loss.item()}')

In [None]:
# Extract latent space representation
latent_train = hybrid_autoencoder.encoder(torch.tensor(X_train).float()).detach().numpy()
latent_test = hybrid_autoencoder.encoder(torch.tensor(X_test).float()).detach().numpy()

#### QSVM for Anomaly Detection

In [None]:
from qiskit_machine_learning.kernels import QuantumKernel
from qiskit.circuit.library import ZZFeatureMap
from qiskit_machine_learning.algorithms import QSVC

# Create a ZZ Feature Map with 4 qubits and depth of 2
feature_map = ZZFeatureMap(feature_dimension=4, reps=2, entanglement='linear')

# Define the quantum kernel using the feature map
quantum_kernel = QuantumKernel(feature_map=feature_map, quantum_instance=Aer.get_backend('qasm_simulator'))

# Initialize QSVC with the quantum kernel
qsvc = QSVC(quantum_kernel=quantum_kernel)

# Fit the QSVC model on the latent space from the hybrid autoencoder
qsvc.fit(latent_train, y_train)

# Predict on the test set using the QSVC
y_pred_svm = qsvc.predict(latent_test)

#### Evaluation

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Evaluate the QSVC model
accuracy_svm = accuracy_score(y_test, y_pred_svm)
precision_svm = precision_score(y_test, y_pred_svm, average='weighted')
recall_svm = recall_score(y_test, y_pred_svm, average='weighted')
f1_svm = f1_score(y_test, y_pred_svm, average='weighted')

print(f"SVM - Accuracy: {accuracy_svm}, Precision: {precision_svm}, Recall: {recall_svm}, F1: {f1_svm}")