## Load in data

In [1]:
import pandas as pd

monday_data = pd.read_csv('data/Monday-WorkingHours.pcap_ISCX.csv')
tuesday_data = pd.read_csv('data/Tuesday-WorkingHours.pcap_ISCX.csv')
wednesday_data = pd.read_csv('data/Wednesday-workingHours.pcap_ISCX.csv')
thursday_web_attacks_data = pd.read_csv('data/Thursday-WorkingHours-Morning-WebAttacks.pcap_ISCX.csv')
thursday_infiltration_data = pd.read_csv('data/Thursday-WorkingHours-Afternoon-Infilteration.pcap_ISCX.csv')
friday_morning_data = pd.read_csv('data/Friday-WorkingHours-Morning.pcap_ISCX.csv')
friday_port_scan_data = pd.read_csv('data/Friday-WorkingHours-Afternoon-PortScan.pcap_ISCX.csv')
friday_ddos_data = pd.read_csv('data/Friday-WorkingHours-Afternoon-DDos.pcap_ISCX.csv')

In [2]:
full_data = pd.concat([monday_data, tuesday_data, wednesday_data,
                       thursday_web_attacks_data, thursday_infiltration_data,
                       friday_morning_data, friday_port_scan_data, friday_ddos_data], ignore_index=True)

print(full_data.shape)
full_data.head()

(2830743, 79)


Unnamed: 0,Destination Port,Flow Duration,Total Fwd Packets,Total Backward Packets,Total Length of Fwd Packets,Total Length of Bwd Packets,Fwd Packet Length Max,Fwd Packet Length Min,Fwd Packet Length Mean,Fwd Packet Length Std,...,min_seg_size_forward,Active Mean,Active Std,Active Max,Active Min,Idle Mean,Idle Std,Idle Max,Idle Min,Label
0,49188,4,2,0,12,0,6,6,6.0,0.0,...,20,0.0,0.0,0,0,0.0,0.0,0,0,BENIGN
1,49188,1,2,0,12,0,6,6,6.0,0.0,...,20,0.0,0.0,0,0,0.0,0.0,0,0,BENIGN
2,49188,1,2,0,12,0,6,6,6.0,0.0,...,20,0.0,0.0,0,0,0.0,0.0,0,0,BENIGN
3,49188,1,2,0,12,0,6,6,6.0,0.0,...,20,0.0,0.0,0,0,0.0,0.0,0,0,BENIGN
4,49486,3,2,0,12,0,6,6,6.0,0.0,...,20,0.0,0.0,0,0,0.0,0.0,0,0,BENIGN


In [3]:
full_data.columns = full_data.columns.str.strip()

In [4]:
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
import numpy as np

full_data.replace([np.inf, -np.inf], None, inplace=True)
full_data.dropna(inplace=True)

label_encoder = LabelEncoder()

full_data['Label'] = label_encoder.fit_transform(full_data['Label'])

In [5]:
feature_columns = full_data.columns.difference(['Label'])

scaler = StandardScaler()

full_data[feature_columns] = scaler.fit_transform(full_data[feature_columns])

full_data.head()

Unnamed: 0,Destination Port,Flow Duration,Total Fwd Packets,Total Backward Packets,Total Length of Fwd Packets,Total Length of Bwd Packets,Fwd Packet Length Max,Fwd Packet Length Min,Fwd Packet Length Mean,Fwd Packet Length Std,...,min_seg_size_forward,Active Mean,Active Std,Active Max,Active Min,Idle Mean,Idle Std,Idle Max,Idle Min,Label
0,2.250506,-0.439612,-0.009825,-0.010426,-0.053793,-0.007145,-0.281253,-0.210906,-0.280686,-0.245193,...,0.002547,-0.125799,-0.104619,-0.149404,-0.101068,-0.352126,-0.109516,-0.357072,-0.339184,0
1,2.250506,-0.439612,-0.009825,-0.010426,-0.053793,-0.007145,-0.281253,-0.210906,-0.280686,-0.245193,...,0.002547,-0.125799,-0.104619,-0.149404,-0.101068,-0.352126,-0.109516,-0.357072,-0.339184,0
2,2.250506,-0.439612,-0.009825,-0.010426,-0.053793,-0.007145,-0.281253,-0.210906,-0.280686,-0.245193,...,0.002547,-0.125799,-0.104619,-0.149404,-0.101068,-0.352126,-0.109516,-0.357072,-0.339184,0
3,2.250506,-0.439612,-0.009825,-0.010426,-0.053793,-0.007145,-0.281253,-0.210906,-0.280686,-0.245193,...,0.002547,-0.125799,-0.104619,-0.149404,-0.101068,-0.352126,-0.109516,-0.357072,-0.339184,0
4,2.266813,-0.439612,-0.009825,-0.010426,-0.053793,-0.007145,-0.281253,-0.210906,-0.280686,-0.245193,...,0.002547,-0.125799,-0.104619,-0.149404,-0.101068,-0.352126,-0.109516,-0.357072,-0.339184,0


In [6]:
from sklearn.model_selection import train_test_split

X = full_data[feature_columns]
y = full_data['Label']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

print(f"Training set size: {X_train.shape}")
print(f"Test set size: {X_test.shape}")

Training set size: (2262300, 78)
Test set size: (565576, 78)


In [20]:
# Import necessary libraries
import numpy as np
from qiskit import transpile
from qiskit_aer import Aer
from qiskit.circuit import QuantumCircuit, Parameter
from qiskit_algorithms.optimizers import COBYLA
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from qiskit_machine_learning.circuit.library import QNNCircuit
from qiskit_machine_learning.connectors import TorchConnector
from torch.optim import Adam
import torch
from sklearn.preprocessing import MinMaxScaler

In [21]:
# Build the quantum autoencoder circuit
def build_qae_circuit(n_input_qubits, n_latent_qubits):
    qc = QuantumCircuit(n_input_qubits)
    params = [Parameter(f'theta_{i}') for i in range(n_input_qubits)]

    # Apply parameterized gates (RX, RY, RZ)
    for i in range(n_input_qubits):
        qc.rx(params[i], i)
        qc.ry(params[i], i)
        qc.rz(params[i], i)

    # Entanglement layer
    for i in range(n_input_qubits - 1):
        qc.cx(i, i + 1)

    # Return circuit and parameters
    return qc, params

In [22]:
# Define the cost function for quantum autoencoder optimization
def cost_function(params, qae_circuit, n_input_qubits, simulator):
    # Update circuit with current parameters
    qae_circuit = qae_circuit.bind_parameters(params)
    
    # Simulate the quantum circuit
    job = transpile(qae_circuit, simulator, shots=1024)
    result = job.result()
    counts = result.get_counts()
    
    # Loss function: target state is '00..0'
    target_state = '0' * n_input_qubits
    loss = 1 - (counts.get(target_state, 0) / 1024)
    
    return loss

In [23]:
# Train the Quantum Autoencoder
def train_qae(X_train):
    n_input_qubits = X_train.shape[1]
    n_latent_qubits = n_input_qubits // 2  # Compress to half the qubits

    # Build the quantum autoencoder circuit
    qae_circuit, params = build_qae_circuit(n_input_qubits, n_latent_qubits)

    # Use the Aer simulator
    simulator = Aer.get_backend('qasm_simulator')

    # COBYLA optimizer
    optimizer = COBYLA(maxiter=100)
    initial_params = np.random.rand(len(params))

    # Optimize the QAE circuit
    result = optimizer.optimize(num_vars=len(initial_params),
                                objective_function=lambda p: cost_function(p, qae_circuit, n_input_qubits, simulator),
                                initial_point=initial_params)

    optimal_params = result[0]
    print("Optimal Parameters:", optimal_params)

    # Return the optimized quantum circuit
    return qae_circuit.bind_parameters(optimal_params), optimal_params


In [24]:
# Quantum Neural Network (QNN) for Latent Space Extraction
def get_qnn(qae_circuit, n_input_qubits, n_latent_qubits):
    # Define the Quantum Neural Network (QNN)
    qnn = QNNCircuit(qae_circuit, input_params=qae_circuit.parameters, 
                     output_params=qae_circuit.parameters[:n_latent_qubits], 
                     quantum_instance=Aer.get_backend('statevector_simulator'))
    return qnn


In [25]:
# Convert the input data into Torch tensors and process using the QNN
def apply_qae_and_extract_latent_space(X, qnn):
    X_torch = torch.tensor(X, dtype=torch.float32)

    # Convert the QNN to a PyTorch connector
    qnn_torch = TorchConnector(qnn)

    # Forward pass through the QNN to get the latent space representation
    latent_space = qnn_torch(X_torch).detach().numpy()

    return latent_space


In [26]:
# Prepare the data for quantum encoding and training
def preprocess_data(X_train, X_test):
    scaler = MinMaxScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    return X_train_scaled, X_test_scaled

In [27]:

# Assume X_train and X_test are prepared from previous cells (loaded and split)
X_train_scaled, X_test_scaled = preprocess_data(X_train, X_test)

In [28]:

# Train the Quantum Autoencoder and extract the compressed representation
qae_circuit, optimal_params = train_qae(X_train_scaled)

AttributeError: 'COBYLA' object has no attribute 'optimize'

In [None]:

# Define the Quantum Neural Network (QNN) for the autoencoder
n_input_qubits = X_train_scaled.shape[1]
n_latent_qubits = n_input_qubits // 2
qnn = get_qnn(qae_circuit, n_input_qubits, n_latent_qubits)

In [None]:

# Extract the latent space representation using the trained QAE
X_train_latent = apply_qae_and_extract_latent_space(X_train_scaled, qnn)
X_test_latent = apply_qae_and_extract_latent_space(X_test_scaled, qnn)

In [None]:

# Random Forest Classifier
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)

In [None]:

# Train on the compressed dataset from QAE
rf_classifier.fit(X_train_latent, y_train)

In [None]:

# Predictions on the test set
y_pred = rf_classifier.predict(X_test_latent)

In [None]:

# Model Evaluation
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")

In [None]:

# Classification report
print("\nClassification Report:\n", classification_report(y_test, y_pred))

In [None]:

# Confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred)
print("\nConfusion Matrix:\n", conf_matrix)