# Detecting network intrusions & anomalies with higher-order topological kernels via quantum computation notebook

## 0. Imports

0.0. Dependencies

In [29]:
import os
from quask.core_implementation.qiskit_kernel import QiskitKernel
from quask.core import KernelType, Ansatz, KernelFactory
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler   # added
import numpy as np

0.1. Establish creds if running on IBM hardware (assumes IBM Quantum Cloud account exists and credentials saved with save_account())

In [30]:
# service = QiskitRuntimeService(instance="crn:v1:bluemix:public:quantum-computing:us-east:a/b8ff6077c08a4ea9871560ccb827d457:d3452110-b228-4c79-8959-15ea8cfd435d::") # assuming creds saved with save_account()
# backend = service.backend("ibm_rensselaer")
print("Running on Qiskit simulator (noiseless mode).")

Running on Qiskit simulator (noiseless mode).


In [31]:
from qiskit_aer import Aer

print("âœ… qiskit imported successfully!")


âœ… qiskit imported successfully!


## 1. Create Kernels

### 1.0. Configure for either quantum simulator or hardware backend

Simulator (noiseless) backend

In [32]:
def create_qiskit_noiseless(ansatz, measurement: str, type: KernelType):
    return QiskitKernel(
        ansatz,
        measurement,
        type,
        platform="Aer",
        n_shots=None
    )

try:
    KernelFactory.add_implementation('qiskit_noiseless', create_qiskit_noiseless)
except ValueError:
    pass

KernelFactory.set_current_implementation('qiskit_noiseless')
print("KernelFactory set to Qiskit noiseless simulator.")

KernelFactory set to Qiskit noiseless simulator.


Hardware (NISQ) backend

Note: only one implementation can be selected (e.g., qiskit_noiseless OR qiskit_ibm)

### 1.1. Load Dataset

In [33]:
benign_path = "500-benign.npy"
attack_path = "500-attack.npy"

qX1 = np.load(benign_path)
qX2 = np.load(attack_path)

attack_name = os.path.basename(attack_path).replace("attack", "").replace(".npy", "").upper()

print("âœ… Loaded datasets:", qX1.shape, qX2.shape)

âœ… Loaded datasets: (500, 5) (500, 5)


In [34]:
n_features = qX1.shape[1]
print("âœ… Number of features:", n_features)

âœ… Number of features: 5


### 1.2. Build Quantum Ansatz & Kernel

Config modified from QuASK iris dataset anomaly detection example; see [QuASK: How to optimize a quantum kernel](https://quask.readthedocs.io/en/latest/tutorials_quask/quask_2_optimizers.html) for alternate optimization techniques

In [35]:
ansatz = Ansatz(n_features=n_features, n_qubits=4, n_operations=4)
ansatz.initialize_to_identity()

ansatz.change_operation(0, 0, [0,1], "XX", 3.0)
ansatz.change_operation(1, 1, [1,2], "XY", 3.0)
ansatz.change_operation(2, 2, [2,3], "XZ", 3.0)
ansatz.change_operation(3, 3, [3,0], "YY", 3.0)

In [36]:
kernel = KernelFactory.create_kernel(ansatz, "ZZZZ", KernelType.FIDELITY)
print("âœ… Kernel built successfully!")

âœ… Kernel built successfully!


### 1.2. Instantiate machine learning model

In [37]:
model = SVC(kernel='precomputed', C=10, class_weight='balanced')  # CHANGED
print("âœ… SVM upgraded.")

âœ… SVM upgraded.


## 2. Fit quantum kernels to SVM model and test on BCCC-CIC-CSE-IDS2018

2.0. Load modified datasets (see KERNELSCRIPT.py for dataset cleaning and reduction)

2.1. Create testing and training sets

In [39]:
qX1 = qX1[:50]   # âœ… CHANGE TO 30, 50, 80, 100 to experiment
qX2 = qX2[:50]

qX = np.vstack([qX1, qX2])
qy = np.array([-1]*len(qX1) + [1]*len(qX2))

qX_train, qX_test, qy_train, qy_test = train_test_split(
    qX, qy, test_size=0.3, random_state=42, stratify=qy   # âœ… ADDED stratify
)

print("âœ… Data split.")

âœ… Data split.


2.2. Normalize data

In [40]:
scaler = StandardScaler()   # âœ… CHANGED
samples = np.append(qX_train, qX_test, axis=0)
scaler.fit(samples)

qX_train = scaler.transform(qX_train)
qX_test = scaler.transform(qX_test)

print("âœ… Data standardized.")

âœ… Data standardized.


### 2.3. Build training matrix using quantum kernel

In [41]:
K_train = kernel.build_kernel(qX_train, qX_train)
model.fit(K_train, qy_train)
print("âœ… Model trained.")

âœ… Model trained.


### 2.4. Predict the labels for the test data

In [42]:
# Predict the labels for the test data
K_test = kernel.build_kernel(qX_test, qX_train)
y_pred = model.predict(K_test)

### 2.5. Calculate and output QML model accuracy

In [43]:
accuracy = np.sum(qy_test == y_pred) / len(qy_test)
print("ðŸŽ¯ BASELINE ACCURACY:", accuracy)

# Optional additional metrics
# from sklearn.metrics import classification_report
# cr = classification_report(qy_test, y_pred)
# print(cr) 

Accuracy for 500- is 0.7


## 3. Further notes

The above demo handles a singular network attack, split for improved readability. Below is our testing across all attacks.

In [44]:
data_dir = '.'

for fname in os.listdir(data_dir):
    if fname.endswith('.npy') and 'benign' in fname:
        benign_path = os.path.join(data_dir, fname)
        
        # Construct corresponding attack file name
        attack_fname = fname.replace('benign', 'attack')
        attack_path = os.path.join(data_dir, attack_fname)

        attack_label = attack_fname.replace("attack", "").replace("500", "").replace(".npy", "").upper()

        if os.path.exists(attack_path):
            # Load both arrays
            qX1 = np.load(benign_path)
            qX2 = np.load(attack_path)

            # select first 30 samples
            qX1 = qX1[:30]
            qX2 = qX2[:30]

            # Create testing/training sets
            qX = np.vstack([qX1, qX2])
            qy = np.array([-1] * len(qX1) + [1] * len(qX2))

            qX_train, qX_test, qy_train, qy_test = train_test_split(
                qX, qy, test_size=0.3, random_state=42
            )
            
            # normalize data
            samples = np.append(qX_train, qX_test, axis=0)
            minmax_scale = MinMaxScaler((-1, 1)).fit(samples)
            qX_train = minmax_scale.transform(qX_train)
            qX_test = minmax_scale.transform(qX_test)

            # Train
            K_train = kernel.build_kernel(qX_train, qX_train)
            model.fit(K_train, qy_train)
        
            # Test
            K_test = kernel.build_kernel(qX_test, qX_train)
            y_pred = model.predict(K_test)

            # Calculate accuracy
            accuracy = np.mean(qy_test == y_pred)
            print(f"âœ… Accuracy for {attack_label}: {accuracy:.3f}")


âœ… Accuracy for -: 0.722
