<a href="https://colab.research.google.com/github/sojak-c4/Covert-Comm/blob/main/QSVM_Thesis_Code.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Thesis QSVM Code

In [1]:
#Installs
!pip -q install pennylane scikit-learn pandas numpy matplotlib seaborn kagglehub pennylane-lightning[gpu] dask[dataframe]

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m57.1/57.1 kB[0m [31m6.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.3/2.3 MB[0m [31m84.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m930.8/930.8 kB[0m [31m19.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m60.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m85.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.6/8.6 MB[0m [31m101.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m167.9/167.9 kB[0m [31m17.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m65.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [2]:
# Import Dataset via Kagllehub
import kagglehub


# Download latest version (also automatically checks if the dataset is already installed and skips if yes)
data_path = kagglehub.dataset_download("solarmainframe/ids-intrusion-csv")

print("Path to dataset files:", data_path)

Downloading from https://www.kaggle.com/api/v1/datasets/download/solarmainframe/ids-intrusion-csv?dataset_version_number=1...


 20%|██        | 331M/1.60G [00:16<01:04, 21.4MB/s]


KeyboardInterrupt: 

In [None]:
import pandas as pd
import os

try:
    # --- MODIFICATION ---
    # Define the path directly to your single CSV file.
    # Replace with your actual file path.
    file_path = data_path + '/02-20-2018.csv'

    print(f"Loading file: '{os.path.basename(file_path)}'")

    # Read the single CSV file directly into a DataFrame.
    # The concatenation logic is no longer needed.
    df = pd.read_csv(file_path)

    print("File loaded successfully.")

except FileNotFoundError:
    print(f"Error: The file was not found at '{file_path}'. Please check the path.")
except Exception as e:
    print(f"An error occurred: {e}")

# Check if the DataFrame 'df' was created successfully before using it
if 'df' in locals() and not df.empty:
    print(f"\n--- Dataset Info ---")
    print(f"Total number of rows: {df.shape[0]}")
    print(f"Total number of columns: {df.shape[1]}")
    # Example of how to access data, e.g., view unique labels
    print("Unique labels found:", df['Label'].unique())
else:
    print("Could not load or process the dataset.")

In [None]:
# Dataset Cleaning

import numpy as np

print(f"Number of rows before initial cleaning: {df.shape[0]}")
print(f"Number of columns before initial cleaning: {df.shape[1]}")

## Handling infinity, NaN values, invariable columns
# Add .copy() to ensure we are working with an independent DataFrame
df = df.loc[:, df.nunique() > 1].copy()

## These operations will now reliably modify the new 'df' DataFrame
df.replace([np.inf, -np.inf], np.nan, inplace=True)
df.dropna(inplace=True)

## Handling duplicates
df.drop_duplicates(inplace=True)

## Seperating features and labels
X = df.drop('Label', axis=1)
y = df['Label']

## Drop timestamps
X = X.drop('Timestamp', axis=1)

## Select only numeric columns to remove any remaining non-numeric identifiers
X = X.select_dtypes(include=np.number)

## Converting labels to a binary format (0 = Benign, 1 = Attack)
y = y.apply(lambda x: 1 if x != 'Benign' else 0)

print(f"Number of rows after initial cleaning: {X.shape[0]}")
print(f"Number of columns after initial cleaning: {X.shape[1]}")


In [None]:
# Feature Selection and dimensionality reduction

from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

## It's crucial to scale the data before PCA
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

## Reduce to a manageable number of features (e.g., 4-8)
## TUNE THIS FOR ANALYSIS
n_components = 4
pca = PCA(n_components=n_components)
X_pca = pca.fit_transform(X_scaled)

# Convert X_pca NumPy array back to a Pandas DataFrame
# Use the index from your features DataFrame `X` to ensure alignment
X_pca_df = pd.DataFrame(X_pca, index=X.index)

In [None]:
# Split data into training and test set
from sklearn.model_selection import train_test_split
import numpy as np

X_subset, _, y_subset, _ = train_test_split(
    X_pca_df, y,
    train_size=1000,
    random_state=42,
    stratify=y
)

X_train, X_test, y_train, y_test = train_test_split(
    X_subset, y_subset,
    test_size=0.3,
    random_state=42,
    stratify=y_subset
)



In [None]:
# Undersampling to balance the training dataset

n_attacks = y_train.value_counts()[1]
benign_indices = y_train[y_train == 0].index
attack_indices = y_train[y_train == 1].index

random_benign_indices = np.random.choice(benign_indices, n_attacks, replace=False)
under_sample_indices = np.concatenate([attack_indices, random_benign_indices])

# Use .loc for robust, label-based indexing with Pandas
X_train_balanced = X_train.loc[under_sample_indices]
y_train_balanced = y_train.loc[under_sample_indices]

print("Successfully created balanced training set.")
print(f"Shape of X_train_balanced: {X_train_balanced.shape}")

In [None]:
# Quantum Feature Encoding via Angle Encoding
import pennylane as qml
from pennylane import numpy as pnp

n_qubits = n_components # Use one qubit per feature after PCA
# dev = qml.device("default.qubit", wires=n_qubits) ## CPU based computation
dev = qml.device("lightning.gpu", wires=n_qubits)   ## GPU based computation

def angle_encoding_feature_map(x):
    """A simple angle encoding feature map."""
    qml.AngleEmbedding(features=x, wires=range(n_qubits), rotation='Z')

In [None]:
# Computing the quantum kernel

## Use the desired feature map (e.g., angle_encoding_feature_map)
feature_map_to_use = angle_encoding_feature_map

@qml.qnode(dev)
def kernel_circuit(x1, x2):
    """Quantum circuit to compute the kernel entry."""
    feature_map_to_use(x1)
    qml.adjoint(feature_map_to_use)(x2)
    return qml.probs(wires=range(n_qubits))

import tqdm

def quantum_kernel(X1, X2, desc="Computing kernel matrix"):
    """Computes the quantum kernel matrix with a progress bar."""
    kernel_matrix = np.zeros((len(X1), len(X2)))
    for i, x1 in tqdm.tqdm(enumerate(X1), total=len(X1), desc=desc):
        for j, x2 in enumerate(X2):
            # The first element of the probs is the probability of the |0...0> state
            kernel_matrix[i, j] = kernel_circuit(x1, x2)[0]
    return kernel_matrix

In [None]:
# Compute kernel matrices

## Compute the training kernel matrix
print("Computing training kernel matrix...")
kernel_train = quantum_kernel(X_train, X_train, desc="Training Kernel")

## Compute the testing kernel matrix
print("Computing testing kernel matrix...")
kernel_test = quantum_kernel(X_test, X_train, desc="Testing Kernel")

In [None]:
# Train

from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

## Create and train the SVM
svm = SVC(kernel='precomputed')
svm.fit(kernel_train, y_train)

## Make predictions
y_pred = svm.predict(kernel_test)

## Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-score: {f1:.4f}")

## Display the confusion matrix
cm = confusion_matrix(y_test, y_pred)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix')
plt.show()