In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
from sklearn.metrics import roc_auc_score, f1_score, accuracy_score
from timeit import default_timer as timer
from typing import Callable, Optional, Union, List, Dict, Any, Sequence
import json
from functools import partial
from qiskit.circuit.library import ZZFeatureMap, TwoLocal
from qiskit.circuit.library import EfficientSU2, RealAmplitudes


# Qiskit imports
from qiskit import QuantumCircuit
from qiskit.circuit import ParameterVector

# Qiskit Machine Learning requirements
from qiskit_machine_learning.kernels import (
    FidelityStatevectorKernel,
    TrainableKernel,
    TrainableFidelityStatevectorKernel,
)
from qiskit_machine_learning.kernels.algorithms import QuantumKernelTrainer
from qiskit_machine_learning.utils.loss_functions import SVCLoss
from qiskit_machine_learning.algorithms.classifiers import QSVC

from qiskit_algorithms.optimizers import SPSA

In [2]:
from sklearn.model_selection import train_test_split
df= pd.read_csv('Datasets/dataset_selected_features.csv')
X = df.drop(columns=['Label_Malicious'])
y = df['Label_Malicious']

# Split your full dataset into train/test sets
X_train, X_test, y_train, y_test = train_test_split(
    X.values, y.values, test_size=0.33, random_state=42, stratify=y.values
)

In [3]:
import numpy as np

# Assuming train_labels and test_labels are your label arrays
unique_train_classes = np.unique(y_train)
unique_test_classes = np.unique(y_test)

print("Unique classes in training labels:", unique_train_classes)
print("Unique classes in test labels:", unique_test_classes)
print("Number of classes in training labels:", len(unique_train_classes))
print("Number of classes in test labels:", len(unique_test_classes))

Unique classes in training labels: [0. 1.]
Unique classes in test labels: [0. 1.]
Number of classes in training labels: 2
Number of classes in test labels: 2


In [4]:


num_qubits = X.shape[1]

feature_map = ZZFeatureMap(feature_dimension=num_qubits, reps=2 , entanglement='linear')

var_layer = TwoLocal(num_qubits=num_qubits,
                     rotation_blocks=['ry', 'rz'],
                     entanglement_blocks='cz',
                     entanglement='linear',
                     reps=1,
                     insert_barriers=True)

var_feature_map = feature_map.compose(var_layer)

feature_params = feature_map.parameters               # dataset-dependent
variational_params = var_layer.parameters             # trainable

# Get variational (trainable) parameters
trainable_params = list(var_layer.parameters)   # not from var_feature_map

# Initialize values for training parameters
init_p = 0.25 * np.random.uniform(-np.pi, np.pi, len(trainable_params))


In [5]:
optimizers = ["SPSA"]
batch_sizes = [1000]
sub_kernel_sizes = [1000]
batch_types = [True]
results = {}
print(init_p)

[ 0.32397058  0.12001553 -0.14784499 -0.18021386 -0.06911982 -0.70494077
 -0.13842304 -0.19646906 -0.75135399  0.7616688  -0.07084935  0.01155279
 -0.65692906 -0.44676391  0.22926334  0.48693151 -0.58154689  0.06384499
 -0.20945453 -0.52071531 -0.69055224  0.28409528  0.02863196  0.33058916
  0.01521392 -0.73991161 -0.70880685 -0.25111838  0.75596863 -0.06299543
  0.75583834 -0.15382136]


In [1]:



# from utils.Batches import BatchedSVCLoss
# from utils.opt import get_optimizer_options
# from utils.pegasosqsvc import run_PegasosQSVC
# from utils.plots import plot_average_loss_with_variance
# from utils.qktcallback import QKTCallback


# for opt in optimizers:
#     opt_results = []
#     for sub_kernel_size in sub_kernel_sizes:
#         for batch_size in batch_sizes:
#             if sub_kernel_size == None:
#                 if batch_size != 1:
#                     continue
#             for batch_type in batch_types:
#                 if sub_kernel_size == None:
#                     sk_size = len(X_train)
#                     if batch_type == True:
#                         continue
#                 else:
#                     sk_size = int(sub_kernel_size)
#                 print("=" * 50)
#                 print("Training with ", opt, " optimizer.")
#                 print(
#                     "Currently using subkernel size: ",
#                     sub_kernel_size,
#                     " where loss is averaged over ",
#                     batch_size,
#                     " loss.",
#                 )
#                 print("Sub-kernels prepared according to Balanced:", batch_type)
#                 print("=" * 50)
#                 cb, optimizer = get_optimizer_options(opt)
#                 if cb == "SPSACallback":
#                     callb = QKTCallback()
#                     optimizer = optimizer(callback=callb.callback)
#                 else:
#                     optimizer = optimizer()
               
#                 qk = TrainableFidelityStatevectorKernel(
#                     feature_map=var_feature_map, training_parameters=var_layer.parameters,
#                 )
#                 # Instantiate Sub-kernel loss
#                 loss = BatchedSVCLoss(
#                     X_train,
#                     y_train,
#                     minibatch_size=batch_size,
#                     sub_kernel_size=sub_kernel_size,
#                     balanced_batch=batch_type,
#                     shuffle=True,
#                     encoder=None,
#                 )
#                 # Instantiate a quantum kernel trainer.
#                 qkt = QuantumKernelTrainer(
#                     quantum_kernel=qk,
#                     loss=loss,
#                     optimizer=optimizer,
#                     initial_point=init_p
#                 )
#                 # Train the kernel
#                 start = timer()
#                 qka_results = qkt.fit(X_train, y_train)
#                 end = timer()
#                 train_time = end - start
#                 print(f" Training Runtime: {train_time} secs. Results: ")
#                 print()
#                 # print('-'*80)
#                 # print(qka_results)
#                 # print('-'*80)
#                 # print()
#                 # print("Evaluating optimized kernel with the optimal parameters...")
#                 optimized_kernel = qka_results.quantum_kernel

#                 start = timer()

#                 # Train the QSVC using optimized quantum fidelity kernel
#                 qsvc, auc, f1, accuracy = run_PegasosQSVC(
#                     X_train,
#                     y_train,
#                     X_test,
#                     y_test,
#                     optimized_kernel,
#                 )
#                 end = timer()
#                 qsvc_runtime = end - start
#                 num_support_vectors = len(qsvc.support_)

#                 print(f"QSVC Training Runtime: {qsvc_runtime} secs")

#                 # Print results
#                 print("-" * 50)
#                 print("F1 Score = %.3f" % (f1))
#                 print("ROC AUC = %.3f" % (auc))
#                 print("Accuracy Score = %.3f" % (accuracy))
#                 print()
#                 print()

#                 # Get the training loss
#                 plot_data = (
#                     len(X_train) * np.array(loss.loss_arr) / (sk_size * num_support_vectors)
#                 )

#                 # Plotting
#                 plot_average_loss_with_variance(plot_data, N=20)

#                 # Append the results
#                 opt_results.append(
#                     {
#                         "ROC": auc,
#                         "F1": f1,
#                         "accuracy": accuracy,
#                         "sub_kernel_size": sub_kernel_size,
#                         "batch_size": batch_size,
#                         "Balanced": batch_type,
#                         "train_time": train_time,
#                         "qsvc_runtime": qsvc_runtime,
#                         "training_loss": plot_data.tolist(),
#                         "loss": loss.loss_arr,
#                         "opt_params": qka_results.optimal_point.tolist(),
#                     }
#                 )
#     results.update({opt: opt_results})

# with open("Subkernel_results.json", "w", encoding="utf-8") as f:
#     json.dump(results, f, ensure_ascii=False, indent=4)

In [None]:
import os
import numpy as np
from timeit import default_timer as timer
from utils.Batches import BatchedSVCLoss
from utils.opt import get_optimizer_options
from utils.pegasosqsvc import run_PegasosQSVC
from utils.plots import plot_average_loss_with_variance
from utils.qktcallback import QKTCallback

# Initialize results dictionary
results = {}

# Checkpoint file path
checkpoint_file = "checkpoint_subkernel_results.json"

# Load existing checkpoint if it exists
if os.path.exists(checkpoint_file):
    with open(checkpoint_file, "r", encoding="utf-8") as f:
        results = json.load(f)
    print("Loaded checkpoint from", checkpoint_file)
    
    # Calculate and display initial progress
    total_combinations = len(optimizers) * len(sub_kernel_sizes) * len(batch_sizes) * len(batch_types)
    completed = sum(len(results.get(opt, [])) for opt in results.keys())
    print(f"Progress: {completed}/{total_combinations} combinations completed ({completed/total_combinations*100:.1f}%)")
    
else:
    print("No checkpoint found, starting fresh.")

for opt in optimizers:
    # Initialize results for this optimizer if not already present
    if opt not in results:
        results[opt] = []
    opt_results = results[opt]

    for sub_kernel_size in sub_kernel_sizes:
        for batch_size in batch_sizes:
            if sub_kernel_size is None:
                if batch_size != 1:
                    continue
            for batch_type in batch_types:
                if sub_kernel_size is None:
                    sk_size = len(X_train)
                    if batch_type:
                        continue
                else:
                    sk_size = int(sub_kernel_size)

                # Check if this combination already exists in results
                combination_exists = any(
                    res["sub_kernel_size"] == sub_kernel_size and
                    res["batch_size"] == batch_size and
                    res["Balanced"] == batch_type
                    for res in opt_results
                )
                if combination_exists:
                    print(f"Skipping already processed: {opt}, sub_kernel_size={sub_kernel_size}, batch_size={batch_size}, batch_type={batch_type}")
                    continue

                print("=" * 50)
                print(f"Training with {opt} optimizer.")
                print(
                    f"Currently using subkernel size: {sub_kernel_size}, "
                    f"where loss is averaged over {batch_size} loss."
                )
                print(f"Sub-kernels prepared according to Balanced: {batch_type}")
                print("=" * 50)

                cb, optimizer = get_optimizer_options(opt)
                if cb == "SPSACallback":
                    callb = QKTCallback()
                    optimizer = optimizer(callback=callb.callback)
                else:
                    optimizer = optimizer()

                qk = TrainableFidelityStatevectorKernel(
                    feature_map=var_feature_map, training_parameters=var_layer.parameters,
                )
                # Instantiate Sub-kernel loss
                loss = BatchedSVCLoss(
                    X_train,
                    y_train,
                    minibatch_size=batch_size,
                    sub_kernel_size=sub_kernel_size,
                    balanced_batch=batch_type,
                    shuffle=True,
                    encoder=None,
                )
                # Instantiate a quantum kernel trainer
                qkt = QuantumKernelTrainer(
                    quantum_kernel=qk,
                    loss=loss,
                    optimizer=optimizer,
                    initial_point=init_p
                )
                # Train the kernel
                start = timer()
                qka_results = qkt.fit(X_train, y_train)
                end = timer()
                train_time = end - start
                print(f"Training Runtime: {train_time} secs. Results: ")
                print()

                optimized_kernel = qka_results.quantum_kernel

                start = timer()
                # Train the QSVC using optimized quantum fidelity kernel
                qsvc, auc, f1, accuracy = run_PegasosQSVC(
                    X_train,
                    y_train,
                    X_test,
                    y_test,
                    optimized_kernel,
                )
                end = timer()
                qsvc_runtime = end - start
                num_support_vectors = len(qsvc.support_)

                print(f"QSVC Training Runtime: {qsvc_runtime} secs")

                # Print results
                print("-" * 50)
                print(f"F1 Score = {f1:.3f}")
                print(f"ROC AUC = {auc:.3f}")
                print(f"Accuracy Score = {accuracy:.3f}")
                print()
                print()

                # Get the training loss
                plot_data = (
                    len(X_train) * np.array(loss.loss_arr) / (sk_size * num_support_vectors)
                )

                # Plotting
                plot_average_loss_with_variance(plot_data, N=20)

                # Append the results
                opt_results.append(
                    {
                        "ROC": auc,
                        "F1": f1,
                        "accuracy": accuracy,
                        "sub_kernel_size": sub_kernel_size,
                        "batch_size": batch_size,
                        "Balanced": batch_type,
                        "train_time": train_time,
                        "qsvc_runtime": qsvc_runtime,
                        "training_loss": plot_data.tolist(),
                        "loss": loss.loss_arr,
                        "opt_params": qka_results.optimal_point.tolist(),
                    }
                )

                # Save checkpoint after each iteration
                with open(checkpoint_file, "w", encoding="utf-8") as f:
                    json.dump(results, f, ensure_ascii=False, indent=4)
                print(f"Checkpoint saved to {checkpoint_file}")
                
                # Calculate and display updated progress
                total_combinations = len(optimizers) * len(sub_kernel_sizes) * len(batch_sizes) * len(batch_types)
                completed = sum(len(results.get(opt, [])) for opt in results.keys())
                print(f"Progress: {completed}/{total_combinations} combinations completed ({completed/total_combinations*100:.1f}%)")

# Save final results
with open("Subk\ernel_results.json", "w", encoding="utf-8") as f:
    json.dump(results, f, ensure_ascii=False, indent=4)
print("Final results saved to Subkernel_results.json")

No checkpoint found, starting fresh.
Training with SPSA optimizer.
Currently using subkernel size: 1000, where loss is averaged over 1000 loss.
Sub-kernels prepared according to Balanced: True
