# Qiskit Quantum Kernel Alignment (QKA) Test

This notebook demonstrates Quantum Kernel Alignment using Qiskit Machine Learning.
It performs regression on cardiac data using a Quantum Support Vector Regressor (QSVR) with an optimized quantum kernel.

In [2]:
!pip install qiskit qiskit-machine-learning qiskit-algorithms pandas scikit-learn tqdm seaborn matplotlib

Collecting qiskit-machine-learning
  Downloading qiskit_machine_learning-0.9.0-py3-none-any.whl.metadata (13 kB)
Collecting qiskit-algorithms
  Using cached qiskit_algorithms-0.4.0-py3-none-any.whl.metadata (4.7 kB)
Downloading qiskit_machine_learning-0.9.0-py3-none-any.whl (263 kB)
Using cached qiskit_algorithms-0.4.0-py3-none-any.whl (327 kB)
Installing collected packages: qiskit-machine-learning, qiskit-algorithms
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2/2[0m [qiskit-algorithms]qiskit-algorithms]
[1A[2KSuccessfully installed qiskit-algorithms-0.4.0 qiskit-machine-learning-0.9.0


In [3]:
import itertools
from functools import partial

import numpy as np
import pandas as pd
from tqdm.auto import tqdm

from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.metrics import mean_squared_error, r2_score

from qiskit.circuit import QuantumCircuit, ParameterVector
from qiskit.circuit.library import ZZFeatureMap
from qiskit_machine_learning.algorithms import QSVR
from qiskit_machine_learning.kernels.algorithms import QuantumKernelTrainer
from qiskit_machine_learning.kernels import TrainableFidelityQuantumKernel
from qiskit_machine_learning.state_fidelities import ComputeUncompute
from qiskit_machine_learning.utils.loss_functions import KernelLoss
from qiskit_machine_learning.optimizers import SPSA

from qiskit.primitives import StatevectorSampler as Sampler
from qiskit_algorithms.utils import algorithm_globals

# Set random seed
algorithm_globals.random_seed = 42
np.random.seed(42)

In [4]:
# ==========================
# Helper Functions
# ==========================
def get_upsampled_df(input_df: pd.DataFrame,
                     total_n: int,
                     add_label: bool = False) -> pd.DataFrame:
    """
    Upsample dataframe rows to reach a specified total number of rows.
    """
    df = input_df.copy()
    n_points = total_n - len(df) if total_n > len(df) else -1
    if n_points > 0:
        tmp_upsampled = df.sample(n=n_points, replace=True, random_state=0)

        if add_label:
            df["data_type"] = "original"
            tmp_upsampled["data_type"] = "bootstrap"

        df_upsampled = pd.concat([df, tmp_upsampled], axis=0)
    else:
        tmp_df = df.copy()
        if add_label:
            tmp_df["data_type"] = "original"
        df_upsampled = tmp_df

    return df_upsampled


def get_hold_out_test_split(modelling_df,
                            split_col="sample",
                            mode="hold_out",
                            upsample_n=0):
    """
    Perform hold-out split based on a column.
    """
    training_datasets = {}
    testing_datasets = {}
    unique_split_col_values = modelling_df[split_col].unique()

    for tmp_split in tqdm(unique_split_col_values, desc="Generating splits"):
        tmp_test_df = modelling_df[modelling_df[split_col] == tmp_split]
        if mode == "hold_out":
            tmp_train_df = modelling_df[modelling_df[split_col] != tmp_split]
        else:
            tmp_train_df = modelling_df[modelling_df[split_col] == tmp_split]

        if upsample_n > 0:
            tmp_train_df = get_upsampled_df(tmp_train_df, total_n=upsample_n)

        training_datasets[f"test_{tmp_split}"] = tmp_train_df
        testing_datasets[f"test_{tmp_split}"] = tmp_test_df

    return training_datasets, testing_datasets


def generate_combinations(genes, size=6):
    combos = list(itertools.combinations(genes, size))
    print(f"Number of combinations of size {size}: {len(combos)}")
    return combos

In [5]:
# ==========================
# Custom Loss Function used for QKA
# ==========================
from qiskit_machine_learning.utils.loss_functions import KernelLoss
from sklearn.svm import SVR

class SVRLoss(KernelLoss):
    """Kernel loss for regression using SVR."""

    def __init__(self, **kwargs):
        super().__init__()
        self.kwargs = kwargs
        self.eval_count = 0

    def evaluate(self, parameter_values, quantum_kernel, data, labels):
        self.eval_count += 1
        # 1) Update kernel parameters
        quantum_kernel.assign_training_parameters(parameter_values)
        # 2) Compute kernel matrix
        K = quantum_kernel.evaluate(data)
        # 3) Train SVR
        svr = SVR(kernel="precomputed", **self.kwargs)
        svr.fit(K, labels)
        # 4) Predict and compute MSE
        y_pred = svr.predict(K)
        loss_val = float(np.mean((labels - y_pred) ** 2))

        if self.eval_count % 5 == 0:
            print(f"[SVR loss] call #{self.eval_count:3d}  MSE = {loss_val:.6f}")

        return loss_val

In [8]:
# ==========================
# Data Loading & Config
# ==========================
y_col = "Beat count per min"
meta_cols = ["sample"]
# Load Data
modelling_df = pd.read_csv("data/processed/cardiac_formatted_dataset-001.csv", index_col=0)
modelling_df = modelling_df.reset_index(drop=False)

# Split Data
training_datasets, testing_datasets = get_hold_out_test_split(
    modelling_df, split_col="sample"
)

# Features
optimal_genes = [
    "H19", "MYL7", "NPPB", "MYL9", "TNNC1", "MYL4",
    "TPM1", "UBC", "UBB", "TIMP1", "HSPB1", "HSPA8", "FTL", "FTH1"
]
# trial_combinations = generate_combinations(optimal_genes, size=6)
trial_gene_set = optimal_genes[:4] # Use first 4 for demo

# Aggregate all training data for kernel training
X_all = []
y_all = []
for hold_out_id in training_datasets.keys():
    X_all.append(training_datasets[hold_out_id][trial_gene_set].values)
    y_all.append(training_datasets[hold_out_id][y_col].values)

X_all = np.vstack(X_all)
y_all = np.concatenate(y_all)

# Global Standardization & PCA
global_scaler = StandardScaler()
X_all_sc = global_scaler.fit_transform(X_all)

global_pca = PCA(
    n_components=min(8, X_all_sc.shape[0], X_all_sc.shape[1])
)
X_all_pca = global_pca.fit_transform(X_all_sc)

n_qubits = X_all_pca.shape[1]
print(f"[Info] Qiskit QKA: Using {n_qubits} qubits.")

Generating splits: 100%|██████████| 24/24 [00:00<00:00, 271.67it/s]

[Info] Qiskit QKA: Using 4 qubits.





In [9]:
# ==========================
# Build Quantum Kernel
# ==========================
# Feature Map + Trainable Layer
feature_map = ZZFeatureMap(feature_dimension=n_qubits)

params_rx = ParameterVector("θx", n_qubits)
params_ry = ParameterVector("θy", n_qubits)
training_params = list(params_rx) + list(params_ry)

qka_layer = QuantumCircuit(n_qubits)
for i in range(n_qubits):
    qka_layer.rx(params_rx[i], i)
for i in range(n_qubits):
    qka_layer.ry(params_ry[i], i)

qka_circuit = feature_map.compose(qka_layer)

# Trainable Kernel
sampler = Sampler()
fidelity = ComputeUncompute(sampler=sampler)

quant_kernel = TrainableFidelityQuantumKernel(
    fidelity=fidelity,
    feature_map=qka_circuit,
    training_parameters=training_params,
)

# Optimizer & Loss
spsa_opt = SPSA(maxiter=10, learning_rate=0.1, perturbation=0.05)
mse_loss = SVRLoss(C=1.0, epsilon=0.1)

  feature_map = ZZFeatureMap(feature_dimension=n_qubits)


In [None]:
# ==========================
# Train Kernel
# ==========================
print("=== Quantum Kernel Training Start ===")
qkt = QuantumKernelTrainer(
    quantum_kernel=quant_kernel,
    loss=mse_loss,
    optimizer=spsa_opt,
    initial_point=[np.pi / 2] * len(training_params),
)

qka_results = qkt.fit(X_all_pca, y_all)
optimized_kernel = qka_results.quantum_kernel
print("Optimal point:", qka_results.optimal_point)
print("=== Quantum Kernel Training Done ===")

=== Quantum Kernel Training Start ===


In [None]:
# ==========================
# Evaluation (LOOCV)
# ==========================
all_true, all_pred = [], []
fold_results = []

for hold_out_id in tqdm(training_datasets.keys(), desc="Running QKA-QSVR LOOCV"):
    # Prepare Data
    Xtr = training_datasets[hold_out_id][trial_gene_set].values
    ytr = training_datasets[hold_out_id][y_col].values
    Xte = testing_datasets[hold_out_id][trial_gene_set].values
    yte = testing_datasets[hold_out_id][y_col].values

    # Standardize & PCA using Global Scaler/PCA
    Xtr_sc = global_scaler.transform(Xtr)
    Xte_sc = global_scaler.transform(Xte)

    Xtr_pca = global_pca.transform(Xtr_sc)
    Xte_pca = global_pca.transform(Xte_sc)

    # QSVR with Optimized Kernel
    qsvr = QSVR(quantum_kernel=optimized_kernel)
    qsvr.fit(Xtr_pca, ytr)
    y_pred = qsvr.predict(Xte_pca)

    # Evaluate
    rmse = float(np.sqrt(mean_squared_error(yte, y_pred)))
    r2 = float(r2_score(yte, y_pred))
    fold_results.append((hold_out_id, rmse, r2))

    all_true.extend(list(yte))
    all_pred.extend(list(y_pred))

    print(f"[{hold_out_id}] RMSE = {rmse:.4f}, R² = {r2:.4f}")

# ==========================
# Summary
# ==========================
global_rmse = float(np.sqrt(mean_squared_error(all_true, all_pred)))
global_r2 = float(r2_score(all_true, all_pred))

print("
========== QKA-QSVR LOOCV Results ==========")
for hid, rmse, r2 in fold_results:
    print(f"{hid:15s}  RMSE = {rmse:.4f},  R² = {r2:.4f}")

print(f"
[LOOCV Aggregate] RMSE = {global_rmse:.4f},  R² = {global_r2:.4f}")