# 03 - Training Qiskit VQC with a Simple SPSA-style Optimizer

This notebook trains a small Qiskit variational classifier using a
gradient-free SPSA-style optimizer with the `Sampler` primitive.

In [1]:
import numpy as np
import matplotlib.pyplot as plt

from qiskit.circuit import QuantumCircuit, ParameterVector
from qiskit.primitives import Sampler

from quantumuq import ShotBootstrap, wrap_qiskit_sampler
from quantumuq.datasets.toy import make_moons

rng = np.random.default_rng(0)
dataset = make_moons(n_samples=200, noise=0.1, random_state=0)
X, y = dataset.X, dataset.y
perm = rng.permutation(len(X))
train_idx, test_idx = perm[:150], perm[150:]
X_train, y_train = X[train_idx], y[train_idx]
X_test, y_test = X[test_idx], y[test_idx]

In [2]:
n_qubits = 1
n_params = 2
theta = ParameterVector("theta", n_params)

qc = QuantumCircuit(n_qubits)
qc.ry(theta[0], 0)
qc.rz(theta[1], 0)
qc.measure_all()

def feature_map(X: np.ndarray):
    X_arr = np.asarray(X)
    if X_arr.ndim == 1:
        X_arr = X_arr[0:1]
    # Map first feature to a simple linear function.
    return [[float(x[0]), -float(x[0])] for x in X_arr]

sampler = Sampler()
predictor = wrap_qiskit_sampler(
    sampler=sampler,
    circuit=qc,
    task="classification",
    n_classes=2,
    feature_map=feature_map,
)

  sampler = Sampler()


In [3]:
def spsa_step(params, a, c):
    delta = rng.choice([-1.0, 1.0], size=params.shape)
    params_plus = params + c * delta
    params_minus = params - c * delta

    def loss_for(p):
        # Override feature_map by shifting parameters.
        def fm(X_batch):
            return [list(p) for _ in np.atleast_2d(X_batch)]

        local_predictor = wrap_qiskit_sampler(
            sampler=sampler,
            circuit=qc,
            task="classification",
            n_classes=2,
            feature_map=fm,
        )
        probs = local_predictor.predict_proba(X_train, shots=1000)
        probs = np.clip(probs, 1e-12, 1.0)
        y_one_hot = np.eye(2)[y_train]
        return -np.mean(np.sum(y_one_hot * np.log(probs), axis=1))

    l_plus = loss_for(params_plus)
    l_minus = loss_for(params_minus)
    g_hat = (l_plus - l_minus) / (2.0 * c * delta)
    return params - a * g_hat

# Minimal training loop (few iterations to keep runtime small).
params_vec = np.zeros(n_params)
for k in range(10):
    a = 0.1 / (k + 1)
    c = 0.1
    params_vec = spsa_step(params_vec, a=a, c=c)
    print(f"Iter {k+1}, params={params_vec}")

def trained_feature_map(X_batch):
    return [list(params_vec) for _ in np.atleast_2d(X_batch)]

trained_predictor = wrap_qiskit_sampler(
    sampler=sampler,
    circuit=qc,
    task="classification",
    n_classes=2,
    feature_map=trained_feature_map,
)

uq = ShotBootstrap(n_samples=8, shots=1000, seed=0)
uq_model = trained_predictor.with_uq(uq)
dist = uq_model.predict_dist(X_test)
print("Predictive mean shape:", dist.mean.shape)

Iter 1, params=[0.08382779 0.08382779]
Iter 2, params=[2.76816958 2.76816958]
Iter 3, params=[2.68309031 2.85324885]
Iter 4, params=[2.63318114 2.80333968]
Iter 5, params=[2.59919249 2.76935103]
Iter 6, params=[2.57171491 2.74187344]
Iter 7, params=[2.5497678  2.76382055]
Iter 8, params=[2.5311114  2.74516415]
Iter 9, params=[2.51648865 2.73054141]
Iter 10, params=[2.50320384 2.7172566 ]
Predictive mean shape: (50, 2)


## What the SPSA training loop does

- `params_vec` collects all trainable circuit parameters.
- At each iteration, we sample a random perturbation `delta` and evaluate
  the loss at `params + c * delta` and `params - c * delta`.
- These two evaluations give a noisy estimate of the gradient direction,
  which we use to update `params_vec`.
- This is a simple, gradient-free way to optimize Qiskit circuits using only
  `Sampler` evaluations (no analytic gradients required).