# Encoding Smoke Tests

Goal: validate that each encoding (Basis, Angle, Amplitude) produces the expected circuit behavior on small, interpretable inputs.
We keep these tests deterministic and lightweight so we can reuse them later when comparing encodings in VQE/QNN experiments.

In [58]:
import os
import sys
from time import perf_counter

import numpy as np

PROJECT_ROOT = os.path.abspath(os.path.join(os.getcwd(), ".."))
if PROJECT_ROOT not in sys.path:
    sys.path.insert(0, PROJECT_ROOT)

from src.encodings import BasisEncoding, AngleEncoding, AmplitudeEncoding, circuit_stats
from src.encodings import ReuploadingAngleEncoding
from src.data import make_linear, make_xor, train_test_split, standardize_fit_transform

## Helper functions

We use two validation modes:
- **Circuit inspection** (text drawing + gate counts)
- **Sampling** on a simulator backend (quick probabilistic sanity checks)
Optionally, we can also use **statevector** for exact checks (small circuits only).

In [59]:
from qiskit import transpile
from qiskit_aer import AerSimulator

def sample_counts(qc, shots=2048, seed=0):
    qc_m = qc.copy()
    if qc_m.num_clbits == 0:
        qc_m.measure_all()

    backend = AerSimulator(seed_simulator=seed)
    tqc = transpile(qc_m, backend=backend, optimization_level=0, seed_transpiler=seed)
    job = backend.run(tqc, shots=shots)
    result = job.result()
    return result.get_counts()

def top_k(counts, k=5):
    items = sorted(counts.items(), key=lambda kv: kv[1], reverse=True)
    return items[:k]

# Optional exact statevector checks (small circuits only).
# This requires circuits without measurements.
from qiskit.quantum_info import Statevector

def statevector_probs(qc):
    sv = Statevector.from_instruction(qc)
    probs = sv.probabilities_dict()
    # sort by probability
    return dict(sorted(probs.items(), key=lambda kv: kv[1], reverse=True))


## Encoders under test

All encoders expose the same minimal API: `encoder.circuit(x)` returns a *pure encoding* circuit (no trainable ansatz).

In [60]:
encoders = [
    BasisEncoding(threshold=0.0),
    AngleEncoding(),
    ReuploadingAngleEncoding(reps=3, entangle=True),
    AmplitudeEncoding(),
]

for e in encoders:
    print(e.name)

basis
angle
angle_reupload
amplitude


## 1) Basis encoding tests

We expect a direct mapping: bit `1` $\rightarrow$ an `X` gate on that qubit.
We verify (a) gate placement via circuit text drawing, and (b) sampling results.

In [None]:
from json import dumps

basis = BasisEncoding(threshold=0.0)

basis_inputs = [
    (np.array([0, 0]), "Expect |00> (no X gates)"),
    (np.array([1, 0]), "Expect X on qubit 0 -> mostly '01' or '10' depending on bit ordering"),
    (np.array([0, 1]), "Expect X on qubit 1 -> mostly the other single-1 string"),
    (np.array([1, 1]), "Expect X on both -> mostly '11'"),
]

basis_dir = PROJECT_ROOT + "/results/smoke_test/circuits/basis/"

for x, note in basis_inputs:
    qc = basis.circuit(x)
    print("\nInput:", x, "|", note)
    #print("Stats:", circuit_stats(qc))
    print("Stats:", dumps(circuit_stats(qc), indent=4))
    print(qc.draw(output="text"))
    # Save circuit to file
    fig = qc.draw(output="mpl", fold=-1)
    fig.savefig(basis_dir + f"basis_input_{int(x[0])}{int(x[1])}.png")

    counts = sample_counts(qc, shots=2048, seed=123)
    print("Top counts:", top_k(counts, k=3))


Input: [0 0] | Expect |00> (no X gates)
Stats: {
    "n_qubits": 2,
    "depth": 0,
    "size": 0,
    "ops": {}
}
     
q_0: 
     
q_1: 
     
Top counts: [('00', 2048)]

Input: [1 0] | Expect X on qubit 0 -> mostly '01' or '10' depending on bit ordering
Stats: {
    "n_qubits": 2,
    "depth": 1,
    "size": 1,
    "ops": {
        "x": 1
    }
}
     ┌───┐
q_0: ┤ X ├
     └───┘
q_1: ─────
          
Top counts: [('01', 2048)]

Input: [0 1] | Expect X on qubit 1 -> mostly the other single-1 string
Stats: {
    "n_qubits": 2,
    "depth": 1,
    "size": 1,
    "ops": {
        "x": 1
    }
}
          
q_0: ─────
     ┌───┐
q_1: ┤ X ├
     └───┘
Top counts: [('10', 2048)]

Input: [1 1] | Expect X on both -> mostly '11'
Stats: {
    "n_qubits": 2,
    "depth": 1,
    "size": 2,
    "ops": {
        "x": 2
    }
}
     ┌───┐
q_0: ┤ X ├
     ├───┤
q_1: ┤ X ├
     └───┘
Top counts: [('11', 2048)]


## 2) Angle encoding tests (RY)

We test simple inputs:
- `x = [0, 0]` should produce near-identity rotations $\rightarrow$ mostly `00`
- a large-magnitude input should change the measurement distribution noticeably

We validate by looking at circuit structure and sampling counts.

In [None]:
angle = AngleEncoding()

# Use a few handcrafted points to probe scaling behavior.
angle_inputs = [
    (np.array([0.0, 0.0]), "Near identity -> mostly '00'"),
    (np.array([1.0, 0.0]), "First qubit rotates -> distribution changes"),
    (np.array([0.0, 1.0]), "Second qubit rotates -> distribution changes"),
    (np.array([2.0, -1.0]), "Both rotate with different magnitudes -> mixed outcomes"),
]

angle_dir = PROJECT_ROOT + "/results/smoke_test/circuits/angle/"

for x, note in angle_inputs:
    qc = angle.circuit(x)
    print("\nInput:", x, "|", note)
    print("Stats:", circuit_stats(qc))
    print(qc.draw(output="text"))
    # Save circuit to file
    fig = qc.draw(output="mpl", fold=-1)
    fig.savefig(angle_dir + f"angle_input_{int(x[0])}{int(x[1])}.png")

    # Optional exact probabilities (no measurements in the encoding circuit)
    probs = statevector_probs(qc)
    print("Top probs:", list(probs.items())[:4])

    counts = sample_counts(qc, shots=4096, seed=123)
    print("Top counts:", top_k(counts, k=4))


Input: [0. 0.] | Near identity -> mostly '00'
Stats: {'n_qubits': 2, 'depth': 1, 'size': 2, 'ops': OrderedDict([('ry', 2)])}
     ┌───────┐
q_0: ┤ Ry(0) ├
     ├───────┤
q_1: ┤ Ry(0) ├
     └───────┘
Top probs: [(np.str_('00'), np.float64(1.0))]
Top counts: [('00', 4096)]

Input: [1. 0.] | First qubit rotates -> distribution changes
Stats: {'n_qubits': 2, 'depth': 1, 'size': 2, 'ops': OrderedDict([('ry', 2)])}
     ┌───────┐
q_0: ┤ Ry(π) ├
     ├───────┤
q_1: ┤ Ry(0) ├
     └───────┘
Top probs: [(np.str_('01'), np.float64(1.0)), (np.str_('00'), np.float64(3.749399456654644e-33))]
Top counts: [('01', 4096)]

Input: [0. 1.] | Second qubit rotates -> distribution changes
Stats: {'n_qubits': 2, 'depth': 1, 'size': 2, 'ops': OrderedDict([('ry', 2)])}
     ┌───────┐
q_0: ┤ Ry(0) ├
     ├───────┤
q_1: ┤ Ry(π) ├
     └───────┘
Top probs: [(np.str_('10'), np.float64(1.0)), (np.str_('00'), np.float64(3.749399456654644e-33))]
Top counts: [('10', 4096)]

Input: [ 2. -1.] | Both rotate with differ

## 3) Amplitude encoding tests

We validate:
- padding/truncation rule (implicitly)
- L2 normalization (implicitly)
- known vectors: one-hot should map to a computational basis state (up to bit order conventions)

We use 2 qubits (dimension 4) for clarity.

In [None]:
amp = AmplitudeEncoding()

amp_inputs = [
    (np.array([1.0, 0.0, 0.0, 0.0]), "One-hot -> should concentrate on one bitstring"),
    (np.array([0.0, 1.0, 0.0, 0.0]), "One-hot -> should concentrate on another bitstring"),
    (np.array([0.0, 0.0, 1.0, 0.0]), "One-hot -> should concentrate on another bitstring"),
    (np.array([0.0, 0.0, 0.0, 1.0]), "One-hot -> should concentrate on another bitstring"),
    (np.array([0.0, 0.0, 0.0, 0.0]), "Zero vector -> fallback to |00> by our preprocess rule"),
]

amp_dir = PROJECT_ROOT + "/results/smoke_test/circuits/amplitude/"

for x, note in amp_inputs:
    qc = amp.circuit(x)
    print("\nInput:", x, "|", note)
    print("Stats:", circuit_stats(qc))
    print(qc.draw(output="text"))
    # Save circuit to file
    fig = qc.draw(output="mpl", fold=-1)
    fig.savefig(amp_dir + f"amp_input_{int(x[0])}{int(x[1])}.png")

    # Exact probs (initialize supports statevector evaluation)
    probs = statevector_probs(qc)
    print("Top probs:", list(probs.items())[:4])

    counts = sample_counts(qc, shots=4096, seed=123)
    print("Top counts:", top_k(counts, k=4))


Input: [1. 0. 0. 0.] | One-hot -> should concentrate on one bitstring
Stats: {'n_qubits': 2, 'depth': 1, 'size': 1, 'ops': OrderedDict([('initialize', 1)])}
     ┌──────────────────────┐
q_0: ┤0                     ├
     │  Initialize(1,0,0,0) │
q_1: ┤1                     ├
     └──────────────────────┘
Top probs: [(np.str_('00'), np.float64(1.0))]
Top counts: [('00', 4096)]

Input: [0. 1. 0. 0.] | One-hot -> should concentrate on another bitstring
Stats: {'n_qubits': 2, 'depth': 1, 'size': 1, 'ops': OrderedDict([('initialize', 1)])}
     ┌──────────────────────┐
q_0: ┤0                     ├
     │  Initialize(0,1,0,0) │
q_1: ┤1                     ├
     └──────────────────────┘
Top probs: [(np.str_('01'), np.float64(1.0))]
Top counts: [('01', 4096)]

Input: [0. 0. 1. 0.] | One-hot -> should concentrate on another bitstring
Stats: {'n_qubits': 2, 'depth': 1, 'size': 1, 'ops': OrderedDict([('initialize', 1)])}
     ┌──────────────────────┐
q_0: ┤0                     ├
     │  Init

## 4) Data Re-uploading (Angle)

We repeat the same data-dependent rotations multiple times (reps), optionally separated by a light entangling pattern.
This increases expressivity but also increases circuit depth.

In [None]:
reup = ReuploadingAngleEncoding(reps=3, entangle=True)

reup_inputs = [
    (np.array([0.0, 0.0]), "Should behave close to identity (still mostly '00')"),
    (np.array([1.0, 0.0]), "Repeated rotations + entangling -> stronger mixing vs plain AngleEncoding"),
    (np.array([2.0, -1.0]), "More pronounced changes; check depth increase"),
]

reup_dir = PROJECT_ROOT + "/results/smoke_test/circuits/reupload/"

for x, note in reup_inputs:
    qc = reup.circuit(x)
    print("\nInput:", x, "|", note)
    print("Stats:", circuit_stats(qc))
    print(qc.draw(output="text"))
    # Save circuit to file
    fig = qc.draw(output="mpl", fold=-1)
    fig.savefig(reup_dir + f"reup_input_{int(x[0])}{int(x[1])}.png")

    probs = statevector_probs(qc)
    print("Top probs:", list(probs.items())[:4])

    counts = sample_counts(qc, shots=4096, seed=123)
    print("Top counts:", top_k(counts, k=4))


Input: [0. 0.] | Should behave close to identity (still mostly '00')
Stats: {'n_qubits': 2, 'depth': 5, 'size': 8, 'ops': OrderedDict([('ry', 6), ('cx', 2)])}
     ┌───────┐     ┌───────┐     ┌───────┐
q_0: ┤ Ry(0) ├──■──┤ Ry(0) ├──■──┤ Ry(0) ├
     ├───────┤┌─┴─┐├───────┤┌─┴─┐├───────┤
q_1: ┤ Ry(0) ├┤ X ├┤ Ry(0) ├┤ X ├┤ Ry(0) ├
     └───────┘└───┘└───────┘└───┘└───────┘
Top probs: [(np.str_('00'), np.float64(1.0))]
Top counts: [('00', 4096)]

Input: [1. 0.] | Repeated rotations + entangling -> stronger mixing vs plain AngleEncoding
Stats: {'n_qubits': 2, 'depth': 5, 'size': 8, 'ops': OrderedDict([('ry', 6), ('cx', 2)])}
     ┌───────┐     ┌───────┐     ┌───────┐
q_0: ┤ Ry(π) ├──■──┤ Ry(π) ├──■──┤ Ry(π) ├
     ├───────┤┌─┴─┐├───────┤┌─┴─┐├───────┤
q_1: ┤ Ry(0) ├┤ X ├┤ Ry(0) ├┤ X ├┤ Ry(0) ├
     └───────┘└───┘└───────┘└───┘└───────┘
Top probs: [(np.str_('11'), np.float64(1.0)), (np.str_('10'), np.float64(1.4997597826618576e-32)), (np.str_('00'), np.float64(3.749399456654644e-33)), (np.

## Quick comparison on a tiny real dataset sample

We generate a toy dataset (2D), standardize it, pick one point, and compare the encoding circuits.
This is not a performance benchmark yet - just a quick end-to-end sanity check.

In [65]:
# Pick one dataset for quick probing
X, y = make_xor(n=200, seed=0, noise=0.2)
split = train_test_split(X, y, test_ratio=0.3, seed=0)
X_train, X_test = standardize_fit_transform(split.X_train, split.X_test)

x0 = X_train[0]
print("Example x0:", x0, "label:", split.y_train[0])

rows = []
for e in encoders:
    qc = e.circuit(x0)
    st = circuit_stats(qc)
    rows.append((e.name, st["n_qubits"], st["depth"], st["size"], dict(st["ops"])))

for r in rows:
    print(r[0], "| qubits:", r[1], "| depth:", r[2], "| size:", r[3], "| ops:", r[4])

Example x0: [-1.31449296  0.21619321] label: 0
basis | qubits: 2 | depth: 1 | size: 1 | ops: {'x': 1}
angle | qubits: 2 | depth: 1 | size: 2 | ops: {'ry': 2}
angle_reupload | qubits: 2 | depth: 5 | size: 8 | ops: {'ry': 6, 'cx': 2}
amplitude | qubits: 1 | depth: 1 | size: 1 | ops: {'initialize': 1}


### Angle vs Reupload

In [66]:
x = np.array([2.0, -1.0])

plain = AngleEncoding().circuit(x)
reup3  = ReuploadingAngleEncoding(reps=3, entangle=True).circuit(x)

print("Plain Angle stats:", circuit_stats(plain))
print("Reupload reps=3 stats:", circuit_stats(reup3))

print("\nPlain Angle top probs:", list(statevector_probs(plain).items())[:4])
print("Reupload top probs:", list(statevector_probs(reup3).items())[:4])

Plain Angle stats: {'n_qubits': 2, 'depth': 1, 'size': 2, 'ops': OrderedDict([('ry', 2)])}
Reupload reps=3 stats: {'n_qubits': 2, 'depth': 5, 'size': 8, 'ops': OrderedDict([('ry', 6), ('cx', 2)])}

Plain Angle top probs: [(np.str_('01'), np.float64(0.5000000000000001)), (np.str_('11'), np.float64(0.4999999999999999)), (np.str_('00'), np.float64(1.8746997283273227e-33)), (np.str_('10'), np.float64(1.8746997283273213e-33))]
Reupload top probs: [(np.str_('11'), np.float64(0.5000000000000001)), (np.str_('01'), np.float64(0.4999999999999999)), (np.str_('10'), np.float64(1.874699728327323e-33)), (np.str_('00'), np.float64(1.8746997283273227e-33))]


## Micro-timing: circuit build time

This is a lightweight proxy for encoding overhead. It can be useful later when discussing practical trade-offs.

In [67]:
def time_encoder(encoder, X, n_reps=200):
    t0 = perf_counter()
    for i in range(n_reps):
        _ = encoder.circuit(X[i % len(X)])
    t1 = perf_counter()
    return (t1 - t0) / n_reps

# Use a small batch
X_small = X_train[:50]

for e in encoders:
    avg_s = time_encoder(e, X_small, n_reps=200)
    print(f"{e.name:10s} avg circuit build: {avg_s*1e3:.3f} ms")

basis      avg circuit build: 0.115 ms
angle      avg circuit build: 0.311 ms
angle_reupload avg circuit build: 0.579 ms
amplitude  avg circuit build: 0.417 ms
