##### Copyright 2020 The TensorFlow Authors.

In [None]:
#@title Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# MNIST classification

<table class="tfo-notebook-buttons" align="left">
  <td>
    <a target="_blank" href="https://www.tensorflow.org/quantum/tutorials/mnist"><img src="https://www.tensorflow.org/images/tf_logo_32px.png" />View on TensorFlow.org</a>
  </td>
  <td>
    <a target="_blank" href="https://colab.research.google.com/github/tensorflow/quantum/blob/master/docs/tutorials/mnist.ipynb"><img src="https://www.tensorflow.org/images/colab_logo_32px.png" />Run in Google Colab</a>
  </td>
  <td>
    <a target="_blank" href="https://github.com/tensorflow/quantum/blob/master/docs/tutorials/mnist.ipynb"><img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" />View source on GitHub</a>
  </td>
  <td>
    <a href="https://storage.googleapis.com/tensorflow_docs/quantum/docs/tutorials/mnist.ipynb"><img src="https://www.tensorflow.org/images/download_logo_32px.png" />Download notebook</a>
  </td>
</table>

This tutorial builds a quantum neural network (QNN) to classify a simplified version of MNIST, similar to the approach used in <a href="https://arxiv.org/pdf/1802.06002.pdf" class="external">Farhi et al</a>. The performance of the quantum neural network on this classical data problem is compared with a classical neural network.

## Setup

In [None]:
!pip install tensorflow==2.15.0

Install TensorFlow Quantum:

In [None]:
!pip install tensorflow-quantum==0.7.3

In [None]:
# Update package resources to account for version changes.
import importlib, pkg_resources

importlib.reload(pkg_resources)

Now import TensorFlow and the module dependencies:

In [None]:
import tensorflow as tf

# 1) Make GPU memory growth dynamic
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    for gpu in gpus:
        tf.config.experimental.set_memory_growth(gpu, True)
    print(f"{len(gpus)} GPU(s) detected; memory growth enabled.")

# 2) Enable XLA JIT for operation fusion
tf.config.optimizer.set_jit(True)
print("XLA JIT enabled.")

# 3) Disable mixed precision for TFQ compatibility
# (we’ll stay in float32 so TFQ’s tf.cond branches agree)
from tensorflow.keras import mixed_precision
mixed_precision.set_global_policy('float32')
print("Mixed-precision policy:", mixed_precision.global_policy())

import tensorflow_quantum as tfq

import cirq
import sympy
import numpy as np
import seaborn as sns
import collections

# visualization tools
%matplotlib inline
import matplotlib.pyplot as plt
from cirq.contrib.svg import SVGCircuit

## 1. Load the data

In this tutorial you will build a binary classifier to distinguish between the digits 3 and 6, following <a href="https://arxiv.org/pdf/1802.06002.pdf" class="external">Farhi et al.</a> This section covers the data handling that:

- Loads the raw data from Keras.
- Filters the dataset to only 3s and 6s.
- Downscales the images so they fit can fit in a quantum computer.
- Removes any contradictory examples.
- Converts the binary images to Cirq circuits.
- Converts the Cirq circuits to TensorFlow Quantum circuits.

### 1.1 Load the raw data

Load the MNIST dataset distributed with Keras.

In [None]:
# 1) Load the clean MNIST training set as before, ignore Keras’s test split
(x_train, y_train), _ = tf.keras.datasets.mnist.load_data()

# 2) Load your AWGN-corrupted MNIST test set from the .mat file
import scipy.io as sio
mat = sio.loadmat('mnist-with-awgn.mat')

# 3) (Optional) inspect the variable names so you pick the right keys:
print(mat.keys())
# → e.g. dict_keys(['__header__', '__version__', '__globals__',
#                    'X_test', 'Y_test'])

# 4) Assign using the actual keys in your .mat
x_test = mat['test_x'].reshape(-1, 28, 28)
y_test = mat['test_y'].argmax(axis=1)  # ← replace 'Y_test' if your key is different

# 5) Rescale exactly as before (now applies to your noisy test set)
x_train, x_test = (
    x_train[..., np.newaxis] / 255.0,
    x_test[..., np.newaxis] / 255.0
)

print("Number of original training examples:", len(x_train))
print("Number of noisy  test   examples:", len(x_test))

Filter the dataset to keep just the 3s and 6s,  remove the other classes. At the same time convert the label, `y`, to boolean: `True` for `3` and `False` for 6.

In [None]:
# def filter_36(x, y):
#     keep = (y == 3) | (y == 6)
#     x, y = x[keep], y[keep]
#     y = y == 3
#     return x, y

In [None]:
# No longer filtering to 3 vs 6 – use full MNIST 0–9
print("Number of training examples:", len(x_train))
print("Number of test     examples:", len(x_test))


Show the first example:

In [None]:
print(y_train[0])

plt.imshow(x_train[0, :, :, 0])
plt.colorbar()

### 1.2 Downscale the images

An image size of 28x28 is much too large for current quantum computers. Resize the image down to 4x4:

In [None]:
x_train_small = tf.image.resize(x_train, (4, 4)).numpy()
x_test_small = tf.image.resize(x_test, (4, 4)).numpy()

Again, display the first training example—after resize:

In [None]:
print(y_train[0])

plt.imshow(x_train_small[0, :, :, 0], vmin=0, vmax=1)
plt.colorbar()

### 1.3 Remove contradictory examples

From section *3.3 Learning to Distinguish Digits* of <a href="https://arxiv.org/pdf/1802.06002.pdf" class="external">Farhi et al.</a>, filter the dataset to remove images that are labeled as belonging to both classes.

This is not a standard machine-learning procedure, but is included in the interest of following the paper.

In [None]:
def remove_contradicting(xs, ys):
    mapping = collections.defaultdict(set)
    orig_x = {}
    # Determine the set of labels for each unique image:
    for x, y in zip(xs, ys):
        orig_x[tuple(x.flatten())] = x
        mapping[tuple(x.flatten())].add(y)

    new_x = []
    new_y = []
    for flatten_x in mapping:
        x = orig_x[flatten_x]
        labels = mapping[flatten_x]
        if len(labels) == 1:
            new_x.append(x)
            new_y.append(next(iter(labels)))
        else:
            # Throw out images that match more than one label.
            pass

    num_uniq_3 = sum(
        1 for value in mapping.values() if len(value) == 1 and True in value)
    num_uniq_6 = sum(
        1 for value in mapping.values() if len(value) == 1 and False in value)
    num_uniq_both = sum(1 for value in mapping.values() if len(value) == 2)

    print("Number of unique images:", len(mapping.values()))
    print("Number of unique 3s: ", num_uniq_3)
    print("Number of unique 6s: ", num_uniq_6)
    print("Number of unique contradicting labels (both 3 and 6): ",
          num_uniq_both)
    print()
    print("Initial number of images: ", len(xs))
    print("Remaining non-contradicting unique images: ", len(new_x))

    return np.array(new_x), np.array(new_y)

The resulting counts do not closely match the reported values, but the exact procedure is not specified.

It is also worth noting here that applying filtering contradictory examples at this point does not totally prevent the model from receiving contradictory training examples: the next step binarizes the data which will cause more collisions.

In [None]:
x_train_nocon, y_train_nocon = remove_contradicting(x_train_small, y_train)

### 1.4 Encode the data as quantum circuits

To process images using a quantum computer, <a href="https://arxiv.org/pdf/1802.06002.pdf" class="external">Farhi et al.</a> proposed representing each pixel with a qubit, with the state depending on the value of the pixel. The first step is to convert to a binary encoding.

In [None]:
THRESHOLD = 0.5

x_train_bin = np.array(x_train_nocon > THRESHOLD, dtype=np.float32)
x_test_bin = np.array(x_test_small > THRESHOLD, dtype=np.float32)

If you were to remove contradictory images at this point you would be left with only 193, likely not enough for effective training.

In [None]:
_ = remove_contradicting(x_train_bin, y_train_nocon)

The qubits at pixel indices with values that exceed a threshold, are rotated through an $X$ gate.

In [None]:
def convert_to_circuit(image):
    """Encode truncated classical image into quantum datapoint."""
    values = np.ndarray.flatten(image)
    qubits = cirq.GridQubit.rect(4, 4)
    circuit = cirq.Circuit()
    for i, value in enumerate(values):
        if value:
            circuit.append(cirq.X(qubits[i]))
    return circuit


x_train_circ = [convert_to_circuit(x) for x in x_train_bin]
x_test_circ = [convert_to_circuit(x) for x in x_test_bin]

Here is the circuit created for the first example (circuit diagrams do not show qubits with zero gates):

In [None]:
SVGCircuit(x_train_circ[0])

Compare this circuit to the indices where the image value exceeds the threshold:

In [None]:
bin_img = x_train_bin[0, :, :, 0]
indices = np.array(np.where(bin_img)).T
indices

Convert these `Cirq` circuits to tensors for `tfq`:

In [None]:
x_train_tfcirc = tfq.convert_to_tensor(x_train_circ)
x_test_tfcirc = tfq.convert_to_tensor(x_test_circ)

## 2. Quantum neural network

There is little guidance for a quantum circuit structure that classifies images. Since the classification is based on the expectation of the readout qubit, <a href="https://arxiv.org/pdf/1802.06002.pdf" class="external">Farhi et al.</a> propose using two qubit gates, with the readout qubit always acted upon. This is similar in some ways to running small a <a href="https://arxiv.org/abs/1511.06464" class="external">Unitary RNN</a> across the pixels.

### 2.1 Build the model circuit

This following example shows this layered approach. Each layer uses *n* instances of the same gate, with each of the data qubits acting on the readout qubit.

Start with a simple class that will add a layer of these gates to a circuit:

In [None]:
class CircuitLayerBuilder():

    def __init__(self, data_qubits, readout):
        self.data_qubits = data_qubits
        self.readout = readout

    def add_layer(self, circuit, gate, prefix):
        for i, qubit in enumerate(self.data_qubits):
            symbol = sympy.Symbol(prefix + '-' + str(i))
            circuit.append(gate(qubit, self.readout)**symbol)

Build an example circuit layer to see how it looks:

In [None]:
demo_builder = CircuitLayerBuilder(data_qubits=cirq.GridQubit.rect(4, 1),
                                   readout=cirq.GridQubit(-1, -1))

circuit = cirq.Circuit()
demo_builder.add_layer(circuit, gate=cirq.XX, prefix='xx')
SVGCircuit(circuit)

Now build a two-layered model, matching the data-circuit size, and include the preparation and readout operations.

In [None]:
def create_quantum_model():
    """Create a QNN model circuit and readout operation to go along with it."""
    data_qubits = cirq.GridQubit.rect(4, 4)  # a 4x4 grid.
    readout = cirq.GridQubit(-1, -1)  # a single qubit at [-1,-1]
    circuit = cirq.Circuit()

    # Prepare the readout qubit.
    circuit.append(cirq.X(readout))
    circuit.append(cirq.H(readout))

    builder = CircuitLayerBuilder(data_qubits=data_qubits, readout=readout)

    # Then add layers (experiment by adding more).
    builder.add_layer(circuit, cirq.XX, "xx1")
    builder.add_layer(circuit, cirq.ZZ, "zz1")

    # Finally, prepare the readout qubit.
    circuit.append(cirq.H(readout))

    return circuit, cirq.Z(readout)

In [None]:
model_circuit, model_readout = create_quantum_model()

### 2.2 Wrap the model-circuit in a tfq-keras model

Build the Keras model with the quantum components. This model is fed the "quantum data", from `x_train_circ`, that encodes the classical data. It uses a *Parametrized Quantum Circuit* layer, `tfq.layers.PQC`, to train the model circuit, on the quantum data.

To classify these images, <a href="https://arxiv.org/pdf/1802.06002.pdf" class="external">Farhi et al.</a> proposed taking the expectation of a readout qubit in a parameterized circuit. The expectation returns a value between 1 and -1.

In [None]:
# Build the Keras model.
model = tf.keras.Sequential([
    # The input is the data-circuit, encoded as a tf.string
    tf.keras.layers.Input(shape=(), dtype=tf.string),
    # The PQC layer returns the expected value of the readout gate, range [-1,1].
    tfq.layers.PQC(model_circuit, model_readout),
])

Next, describe the training procedure to the model, using the `compile` method.

Since the the expected readout is in the range `[-1,1]`, optimizing the hinge loss is a somewhat natural fit.

Note: Another valid approach would be to shift the output range to `[0,1]`, and treat it as the probability the model assigns to class `3`. This could be used with a standard a `tf.losses.BinaryCrossentropy` loss.

To use the hinge loss here you need to make two small adjustments. First convert the labels, `y_train_nocon`, from boolean to `[-1,1]`, as expected by the hinge loss.

In [None]:
y_train_hinge = 2.0 * y_train_nocon - 1.0
y_test_hinge = 2.0 * y_test - 1.0

Second, use a custiom `hinge_accuracy` metric that correctly handles `[-1, 1]` as the `y_true` labels argument.
`tf.losses.BinaryAccuracy(threshold=0.0)` expects `y_true` to be a boolean, and so can't be used with hinge loss).

In [None]:
def hinge_accuracy(y_true, y_pred):
    y_true = tf.squeeze(y_true) > 0.0
    y_pred = tf.squeeze(y_pred) > 0.0
    result = tf.cast(y_true == y_pred, tf.float32)

    return tf.reduce_mean(result)

In [None]:
model.compile(loss=tf.keras.losses.Hinge(),
              optimizer=tf.keras.optimizers.Adam(),
              metrics=[hinge_accuracy, 'accuracy'])

In [None]:
print(model.summary())

### Train the quantum model

Now train the model—this takes about 45 min. If you don't want to wait that long, use a small subset of the data (set `NUM_EXAMPLES=500`, below). This doesn't really affect the model's progress during training (it only has 32 parameters, and doesn't need much data to constrain these). Using fewer examples just ends training earlier (5min), but runs long enough to show that it is making progress in the validation logs.

In [None]:
EPOCHS = 10
BATCH_SIZE = 32

NUM_EXAMPLES = len(x_train_tfcirc)

In [None]:
x_train_tfcirc_sub = x_train_tfcirc[:NUM_EXAMPLES]
y_train_hinge_sub = y_train_hinge[:NUM_EXAMPLES]

In [None]:
# 4) Build efficient tf.data pipelines for quantum data
batch_size = 512   # bump up until you hit OOM for max throughput

import tensorflow as tf

train_ds = (
    tf.data.Dataset
      .from_tensor_slices((x_train_tfcirc_sub, y_train_hinge_sub))
      .shuffle(buffer_size=10_000)
      .batch(batch_size)
      .cache()
      .prefetch(tf.data.AUTOTUNE)
)

val_ds = (
    tf.data.Dataset
      .from_tensor_slices((x_test_tfcirc, y_test_hinge))
      .batch(batch_size)
      .cache()
      .prefetch(tf.data.AUTOTUNE)
)
print("Training pipeline ready:", train_ds, "\nValidation pipeline:", val_ds)

Training this model to convergence should achieve >85% accuracy on the test set.

In [None]:
qnn_history = model.fit(
    train_ds,
    epochs=EPOCHS,
    verbose=1,
    validation_data=val_ds
)

# (optional) re-evaluate on raw arrays if you like
qnn_results = model.evaluate(x_test_tfcirc, y_test)

Note: The training accuracy reports the average over the epoch. The validation accuracy is evaluated at the end of each epoch.

## 3. Classical neural network

While the quantum neural network works for this simplified MNIST problem, a basic classical neural network can easily outperform a QNN on this task. After a single epoch, a classical neural network can achieve >98% accuracy on the holdout set.

In the following example, a classical neural network is used for for the 3-6 classification problem using the entire 28x28 image instead of subsampling the image. This easily converges to nearly 100% accuracy of the test set.

In [None]:
def create_classical_model():
    # LeNet-style CNN for 10-class MNIST
    model = tf.keras.Sequential([
        tf.keras.layers.Conv2D(32, (3, 3), activation='relu',
                               input_shape=(28, 28, 1)),
        tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
        tf.keras.layers.MaxPooling2D((2, 2)),
        tf.keras.layers.Dropout(0.25),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dropout(0.5),
        tf.keras.layers.Dense(10, activation='softmax'),
    ])
    return model

model = create_classical_model()
model.compile(
    loss=tf.keras.losses.SparseCategoricalCrossentropy(),
    optimizer=tf.keras.optimizers.Adam(),
    metrics=['accuracy']
)
model.summary()


In [None]:
fullcnn_history = model.fit(x_train,
          y_train,
          batch_size=128,
          epochs=10,
          verbose=1,
          validation_data=(x_test, y_test))

cnn_results = model.evaluate(x_test, y_test)

The above model has nearly 1.2M parameters. For a more fair comparison, try a 37-parameter model, on the subsampled images:

In [None]:
def create_fair_classical_model():
    # A simple model based off LeNet from https://keras.io/examples/mnist_cnn/
    model = tf.keras.Sequential()
    model.add(tf.keras.layers.Flatten(input_shape=(4, 4, 1)))
    model.add(tf.keras.layers.Dense(2, activation='relu'))
    # → expand to 10‑way output
    model.add(tf.keras.layers.Dense(10))
    return model

model = create_fair_classical_model()
model.compile(
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer=tf.keras.optimizers.Adam(),
    metrics=['accuracy']
)

model.summary()

In [None]:
cnn_history = model.fit(
    x_train_nocon,         # your 4×4 train images (0–9)
    y_train_nocon,         # the matching digit labels 0–9
    batch_size=128,
    epochs=20,
    verbose=2,
    validation_data=(x_test_small, y_test)  # 4×4 test images & labels
)

fair_nn_results = model.evaluate(x_test_small, y_test)


## 4. Comparison

Higher resolution input and a more powerful model make this problem easy for the CNN. While a classical model of similar power (~32 parameters) trains to a similar accuracy in a fraction of the time. One way or the other, the classical neural network easily outperforms the quantum neural network. For classical data, it is difficult to beat a classical neural network.

In [None]:
qnn_accuracy = qnn_results[1]
cnn_accuracy = cnn_results[1]
fair_nn_accuracy = fair_nn_results[1]

sns.barplot(x=["Quantum", "Classical, full", "Classical, fair"],
            y=[qnn_accuracy, cnn_accuracy, fair_nn_accuracy])

In [None]:
# 4) 3D Contour Map of Loss over Epoch & Accuracy
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from scipy.interpolate import griddata

history = qnn_history

# pull your real metrics
epochs   = np.arange(1, len(history.history['loss']) + 1)
val_loss = np.array(history.history['val_loss'])
loss     = np.array(history.history['loss'])

# build a regular grid spanning your data range
grid_x   = np.linspace(epochs.min(),    epochs.max(),    60)
grid_y   = np.linspace(val_loss.min(),  val_loss.max(),  60)
X, Y   = np.meshgrid(grid_x, grid_y)

# interpolate the scattered (epoch, acc)→loss points onto that grid
Z = griddata(
    points=(epochs, val_loss),
    values=loss,
    xi=(X, Y),
    method='cubic'      # try 'linear' if cubic is too slow or creates NaNs
)

# plot the contour
fig = plt.figure(figsize=(8,6))
ax  = fig.add_subplot(111, projection='3d')

# … build X, Y, Z …

ax.contour3D(X, Y, Z, 30, cmap='viridis')
ax.set_xlabel('Epoch')
ax.set_ylabel('Validation Loss')
ax.set_zlabel('Loss', rotation=90, labelpad=20)
ax.zaxis.set_label_coords(1.10, 0.5, 0)
ax.set_title('3D Contour: Loss over Epoch & Accuracy')

fig.subplots_adjust(left=0.1, right=0.85, bottom=0.1, top=0.9)
plt.show()




In [None]:
import numpy as np
import matplotlib.pyplot as plt

# 1) Pull out your real metrics from qnn_history
metrics    = qnn_history.history
loss       = np.array(metrics['loss'])
hinge_acc  = np.array(metrics['hinge_accuracy'])
val_loss   = np.array(metrics.get('val_loss', loss))   # fall back to loss if you didn’t log val_loss

# 2) Compute the “volcano” axes
#    – X: log₂ fold‑change of hinge accuracy vs its mean
#    – Y: −log₁₀ of normalized validation‑loss
log2_fc         = np.log2(hinge_acc / hinge_acc.mean())
norm_val_loss   = val_loss / val_loss.max()
neg_log10_vloss = -np.log10(norm_val_loss + 1e-8)       # add tiny floor to avoid inf

# 3) Color by epoch index
epochs = np.arange(1, len(loss) + 1)

# 4) Plot it
plt.figure(figsize=(8,6))
sc = plt.scatter(
    log2_fc, neg_log10_vloss,
    c=epochs, cmap='plasma',
    s=80, edgecolors='k', linewidth=0.5, alpha=0.9
)
plt.colorbar(sc, label='Epoch')

# 5) Guide‑lines at fold‑change ±1 and “p‑value” cutoff of 0.5
plt.axvline(+1, linestyle='--', color='gray')
plt.axvline(-1, linestyle='--', color='gray')
plt.axhline(-np.log10(0.5), linestyle='--', color='gray')

plt.xlabel('Log₂ Fold Change (Hinge Accuracy)')
plt.ylabel('-Log₁₀ Normalized Val Loss')
plt.title('Volcano Plot from qnn_history')
plt.grid(alpha=0.3, linestyle='--')
plt.tight_layout()
plt.show()


In [None]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.interpolate import griddata

# 1) Pull your real metrics
metrics   = qnn_history.history
epochs    = np.arange(1, len(metrics['loss']) + 1)
val_loss  = np.array(metrics.get('val_loss', metrics['loss']))
loss      = np.array(metrics['loss'])

# 2) Build the regular grid
grid_x = np.linspace(epochs.min(),    epochs.max(),    100)
grid_y = np.linspace(val_loss.min(),  val_loss.max(),  100)
X, Y   = np.meshgrid(grid_x, grid_y)

# 3) Interpolate your scattered data onto that grid
Z = griddata(
    points=(epochs, val_loss),
    values=loss,
    xi=(X, Y),
    method='cubic'    # or 'linear' if you prefer
)

# 4) Make a 2D contour plot
plt.figure(figsize=(8,6))
# filled contours
cf = plt.contourf(X, Y, Z, levels=20, alpha=0.8)
# contour lines
c  = plt.contour( X, Y, Z, levels=10, colors='k', linewidths=0.5)
plt.clabel(c, inline=True, fmt="%.2f", fontsize=8)

plt.xlabel('Epoch')
plt.ylabel('Validation Loss')
plt.title('Contour Plot of Loss over Epoch & Validation Loss')
plt.colorbar(cf, label='Loss')
plt.grid(alpha=0.3, linestyle='--')
plt.tight_layout()
plt.show()

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# pull your metrics
m   = qnn_history.history
epochs = np.arange(1, len(m['loss']) + 1)
loss      = np.array(m['loss'])
val_loss  = np.array(m.get('val_loss', loss))
hinge_acc = np.array(m['hinge_accuracy'])
val_acc   = np.array(m.get('val_accuracy', hinge_acc))

# start the figure
fig, ax1 = plt.subplots(figsize=(8,5))

# left axis: Loss
ax1.plot(epochs, loss,      label='Train Loss', color='tab:blue',   linewidth=2)
ax1.plot(epochs, val_loss,  label='Val Loss',   color='tab:blue',   linestyle='--')
ax1.set_xlabel('Epoch')
ax1.set_ylabel('Loss')
ax1.tick_params(axis='y', labelcolor='tab:blue')

# twin axis for Accuracy
ax2 = ax1.twinx()
ax2.plot(epochs, hinge_acc, label='Train Hinge Acc', color='tab:orange', linewidth=2)
ax2.plot(epochs, val_acc,   label='Val Hinge Acc',   color='tab:orange', linestyle='--')
ax2.set_ylabel('Hinge Accuracy')
ax2.tick_params(axis='y', labelcolor='tab:orange')

# Legends
lines1, labels1 = ax1.get_legend_handles_labels()
lines2, labels2 = ax2.get_legend_handles_labels()
ax1.legend(lines1 + lines2, labels1 + labels2, loc='center right')

# highlight best hinge accuracy
best_epoch = hinge_acc.argmax() + 1
best_acc   = hinge_acc.max()
ax2.scatter([best_epoch], [best_acc], s=100, edgecolors='k', zorder=5)
ax2.annotate(f'Best @ {best_epoch}\n{best_acc:.3f}',
             xy=(best_epoch, best_acc),
             xytext=(best_epoch+0.5, best_acc-0.05),
             arrowprops=dict(arrowstyle='->', color='k'),
             color='tab:orange')

# styling
ax1.grid(True, linestyle='--', alpha=0.3)
plt.title('Training & Validation Loss / Hinge Accuracy vs Epoch')
plt.tight_layout()
plt.show()


In [None]:
import numpy as np
import matplotlib.pyplot as plt

# 0) Pull your real metrics
metrics    = qnn_history.history
epochs     = np.arange(1, len(metrics['loss']) + 1)
loss       = np.array(metrics['loss'])
hinge_acc  = np.array(metrics['hinge_accuracy'])

# 1) Loss Gradient over Epochs
grad_loss = np.gradient(loss)
plt.figure()
plt.plot(epochs, grad_loss, marker='o')
plt.xlabel('Epoch')
plt.ylabel('d(Loss)/d(Epoch)')
plt.title('Loss Gradient')
plt.grid(True, linestyle='--', alpha=0.5)
plt.show()

# 2) Hinge Accuracy Gradient over Epochs
grad_acc = np.gradient(hinge_acc)
plt.figure()
plt.plot(epochs, grad_acc, marker='o')
plt.xlabel('Epoch')
plt.ylabel('d(Hinge Accuracy)/d(Epoch)')
plt.title('Hinge Accuracy Gradient')
plt.grid(True, linestyle='--', alpha=0.5)
plt.show()

# 3) Hexbin: Loss vs. Hinge Accuracy
plt.figure()
hb = plt.hexbin(loss, hinge_acc, gridsize=20, mincnt=1)
plt.xlabel('Loss')
plt.ylabel('Hinge Accuracy')
plt.title('Hexbin: Loss vs. Hinge Accuracy')
plt.colorbar(hb, label='Count')
plt.show()

# 4) Phase Plot: Loss vs. Hinge Accuracy
plt.figure()
plt.plot(loss, hinge_acc, '-o')
plt.xlabel('Loss')
plt.ylabel('Hinge Accuracy')
plt.title('Phase Plot: Loss vs. Hinge Accuracy')
plt.grid(True, linestyle='--', alpha=0.5)
plt.show()

# 5) Stacked Area Chart: Normalized Loss & Hinge Accuracy
# Normalize both series to [0,1]
loss_norm = (loss - loss.min()) / (loss.max() - loss.min())
acc_norm  = (hinge_acc - hinge_acc.min()) / (hinge_acc.max() - hinge_acc.min())

plt.figure()
plt.stackplot(epochs, loss_norm, acc_norm, labels=['Loss', 'Hinge Accuracy'])
plt.xlabel('Epoch')
plt.ylabel('Normalized Value')
plt.title('Stacked Area: Loss & Hinge Accuracy over Epoch')
plt.legend(loc='upper right')
plt.grid(True, linestyle='--', alpha=0.3)
plt.show()


In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Collect only the keys you actually logged
m       = qnn_history.history
keys    = [k for k in ('loss','val_loss','accuracy','val_accuracy','hinge_accuracy') if k in m]
data    = np.vstack([m[k] for k in keys])

# Draw
fig, ax = plt.subplots(figsize=(10,3))
im = ax.imshow(
    data,
    aspect='auto',
    cmap='magma',
    interpolation='nearest'
)
ax.set_yticks(np.arange(len(keys)))
ax.set_yticklabels(keys)
ax.set_xticks(np.arange(len(data[0])))
ax.set_xticklabels(np.arange(1, len(data[0]) + 1), rotation=90)
ax.set_xlabel('Epoch')
ax.set_title('Heatmap of Metrics per Epoch')
cbar = plt.colorbar(im, ax=ax, pad=0.02)
cbar.set_label('Metric Value', rotation=270, labelpad=15)
plt.tight_layout()
plt.show()


In [None]:
import matplotlib.pyplot as plt

# --- QNN accuracy & loss ---
epochs = range(1, len(qnn_history.history['val_hinge_accuracy']) + 1)

plt.figure(figsize=(6,4))
plt.plot(epochs, qnn_history.history['val_hinge_accuracy'], marker='o')
plt.title('QNN Accuracy (Testing)')
plt.xlabel('Epoch')
plt.ylabel('Testing Accuracy')
plt.grid(True)
plt.tight_layout()
plt.show()

plt.figure(figsize=(6,4))
plt.plot(epochs, qnn_history.history['val_loss'], marker='o', color='C1')
plt.title('QNN Loss (Testing)')
plt.xlabel('Epoch')
plt.ylabel('Testing Loss')
plt.grid(True)
plt.tight_layout()
plt.show()


# --- Fair CNN accuracy & loss ---
epochs = range(1, len(cnn_history.history['val_accuracy']) + 1)

plt.figure(figsize=(6,4))
plt.plot(epochs, cnn_history.history['val_accuracy'], marker='o')
plt.title('Fair CNN Accuracy (Testing)')
plt.xlabel('Epoch')
plt.ylabel('Testing Accuracy')
plt.grid(True)
plt.tight_layout()
plt.show()

plt.figure(figsize=(6,4))
plt.plot(epochs, cnn_history.history['val_loss'], marker='o', color='C1')
plt.title('Fair CNN Loss (Testing)')
plt.xlabel('Epoch')
plt.ylabel('Testing Loss')
plt.grid(True)
plt.tight_layout()
plt.show()


# --- Full CNN accuracy & loss ---
epochs = range(1, len(fullcnn_history.history['val_accuracy']) + 1)

plt.figure(figsize=(6,4))
plt.plot(epochs, fullcnn_history.history['val_accuracy'], marker='o')
plt.title('Full CNN Accuracy (Testing)')
plt.xlabel('Epoch')
plt.ylabel('Testing Accuracy')
plt.grid(True)
plt.tight_layout()
plt.show()

plt.figure(figsize=(6,4))
plt.plot(epochs, fullcnn_history.history['val_loss'], marker='o', color='C1')
plt.title('Full CNN Loss (Testing)')
plt.xlabel('Epoch')
plt.ylabel('Testing Loss')
plt.grid(True)
plt.tight_layout()
plt.show()


In [None]:
import numpy as np
import matplotlib.pyplot as plt

# 1) Grab your model's weights exactly as before
all_weights = model.get_weights()
labels = []
data   = []
for idx, w in enumerate(all_weights):
    layer   = idx // 2
    ptype   = 'Kernel' if idx % 2 == 0 else 'Bias'
    labels.append(f'Layer {layer} {ptype}')
    data.append(w.flatten())

# 2) Z‑score each distribution so they’re all comparable
data_z = [(d - d.mean()) / (d.std() + 1e-8) for d in data]

# 3) Color palette
colors = plt.cm.Set2(np.linspace(0, 1, len(data_z)))

# 4) Plot
fig, ax = plt.subplots(figsize=(12, 5))
parts = ax.violinplot(
    data_z,
    showmedians=True,
    showextrema=False,
    widths=0.7
)

# style
for body, c in zip(parts['bodies'], colors):
    body.set_facecolor(c)
    body.set_edgecolor('black')
    body.set_alpha(0.8)

# annotate means
for i, d in enumerate(data_z, start=1):
    ax.scatter(i, np.mean(d), color='k', marker='D', s=30, zorder=3)

ax.set_xticks(np.arange(1, len(labels)+1))
ax.set_xticklabels(labels, rotation=45, ha='right')
ax.set_ylabel('Z‑score of Weights')
ax.set_title('Standardized Violin Plot of Model Weights')
ax.grid(axis='y', linestyle='--', alpha=0.5)
plt.tight_layout()
plt.show()


In [None]:
import numpy as np
import pandas as pd

# ——— replace these names with your actual History objects ———
# qnn_history      = History from training your quantum‑inspired net
# full_cnn_history = History from training the standard/full CNN
# fair_cnn_history = History from training the “fair” CNN

# compute per‑model averages
summary = {
    'Model':      ['QNN', 'full_CNN', 'fair_CNN'],
    'Avg Loss':   [
        np.mean(qnn_history.history['val_loss']),
        np.mean(fullcnn_history.history['val_loss']),
        np.mean(cnn_history.history['val_loss']),
    ],
    'Avg Accuracy': [
        np.mean(qnn_history.history['val_hinge_accuracy']),
        np.mean(fullcnn_history.history['val_accuracy']),
        np.mean(cnn_history.history['val_accuracy']),
    ]
}

df = pd.DataFrame(summary)
# round for readability
df['Avg Loss']      = df['Avg Loss'].round(4)
df['Avg Accuracy']  = df['Avg Accuracy'].round(4)

display(df)


In [None]:
import matplotlib.pyplot as plt

qnn_history.history['val_accuracy'] = qnn_history.history['val_hinge_accuracy']
qnn_history.history['accuracy'] = qnn_history.history['hinge_accuracy']
# —————————————————————————————————————————————————————————————————————————
# Map a human‑readable name to each of your History objects
histories = {
    'Quantum QNN':   qnn_history,
    'CNN (28×28)':   fullcnn_history,
    'CNN (4×4)':     cnn_history,
}
# —————————————————————————————————————————————————————————————————————————

# 1) Plot training & validation accuracies
plt.figure(figsize=(10, 6))
for name, h in histories.items():
    plt.plot(h.history['accuracy'],   linestyle='--', label=f'{name} ▶ train')
    plt.plot(h.history['val_accuracy'],   linestyle='--', label=f'{name} ▶ val')
plt.title('Model Accuracies Over Epochs')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend(loc='lower right')
plt.grid(alpha=0.3)
plt.show()

# 2) Plot training & validation losses
plt.figure(figsize=(10, 6))
for name, h in histories.items():
    plt.plot(h.history['loss'],           label=f'{name} ▶ train')
    plt.plot(h.history['val_loss'],       linestyle='--', label=f'{name} ▶ val')
plt.title('Model Losses Over Epochs')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend(loc='upper right')
plt.grid(alpha=0.3)
plt.show()
