# A QNN based on Parametric Quantum Circuit for classification of MNIST images
## Tutorial - Hands-on ##

**Version:** V2.0 <p>
**Authors:** Stefano Giagu <stefano.giagu@uniroma1.it>


**Scope:**: learn how to design a simple variational PQC and train it for a binary classification task using the [pennylane](https://pennylane.ai/) platform with [pytorch](https://pytorch.org/) backend

**Libraries:** numpy, matplotlib, pennylane, torch

In [None]:
# only needed on google colab
#!pip install pennylane-lightning-gpu

In [None]:
# import needed libraries
import numpy as np
import matplotlib.pyplot as plt
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0" 
os.environ["WORLD_SIZE"] = "1"

from time import time
from tqdm import tqdm

import torch
import pennylane as qml
print('Torch version: ', torch.__version__)
print('Pennylane version: ', qml.__version__)

In [None]:
# check if GPU is available
# Note: the example can also be run on CPU w/o problems

if torch.cuda.is_available():
  print('Number of available GPUs: ',torch.cuda.device_count())
  for i in range(0,torch.cuda.device_count()):
    print(torch.cuda.get_device_name(i))
  !nvidia-smi
else:
  print('No GPU available')

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [None]:
# check Pennylane software stack
qml.about()

In [None]:
# Download MNIST dataset (from torchvision repository)

import torchvision.datasets as datasets
import torchvision.transforms as transforms

# Download MNIST and prepare transforms
# we donwsample MNIST images from 28x28 to 16x16 pixels in order to reduce the number of qubit needed to represent each image
mnist_train = datasets.MNIST(root='./data', train=True, download=True,
                             transform=transforms.Compose([
                                transforms.Resize((16, 16)),                # Resize to 16x16
                                transforms.ToTensor(),                      # Convert to torch tensors
                                transforms.Normalize((0.1307,), (0.3081,))  # Normalize
                             ]))

# Filter for zeros and ones (in the example we train the QNN for a binary task, classification of 0 and 1 digits)
data = []
targets = []
for image, label in mnist_train:
    if label in [0, 1]:
        data.append(image.squeeze())
        targets.append(label)

data = torch.stack(data)
targets = torch.tensor(targets)

# Select zeros and ones to implement a simpler binary classification problem
zeros_indices = (targets == 0)
ones_indices = (targets == 1)

zeros = data[zeros_indices]
ones = data[ones_indices]

# take a subset of the dataset to limit the training time: 1024 zeros and 1024 ones for training and an equal size set for test
zeros_train = zeros[:1024]
ones_train = ones[:1024]

zeros_test = zeros[1024:2048]
ones_test = ones[1024:2048]

In [None]:
# normalize images in [0,1]

def normalize(imgs):
  maxes, _ = torch.max(imgs.reshape(-1, 16*16), dim = 1)
  mins, _ = torch.min(imgs.reshape(-1, 16*16), dim = 1)

  mins = mins.unsqueeze(1).unsqueeze(2)
  maxes = maxes.unsqueeze(1).unsqueeze(2)

  return (imgs-mins)/(maxes-mins)

zeros_train = normalize(zeros_train)
ones_train = normalize(ones_train)

zeros_test = normalize(zeros_test)
ones_test = normalize(ones_test)

In [None]:
# plot two examples of the input images

# Select a random sample index
zero_idx = np.random.randint(0, zeros_train.shape[0])
one_idx = np.random.randint(0, ones_train.shape[0])

# Extract the images
sample_zero = zeros_train[zero_idx]
sample_one = ones_train[one_idx]

# Plot the images
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(4, 2))

ax1.imshow(sample_zero, cmap='gray')
ax1.set_title('Zero')
ax1.axis('off')

ax2.imshow(sample_one, cmap='gray')
ax2.set_title('One')
ax2.axis('off')

plt.show()

In [None]:
# assert images have min 0 and max 1 within an error of 1e-5
assert torch.allclose(zeros_train.min(), torch.tensor(0., dtype = torch.float32), atol=1e-5)
assert torch.allclose(zeros_train.max(), torch.tensor(1., dtype = torch.float32), atol=1e-5)
assert torch.allclose(ones_train.min(), torch.tensor(0., dtype = torch.float32), atol=1e-5)
assert torch.allclose(ones_train.max(), torch.tensor(1., dtype = torch.float32), atol=1e-5)

assert torch.allclose(zeros_test.min(), torch.tensor(0., dtype = torch.float32), atol=1e-5)
assert torch.allclose(zeros_test.max(), torch.tensor(1., dtype = torch.float32), atol=1e-5)
assert torch.allclose(ones_test.min(), torch.tensor(0., dtype = torch.float32), atol=1e-5)
assert torch.allclose(ones_test.max(), torch.tensor(1., dtype = torch.float32), atol=1e-5)

# concatenate the zeros and ones datasets in just one training dataset
zeros_train = zeros_train.flatten(start_dim = 1)
ones_train = ones_train.flatten(start_dim = 1)
dataset_train = torch.cat((zeros_train, ones_train), dim = 0)

# same for test
zeros_test = zeros_test.flatten(start_dim = 1)
ones_test = ones_test.flatten(start_dim = 1)
dataset_test = torch.cat((zeros_test, ones_test), dim = 0)

# add labels
labels_train = torch.cat((torch.zeros((zeros_train.shape[0], 1)), torch.ones((ones_train.shape[0], 1))), dim = 0).squeeze()
labels_test = torch.cat((torch.zeros((zeros_test.shape[0], 1)), torch.ones((ones_test.shape[0], 1))), dim = 0).squeeze()

In [None]:
# build torch datasets and dataloaders
BATCH_SIZE = 32

# build dataset
dataset_train = torch.utils.data.TensorDataset(dataset_train, labels_train)
dataset_test = torch.utils.data.TensorDataset(dataset_test, labels_test)

#build dataloaders
dataloader_train = torch.utils.data.DataLoader(dataset_train, batch_size = BATCH_SIZE, shuffle = True, drop_last = True)
dataloader_test = torch.utils.data.DataLoader(dataset_test, batch_size = BATCH_SIZE, shuffle = True, drop_last = True)

In [None]:
# Define a quantum device (eg the number of qubits in the circuit)


NUM_QUBITS = 8 # we need 8 qubits to encode 16x16 features
NUM_LAYERS = 3 #n umber of layers in the QNN (each layer is composed by a parametric unitary transformation given by 3 rotation and CNOT gates to entangle qubits)

# definition of the quantum parametric circuit
# the circuit ansatz is made by NUM_LAYERS of the same unitary block made of a 3 parametric rotations gates(along X, Y and Z) in the block sphere for each qubit, folloeed by a ladder of CNOT
# gates to entangle the qubits

dev = qml.device("default.qubit", wires=NUM_QUBITS) #"default.qubit" is the default pennylane differentiable quantum device simulator that is capable of backprop derivatives

@qml.qnode(dev, interface="torch")
def circuit_block(params, state=None):



    # Load the initial state if provided
    # quantum encoding (using amplitude encoding: Encodes 2𝑛 features into the amplitude vector of 𝑛 qubits), to represent a valid quantum state vector, the L2-norm of features must be one, this can be achieved
    # either by manually normalizing the data (ex: state = state / torch.linalg.norm(state, dim=1).view(-1, 1)), or automatically by setting the argument normalize=True
    if state is not None: qml.AmplitudeEmbedding(features=state, wires=range(NUM_QUBITS), normalize=True)

    # Alternatively as we are in simulation also a simplified encoding as normalized state vector can be used (ok in simulation, allows to skip quantum encoding of the classical
    # data, requires the input to be pre-normalized as a quantum state (eg L2 norm = 1.0: state = state / torch.linalg.norm(state, dim=1).view(-1, 1)))
    # if state is not None: qml.QubitStateVector(state, wires=range(NUM_QUBITS))

    # Quantum circuit
    for i in range(NUM_LAYERS):

      # Rotation layer
      for j in range(NUM_QUBITS):
          qml.RX(params[i, j, 0], wires=j)   #params is the vector containing the PQC paraneters: size = (NUM_LAYERS, NUM_QUBITS, NUM_ROTATIONS) --> ex. in our case: 3*8*3=72 parameters
          qml.RY(params[i, j, 1], wires=j)
          qml.RZ(params[i, j, 2], wires=j)

      # Entangling layer (a ladder of CNOT gates)
      for j in range(NUM_QUBITS):
          qml.CNOT(wires=[j, (j + 1) % NUM_QUBITS]) # note: when j=NUM_QUBITS-1 ->  (j+1)%NUM_QUBITS = 0

    # Return the probability of measuring one of the base vector in the first qubit
    return qml.probs(wires=0)

  # define general circuit
def circuit(params, state):

    # apply quantum circuit
    basis_state_proba = circuit_block(params, state)

    # return probability of measuring |0> in the first qubit
    return basis_state_proba[:,0]

In [None]:
# visualize the qnn
from torch.autograd import Variable

parameters = Variable(torch.normal( mean=0. , std=0.1, size=(NUM_LAYERS, NUM_QUBITS, 3)), requires_grad=True)

state,_ = next(iter(dataloader_train))
#state = state / torch.linalg.norm(state, dim=1).view(-1, 1) #needed in case the qml.QubitStateVector encoding is used

qml.drawer.use_style("black_white")
fig, ax = qml.draw_mpl(circuit_block)(parameters, state)
plt.show()

In [None]:
def run_exp(batch_size, num_epochs, dataloader, loss_fn):

    loss_history = []

    avg_time_per_epoch = 0

    # training loop for classification
    for epoch in range(num_epochs):

        t0 = time()

        # Initialize tqdm progress bar with description showing the current epoch
        with tqdm(enumerate(dataloader), total=len(dataloader), desc=f"Epoch {epoch+1}/{num_epochs}") as tqdm_epoch:
            for _, (data, labels) in tqdm_epoch:

                #needed in case the qml.QubitStateVector encoding is used
                #data = data / torch.linalg.norm(data, dim=1).view(-1, 1)

                # zero the parameter gradients
                optimizer.zero_grad()

                data = data.type(torch.float).to(device=device)
                labels = labels.type(torch.float).to(device=device)

                # forward pass
                output = circuit(params, data).to(device=device)

                # compute the loss
                loss = loss_fn(output, labels)

                # backward pass
                loss.backward()

                # update the parameters
                optimizer.step()

                # Optionally, update tqdm bar with batch loss
                tqdm_epoch.set_postfix(loss=loss.item(), accuracy=torch.sum((output > 0.5) == labels).item() / batch_size)

        avg_time_per_epoch += time()-t0

        loss_history.append(loss.item())

        # print the time
        print("Time per epoch: ", time()-t0)

        # print the loss
        print("Epoch: ", epoch, "Loss: ", loss.item())

        # print the accuracy
        print("Accuracy: ", torch.sum((output > 0.5) == labels).item()/batch_size)

        print("--------------------------------------------------------------------------")

    return avg_time_per_epoch/NUM_EPOCHS, loss_history

In [None]:
def run_test(batch_size, dataloader, loss_fn):

    t_loss = 0.0
    t_accuracy = 0.0

    counter = 0
    for data, labels in dataloader:

        counter += 1
        # normalize
        #data = data / torch.linalg.norm(data, dim=1).view(-1, 1)

        data = data.type(torch.float).to(device=device)
        labels = labels.type(torch.float).to(device=device)

        # forward pass
        output = circuit(params, data)

        # compute the loss
        loss = loss_fn(output, labels)

        # accuracy
        accuracy = torch.sum((output > 0.5) == labels).item()/batch_size


        t_loss += loss.item()
        t_accuracy += accuracy

    print("Test loss: ", t_loss/counter)
    print("Test accuracy: ", t_accuracy/counter)

    return

In [None]:
# parametres
NUM_EPOCHS = 2
LEARNING_RATE = 1e-3

# define the cost function (Binary X-entropy)
loss_fn = torch.nn.BCELoss()

# initialize parameters randomly
params = torch.randn((NUM_LAYERS, NUM_QUBITS, 3), requires_grad=True)

# define the optimizer
optimizer = torch.optim.Adam([params], lr=LEARNING_RATE)

# training
print(f'\nRunning experiment with batch size {BATCH_SIZE} and layers {NUM_LAYERS}\n')

time_per_epoch, loss_history = run_exp(BATCH_SIZE, NUM_EPOCHS, dataloader_train, loss_fn)

print(f'Average time per epoch: {time_per_epoch} - BS: {BATCH_SIZE} - LAYERS: {NUM_LAYERS}\n')
print('='*50)

In [None]:
plt.plot(loss_history)
plt.show()

In [None]:
# test performance

run_test(BATCH_SIZE, dataloader_test, loss_fn)

## Suggested Exercises:

* play with the model: changing number of layers, smaller entanglement, ... and checks the effect on the classification performance
* extend the model to multi-class MNIST classification: you can try for example to use 4 classes, that can be described be reading 2 output qubits (00,01,10,11), or 8 classes, that can be described by reading out 3 output qubits (000,100,010,001,110,101,011,111)
* try to use other data encoding algorithms, like angular encoding: AngleEmbedding(features, wires[, rotation, id]) that encodes 𝑁 features into the rotation angles of 𝑛 qubits, where 𝑁≤𝑛. NOTE: due to this limitation and the limited amount of qubits that can be used in simulation, it is better to downsample the images to max (4,4) pixels --> 16 qubits (will be quite slow)