# Task 2 Encoding and Classifier

## Problem 
<br>
Encoding the following files in a quantum circuit mock_train_set.csv and mock_test_set.csv in at least two different ways (these could be basis, angle, amplitude, kernel or random encoding
<br>
<br>
● Design a variational quantum circuit for each of the encodings, uses the column 4 as the target, this is a binary class 0 and 1.<br>
● You must use the data from column0 to column3 for your proposed classifier.<br>
● Consider the ansatz you are going to design as a layer and find out how many layers are
necessary to reach the best performance. <br>

### Analyze and discuss the results

Feel free to use existing frameworks (e.g. PennyLane, Qiskit) for creating and training the circuits.<br><br>
This PennyLane demo can be useful: Training a quantum circuit with Pytorch,<br>
This Quantum Tensorflow tutorial can be useful: Training a quantum circuit with Tensorflow.<br>
For the variational circuit, you can try any circuit you want. You can start from one with a layer of RX, RZ and CNOTs.<br>
<br>
## References
* https://pennylane.ai/qml/demos/tutorial_state_preparation.html
* https://www.tensorflow.org/quantum/tutorials/mnist

<br>

## Clasification Using Amplitude Encoding on 2 Qubits

In [1]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

from ipywidgets import widgets
from IPython.display import display, HTML

In [2]:
import pennylane as qml
from pennylane import numpy as np
from pennylane.optimize import NesterovMomentumOptimizer

import sys
from math import sqrt, pi

import pandas as pd
import scipy
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import Normalizer
# supress a warning that is not useful here
pd.options.mode.chained_assignment = None

# references:
# pandas dataframe:
# https://pandas.pydata.org/docs/reference/frame.html
# numpy math routines:
# https://numpy.org/doc/stable/reference/routines.math.html
# pandas quick shortcuts:
# https://www.listendata.com/2017/12/python-pandas-tutorial.html
# https://www.listendata.com/2019/06/pandas-read-csv.html#Example-How-to-read-CSV-file-without-using-Pandas-package

In [3]:
# sklearn StandardScaler
# https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.StandardScaler.html

path_train = '/_jupyter/QC/QOSF-challenge-md-2022/task-02/mock_train_set.csv'
path_test = '/_jupyter/QC/QOSF-challenge-md-2022/task-02/mock_test_set.csv'

df = pd.read_csv(path_train)
df_c = df.copy(deep=True)
df['1'] = np.log10(df_c['1'])
df['2'] = np.log10(df_c['2'])

f = lambda x: -1.0 if x==0 else 1.0
df['4'] = df_c['4'].map(f)

# npdf = df2.to_numpy()
npdf = df.to_numpy()
data = np.array(npdf)

print("Train data standardised:\n", data)

X = data[:, 0:4]
Y = data[:, -1]

# scale the data using sklearn StandardScaler
std_slc = StandardScaler(with_mean=False)
std_slc.fit(X)
X_std = std_slc.transform(X)

# normalize data using sklearn StandardScaler
normalizer = Normalizer().fit(X_std)  # fit does nothing.
X_norm = normalizer.transform(X_std)


# features will be applitudes vector
features = np.array(X_norm, requires_grad=False)
print("Train data normalized:\n", features)




Train data standardised:
 [[ 2.78926e+03  3.00000e+00  1.00000e+00  2.00000e+01 -1.00000e+00]
 [ 4.04001e+03  6.00000e+00  0.00000e+00  1.00000e+00  1.00000e+00]
 [ 2.93120e+03  4.00000e+00  4.00000e+00  4.00000e+01  1.00000e+00]
 ...
 [ 4.18281e+03  0.00000e+00  0.00000e+00  6.50000e+01 -1.00000e+00]
 [ 3.11375e+03  4.00000e+00  2.00000e+00  1.00000e+00  1.00000e+00]
 [ 4.56757e+03  4.00000e+00  5.00000e+00  9.00000e+01  1.00000e+00]]


StandardScaler(with_mean=False)

Train data normalized:
 [[0.78842549 0.50347726 0.20019498 0.29123507]
 [0.7500201  0.66134589 0.         0.00956384]
 [0.56936298 0.46130771 0.55028199 0.40026331]
 ...
 [0.78066018 0.         0.         0.62495574]
 [0.74765323 0.57024753 0.34011673 0.01236968]
 [0.5870369  0.30522994 0.45512608 0.5958881 ]]


Circuit coding for angle encoding follows pennylane technique, whch is also following the scheme in in Schuld and Petruccione (2018).<br>
""We had to also decompose controlled Y-axis rotations into more basic circuits following Nielsen and Chuang (2010).""<br>

* https://link.springer.com/book/10.1007/978-3-319-96424-9
* 

In [4]:
num_qubits = 2
num_layers = 6

dev = qml.device("default.qubit", wires=num_qubits)

### test angle encoding

In [5]:
def layer(W):
    qml.Rot(W[0, 0], W[0, 1], W[0, 2], wires=0)
    qml.Rot(W[1, 0], W[1, 1], W[1, 2], wires=1)
    qml.CNOT(wires=[0, 1])


@qml.qnode(dev)
def circuit(weights, data):

    qml.AmplitudeEmbedding(features=data, wires=range(num_qubits))

    for W in weights:
        layer(W)

    return qml.expval(qml.PauliZ(0))

draw_flag = 0
def variational_classifier(weights, bias, data):
    
    global draw_flag

    if draw_flag:
        draw_flag=0
        # qml.draw

    return circuit(weights, data) + bias

# ###############################################################################
# standard square loss
def square_loss(labels, predictions):
    
    loss = 0
    for l, p in zip(labels, predictions):
        loss = loss + (l - p) ** 2

    loss = loss / len(labels)
    return loss

# ###############################################################################
# goal: maximize accuracy
def accuracy(labels, predictions):

    loss = 0
    for l, p in zip(labels, predictions):
        if abs(l - p) < 1e-5:
            loss = loss + 1
    loss = loss / len(labels)

    return loss


def cost(weights, bias, features, labels):
    
    predictions = [variational_classifier(weights, bias, f) for f in features]
    return square_loss(labels, predictions)





In [6]:
# import matplotlib.pyplot as plt

# plt.figure()
# plt.scatter(X[:, 0][Y == 1], X[:, 1][Y == 1], c="b", marker="o", edgecolors="k")
# plt.scatter(X[:, 0][Y == -1], X[:, 1][Y == -1], c="r", marker="o", edgecolors="k")
# plt.title("Original data")
# plt.show()

# plt.figure()
# plt.scatter(X[:, 0][Y == 1], X[:, 2][Y == 1], c="b", marker="o", edgecolors="k")
# plt.scatter(X[:, 0][Y == -1], X[:, 2][Y == -1], c="r", marker="o", edgecolors="k")
# plt.title("Original data")
# plt.show()

# plt.figure()
# plt.scatter(X[:, 0][Y == 1], X[:, 3][Y == 1], c="b", marker="o", edgecolors="k")
# plt.scatter(X[:, 0][Y == -1], X[:, 3][Y == -1], c="r", marker="o", edgecolors="k")
# plt.title("Original data")
# plt.show()


# plt.figure()
# plt.scatter(X[:, 1][Y == 1], X[:, 2][Y == 1], c="b", marker="o", edgecolors="k")
# plt.scatter(X[:, 1][Y == -1], X[:, 2][Y == -1], c="r", marker="o", edgecolors="k")
# plt.title("Original data")
# plt.show()

# plt.figure()
# plt.scatter(X[:, 1][Y == 1], X[:, 3][Y == 1], c="b", marker="o", edgecolors="k")
# plt.scatter(X[:, 1][Y == -1], X[:, 3][Y == -1], c="r", marker="o", edgecolors="k")
# plt.title("Original data")
# plt.show()

# plt.figure()
# plt.scatter(X[:, 2][Y == 1], X[:, 3][Y == 1], c="b", marker="o", edgecolors="k")
# plt.scatter(X[:, 2][Y == -1], X[:, 3][Y == -1], c="r", marker="o", edgecolors="k")
# plt.title("Original data")
# plt.show()





In [7]:
# plt.figure()
# dim1 = 0
# dim2 = 2
# plt.scatter(
#     X_norm[:, dim1][Y == 1], X_norm[:, dim2][Y == 1], c="b", marker="o", edgecolors="k"
# )
# plt.scatter(
#     X_norm[:, dim1][Y == -1], X_norm[:, dim2][Y == -1], c="r", marker="o", edgecolors="k"
# )
# plt.title("Padded and normalised data (dims {} and {})".format(dim1, dim2))
# plt.show()


# plt.figure()
# dim1 = 0
# dim2 = 1
# plt.scatter(
#     features[:, dim1][Y == 1], features[:, dim2][Y == 1], c="b", marker="o", edgecolors="k"
# )
# plt.scatter(
#     features[:, dim1][Y == -1], features[:, dim2][Y == -1], c="r", marker="o", edgecolors="k"
# )
# plt.title("Feature vectors (dims {} and {})".format(dim1, dim2))
# plt.show()



# plt.figure()
# dim1 = 1
# dim2 = 2
# plt.scatter(
#     features[:, dim1][Y == 1], features[:, dim2][Y == 1], c="b", marker="o", edgecolors="k"
# )
# plt.scatter(
#     features[:, dim1][Y == -1], features[:, dim2][Y == -1], c="r", marker="o", edgecolors="k"
# )
# plt.title("Feature vectors (dims {} and {})".format(dim1, dim2))
# plt.show()

In [8]:
np.random.seed(0)
num_data = len(Y)

# num_train = int(0.75 * num_data)
num_train = int(0.75 * num_data)
index = np.random.permutation(range(num_data))

feats_train = features[index[:num_train]]
Y_train = Y[index[:num_train]]

feats_val = features[index[num_train:]]
Y_val = Y[index[num_train:]]

# We need these later for plotting
X_train = X[index[:num_train]]
X_val = X[index[num_train:]]


In [9]:
weights_init = 0.01 * np.random.randn(num_layers, num_qubits, 3, requires_grad=True)
bias_init = np.array(0.0, requires_grad=True)

# # start with learned step
# w = np.load('/_jupyter/QC/QOSF-challenge-md-2022/task-02/temp-data/variational_classifier/data/mock_train_numpy_wights_01.npy', allow_pickle=True)
# weights_init = np.array(w, requires_grad=True)
# bias_init = np.array(-0.483902119474, requires_grad=True)


opt = NesterovMomentumOptimizer(0.01)
# opt = NesterovMomentumOptimizer(0.1)
# opt = NesterovMomentumOptimizer(0.1)
batch_size = 5
batch_size = 15
batch_size = 20
batch_size = 10


# train the variational classifier
weights = weights_init
bias = bias_init
# for it in range(60):
steps = 50
# steps = 5
for it in range(steps):
# for it in range(2):

    # Update the weights by one optimizer step
    batch_index = np.random.randint(0, num_train, (batch_size,))
    feats_train_batch = feats_train[batch_index]
    Y_train_batch = Y_train[batch_index]
    weights, bias, _, _ = opt.step(cost, weights, bias, feats_train_batch, Y_train_batch)

    # Compute predictions on train and validation set
    predictions_train = [np.sign(variational_classifier(weights, bias, f)) for f in feats_train]
    predictions_val = [np.sign(variational_classifier(weights, bias, f)) for f in feats_val]

    # Compute accuracy on train and validation set
    acc_train = accuracy(Y_train, predictions_train)
    acc_val = accuracy(Y_val, predictions_val)

    print(
        "Iter: {:5d} | Cost: {:0.7f} | Acc train: {:0.7f} | Acc validation: {:0.7f} "
        "".format(it + 1, cost(weights, bias, features, Y), acc_train, acc_val)
    )
    if acc_train >= 0.93 and acc_val >= 0.93:
        # early stop
        break

print('___: ', _)    
print('bias: ', bias)    
# print('weights:\n', weights)
# np.save('/_jupyter/QC/QOSF-challenge-md-2022/task-02/temp-data/variational_classifier/data/mock_train_numpy_wights_02.npy', weights, allow_pickle=True)



Iter:     1 | Cost: 1.1942728 | Acc train: 0.5333333 | Acc validation: 0.5066667 
Iter:     2 | Cost: 1.1980613 | Acc train: 0.5111111 | Acc validation: 0.5333333 
Iter:     3 | Cost: 1.2161650 | Acc train: 0.5022222 | Acc validation: 0.5200000 
Iter:     4 | Cost: 1.2344483 | Acc train: 0.4977778 | Acc validation: 0.5333333 
Iter:     5 | Cost: 1.2549363 | Acc train: 0.4933333 | Acc validation: 0.4933333 
Iter:     6 | Cost: 1.2873434 | Acc train: 0.4933333 | Acc validation: 0.4933333 
Iter:     7 | Cost: 1.3183583 | Acc train: 0.5022222 | Acc validation: 0.4933333 
Iter:     8 | Cost: 1.3291061 | Acc train: 0.4933333 | Acc validation: 0.4800000 
Iter:     9 | Cost: 1.3015293 | Acc train: 0.4977778 | Acc validation: 0.4933333 
Iter:    10 | Cost: 1.2507147 | Acc train: 0.4844444 | Acc validation: 0.4933333 
Iter:    11 | Cost: 1.1958381 | Acc train: 0.5066667 | Acc validation: 0.5333333 
Iter:    12 | Cost: 1.1622816 | Acc train: 0.5244444 | Acc validation: 0.5333333 
Iter:    13 | Co

Load the test dataset and apply same transformations as we did with the train dataset.


In [10]:

# Load the test dataset and apply same transformations as we did with the train dataset.
df = pd.read_csv(path_test)
df_c = df.copy(deep=True)
df['1'] = np.log10(df_c['1'])
df['2'] = np.log10(df_c['2'])

f = lambda x: -1.0 if x==0 else 1.0
df['4'] = df_c['4'].map(f)
npdf = df.to_numpy()
data = np.array(npdf)

X = data[:, 0:4]
Y_test = data[:, -1]

# scale data using sklearn StandardScaler
std_slc = StandardScaler(with_mean=False)
std_slc.fit(X)
X_std = std_slc.transform(X)

# normalize data using sklearn StandardScaler
normalizer = Normalizer().fit(X_std)  # fit does nothing.
X_norm = normalizer.transform(X_std)

# convert to a pennylane numpy array
X_test = np.array(X_norm, requires_grad=False)

# apply the variational clasifier circuit on test dataset
# using the learned weights
predictions_test = [np.sign(variational_classifier(weights, bias, f)) for f in X_test]

acc_test = accuracy(Y_test, predictions_test)
print('Accuracy on test data: {:0.4f}'.format(acc_test))


StandardScaler(with_mean=False)

Accuracy on test data: 0.9583


In [11]:
# We can plot the continuous output of the variational classifier for the first two dimensions of the Iris data set.
plt.figure()
cm = plt.cm.RdBu

# make data for decision regions
xx, yy = np.meshgrid(np.linspace(0.0, 1.5, 300), np.linspace(0.0, 1.5, 300))
X_grid = [np.array([x, y]) for x, y in zip(xx.flatten(), yy.flatten())]

pd.DataFrame(xx).shape
pd.DataFrame(yy).shape
pd.DataFrame(X_grid).shape

NameError: name 'plt' is not defined

In [None]:

# # # preprocess grid points like data inputs above
# # padding = 0.3 * np.ones((len(X_grid), 1))
# # X_grid = np.c_[np.c_[X_grid, padding], np.zeros((len(X_grid), 1))]  # pad each input

# # normalization = np.sqrt(np.sum(X_grid ** 2, -1))
# # X_grid = (X_grid.T / normalization).T  # normalize each input

# # features_grid = np.array(
# #     [get_angles(x) for x in X_grid]
# # )  # angles for state preparation are new features

# X_grid = X_norm
# features_grid = X_grid
# predictions_grid = [variational_classifier(weights, bias, f) for f in features_grid]
# # Z = np.reshape(predictions_grid, xx.shape)
# Z = predictions_grid 

# # plot decision regions
# cnt = plt.contourf(
#     xx, yy, Z, levels=np.arange(-1, 1.1, 0.1), cmap=cm, alpha=0.8, extend="both"
# )
# plt.contour(
#     xx, yy, Z, levels=[0.0], colors=("black",), linestyles=("--",), linewidths=(0.8,)
# )
# plt.colorbar(cnt, ticks=[-1, 0, 1])

# # plot data
# plt.scatter(
#     X_train[:, 0][Y_train == 1],
#     X_train[:, 1][Y_train == 1],
#     c="b",
#     marker="o",
#     edgecolors="k",
#     label="class 1 train",
# )
# plt.scatter(
#     X_val[:, 0][Y_val == 1],
#     X_val[:, 1][Y_val == 1],
#     c="b",
#     marker="^",
#     edgecolors="k",
#     label="class 1 validation",
# )
# # plt.scatter(
# #     X_train[:, 0][Y_train == -1],
# #     X_train[:, 1][Y_train == -1],
# #     c="r",
# #     marker="o",
# #     edgecolors="k",
# #     label="class -1 train",
# # )
# # plt.scatter(
# #     X_val[:, 0][Y_val == -1],
# #     X_val[:, 1][Y_val == -1],
# #     c="r",
# #     marker="^",
# #     edgecolors="k",
# #     label="class -1 validation",
# # )

# plt.legend()
# plt.show()