# Amplitude Encoding - 2 qubits - using qml.AmplitudeEmbedding

In [15]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

from ipywidgets import widgets
from IPython.display import display, HTML

In [16]:
import pennylane as qml
from pennylane import numpy as np
from pennylane.optimize import NesterovMomentumOptimizer

import sys
from math import sqrt, pi
import time

import pandas as pd
import scipy
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import Normalizer
# supress a warning that is not useful here
pd.options.mode.chained_assignment = None


## Data Loading and Exploration

Second and third columns looks better as decimal logarithms. Also it seems they are highly corelated and eventually can be compressed to one feature. 

In [17]:
## #################################################################################
# globals

path_train = '/_jupyter/QC/QOSF-challenge-md-2022/task-02/mock_train_set.csv'
path_test = '/_jupyter/QC/QOSF-challenge-md-2022/task-02/mock_test_set.csv'

num_qubits = 2
num_layers = 4

dev = qml.device("default.qubit", wires=num_qubits)

df = pd.read_csv(path_train)
df_c = df.copy(deep=True)
df['1'] = np.log10(df_c['1'])
df['2'] = np.log10(df_c['2'])

f = lambda x: -1.0 if x==0 else 1.0
df['4'] = df_c['4'].map(f)

# npdf = df2.to_numpy()
npdf = df.to_numpy()
data = np.array(npdf)

print("Train data standardised:\n", data)

X = data[:, 0:4]
Y = data[:, -1]

# scale the data using sklearn StandardScaler
std_slc = StandardScaler(with_mean=False)
std_slc.fit(X)
X_std = std_slc.transform(X)

# normalize data using sklearn StandardScaler
normalizer = Normalizer().fit(X_std)  # fit does nothing.
X_norm = normalizer.transform(X_std)


# features will be applitudes vector
features = np.array(X_norm, requires_grad=False)
print("Train data normalized:\n", features)




Train data standardised:
 [[ 2.78926e+03  3.00000e+00  1.00000e+00  2.00000e+01 -1.00000e+00]
 [ 4.04001e+03  6.00000e+00  0.00000e+00  1.00000e+00  1.00000e+00]
 [ 2.93120e+03  4.00000e+00  4.00000e+00  4.00000e+01  1.00000e+00]
 ...
 [ 4.18281e+03  0.00000e+00  0.00000e+00  6.50000e+01 -1.00000e+00]
 [ 3.11375e+03  4.00000e+00  2.00000e+00  1.00000e+00  1.00000e+00]
 [ 4.56757e+03  4.00000e+00  5.00000e+00  9.00000e+01  1.00000e+00]]


StandardScaler(with_mean=False)

Train data normalized:
 [[0.78842549 0.50347726 0.20019498 0.29123507]
 [0.7500201  0.66134589 0.         0.00956384]
 [0.56936298 0.46130771 0.55028199 0.40026331]
 ...
 [0.78066018 0.         0.         0.62495574]
 [0.74765323 0.57024753 0.34011673 0.01236968]
 [0.5870369  0.30522994 0.45512608 0.5958881 ]]


## Data Encoding, Circuit Preparation and Cost Functions

In [18]:
# ###################################################################
# layer circuit - this is where the circuit learns
def layer(W):
    qml.Rot(W[0, 0], W[0, 1], W[0, 2], wires=0)
    qml.Rot(W[1, 0], W[1, 1], W[1, 2], wires=1)
    qml.CNOT(wires=[0, 1])

# ###################################################################
# the circuit - where the action happens
@qml.qnode(dev)
def circuit(weights, data):

    qml.AmplitudeEmbedding(features=data, wires=range(num_qubits))

    for W in weights:
        layer(W)

    return qml.expval(qml.PauliZ(0))

draw_flag = 0

# ###############################################################################
# variational classifier
# this will be called on each optimization step by the cost evaluation
def variational_classifier(weights, bias, data):
    
    global draw_flag

    if draw_flag:
        draw_flag=0
        # qml.draw

    return circuit(weights, data) + bias

# ###############################################################################
# standard square loss
def square_loss(labels, predictions):
    
    loss = 0
    for l, p in zip(labels, predictions):
        loss = loss + (l - p) ** 2

    loss = loss / len(labels)
    return loss

# ###############################################################################
# goal: maximize accuracy
def accuracy(labels, predictions):

    loss = 0
    for l, p in zip(labels, predictions):
        if abs(l - p) < 1e-5:
            loss = loss + 1
    loss = loss / len(labels)

    return loss

# ###############################################################################
# goal: maximize accuracy
def cost(weights, bias, features, labels):
    
    predictions = [variational_classifier(weights, bias, f) for f in features]
    return square_loss(labels, predictions)



## Datasets Preparation

In [19]:
np.random.seed(0)
num_data = len(Y)

num_train = int(0.75 * num_data)
index = np.random.permutation(range(num_data))

feats_train = features[index[:num_train]]
Y_train = Y[index[:num_train]]

feats_val = features[index[num_train:]]
Y_val = Y[index[num_train:]]

# We need these later for plotting
X_train = X[index[:num_train]]
X_val = X[index[num_train:]]

# ######################################################################
# Load the test dataset and apply same transformations as we did with the train dataset.
df_test = pd.read_csv(path_test)
df_test_c = df_test.copy(deep=True)
df_test['1'] = np.log10(df_test_c['1'])
df_test['2'] = np.log10(df_test_c['2'])

f = lambda x: -1.0 if x==0 else 1.0
df_test['4'] = df_test_c['4'].map(f)
data_test = df_test.to_numpy()

X_test_ini = data_test[:, 0:4]
Y_test = data_test[:, -1]

# scale data using sklearn StandardScaler
std_slc = StandardScaler(with_mean=False)
std_slc.fit(X_test_ini)
X_test_std = std_slc.transform(X_test_ini)

# normalize data using sklearn Normalizer
normalizer = Normalizer().fit(X_test_std)  # fit does nothing.
X_test_norm = normalizer.transform(X_test_std)

# convert to a pennylane numpy array
X_test = np.array(X_test_norm, requires_grad=False)


# ######################################################################
# accuracy for test dtaset
def test_accuracy(weights, bias):
    # apply the variational clasifier circuit on test dataset
    # using the learned weights
    predictions_test = [np.sign(variational_classifier(weights, bias, f)) for f in X_test]

    return accuracy(Y_test, predictions_test)

StandardScaler(with_mean=False)

## Training

In [20]:
weights_init = 0.01 * np.random.randn(num_layers, num_qubits, 3, requires_grad=True)
bias_init = np.array(0.0, requires_grad=True)

# # start with learned step
# w = np.load('/_jupyter/QC/QOSF-challenge-md-2022/task-02/temp-data/variational_classifier/data/mock_train_numpy_wights_01.npy', allow_pickle=True)
# weights_init = np.array(w, requires_grad=True)
# bias_init = np.array(-0.483902119474, requires_grad=True)


opt = NesterovMomentumOptimizer(0.01)
# opt = NesterovMomentumOptimizer(0.1)
# opt = NesterovMomentumOptimizer(0.1)
# batch_size = 5
# batch_size = 15
# batch_size = 20
batch_size = 10


# train the variational classifier
weights = weights_init
bias = bias_init
# for it in range(60):
steps = 50

toc = time.time()
for it in range(steps):

    # Update the weights by one optimizer step
    batch_index = np.random.randint(0, num_train, (batch_size,))
    feats_train_batch = feats_train[batch_index]
    Y_train_batch = Y_train[batch_index]
    weights, bias, _, _ = opt.step(cost, weights, bias, feats_train_batch, Y_train_batch)

    # Compute predictions on train and validation set
    predictions_train = [np.sign(variational_classifier(weights, bias, f)) for f in feats_train]
    predictions_val = [np.sign(variational_classifier(weights, bias, f)) for f in feats_val]

    # Compute accuracy on train and validation set
    acc_train = accuracy(Y_train, predictions_train)
    acc_val = accuracy(Y_val, predictions_val)
    acc_test = test_accuracy(weights, bias)

    print(
        "Iter: {:5d} | Cost: {:0.7f} | Acc train: {:0.7f} | Acc validation: {:0.7f} | Acc test: {:0.7f} "
        "".format(it + 1, cost(weights, bias, features, Y), acc_train, acc_val, acc_test)
    )

    if acc_train >= 0.93 and acc_val >= 0.93:
        # early stop
        break

tic = time.time()

print('\n\ntime in sec. for {} steps: {}'.format(it+1, tic-toc))
print('\nbias: ', bias)    
print('\nweights:\n', weights)



Iter:     1 | Cost: 1.1970236 | Acc train: 0.5288889 | Acc validation: 0.5066667 | Acc test: 0.4333333 
Iter:     2 | Cost: 1.1949626 | Acc train: 0.5288889 | Acc validation: 0.5066667 | Acc test: 0.4333333 
Iter:     3 | Cost: 1.1951781 | Acc train: 0.5288889 | Acc validation: 0.4933333 | Acc test: 0.4250000 
Iter:     4 | Cost: 1.1949684 | Acc train: 0.5200000 | Acc validation: 0.5333333 | Acc test: 0.4083333 
Iter:     5 | Cost: 1.2056649 | Acc train: 0.5111111 | Acc validation: 0.4933333 | Acc test: 0.3916667 
Iter:     6 | Cost: 1.2253642 | Acc train: 0.4888889 | Acc validation: 0.5066667 | Acc test: 0.3666667 
Iter:     7 | Cost: 1.2349040 | Acc train: 0.4933333 | Acc validation: 0.5333333 | Acc test: 0.3250000 
Iter:     8 | Cost: 1.2546892 | Acc train: 0.4977778 | Acc validation: 0.4933333 | Acc test: 0.3333333 
Iter:     9 | Cost: 1.2799436 | Acc train: 0.4888889 | Acc validation: 0.4933333 | Acc test: 0.3500000 
Iter:    10 | Cost: 1.3077180 | Acc train: 0.4933333 | Acc valid

## Final Result

In [23]:
acc_test = test_accuracy(weights, bias)
print('Accuracy on test data: {:0.4f}'.format(acc_test))

Accuracy on test data: 0.9333


## Analyze Results