# Multiclass Margin Classifier

This tutorial shows usage of Multiclass Margin Classifier to classify Iris data of 4 features into 3 classes. It uses multiple one vs all classifier to do the task. 

In [1]:
import pennylane as qml
import torch
import numpy as np
from torch.autograd import Variable
import torch.optim as optim

np.random.seed(0)
torch.manual_seed(0)

num_classes = 3
margin = 0.15
feature_size = 4
batch_size = 10
lr_adam = 0.01
train_split = 0.75

num_qubits = int(np.ceil(np.log2(feature_size)))
num_layers = 6
total_iterations = 100

dev = qml.device("default.qubit", wires = num_qubits)

We start off by loading iris dataset.

In [13]:
from sklearn.datasets import load_iris

def load_and_process_data():
    iris = load_iris()
    X, Y = iris.data, iris.target
    X_torch = torch.tensor(X, requires_grad = False)
    Y_torch = torch.tensor(Y, requires_grad = False)
    print("First X Sample: ", X_torch[0])
    
    normalization = torch.sqrt(torch.sum(X_torch ** 2, dim = 1))
    X_norm = X_torch / normalization.reshape(len(X_torch), 1)
    print("First X norm sample: ", X_norm[0])

    return X_norm, Y_torch

In [20]:
X, Y = load_and_process_data()
print(Y)

First X Sample:  tensor([5.1000, 3.5000, 1.4000, 0.2000], dtype=torch.float64)
First X norm sample:  tensor([0.8038, 0.5516, 0.2206, 0.0315], dtype=torch.float64)
tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2])


Now we construct a multiclass classifier.

In [14]:
def layer(W):
    for i in range(num_qubits):
        qml.Rot(*W[i], wires = i)
    for j in range(num_qubits - 1):
        qml.CNOT(wires = [j, j + 1])
    if num_qubits >= 2:
        qml.CNOT(wires = [num_qubits - 1, 0])

In [15]:
from pennylane.templates.embeddings import AmplitudeEmbedding

def circuit(weights, feat = None):
    AmplitudeEmbedding(feat, [0,1], pad = 0.0, normalize = True)
    for W in weights:
        layer(W)
    return qml.expval(qml.PauliZ(0))

qnodes = []
# we create variational classifiers for each classes
for iq in range(num_classes):
    qnode = qml.QNode(circuit, dev, interface = "torch")
    qnodes.append(qnode)
    
    
def variational_classifier(q_circuit, params, feat):
    weights = params[0]
    bias = params[1]
    return q_circuit(weights, feat = feat) + bias

In [21]:
def multiclass_svm_loss(q_circuits, all_params, feature_vecs, true_labels):
    loss = 0
    num_samples = len(true_labels)
    for i, feature_vec in enumerate(feature_vecs):
        s_true = variational_classifier(
            q_circuits[int(true_labels[i])], (all_params[0][int(true_labels[i])], all_params[1][int(true_labels[i])]), feature_vec)
        s_true = s_true.float()
        li = 0
        
        for j in range(num_classes):
            if j != int(true_labels[i]):
                s_j = variational_classifier(
                q_circuits[j], (all_params[0][j], all_params[1][j]), feature_vec)
                s_j = s_j.float()
                li += torch.max(torch.zeros(1).float(), s_j - s_true + margin)
        loss += li
    return loss / num_samples

In [23]:
def classify(q_circuits, all_params, feature_vecs, labels):
    predicted_labels = []
    for i, feature_vec in enumerate(feature_vecs):
        scores = np.zeros(num_classes)
        for c in range(num_classes):
            score = variational_classifier(
                q_circuits[c], (all_params[0][c], all_params[1][c]), feature_vec)
            scores[c] = float(score)
        pred_class = np.argmax(scores)
        predicted_labels.append(pred_class)
    return predicted_labels

def accuracy(labels, hard_predictions):
    loss = 0
    for l,p in zip(labels, hard_predictions):
        if np.abs(1 - p) < 1e-3:
            loss = loss + 1
    loss = loss / labels.shape[0]
    return loss

In [18]:
from sklearn.model_selection import train_test_split

def training(features, Y):
    num_data = Y.shape[0]
    feat_vecs_train, feat_vecs_test, Y_train, Y_test = train_test_split(features, Y)
    num_train = Y_train.shape[0]
    q_circuits = qnodes
    
    all_weights = [
        Variable(0.1 * torch.randn(num_layers, num_qubits, 3), requires_grad = True)
        for i in range(num_classes)
    ]
    all_bias = [Variable(0.1 * torch.ones(1), requires_grad = True) for i in range (num_classes)]
    
    optimizer = optim.Adam(all_weights + all_bias, lr = lr_adam)
    params = (all_weights, all_bias)
    print("Num params: ", 3 * num_layers * num_qubits * 3 + 3)
    
    costs, train_acc, test_acc = [], [], []
    
    for it in range(total_iterations):
        batch_index = np.random.randint(0, num_train, (batch_size,))
        feat_vecs_train_batch = feat_vecs_train[batch_index]
        Y_train_batch = Y_train[batch_index]
        
        optimizer.zero_grad()
        curr_cost = multiclass_svm_loss(q_circuits, params, feat_vecs_train_batch, Y_train_batch)
        curr_cost.backward()
        optimizer.step()
        
        predictions_train = classify(q_circuits, params, feat_vecs_train, Y_train)
        predictions_test = classify(q_circuits, params, feat_vecs_test, Y_test)
        acc_train = accuracy(Y_train, predictions_train)
        acc_test = accuracy(Y_test, predictions_test)
        
        if it % 10 == 0:
            print("Iteration: ", it)
            print("cost: ", curr_cost)
            print("acc_train: ", acc_train)
            print("acc_test: ", acc_test)
    
    return costs, train_acc, test_acc

In [19]:
features, Y = load_and_process_data()
costs, train_acc, test_acc = training(features, Y)

First X Sample:  tensor([5.1000, 3.5000, 1.4000, 0.2000], dtype=torch.float64)
First X norm sample:  tensor([0.8038, 0.5516, 0.2206, 0.0315], dtype=torch.float64)
Num params:  111
Iteration:  0
cost:  tensor([0.3649], grad_fn=<DivBackward0>)
acc_train:  0.32142857142857145
acc_test:  0.34210526315789475
Iteration:  10
cost:  tensor([0.1440], grad_fn=<DivBackward0>)
acc_train:  0.008928571428571428
acc_test:  0.02631578947368421
Iteration:  20
cost:  tensor([0.1479], grad_fn=<DivBackward0>)
acc_train:  0.6517857142857143
acc_test:  0.6052631578947368
Iteration:  30
cost:  tensor([0.0290], grad_fn=<DivBackward0>)
acc_train:  0.0
acc_test:  0.0
Iteration:  40
cost:  tensor([0.0501], grad_fn=<DivBackward0>)
acc_train:  0.6696428571428571
acc_test:  0.6578947368421053
Iteration:  50
cost:  tensor([0.0668], grad_fn=<DivBackward0>)
acc_train:  0.26785714285714285
acc_test:  0.2894736842105263
Iteration:  60
cost:  tensor([0.0576], grad_fn=<DivBackward0>)
acc_train:  0.026785714285714284
acc_t