In [None]:
import pennylane as qml
from pennylane import numpy as np
from pennylane.optimize import NesterovMomentumOptimizer
import random

In [None]:
#dev = qml.device("default.qubit", wires=4)
dev = qml.device("lightning.qubit", wires=4)

In [None]:
def check_np(list):
    num = 0
    for r in list:
        if r[0] == "Rot": num += 1
    return num

In [None]:
def ansatz(W, gatestream):
    w_cnt = 0
    for gate in gatestream:
        if gate[0] == "Rot":
            qml.PauliRot(W[w_cnt], gate[1], wires=gate[2])
            w_cnt += 1

        elif gate[0] == "CNOT":
            qml.CNOT(wires=[gate[1], gate[2]])

In [None]:
def statepreparation(x):
    qml.BasisState(x, wires=[0, 1, 2, 3])

In [None]:
@qml.qnode(dev, interface="autograd")
def circuit(weights, x, gatestream):

    statepreparation(x)
    ansatz(weights, gatestream)

    return qml.expval(qml.PauliZ(0))

In [None]:
def variational_classifier(weights, bias, x, gatestream):
    return circuit(weights, x, gatestream) + bias

In [None]:
def square_loss(labels, predictions):
    loss = 0
    for l, p in zip(labels, predictions):
        loss = loss + (l - p) ** 2

    loss = loss / len(labels)
    return loss

In [None]:
def accuracy(labels, predictions):

    loss = 0
    for l, p in zip(labels, predictions):
        if abs(l - p) < 1e-5:
            loss = loss + 1
    loss = loss / len(labels)

    return loss

In [None]:
def cost(weights, bias, X, Y, gatestream):
    predictions = [variational_classifier(weights, bias, x, gatestream) for x in X]
    return square_loss(Y, predictions)

---

In [None]:
def opt_classifier(gatestream):

    data = np.loadtxt("parity.txt")
    X = np.array(data[:, :-1], requires_grad=False)
    Y = np.array(data[:, -1], requires_grad=False)
    Y = Y * 2 - np.ones(len(Y))

    np.random.seed(0)
    weights_init = 0.01 * np.random.randn(check_np(gatestream), requires_grad=True)
    bias_init = np.array(0.0, requires_grad=True)
    
    opt = NesterovMomentumOptimizer(0.5)
    batch_size = 5
    
    weights = weights_init
    bias = bias_init

    out_list = []
    for it in range(15): # iters
    
        batch_index = np.random.randint(0, len(X), (batch_size,))
        X_batch = X[batch_index]
        Y_batch = Y[batch_index]
        weights, bias, _, _, _ = opt.step(cost, weights, bias, X_batch, Y_batch, gatestream)
    
        predictions = [np.sign(variational_classifier(weights, bias, x, gatestream)) for x in X]
        acc = accuracy(Y, predictions)

        out_list.append([it + 1, float(cost(weights, bias, X, Y, gatestream)), float(acc)])

    draw_p = qml.draw(circuit)(weights, [0, 0, 0, 0] ,gatestream)
    
    return out_list, draw_p

In [None]:
def gate_to_obs(gate):
    
    ob = [0, 0, 0, 0]
    
    if gate[0] == 'Rot':
        ob[0] = 1
        
        if gate[1] == 'X': ob[2] = 1
        elif gate[1] == 'Y': ob[2] = 2
        elif gate[1] == 'Z': ob[2] = 3
        
        ob[3] = gate[2]
    
    elif gate[0] == 'CNOT':
        ob[1] = 1
        ob[2] = gate[1] 
        ob[3] = gate[2] 
    
    return ob

In [None]:
def update_obs(act, step, obs, gatestream, gates):

    gatestream.append(gates[act])
    
    ob = gate_to_obs(gates[act])
    obs[step] = ob
    step += 1

    return step, obs, gatestream

In [None]:
def cal_reward(steps, obs, outs):
    ## accuracy
    acc = [row[2] for row in outs]
    acc_m = sum(acc) / len(acc)

    ## cost
    cost = [row[1] for row in outs]
    cost_m = 1 / (sum(cost) / len(cost))

    ## variance
    pop_list = [0, 0, 0, 0]
    for row in obs:
        if row[1] == 1:
            pop_list[row[2]] += 1
            pop_list[row[3]] += 1
        elif row[0] == 1:
            pop_list[row[3]] += 1
    pop_r = (2 - np.var(pop_list)) / 2

    ## duplicate
    dup_r = 0
    if obs[steps-1][0] == 1:
        tc = obs[steps-1][3]
        tc_list = []
        for row in obs:
            if row[1] == 1:
                if row[2] == tc or row[3] == tc: tc_list.append(row)
            elif row[0] == 1:
                if row[3] == tc: tc_list.append(row)
        if len(tc_list) > 1:
            if tc_list[-1] == tc_list[-2]: dup_r = -10
    elif obs[steps-1][1] == 1:
        tc = obs[steps-1][2]
        tc_list_c = []
        for row in obs:
            if row[1] == 1:
                if row[2] == tc or row[3] == tc: tc_list_c.append(row)
            elif row[0] == 1:
                if row[3] == tc: tc_list_c.append(row)
        tc = obs[steps-1][3]
        tc_list_t = []
        for row in obs:
            if row[1] == 1:
                if row[2] == tc or row[3] == tc: tc_list_t.append(row)
            elif row[0] == 1:
                if row[3] == tc: tc_list_t.append(row)
        if len(tc_list_c) > 1 and len(tc_list_t) > 1:
            if tc_list_c[-1] == tc_list_c[-2] and tc_list_t[-1] == tc_list_t[-2]: dup_r = -10

    ## gate type
    if obs[steps-1][0] == 1:
        gate_r = 1
        rot_r = 1
    else:
        gate_r = 0
        rot_r = 0

    ## CNOT distance
    if obs[steps-1][1] == 1:   
        cnot_r = 1 / abs(obs[steps-1][2]-obs[steps-1][3])
    else: cnot_r = 0    

    ## circuit steps
    steps_r = (32 - steps) / 32
    
    return [acc_m, cost_m, gate_r, rot_r, cnot_r, steps_r, pop_r, dup_r], (acc_m - 0.5)*2 * 15 + cost_m * 2 + gate_r * 3 + rot_r + cnot_r + steps_r * 5 + pop_r * 3 + dup_r  ## with weight

In [None]:
class qc:
    def __init__(self):
        self.gates = [['Rot','X', 0], ['Rot','X', 1], ['Rot','X', 2], ['Rot','X', 3],
         ['Rot','Y', 0], ['Rot','Y', 1], ['Rot','Y', 2], ['Rot','Y', 3],
         ['Rot','Z', 0], ['Rot','Z', 1], ['Rot','Z', 2], ['Rot','Z', 3],
         ['CNOT', 0, 1],  ['CNOT', 0, 2],  ['CNOT', 0, 3],
         ['CNOT', 1, 0],  ['CNOT', 1, 2],  ['CNOT', 1, 3],
         ['CNOT', 2, 0],  ['CNOT', 2, 1],  ['CNOT', 2, 3],
         ['CNOT', 3, 0],  ['CNOT', 3, 1],  ['CNOT', 3, 2]]
        self.len_qc = 32
        self.act_space = len(self.gates)
    
    def reset(self):
        self.steps = 0
        self.obs = [[0] * 4 for _ in range(self.len_qc)]
        self.gatestream = []
        self.reward = -1
        self.term = -1
        self.done = 0
        return

    def step(self, act):
        if act > self.act_space-1 or act < 0:
            print("out of action space")
            return 0
        if self.steps > self.len_qc-1:
            print("out of qc length")
            return 0
        
        self.steps, self.obs, self.gatestream = update_obs(act, self.steps, self.obs, self.gatestream, self.gates)
        self.outs, self.draw = opt_classifier(self.gatestream)
        
        self.rlist, self.reward = cal_reward(self.steps, self.obs, self.outs)

        if self.steps == self.len_qc: self.term = 1
        else: self.term = 0

        if max([row[2] for row in self.outs]) == 1:
            self.done = 1
            ##self.term = 1
        
        return 1

    def sample(self):
        return random.randint(0, self.act_space-1)

---