In [1]:
import pennylane as qml
import gym
from pennylane import numpy as np
from gym import spaces
import inspect
import pickle

In [2]:
sq = 1/np.sqrt(3)
w_state= np.array([0,sq,sq,0,sq,0,0,0])

def projector(ket):
    n = len(ket)
    proj=np.zeros((n,n))
    for ind1,i in enumerate(ket):
        for ind2,j in enumerate(ket):
            proj[ind1,ind2] = i*np.conjugate(j)
    return proj
W = projector(w_state)

In [4]:
qml.PauliX.num_params

0

In [3]:
qml.Rot.num_params

3

In [138]:
alphabet_w = {"0":{"gate": qml.PauliX, "wires": [2]}, 
            "1":{"gate": qml.RZ, "wires": [0], "params":[np.pi]},
            "2":{"gate": qml.RY, "wires": [1], "params":[0.608*np.pi]},
            "3":{"gate": qml.CNOT, "wires": [1,2]},#, "params":[np.pi]},
            "4":{"gate": qml.CNOT, "wires": [1,0]},#, "params":[np.pi]},
            "5":{"gate": qml.RY, "wires": [0], "params":[0.78476]},
            "6":{"gate":qml.Rot, "wires":[0], "params": np.array([-2.31419177e-03,  5.49757776e+00,  3.14314062e+00])}, #borrowed from other optimization
            "7":{"gate": qml.CNOT, "wires": [0,1]},#, "params":[np.pi]},
           }

In [None]:
alphabet_w = {"0":{"gate": qml.PauliX, "wires": [2]}, 
            "1":{"gate": qml.RZ, "wires": [0]},
            "2":{"gate": qml.RY, "wires": [1]},
            "3":{"gate": qml.CNOT, "wires": [1,2]},#, "params":[np.pi]},
            "4":{"gate": qml.CNOT, "wires": [1,0]},#, "params":[np.pi]},
            "5":{"gate": qml.RY, "wires": [0]},
            "6":{"gate":qml.Rot, "wires":[0]}, #borrowed from other optimization
            "7":{"gate": qml.CNOT, "wires": [0,1]},#, "params":[np.pi]},
           }

In [139]:
f = open("alphabet_w.pickle","wb")
pickle.dump(alphabet_w,f, protocol=pickle.HIGHEST_PROTOCOL)
f.close()

In [94]:
state_indexed = [0,1,2,3,4,5,4,6,7]

In [95]:
dev = qml.device("default.qubit", wires=3)
@qml.qnode(dev)
def circuit(state_indexed):
    for ind in state_indexed:
        append_gate(alphabet, ind.val)
    return qml.expval(qml.Hermitian(W,wires=[0,1,2]))

In [116]:
np.count_nonzero(np.random.choice(range(10),30)==8)

5

In [102]:
spaces.Discrete(8)

Discrete(8)

In [169]:
class Ansatz(gym.Env):
    
    def __init__(self, maximum_number_of_gates=15):
        super(Ansatz, self).__init__()
        
        self.state_indexed = np.array([])
        self.maximum_number_of_gates = maximum_number_of_gates

        with open('alphabet_w.pickle', 'rb') as alphabet:
            self.alphabet = pickle.load(alphabet)
    
        self.n_actions = len(self.alphabet)
        self.action_space = spaces.Discrete(self.n_actions)
        
        sq = 1/np.sqrt(3)
        w_state= np.array([0,sq,sq,0,sq,0,0,0])
        self.W = projector(w_state) 
        self.reward_history = np.array([])

    def projector(self,ket):
        n = len(ket)
        proj=np.zeros((n,n))
        for ind1,i in enumerate(ket):
            for ind2,j in enumerate(ket):
                proj[ind1,ind2] = i*np.conjugate(j)
        return proj
    
    def reset(self):
        """
        the observation must be a numpy array (??)
        
        !!!** Not necessarily, notice that Luckasz selects at random, so in principle
        it should be enough to give the sequence of gates done.
        """
        self.state_indexed = np.array([])
        self.reward_history = np.array([])
        return np.array([self.state_indexed]).astype(np.float32)

    
    def check_if_finish(self):
        #np.count_nonzero(self.state_indexed,self.alphabet.CNOTS_indexes)
        if len(self.state_indexed)>self.maximum_number_of_gates:
            return True
        else:
            return False
        
    
    def step(self, action):
        """importantly, action is an integer between 0 and len(self.alphaber)-1 """
        self.state_indexed = np.append(self.state_indexed, action)
        done = self.check_if_finish()
        reward = self.give_reward()
        self.reward_history = np.append(self.reward_history,reward)
        info={}
        return self.state_indexed.astype(np.float32), reward, done, info #maybe instaed of 

    def give_reward(self):
        dev = qml.device("default.qubit", wires=3)
        @qml.qnode(dev)
        def circuit(state_indexed):
            for ind in state_indexed:
                append_gate(self.alphabet, int(ind.val))
            return qml.expval(qml.Hermitian(self.W,wires=[0,1,2]))
        return circuit(self.state_indexed)

In [170]:
env = Ansatz()
env.reset()
optimal_policy = [0,1,2,3,4,5,4,6,7]
for k in optimal_policy:
    env.step(k)

In [171]:
env.reward_history

array([0.33333333, 0.33333333, 0.11119674, 0.64766351, 0.11119674,
       0.01635693, 0.55296147, 0.44462534, 0.99999948])

In [173]:
#a sub-optimal policy
env.reset()
optimal_policy = np.random.choice(range(env.n_actions),8)
for k in optimal_policy:
    env.step(k)
env.reward_history

array([0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 4.87403522e-02,
       2.82910636e-07, 0.00000000e+00, 0.00000000e+00, 4.88401737e-02])