# Install and import packages
--------

In [1]:
# install/import quantum gym environments
#!pip install git+https://github.com/qdevpsi3/quantum-arch-search.git

# install/import stable baselines 3
#!pip install stable_baselines3

In [2]:
import gym
import numpy as np
import torch.optim as optim
from stable_baselines3 import A2C, PPO
from stable_baselines3.common.evaluation import evaluate_policy
import qas_gym
from scipy.stats import unitary_group


# Basic Environment
------
Create your gym environment :

In [3]:
from qulacs import QuantumState
from scipy.linalg import expm

# Parameters 
env_name = 'BasicTwoQubit-v0' # 两比特环境-无噪声
fidelity_threshold = 0.9
reward_penalty = 0.1
max_timesteps = 20

# x = unitary_group.rvs(2**2)
# initial = np.dot(x , np.asarray([1, 0,0,0]))
# print(initial)
n = 2
state = QuantumState(n)
state.set_Haar_random_state(seed=0)
initial = state.get_vector()

# define Hamiltonian for evolution
from qulacs import Observable
observable = Observable(n)
observable.add_operator(1.0, "X 0")
observable.add_operator(1.0, "Z 1")
observable.add_operator(0.5, "X 0 X 1")
# ref to https://pennylane.ai/qml/demos/tutorial_qaoa_intro/

obs = observable.get_matrix()
# target = QuantumState(n)
dt = 0.001
target = expm(1j *obs.todense() * dt) @ initial 
print(target)

# # target = np.asarray([0.70710678+0.j,0. +0.j,0. +0.j, 0.70710678+0.j])
# # Environment
# env = gym.make(env_name, target = target,
#                fidelity_threshold=fidelity_threshold,
#                reward_penalty=reward_penalty,
#                max_timesteps=max_timesteps,
#                initial = state)

[ 0.42500695+0.14316491j  0.30726487+0.10966392j -0.25658306-0.63684443j
  0.30199583-0.36031632j]


Diplay the action gates : 

In [4]:
# # target = np.asarray([0.70710678+0.j,0. +0.j,0. +0.j, 0.70710678+0.j])
# Environment
env = gym.make(env_name, target = target,
               fidelity_threshold=fidelity_threshold,
               reward_penalty=reward_penalty,
               max_timesteps=max_timesteps,
               initial = initial)

Diplay the state observables : 

In [5]:
for idx, observable in enumerate(env.state_observables):
    print('State({:02d}) --> {}'.format(idx, observable))

# for idx, observable in enumerate(env.pauli_observables):
#     print('State({:02d}) --> {}'.format(idx, observable))   

State(00) --> 1 X 0
State(01) --> 1 Y 0
State(02) --> 1 Z 0
State(03) --> 1 X 1
State(04) --> 1 Y 1
State(05) --> 1 Z 1


In [6]:
from qulacs import QuantumState, QuantumCircuit, QuantumCircuitSimulator, Observable
n = 2
state = QuantumState(n)
print(state)

circuit = QuantumCircuit(n)
for i in range(n):
   circuit.add_H_gate(i)

sim = QuantumCircuitSimulator(circuit, state)

sim.initialize_state(0)

print("gate_count: ", sim.get_gate_count())

sim.simulate()

print(state)


 *** Quantum State ***
 * Qubit Count : 2
 * Dimension   : 4
 * State vector : 
(1,0)
(0,0)
(0,0)
(0,0)

gate_count:  2
 *** Quantum State ***
 * Qubit Count : 2
 * Dimension   : 4
 * State vector : 
(0.5,0)
(0.5,0)
(0.5,0)
(0.5,0)



In [7]:
observable = Observable(2)
observable.add_operator(1.0, "Z 1")
# print("expectation_value: ", sim.get_expectation_value(observable))

state = QuantumState(2)
state.load([.65328148-0.27059805j, 0.-0.j, 0.65328148-0.27059805j, 0.-0.j])

observable.get_expectation_value(state)

0.0

In [8]:
a = observable.get_term(0)
a.get_pauli_string()

'Z 1'

In [9]:
# from cirq.work.observable_measurement import measure_observables, RepetitionsStoppingCriteria
# import cirq
# # circuit = env._get_cirq(maybe_add_noise=True)
# # sample_pauli_obs = env.pauli_observables[0]
# circuit = cirq.Circuit(cirq.I(qubit) for qubit in env.qubits)
# # result = measure_observables(
# #                 circuit, env.pauli_observables, cirq.Simulator(), stopping_criteria=RepetitionsStoppingCriteria(100))
# K=10
# # sample_pauli_idx = np.random.choice(len(env.pauli_observables), K)
# sample_pauli = [env.pauli_observables[i] for i in np.random.choice(len(env.pauli_observables), K)]
# result = measure_observables(
#     circuit, sample_pauli, cirq.Simulator(), stopping_criteria=RepetitionsStoppingCriteria(100))
  
# # print(result[15].mean, result[15].variance)

In [10]:
# for observable in env.pauli_observables:
#    print(type(observable))
#    for qubit, pauli in observable.items():
#       print(qubit, pauli)

# print(result[1].mean, result[1].variance)

# A2C Agent
------

In [11]:
# # Parameters
# gamma = 0.99
# learning_rate = 0.0001
# policy_kwargs = dict(optimizer_class=optim.Adam)

# # Agent
# a2c_model = A2C("MlpPolicy",
#                 env,
#                 gamma=gamma,
#                 learning_rate=learning_rate,
#                 policy_kwargs=policy_kwargs,
#                 tensorboard_log='logs/')

In [12]:
# a2c_model.learn(total_timesteps=20000)

In [13]:
# # 从初始态开始，每一步用训练好的agent对当前state选择添加的门（action）
# state = env.reset()
# print(state)
# done = False
# while not done:
#     action = a2c_model.predict(state)
#     state, reward, done, info = env.step(action[0])
#     #展示当前的线路 和 state
#     # env.render()
#     print(state)

# print(info['circuit'])
# print(info['fidelity'])

# PPO Model
------

In [14]:
# Parameters
gamma = 0.99
n_epochs = 4
clip_range = 0.2
learning_rate = 0.0001
policy_kwargs = dict(optimizer_class=optim.Adam)


# Agent
ppo_model = PPO("MlpPolicy",
                env,
                gamma=gamma,
                n_epochs=n_epochs,
                clip_range=clip_range,
                learning_rate=learning_rate,
                policy_kwargs=policy_kwargs,
                tensorboard_log='logs/')



In [15]:
ppo_model.learn(total_timesteps=2000)

  logger.warn(
  logger.warn(
  logger.warn(
  logger.deprecation(
  if not isinstance(done, (bool, np.bool8)):
  logger.warn(
  logger.warn(f"{pre} is not within the observation space.")


state:  [1.+0.j 0.+0.j 0.+0.j 0.+0.j]
state:  [1.+0.j 0.+0.j 0.+0.j 0.+0.j]
number of gates:1,  number of param gates:0
Parameters:  None
state:  [0.70710678+0.j 0.        +0.j 0.70710678+0.j 0.        +0.j]
number of gates:2,  number of param gates:0
Parameters:  None
state:  [0.62329486+0.j         0.        +0.33392142j 0.62329486+0.j
 0.        +0.33392142j]
number of gates:3,  number of param gates:1
Parameters:  [1.87431798]
state:  [0.24025464+0.j         0.        +0.66503963j 0.24025464+0.j
 0.        +0.66503963j]
number of gates:4,  number of param gates:2
Parameters:  [1.22961405 0.64466046]
state:  [0.41867611+0.j         0.        +0.56983358j 0.41867611+0.j
 0.        +0.56983358j]
number of gates:5,  number of param gates:2
Parameters:  [1.22966013 0.6446144 ]
state:  [0.37340862+0.18935598j 0.25772045+0.50822287j 0.37340862+0.18935598j
 0.25772045+0.50822287j]
number of gates:6,  number of param gates:3
Parameters:  [1.10613947 0.69605076 0.71908166]
state:  [0.4108714

<stable_baselines3.ppo.ppo.PPO at 0x252f68ead50>

In [16]:
c = env.ansatz
c.get_p

AttributeError: 'qulacs_core.ParametricQuantumCircuit' object has no attribute 'get_p'

In [17]:
# 从初始态开始，每一步用训练好的agent对当前state选择添加的门（action）
state = env.reset()
done = False
while not done:
    action = ppo_model.predict(state)
    state, reward, done, info = env.step(action[0])
    #展示当前的线路 和 state
    env.render()
    # print(state)
    print(info['fidelity'])
print("Final circuit: ")    
env.render()


state:  [1.+0.j 0.+0.j 0.+0.j 0.+0.j]
state:  [0.93848844+0.j         0.        +0.34531066j 0.        +0.j
 0.        +0.j        ]
number of gates:1,  number of param gates:1
Parameters:  [0.05522675]


  logger.warn(


UnboundLocalError: cannot access local variable 'gate' where it is not associated with a value

In [None]:
print(info['fidelity'])

0.9053736304476916


# Results
------

In [None]:
import os
os.environ['TENSORBOARD_BINARY'] = 'c:\\users\\mac\\appdata\\roaming\\python\\python311\\site-packages\\tensorboard'
%load_ext tensorboard
%tensorboard --logdir=logs/

ERROR: Failed to start
'c:\\users\\mac\\appdata\\roaming\\python\\python311\\site-
packages\\tensorboard' (set by the `TENSORBOARD_BINARY` environment
variable): [WinError 5] 拒绝访问。

In [None]:
# 从初始态开始，每一步用训练好的agent对当前state选择添加的门（action）
state = env.reset()
done = False
while not done:
    action = ppo_model.predict(state)
    state, reward, done, info = env.step(action[0])
    #展示当前的线路 和 state
    env.render()
    print(state)


state:  [1.+0.j 0.+0.j 0.+0.j 0.+0.j]
state:  [1.+0.j 0.+0.j 0.+0.j 0.+0.j]
number of gates:1,  number of param gates:0
Parameters:  None

0: ───I───@───
          │
1: ───I───X───
[0. 0. 1. 0. 0. 1.]
state:  [0.94505639+0.j         0.        +0.j         0.        +0.32690735j
 0.        +0.j        ]
number of gates:2,  number of param gates:1
Parameters:  [-1.11022302e-15]

0: ───I───@───────────────
          │
1: ───I───X───Rx(0.25π)───
[0.         0.         1.         0.         0.61789177 0.78626317]
state:  [9.67021266e-01+0.00000000e+00j 0.00000000e+00+2.54695642e-01j
 0.00000000e+00-3.14498422e-05j 8.28331084e-06+0.00000000e+00j]
number of gates:3,  number of param gates:2
Parameters:  [-0.00011896  0.00013967]

0: ───I───@───Rx(0.25π)───
          │
1: ───I───X───Rx(0.25π)───
[ 0.00000000e+00  4.92592204e-01  8.70260260e-01  0.00000000e+00
 -6.50447788e-05  9.99999998e-01]
state:  [ 9.86931960e-01+1.61137513e-01j -1.12533748e-05+6.89244548e-05j
 -9.58447100e-06-5.87027850e-

In [None]:
a = env.action_gates[5]
a.get_name()

NameError: name 'env' is not defined

In [None]:
a.get_control_index_list()

[]

In [None]:
a.get_target_index_list()

[1]

In [None]:
which_angles=[]
if list(which_angles):
    print("OK")