# Install and import packages
--------

In [1]:
# install/import quantum gym environments
#!pip install git+https://github.com/qdevpsi3/quantum-arch-search.git

# install/import stable baselines 3
#!pip install stable_baselines3

In [2]:
import gym
import numpy as np
import torch.optim as optim
from stable_baselines3 import A2C, PPO
from stable_baselines3.common.evaluation import evaluate_policy
import qas_gym

# Basic Environment
------
Create your gym environment :

In [3]:
# Parameters 
env_name = 'BasicTwoQubit-v0' # 两比特环境-无噪声
fidelity_threshold = 0.9
reward_penalty = 0.1
max_timesteps = 20
target = np.asarray([0.70710678+0.j,0. +0.j,0. +0.j, 0.70710678+0.j])
# Environment
env = gym.make(env_name, target = target,
               fidelity_threshold=fidelity_threshold,
               reward_penalty=reward_penalty,
               max_timesteps=max_timesteps)

Diplay the action gates : 

In [4]:
for idx, gate in enumerate(env.action_gates):
    print('Action({:02d}) --> {}'.format(idx, gate))

Action(00) -->  *** gate info *** 
 * gate name : X-rotation
 * target    : 
 0 : commute X     
 * control   : 
 * Pauli     : no
 * Clifford  : no
 * Gaussian  : no
 * Parametric: no
 * Diagonal  : no

Action(01) -->  *** gate info *** 
 * gate name : Y-rotation
 * target    : 
 0 : commute   Y   
 * control   : 
 * Pauli     : no
 * Clifford  : no
 * Gaussian  : no
 * Parametric: no
 * Diagonal  : no

Action(02) -->  *** gate info *** 
 * gate name : Z-rotation
 * target    : 
 0 : commute     Z 
 * control   : 
 * Pauli     : no
 * Clifford  : no
 * Gaussian  : no
 * Parametric: no
 * Diagonal  : yes

Action(03) -->  *** gate info *** 
 * gate name : H
 * target    : 
 0 : commute       
 * control   : 
 * Pauli     : no
 * Clifford  : yes
 * Gaussian  : no
 * Parametric: no
 * Diagonal  : no

Action(04) -->  *** gate info *** 
 * gate name : CNOT
 * target    : 
 1 : commute X     
 * control   : 
 0 : value 1
 * Pauli     : no
 * Clifford  : yes
 * Gaussian  : no
 * Parametric: n

Diplay the state observables : 

In [5]:
for idx, observable in enumerate(env.state_observables):
    print('State({:02d}) --> {}'.format(idx, observable))

# for idx, observable in enumerate(env.pauli_observables):
#     print('State({:02d}) --> {}'.format(idx, observable))   

State(00) --> 1 X 0
State(01) --> 1 Y 0
State(02) --> 1 Z 0
State(03) --> 1 X 1
State(04) --> 1 Y 1
State(05) --> 1 Z 1


In [6]:
from qulacs import QuantumState, QuantumCircuit, QuantumCircuitSimulator, Observable
n = 2
state = QuantumState(n)
print(state)

circuit = QuantumCircuit(n)
for i in range(n):
   circuit.add_H_gate(i)

sim = QuantumCircuitSimulator(circuit, state)

sim.initialize_state(0)

print("gate_count: ", sim.get_gate_count())

sim.simulate()

print(state)


 *** Quantum State ***
 * Qubit Count : 2
 * Dimension   : 4
 * State vector : 
(1,0)
(0,0)
(0,0)
(0,0)

gate_count:  2
 *** Quantum State ***
 * Qubit Count : 2
 * Dimension   : 4
 * State vector : 
(0.5,0)
(0.5,0)
(0.5,0)
(0.5,0)



In [7]:
observable = Observable(2)
observable.add_operator(1.0, "Z 1")
# print("expectation_value: ", sim.get_expectation_value(observable))

state = QuantumState(2)
state.load([.65328148-0.27059805j, 0.-0.j, 0.65328148-0.27059805j, 0.-0.j])

observable.get_expectation_value(state)

0.0

In [8]:
observable = Observable(2)
observable.add_operator(1.0, "Z 0")
obs = observable.get_matrix()
print(obs.todense())

[[ 1.+0.j  0.+0.j  0.+0.j  0.+0.j]
 [ 0.+0.j -1.+0.j  0.+0.j  0.+0.j]
 [ 0.+0.j  0.+0.j  1.+0.j  0.+0.j]
 [ 0.+0.j  0.+0.j  0.+0.j -1.+0.j]]


In [9]:
observable.get_term(0)

<qulacs_core.PauliOperator at 0x1794726d1b0>

In [10]:
a = observable.get_term(0)
a.get_pauli_string()

'Z 0'

In [11]:
a.get_pauli_id_list()

[3]

In [12]:
# from cirq.work.observable_measurement import measure_observables, RepetitionsStoppingCriteria
# import cirq
# # circuit = env._get_cirq(maybe_add_noise=True)
# # sample_pauli_obs = env.pauli_observables[0]
# circuit = cirq.Circuit(cirq.I(qubit) for qubit in env.qubits)
# # result = measure_observables(
# #                 circuit, env.pauli_observables, cirq.Simulator(), stopping_criteria=RepetitionsStoppingCriteria(100))
# K=10
# # sample_pauli_idx = np.random.choice(len(env.pauli_observables), K)
# sample_pauli = [env.pauli_observables[i] for i in np.random.choice(len(env.pauli_observables), K)]
# result = measure_observables(
#     circuit, sample_pauli, cirq.Simulator(), stopping_criteria=RepetitionsStoppingCriteria(100))
  
# # print(result[15].mean, result[15].variance)

In [13]:
# for observable in env.pauli_observables:
#    print(type(observable))
#    for qubit, pauli in observable.items():
#       print(qubit, pauli)

# print(result[1].mean, result[1].variance)

# A2C Agent
------

In [14]:
# # Parameters
# gamma = 0.99
# learning_rate = 0.0001
# policy_kwargs = dict(optimizer_class=optim.Adam)

# # Agent
# a2c_model = A2C("MlpPolicy",
#                 env,
#                 gamma=gamma,
#                 learning_rate=learning_rate,
#                 policy_kwargs=policy_kwargs,
#                 tensorboard_log='logs/')

In [15]:
# a2c_model.learn(total_timesteps=20000)

In [16]:
# # 从初始态开始，每一步用训练好的agent对当前state选择添加的门（action）
# state = env.reset()
# print(state)
# done = False
# while not done:
#     action = a2c_model.predict(state)
#     state, reward, done, info = env.step(action[0])
#     #展示当前的线路 和 state
#     # env.render()
#     print(state)

# print(info['circuit'])
# print(info['fidelity'])

# PPO Model
------

In [17]:
# Parameters
gamma = 0.99
n_epochs = 4
clip_range = 0.2
learning_rate = 0.0001
policy_kwargs = dict(optimizer_class=optim.Adam)


# Agent
ppo_model = PPO("MlpPolicy",
                env,
                gamma=gamma,
                n_epochs=n_epochs,
                clip_range=clip_range,
                learning_rate=learning_rate,
                policy_kwargs=policy_kwargs,
                tensorboard_log='logs/')



In [28]:
ppo_model.learn(total_timesteps=2000)

state:  [1.+0.j 0.+0.j 0.+0.j 0.+0.j]
state:  [ 0.88745254+0.j  0.        +0.j -0.46089911+0.j  0.        +0.j]
number of gates:1,  number of param gates:1
Parameters:  [2.49800181e-15]
state:  [9.79855437e-01+9.34197123e-06j 0.00000000e+00+0.00000000e+00j
 4.58358067e-05+1.99708088e-01j 0.00000000e+00+0.00000000e+00j]
number of gates:2,  number of param gates:2
Parameters:  [-1.11244119e-04  1.30707689e-05]
state:  [8.85844323e-01+2.58076667e-05j 0.00000000e+00+0.00000000e+00j
 4.92724858e-05+4.63982578e-01j 0.00000000e+00+0.00000000e+00j]
number of gates:3,  number of param gates:3
Parameters:  [-1.43388018e-04  1.57804994e-05  1.58887922e-05]
state:  [0.70715747+1.11975881e-05j 0.        +0.00000000e+00j
 0.70705608-1.11959826e-05j 0.        +0.00000000e+00j]
number of gates:4,  number of param gates:3
Parameters:  [-1.50557419e-04  1.57804994e-05  1.58887922e-05]
state:  [0.70715747+1.14784851e-05j 0.        +0.00000000e+00j
 0.        +0.00000000e+00j 0.70705608-1.14768393e-05j]
n

<stable_baselines3.ppo.ppo.PPO at 0x1793631af10>

In [26]:
# 从初始态开始，每一步用训练好的agent对当前state选择添加的门（action）
state = env.reset()
done = False
while not done:
    action = ppo_model.predict(state)
    state, reward, done, info = env.step(action[0])
    #展示当前的线路 和 state
    env.render()
    # print(state)
    print(info['fidelity'])
    
env.render()


state:  [1.+0.j 0.+0.j 0.+0.j 0.+0.j]
state:  [1.+0.j 0.+0.j 0.+0.j 0.+0.j]
number of gates:1,  number of param gates:0
Parameters:  None

0: ───I───X───
          │
1: ───I───@───
0.49999999832196845
state:  [0.96242549+0.2715459j 0.        +0.j        0.        +0.j
 0.        +0.j       ]
number of gates:2,  number of param gates:1
Parameters:  [0.54999785]

0: ───I───X───────────────
          │
1: ───I───@───Rz(0.25π)───
0.49999999832196856
state:  [0.96243278+0.27152006j 0.        +0.j         0.        +0.j
 0.        +0.j        ]
number of gates:3,  number of param gates:1
Parameters:  [0.56197417]

0: ───I───X───────────────@───
          │               │
1: ───I───@───Rz(0.25π)───X───
0.49999999832196856
state:  [0.91731236+0.39816835j 0.        +0.j         0.        +0.j
 0.        +0.j        ]
number of gates:4,  number of param gates:2
Parameters:  [0.55500252 0.27307374]

0: ───I───X───────────────@───────────────
          │               │
1: ───I───@───Rz(0.25π)───

In [27]:
print(info['fidelity'])

0.4999999983069439


# Results
------

In [21]:
import os
os.environ['TENSORBOARD_BINARY'] = 'c:\\users\\mac\\appdata\\roaming\\python\\python311\\site-packages\\tensorboard'
%load_ext tensorboard
%tensorboard --logdir=logs/

ERROR: Failed to start
'c:\\users\\mac\\appdata\\roaming\\python\\python311\\site-
packages\\tensorboard' (set by the `TENSORBOARD_BINARY` environment
variable): [WinError 5] 拒绝访问。

In [22]:
# 从初始态开始，每一步用训练好的agent对当前state选择添加的门（action）
state = env.reset()
done = False
while not done:
    action = ppo_model.predict(state)
    state, reward, done, info = env.step(action[0])
    #展示当前的线路 和 state
    env.render()
    print(state)


state:  [1.+0.j 0.+0.j 0.+0.j 0.+0.j]
state:  [1.+0.j 0.+0.j 0.+0.j 0.+0.j]
number of gates:1,  number of param gates:0
Parameters:  None

0: ───I───@───
          │
1: ───I───X───
[0. 0. 1. 0. 0. 1.]
state:  [0.94505639+0.j         0.        +0.j         0.        +0.32690735j
 0.        +0.j        ]
number of gates:2,  number of param gates:1
Parameters:  [-1.11022302e-15]

0: ───I───@───────────────
          │
1: ───I───X───Rx(0.25π)───
[0.         0.         1.         0.         0.61789177 0.78626317]
state:  [9.67021266e-01+0.00000000e+00j 0.00000000e+00+2.54695642e-01j
 0.00000000e+00-3.14498422e-05j 8.28331084e-06+0.00000000e+00j]
number of gates:3,  number of param gates:2
Parameters:  [-0.00011896  0.00013967]

0: ───I───@───Rx(0.25π)───
          │
1: ───I───X───Rx(0.25π)───
[ 0.00000000e+00  4.92592204e-01  8.70260260e-01  0.00000000e+00
 -6.50447788e-05  9.99999998e-01]
state:  [ 9.86931960e-01+1.61137513e-01j -1.12533748e-05+6.89244548e-05j
 -9.58447100e-06-5.87027850e-

In [23]:
from qulacs import QuantumCircuit, QuantumCircuitSimulator
a = QuantumCircuitSimulator()
help(a)

TypeError: __init__(): incompatible constructor arguments. The following argument types are supported:
    1. qulacs_core.QuantumCircuitSimulator(circuit: qulacs_core.QuantumCircuit, state: qulacs_core.QuantumStateBase)

Invoked with: 

In [None]:
env.action_gates



[<qulacs_core.ClsOneQubitRotationGate at 0x1f74870ccb0>,
 <qulacs_core.ClsOneQubitRotationGate at 0x1f74870cbb0>,
 <qulacs_core.ClsOneQubitRotationGate at 0x1f74870cc70>,
 <qulacs_core.ClsOneQubitGate at 0x1f74870cd30>,
 <qulacs_core.ClsOneControlOneTargetGate at 0x1f74870cbf0>,
 <qulacs_core.ClsOneQubitRotationGate at 0x1f74870d030>,
 <qulacs_core.ClsOneQubitRotationGate at 0x1f74870cdf0>,
 <qulacs_core.ClsOneQubitRotationGate at 0x1f74870d0b0>,
 <qulacs_core.ClsOneQubitGate at 0x1f74870cd70>,
 <qulacs_core.ClsOneControlOneTargetGate at 0x1f74870d070>]

In [None]:
a = env.action_gates[5]
a.__str__()
a.get_name()

'X-rotation'

In [None]:
a.get_control_index_list()

[]

In [None]:
a.get_target_index_list()

[1]

In [None]:
which_angles=[]
if list(which_angles):
    print("OK")