# Install and import packages
--------

In [1]:
# install/import quantum gym environments
#!pip install git+https://github.com/qdevpsi3/quantum-arch-search.git

# install/import stable baselines 3
#!pip install stable_baselines3

In [2]:
import gym
import numpy as np
import torch.optim as optim
from stable_baselines3 import A2C, PPO
from stable_baselines3.common.evaluation import evaluate_policy
import qas_gym

# Basic Environment
------
Create your gym environment :

In [3]:
# Parameters 
env_name = 'BasicTwoQubit-v0' # 两比特环境-无噪声
fidelity_threshold = 0.9
reward_penalty = 0.1
max_timesteps = 20
target = np.asarray([0.70710678+0.j,0. +0.j,0. +0.j, 0.70710678+0.j])
# Environment
env = gym.make(env_name, target = target,
               fidelity_threshold=fidelity_threshold,
               reward_penalty=reward_penalty,
               max_timesteps=max_timesteps)

Diplay the action gates : 

In [4]:
for idx, gate in enumerate(env.action_gates):
    print('Action({:02d}) --> {}'.format(idx, gate))

Action(00) -->  *** gate info *** 
 * gate name : Z-rotation
 * target    : 
 0 : commute     Z 
 * control   : 
 * Pauli     : no
 * Clifford  : no
 * Gaussian  : no
 * Parametric: no
 * Diagonal  : yes

Action(01) -->  *** gate info *** 
 * gate name : X
 * target    : 
 0 : commute X     
 * control   : 
 * Pauli     : yes
 * Clifford  : yes
 * Gaussian  : no
 * Parametric: no
 * Diagonal  : no

Action(02) -->  *** gate info *** 
 * gate name : Y
 * target    : 
 0 : commute   Y   
 * control   : 
 * Pauli     : yes
 * Clifford  : yes
 * Gaussian  : no
 * Parametric: no
 * Diagonal  : no

Action(03) -->  *** gate info *** 
 * gate name : Z
 * target    : 
 0 : commute     Z 
 * control   : 
 * Pauli     : yes
 * Clifford  : yes
 * Gaussian  : yes
 * Parametric: no
 * Diagonal  : yes

Action(04) -->  *** gate info *** 
 * gate name : H
 * target    : 
 0 : commute       
 * control   : 
 * Pauli     : no
 * Clifford  : yes
 * Gaussian  : no
 * Parametric: no
 * Diagonal  : no

Action

Diplay the state observables : 

In [5]:
for idx, observable in enumerate(env.state_observables):
    print('State({:02d}) --> {}'.format(idx, observable))

# for idx, observable in enumerate(env.pauli_observables):
#     print('State({:02d}) --> {}'.format(idx, observable))   

State(00) --> 1 X 0
State(01) --> 1 Y 0
State(02) --> 1 Z 0
State(03) --> 1 X 1
State(04) --> 1 Y 1
State(05) --> 1 Z 1


In [6]:
from qulacs import QuantumState, QuantumCircuit, QuantumCircuitSimulator, Observable
n = 2
state = QuantumState(n)

circuit = QuantumCircuit(n)
for i in range(n):
   circuit.add_H_gate(i)

sim = QuantumCircuitSimulator(circuit, state)

sim.initialize_state(0)

print("gate_count: ", sim.get_gate_count())

sim.simulate()

print(state)


gate_count:  2
 *** Quantum State ***
 * Qubit Count : 2
 * Dimension   : 4
 * State vector : 
(0.5,0)
(0.5,0)
(0.5,0)
(0.5,0)



In [7]:
observable = Observable(2)
observable.add_operator(1.0, "Z 1")
# print("expectation_value: ", sim.get_expectation_value(observable))

state = QuantumState(2)
state.load([.65328148-0.27059805j, 0.-0.j, 0.65328148-0.27059805j, 0.-0.j])

observable.get_expectation_value(state)

0.0

In [28]:
observable = Observable(2)
observable.add_operator(1.0, "Z 0")
obs = observable.get_matrix()
print(obs.todense())

[[ 1.+0.j  0.+0.j  0.+0.j  0.+0.j]
 [ 0.+0.j -1.+0.j  0.+0.j  0.+0.j]
 [ 0.+0.j  0.+0.j  1.+0.j  0.+0.j]
 [ 0.+0.j  0.+0.j  0.+0.j -1.+0.j]]


In [9]:
print(state)

 *** Quantum State ***
 * Qubit Count : 2
 * Dimension   : 4
 * State vector : 
(0.653281,-0.270598)
               (0,0)
(0.653281,-0.270598)
               (0,0)



In [10]:
observable.get_state_dim()

4

In [11]:
a = observable.get_term(0)
a.get_coef()

(1+0j)

In [12]:
help(a.get_pauli_id_list)

Help on method get_pauli_id_list in module qulacs_core:

get_pauli_id_list(...) method of qulacs_core.PauliOperator instance
    get_pauli_id_list(self: qulacs_core.PauliOperator) -> List[int]
    
    Get list of Pauli IDs (I,X,Y,Z) = (0,1,2,3)



In [13]:
# from cirq.work.observable_measurement import measure_observables, RepetitionsStoppingCriteria
# import cirq
# # circuit = env._get_cirq(maybe_add_noise=True)
# # sample_pauli_obs = env.pauli_observables[0]
# circuit = cirq.Circuit(cirq.I(qubit) for qubit in env.qubits)
# # result = measure_observables(
# #                 circuit, env.pauli_observables, cirq.Simulator(), stopping_criteria=RepetitionsStoppingCriteria(100))
# K=10
# # sample_pauli_idx = np.random.choice(len(env.pauli_observables), K)
# sample_pauli = [env.pauli_observables[i] for i in np.random.choice(len(env.pauli_observables), K)]
# result = measure_observables(
#     circuit, sample_pauli, cirq.Simulator(), stopping_criteria=RepetitionsStoppingCriteria(100))
  
# # print(result[15].mean, result[15].variance)

In [14]:
# for observable in env.pauli_observables:
#    print(type(observable))
#    for qubit, pauli in observable.items():
#       print(qubit, pauli)

# print(result[1].mean, result[1].variance)

# A2C Agent
------

In [15]:
# Parameters
gamma = 0.99
learning_rate = 0.0001
policy_kwargs = dict(optimizer_class=optim.Adam)

# Agent
a2c_model = A2C("MlpPolicy",
                env,
                gamma=gamma,
                learning_rate=learning_rate,
                policy_kwargs=policy_kwargs,
                tensorboard_log='logs/')



In [16]:
a2c_model.learn(total_timesteps=20000)

  logger.warn(
  logger.warn(
  logger.warn(
  logger.deprecation(
  if not isinstance(done, (bool, np.bool8)):
  logger.warn(
  logger.warn(f"{pre} is not within the observation space.")


[0.+0.j 0.+0.j 1.+0.j 0.+0.j]
[ 0.+0.j -0.-0.j  1.+0.j -0.-0.j]
[ 1.+0.j -0.-0.j  0.+0.j -0.-0.j]
[ 0.92387953+0.38268343j  0.        -0.j          0.        +0.j
 -0.        +0.j        ]
[ 0.92387953+0.38268343j -0.        +0.j          0.        +0.j
  0.        -0.j        ]
[ 0.70710678+0.70710678j -0.        +0.j          0.        +0.j
  0.        -0.j        ]
[ 0.        +0.j          0.        -0.j          0.70710678+0.70710678j
 -0.        +0.j        ]
[ 0.        -0.j          0.        +0.j         -0.        +0.j
  0.70710678+0.70710678j]
[ 0.        +0.j          0.        +0.j         -0.        +0.j
  0.92387953+0.38268343j]
[ 0.        +0.j          0.        +0.j          0.65328148+0.27059805j
 -0.65328148-0.27059805j]
[ 0.        +0.00000000e+00j  0.        +0.00000000e+00j
  0.5       +5.00000000e-01j -0.70710678-2.77555756e-17j]
[ 0.        +0.00000000e+00j  0.        +0.00000000e+00j
 -0.70710678-2.77555756e-17j  0.5       +5.00000000e-01j]
[ 0.        +0.0000

<stable_baselines3.a2c.a2c.A2C at 0x1e2be708f50>

In [17]:
# 从初始态开始，每一步用训练好的agent对当前state选择添加的门（action）
state = env.reset()
print(state)
done = False
while not done:
    action = a2c_model.predict(state)
    state, reward, done, info = env.step(action[0])
    #展示当前的线路 和 state
    # env.render()
    print(state)

print(info['circuit'])
print(info['fidelity'])

[0. 0. 1. 0. 0. 1.]
[0.92387953+0.38268343j 0.        +0.j         0.        +0.j
 0.        +0.j        ]
[0. 0. 1. 0. 0. 1.]
[0.92387953+0.38268343j 0.        +0.j         0.        +0.j
 0.        +0.j        ]
[0. 0. 1. 0. 0. 1.]
[0.92387953+0.38268343j 0.        +0.j         0.        +0.j
 0.        +0.j        ]
[0. 0. 1. 0. 0. 1.]
[0.92387953+0.38268343j 0.        +0.j         0.        +0.j
 0.        +0.j        ]
[0. 0. 1. 0. 0. 1.]
[0.70710678+0.70710678j 0.        +0.j         0.        +0.j
 0.        +0.j        ]
[0. 0. 1. 0. 0. 1.]
[0.70710678+0.70710678j 0.        +0.j         0.        +0.j
 0.        +0.j        ]
[0. 0. 1. 0. 0. 1.]
[0.70710678+0.70710678j 0.        +0.j         0.        +0.j
 0.        +0.j        ]
[0. 0. 1. 0. 0. 1.]
[ 0.70710678+0.70710678j -0.        -0.j          0.        +0.j
 -0.        -0.j        ]
[0. 0. 1. 0. 0. 1.]
[ 0.        +0.j         -0.        -0.j          0.70710678+0.70710678j
 -0.        -0.j        ]
[ 0.  0.  1.  0.  0. 

In [18]:
action

(array(0, dtype=int64), None)

# PPO Model
------

In [29]:
# Parameters
gamma = 0.99
n_epochs = 4
clip_range = 0.2
learning_rate = 0.0001
policy_kwargs = dict(optimizer_class=optim.Adam)


# Agent
ppo_model = PPO("MlpPolicy",
                env,
                gamma=gamma,
                n_epochs=n_epochs,
                clip_range=clip_range,
                learning_rate=learning_rate,
                policy_kwargs=policy_kwargs,
                tensorboard_log='logs/')

In [30]:
ppo_model.learn(total_timesteps=20000)

[0.+0.j 1.+0.j 0.+0.j 0.+0.j]
[0.-0.j 0.-0.j 0.+0.j 0.+1.j]
[0.-0.j 0.+1.j 0.+0.j 0.-0.j]
[0.+0.j 0.-0.j 0.-0.j 0.+1.j]
[-0.+0.j  1.-0.j  0.+0.j  0.+0.j]
[-0.        +0.j          0.92387953-0.38268343j  0.        +0.j
  0.        +0.j        ]
[ 0.        -0.j          0.        -0.j         -0.        +0.j
  0.38268343+0.92387953j]
[-0.        +0.j          0.        +0.j          0.92387953-0.38268343j
 -0.        +0.j        ]
[-0.        +0.j          0.        +0.j         -0.        +0.j
  0.92387953-0.38268343j]
[ 0.        +0.j         -0.        +0.j          0.92387953-0.38268343j
 -0.        +0.j        ]
[0.+0.j 0.+0.j 1.+0.j 0.+0.j]
[0.        +0.j 0.        +0.j 0.70710678+0.j 0.70710678+0.j]
[ 0.        +0.j  0.        +0.j -0.70710678-0.j -0.70710678-0.j]
[0.        +0.j 0.        +0.j 0.70710678+0.j 0.70710678+0.j]
[0.70710678+0.j 0.70710678+0.j 0.        +0.j 0.        +0.j]
[ 0.70710678+0.j -0.70710678-0.j  0.        +0.j -0.        -0.j]
[ 0.65328148+0.27059805j -0

<stable_baselines3.ppo.ppo.PPO at 0x1e2bf79e810>

In [21]:
# 从初始态开始，每一步用训练好的agent对当前state选择添加的门（action）
state = env.reset()
done = False
while not done:
    action = ppo_model.predict(state)
    state, reward, done, info = env.step(action[0])
    #展示当前的线路 和 state
    env.render()
    print(state)
    
print(info['fidelity'])

[0.70710678+0.j 0.70710678+0.j 0.        +0.j 0.        +0.j]

*** Quantum Circuit Info ***
# of qubit: 2
# of step : 1
# of gate : 1
# of 1 qubit gate: 1
Clifford  : yes
Gaussian  : no


[1. 0. 0. 0. 0. 1.]
[0.70710678+0.j 0.        +0.j 0.        +0.j 0.70710678+0.j]

*** Quantum Circuit Info ***
# of qubit: 2
# of step : 2
# of gate : 2
# of 1 qubit gate: 1
# of 2 qubit gate: 1
Clifford  : yes
Gaussian  : no


[0. 0. 0. 0. 0. 0.]


  logger.warn(


In [35]:
print(info['circuit'])
# print(info['fidelity'])

array(5, dtype=int64)

# Results
------

In [22]:
import os
os.environ['TENSORBOARD_BINARY'] = 'c:\\users\\mac\\appdata\\roaming\\python\\python311\\site-packages\\tensorboard'
%load_ext tensorboard
%tensorboard --logdir=logs/

ERROR: Failed to start
'c:\\users\\mac\\appdata\\roaming\\python\\python311\\site-
packages\\tensorboard' (set by the `TENSORBOARD_BINARY` environment
variable): [WinError 5] 拒绝访问。

In [23]:
# 从初始态开始，每一步用训练好的agent对当前state选择添加的门（action）
state = env.reset()
done = False
while not done:
    action = ppo_model.predict(state)
    state, reward, done, info = env.step(action[0])
    #展示当前的线路 和 state
    env.render()
    print(state)


[1.+0.j 0.+0.j 0.+0.j 0.+0.j]

*** Quantum Circuit Info ***
# of qubit: 2
# of step : 1
# of gate : 1
# of 1 qubit gate: 0
# of 2 qubit gate: 1
Clifford  : yes
Gaussian  : no


[0. 0. 1. 0. 0. 1.]
[0.92387953+0.38268343j 0.        +0.j         0.        +0.j
 0.        +0.j        ]

*** Quantum Circuit Info ***
# of qubit: 2
# of step : 2
# of gate : 2
# of 1 qubit gate: 1
# of 2 qubit gate: 1
Clifford  : no
Gaussian  : no


[0. 0. 1. 0. 0. 1.]
[0.70710678+0.70710678j 0.        +0.j         0.        +0.j
 0.        +0.j        ]

*** Quantum Circuit Info ***
# of qubit: 2
# of step : 3
# of gate : 3
# of 1 qubit gate: 2
# of 2 qubit gate: 1
Clifford  : no
Gaussian  : no


[0. 0. 1. 0. 0. 1.]
[0.5+0.5j 0.5+0.5j 0. +0.j  0. +0.j ]

*** Quantum Circuit Info ***
# of qubit: 2
# of step : 3
# of gate : 4
# of 1 qubit gate: 3
# of 2 qubit gate: 1
Clifford  : no
Gaussian  : no


[1. 0. 0. 0. 0. 1.]
[0.5+0.5j 0.5+0.5j 0. +0.j  0. +0.j ]

*** Quantum Circuit Info ***
# of qubit: 2
# of step :

In [25]:
from qulacs import QuantumCircuit

circuit = QuantumCircuit(2)


In [26]:
from qulacs.gate import H
h_gate = H(2)