# The notebook is to demonstrate Indicating problem

Generate $|z'\rangle$ from quantum part

2-regular graph with 4 nodes(a ring with $Z_4=Z_0$)
    \begin{eqnarray}
        H_c=-\sum_{i=0}^3\frac{Z_iZ_{i+1}}{2},\quad  H_b =\sum_{i=0}^3 X_i .
    \end{eqnarray}

In [1]:
import numpy as np
from scipy.optimize import minimize
from scipy.linalg import expm
from qulacs import QuantumCircuitSimulator

# Tensor product function
def tensor_product(*args):
    result = np.array([1])
    for arg in args:
        result = np.kron(result, arg)
    return result

# Unitary evolution operators
def unitary(H, theta):
    return expm(-1j*H*theta)


# Full circuit
def apply_circuit(gamma, beta, p, initial_state, H_c,H_b):
    state = initial_state
    # Define the Hamiltonians H_c and H_b
    for _ in range(p):
        state = unitary(H_c, gamma) @ state
        state = unitary(H_b, beta) @ state
    return state


# Cost function
def cost_function(params, p, initial_state, H_c, H_b):
    gamma, beta = params
  
    final_state = apply_circuit(gamma, beta, p, initial_state, H_c, H_b)
    expectation_value = np.real(np.vdot(final_state, H_c @ final_state))
    
    # Increment the counter and print every 10 iterations
    global iteration_counter
    iteration_counter += 1
    if iteration_counter % 10 == 0:
        print(f'Iteration: {iteration_counter}, gamma: {gamma:.4f}, beta: {beta:.4f}, cost: {expectation_value:.4f}')

    
    
    return expectation_value


def find_ground_energy(hamiltonian):
    """
    Find the ground energy of a Hamiltonian.
    
    Parameters:
    hamiltonian (np.ndarray): The Hamiltonian matrix.
    
    Returns:
    float: The ground energy (lowest eigenvalue) of the Hamiltonian.
    """
    # Calculate the eigenvalues of the Hamiltonian
    eigenvalues = np.linalg.eigvalsh(hamiltonian)
    
    # Return the smallest eigenvalue
    return np.min(eigenvalues)

Pre-train with an initial PQC, circuit formulates like this: $[e ^{-i H_b \beta}e^{-iH_c \gamma}]$

In [2]:
# Define Pauli matrices
I = np.eye(2)
X = np.array([[0, 1], [1, 0]])
Y = np.array([[0, -1j], [1j, 0]])
Z = np.array([[1, 0], [0, -1]])

# Initial state |+>^4 = (|0> + |1>)/sqrt(2) ⊗ 4
initial_state = tensor_product(np.array([1, 1]) / np.sqrt(2), 
                               np.array([1, 1]) / np.sqrt(2),
                               np.array([1, 1]) / np.sqrt(2),
                               np.array([1, 1]) / np.sqrt(2))
# Define the Hamiltonians H_c and H_b
H_c = -(tensor_product(Z, Z, I, I)+tensor_product(I, Z, Z, I)+tensor_product(I, I, Z, Z)+tensor_product(Z, I, I, Z))
H_b = tensor_product(X, I, I, I)+tensor_product(I, X, I, I)+tensor_product(I, I, X, I)+tensor_product(I, I, I, X)  

# Global iteration counter
iteration_counter = 0

# Optimize using scipy
p = 1  # Number of layers
initial_params = np.random.rand(2)  # Initial guess for gamma and beta

result = minimize(cost_function, initial_params, args=(p,initial_state, H_c, H_b,), method='COBYLA')
optimal_gamma, optimal_beta = result.x

print(f'Optimal gamma: {optimal_gamma}, Optimal beta: {optimal_beta}')


temp_eig3 = find_ground_energy(H_c)
print(temp_eig3)

Iteration: 10, gamma: 0.2995, beta: 1.3990, cost: -1.1816
Iteration: 20, gamma: 0.3939, beta: 1.1865, cost: -1.9988
Iteration: 30, gamma: 0.3925, beta: 1.1776, cost: -2.0000
Iteration: 40, gamma: 0.3926, beta: 1.1781, cost: -2.0000
Optimal gamma: 0.3926177463928006, Optimal beta: 1.1781227514385042
-4.0


Then, using quantum gradient algorithm, we can obtain an updated $|z'\rangle$

In [8]:
def fidelity(state1, state2):

    state1 = np.asarray(state1)
    state2 = np.asarray(state2)
    
#     print(np.vdot(state1, state1))
    # Normalize the states
    state1 = state1 / np.linalg.norm(state1)
    state2 = state2 / np.linalg.norm(state2)
    
    # Calculate the fidelity
    fidelity_value = np.abs(np.vdot(state1, state2))**2
    
    return fidelity_value


def gradient(hamiltonian, xi):
    """
    Compute the gradient of the Hamiltonian with respect to a parameter.

    Parameters:
    hamiltonian (numpy.ndarray): The Hamiltonian matrix.
    xi (float): The scalar value representing the parameter with respect to which 
                the gradient is being computed.

    Returns:
    numpy.ndarray: The gradient matrix, which is the Hamiltonian scaled by the parameter xi.
    """
    
    # Compute the gradient by scaling the Hamiltonian with the parameter xi
    gradient = xi * hamiltonian
    
    # Return the resulting gradient matrix
    return gradient

def PQC_RL(env):
    """
    Compute parameterized quantum circuit that tranform state_in to state_out.

    Parameters:
        env (): the defined environment with correspoding initial and target state.

    Returns:
        info (dict): with key 'fidelity', 'circuit'
        ...: the learned policy.
    """


    # Parameters
    gamma = 0.99
    n_epochs = 4
    clip_range = 0.2
    learning_rate = 0.0001
    policy_kwargs = dict(optimizer_class=optim.Adam)

    # Agent
    ppo_model = PPO("MlpPolicy",
                    env,
                    gamma=gamma,
                    n_epochs=n_epochs,
                    clip_range=clip_range,
                    learning_rate=learning_rate,
                    policy_kwargs=policy_kwargs,
                    tensorboard_log='logs/')
    
    ppo_model.learn(total_timesteps=10000)
    
    # simulate to get fidelity
    state = env.reset()
    print(state)
    done = False
    while not done:
        action = ppo_model.predict(state)
        state, reward, done, info = env.step(action[0])
        #展示当前的线路 和 state
        # env.render()
        # print(state)
        # print(info['fidelity'])
    return info,ppo_model



In [9]:
import gym
import numpy as np
import torch.optim as optim
from stable_baselines3 import PPO
from stable_baselines3.common.evaluation import evaluate_policy
import qas_gym
import warnings

# loop of quantum gradient and classical learning 

xi=0.2
state_in=apply_circuit(optimal_gamma, optimal_beta, p, initial_state, H_c, H_b)

env_name = 'BasicFourQubit-v0' # 四比特环境-无噪声
reward_penalty = 0.01
# max_timesteps = 5
max_depth = 10
max_iter = 5
for i in range(max_iter):
    print("\n------------------ Iteration {}: ----------------------".format(i))
    #initial state from the last classical learning
    if i>0: 
        state_in = info['state']

    state_out=state_in-gradient(H_c, xi)@state_in

    state_out = state_out/np.linalg.norm(state_out)

    # # print(state_out)

    f_ini = fidelity(state_out, state_in)
    print("Initial fidelity {:.4f}: ".format(f_ini))
    fidelity_threshold = min(0.95, f_ini*1.2)


    # fidelity(state_out, state_out_pqc)

    # expectation_value = np.real(np.vdot(state_out, H_c @ state_out))
    # print(expectation_value)

    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        env = gym.make(env_name, target = state_out,
                fidelity_threshold=fidelity_threshold,
                reward_penalty=reward_penalty,
                max_timesteps=max_depth,
                initial = state_in)

        info,ppo_model = PQC_RL(env)
    
    print("Fidelity after classical learning PQC: {:.4f}".format(info['fidelity']))


------------------Iteration 0: -----------------------

initial fidelity 0.9074: 


  logger.warn(
  logger.warn(
  logger.warn(
  logger.deprecation(
  if not isinstance(done, (bool, np.bool8)):
  logger.warn(
  logger.warn(f"{pre} is not within the observation space.")


[ 5.00162671e-01  2.08166817e-17  0.00000000e+00  5.00162671e-01
  6.93889390e-17  2.77555756e-16  5.00162671e-01  3.46944695e-17
  5.55111512e-17  5.00162671e-01 -2.49800181e-16 -5.55111512e-17]
Fidelity after classical learning PQC: 0.9707
------------------Iteration 1: -----------------------

initial fidelity 0.9457: 


  logger.warn(
  logger.warn(
  logger.warn(
  logger.deprecation(
  if not isinstance(done, (bool, np.bool8)):
  logger.warn(
  logger.warn(f"{pre} is not within the observation space.")


[ 4.69817148e-01 -1.04083409e-17  5.55111512e-17  3.89146886e-01
 -5.55111512e-17 -5.55111512e-17  5.04238901e-01 -9.02056208e-17
 -5.55111512e-17  4.23220471e-01  2.84494650e-16  5.55111512e-17]
Fidelity after classical learning PQC: 0.9611
------------------Iteration 2: -----------------------

initial fidelity 0.9546: 


  logger.warn(
  logger.warn(
  logger.warn(
  logger.deprecation(
  if not isinstance(done, (bool, np.bool8)):
  logger.warn(
  logger.warn(f"{pre} is not within the observation space.")


[ 4.47249426e-01 -2.08166817e-17 -5.55111512e-17  3.89146886e-01
 -2.77555756e-17 -1.66533454e-16  4.82289360e-01 -1.17961196e-16
 -1.66533454e-16  4.25729484e-01  2.63677968e-16 -5.55111512e-17]
Fidelity after classical learning PQC: 0.9602
------------------Iteration 3: -----------------------

initial fidelity 0.9575: 


  logger.warn(
  logger.warn(
  logger.warn(
  logger.deprecation(
  if not isinstance(done, (bool, np.bool8)):
  logger.warn(
  logger.warn(f"{pre} is not within the observation space.")


[ 4.34599395e-01  2.07240193e-07  3.35387570e-08  3.89146886e-01
 -1.19538741e-06  5.91287881e-06  4.73954197e-01 -6.71056874e-06
  8.19165448e-06  4.21896822e-01 -5.41175624e-07  9.05492046e-07]
Fidelity after classical learning PQC: 0.9602
------------------Iteration 4: -----------------------

initial fidelity 0.9595: 


  logger.warn(
  logger.warn(
  logger.warn(
  logger.deprecation(
  if not isinstance(done, (bool, np.bool8)):
  logger.warn(
  logger.warn(f"{pre} is not within the observation space.")


[ 4.25841515e-01  2.07240193e-07  2.05647103e-08  3.63372481e-01
 -1.19538741e-06  6.49119209e-06  4.14559812e-01 -5.86194966e-06
  8.72669595e-06  3.79651250e-01 -1.91454457e-06  9.06546040e-07]
Fidelity after classical learning PQC: 0.9601


In [None]:
# Visualze the final circuit
env.render()


0: ───I───YY───XX───YY─────────────XX─────────────
          │    │    │              │
1: ───I───YY───┼────┼────XX───XX───┼────XX────────
               │    │    │    │    │    │
2: ───I────────┼────┼────┼────┼────┼────┼────XX───
               │    │    │    │    │    │    │
3: ───I────────XX───YY───XX───XX───XX───XX───XX───
