In [1]:
!pip install pennylane
!pip isntall torch

Collecting pennylane
  Downloading PennyLane-0.40.0-py3-none-any.whl.metadata (10 kB)
Collecting rustworkx>=0.14.0 (from pennylane)
  Downloading rustworkx-0.15.1-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.9 kB)
Collecting tomlkit (from pennylane)
  Downloading tomlkit-0.13.2-py3-none-any.whl.metadata (2.7 kB)
Collecting appdirs (from pennylane)
  Downloading appdirs-1.4.4-py2.py3-none-any.whl.metadata (9.0 kB)
Collecting autoray>=0.6.11 (from pennylane)
  Downloading autoray-0.7.0-py3-none-any.whl.metadata (5.8 kB)
Collecting pennylane-lightning>=0.40 (from pennylane)
  Downloading PennyLane_Lightning-0.40.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (27 kB)
Collecting diastatic-malt (from pennylane)
  Downloading diastatic_malt-2.15.2-py3-none-any.whl.metadata (2.6 kB)
Collecting scipy-openblas32>=0.3.26 (from pennylane-lightning>=0.40->pennylane)
  Downloading scipy_openblas32-0.3.29.0.0-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (

In [190]:
def remove_zero_diagonal_nodes(matrix):
    """
    행렬에서 대각선의 행과 열이 모두 0인 경우 해당 행과 열을 제거.

    Args:
        matrix (np.array): 초기 해밀토니안 행렬

    Returns:
        np.array: 축소된 해밀토니안 행렬
    """
    n = matrix.shape[0]

    # 제거할 인덱스를 추적
    keep_indices = []
    for i in range(n):
        row_sum = np.sum(matrix[i, :])  # i번째 행의 합
        col_sum = np.sum(matrix[:, i])  # i번째 열의 합

        # 대각 원소에 해당하는 행과 열이 모두 0이 아니면 유지
        if row_sum != 0 or col_sum != 0:
            keep_indices.append(i)

    # 행과 열이 모두 0인 노드를 제거하고 새로운 행렬 생성
    reduced_matrix = matrix[np.ix_(keep_indices, keep_indices)]

    return reduced_matrix

def reduce_hamiltonian(J, Q_action,k, l,i,j, sign):
    """
    주어진 제약 조건 Z_k = sign * Z_l을 적용하여 해밀토니안을 축소하는 함수

    Args:
    - J (np.array): 초기 해밀토니안 행렬 (대각 항 포함)
    - k (int): 제거할 변수 인덱스
    - l (int): 대체할 변수 인덱스
    - sign (int): 관계 (1이면 동일, -1이면 반대)

    Returns:
    - np.array: 축소된 해밀토니안 행렬
    """
    # 새로운 상호작용 업데이트: J[i,l] = J[i,l] + sign * J[i,k]
    for i in range(J.shape[0]):
        if i != k and i != l:
            J[i, l] += sign * J[i, k]
            J[l, i] += sign * J[k, i]
    # 대각선 업데이트 (자기 항 Z_k = sign * Z_l 적용)
    J[l, l] = sign * J[k, k] + J[l, l]  # 자기 항 반영
    R = copy.deepcopy(J)
    R[:,i] = 0
    R[i,:] = 0
    # 변수 제거 (행 및 열 삭제)
    J = remove_zero_diagonal_nodes(R)

    return J,R





In [191]:
def signed_softmax_rewards(rewards, beta=15.0):
    """
    Apply softmax transformation to absolute values of rewards
    while preserving their original sign.

    Args:
        rewards (np.ndarray): Array of reward values.
        beta (float): Temperature parameter to control sharpness of softmax.

    Returns:
        np.ndarray: Transformed reward values with preserved sign.
    """
    rewards = np.array(rewards)

    # Step 1: Compute absolute values and apply softmax
    abs_rewards = np.abs(rewards)
    scaled_rewards = beta * abs_rewards
    exp_rewards = np.exp(scaled_rewards - np.max(scaled_rewards))  # Numerical stability
    softmax_vals = exp_rewards / np.sum(exp_rewards)

    # Step 2: Restore original sign
    signed_rewards = np.sign(rewards) * softmax_vals
    return signed_rewards

In [210]:
class UnionFind:
    def __init__(self, n):
        self.parent = list(range(n))
        self.rank = [0] * n
        self.value = [0] * n  # 값 할당을 위한 배열 (-1 or 1)

    def find(self, x):
        if self.parent[x] != x:
            self.parent[x] = self.find(self.parent[x])  # 경로 압축
        return self.parent[x]

    def union(self, x, y):
        rootX = self.find(x)
        rootY = self.find(y)
        if rootX != rootY:
            if self.rank[rootX] > self.rank[rootY]:
                self.parent[rootY] = rootX
            elif self.rank[rootX] < self.rank[rootY]:
                self.parent[rootX] = rootY
            else:
                self.parent[rootY] = rootX
                self.rank[rootX] += 1

def find_valid_combinations_fast(n, same_constraints, diff_constraints):
    uf = UnionFind(n)

    # 1. 같은 값을 가져야 하는 노드를 그룹화
    for i, j in same_constraints:
        uf.union(i, j)

    # 2. 그룹핑을 기반으로 대표 노드를 찾기
    groups = {}
    for i in range(n):
        root = uf.find(i)
        if root not in groups:
            groups[root] = []
        groups[root].append(i)

    # 3. 다른 값을 가져야 하는 관계 반영
    diff_relations = {}
    for i, j in diff_constraints:
        root_i = uf.find(i)
        root_j = uf.find(j)
        if root_i == root_j:
            return []  # 서로 다른 값이어야 하는데 같은 그룹이면 불가능
        diff_relations[root_i] = root_j
        diff_relations[root_j] = root_i

    # 4. 가능한 조합 생성
    def assign_values(node, value):
        if uf.value[node] == 0:  # 값이 아직 설정되지 않은 경우
            uf.value[node] = value
            if node in diff_relations:
                assign_values(diff_relations[node], -value)
            for member in groups[node]:
                uf.value[member] = value

    # 5. 각 그룹에 대해 값을 설정 (-1, 1)
    solutions = []
    for root in groups:
        if uf.value[root] == 0:
            assign_values(root, 1)
            solution1 = uf.value[:]
            assign_values(root, -1)
            solution2 = uf.value[:]
            solutions.append(solution1)
            solutions.append(solution2)

    return solutions

# 테스트 케이스
n = 5
same_constraints = [(0, 1), (2, 3)]  # Z1=Z2, Z3=Z4
diff_constraints = [(1, 2), (3, 4)]  # Z2≠Z3, Z4≠Z5

# 가능한 조합 찾기
solutions = find_valid_combinations_fast(n, same_constraints, diff_constraints)

# 결과 출력
print("가능한 해들:")
for sol in solutions:
    print(sol)


가능한 해들:
[1, 1, -1, -1, 1]
[1, 1, -1, -1, 1]


In [207]:
find_valid_combinations_fast(8,rl_qaoa.same_list,rl_qaoa.diff_list)

[[1, 0, 0, 0, -1, 1, 0, 0],
 [1, 0, 0, 0, -1, 1, 0, 0],
 [1, 1, -1, 1, -1, 1, 1, 1],
 [1, 1, -1, 1, -1, 1, 1, 1]]

In [197]:
def find_valid_combinations_bt(n, same_constraints, diff_constraints):
    """
    백트래킹을 사용하여 조건을 만족하는 가능한 조합을 찾는 함수.

    Args:
        n (int): 변수의 개수.
        same_constraints (list of tuples): 같은 값을 가져야 하는 쌍.
        diff_constraints (list of tuples): 다른 값을 가져야 하는 쌍.

    Returns:
        list of list: 모든 가능한 조합.
    """
    solutions = []
    assignment = [0] * n  # 초기 상태 (0은 아직 결정되지 않은 상태)

    def is_valid(index):
        # 같은 값을 가져야 하는 조건 확인
        for (i, j) in same_constraints:
            if assignment[i] != 0 and assignment[j] != 0:
                if assignment[i] != assignment[j]:
                    return False

        # 다른 값을 가져야 하는 조건 확인
        for (i, j) in diff_constraints:
            if assignment[i] != 0 and assignment[j] != 0:
                if assignment[i] == assignment[j]:
                    return False

        return True

    def backtrack(index):
        if index == n:
            solutions.append(assignment[:])  # 유효한 조합 저장
            return

        for value in [-1, 1]:  # 가능한 값들 탐색 (-1 또는 1)
            assignment[index] = value
            if is_valid(index):
                backtrack(index + 1)
            assignment[index] = 0  # 백트래킹

    backtrack(0)
    return solutions



In [192]:
from itertools import product

def find_valid_combinations(n, same_constraints, diff_constraints):
    """
    주어진 조건을 만족하는 가능한 조합을 찾는 함수.

    Args:
        n (int): 변수의 개수.
        same_constraints (list of tuples): 같은 값을 가져야 하는 쌍 (e.g. [(1, 2), (3, 4)]).
        diff_constraints (list of tuples): 다른 값을 가져야 하는 쌍 (e.g. [(2, 3), (4, 5)]).

    Returns:
        list of list: 모든 가능한 조합
    """
    valid_solutions = []

    # 가능한 모든 2^n 조합 생성 (각 변수는 -1 또는 1을 가짐)
    for candidate in product([-1, 1], repeat=n):
        assignment = list(candidate)
        is_valid = True

        # 같은 값을 가져야 하는 조건 확인
        for (i, j) in same_constraints:
            if assignment[i] != assignment[j]:
                is_valid = False
                break

        # 다른 값을 가져야 하는 조건 확인
        for (i, j) in diff_constraints:
            if assignment[i] == assignment[j]:
                is_valid = False
                break

        if is_valid:
            valid_solutions.append(assignment)

    return valid_solutions




In [212]:
import pennylane as qml  # Importing PennyLane for quantum computing
from pennylane import numpy as np  # Importing PennyLane's NumPy for compatibility
import torch  # Importing PyTorch for potential machine learning applications
import torch.nn as nn  # Importing PyTorch's neural network module
import copy  # Importing copy module for deep copying objects
import itertools  # Importing itertools for combinatorial operations
from tqdm import tqdm  # Importing tqdm for progress tracking

class AdamOptimizer():
	"""Stochastic gradient descent optimizer with Adam
	Note: All default values are from the original Adam paper
	Parameters
	----------
	params : list, length = len(coefs_) + len(intercepts_)
		The concatenated list containing coefs_ and intercepts_ in MLP model.
		Used for initializing velocities and updating params
	learning_rate_init : float, default=0.001
		The initial learning rate used. It controls the step-size in updating
		the weights
	beta_1 : float, default=0.9
		Exponential decay rate for estimates of first moment vector, should be
		in [0, 1)
	beta_2 : float, default=0.999
		Exponential decay rate for estimates of second moment vector, should be
		in [0, 1)
	epsilon : float, default=1e-8
		Value for numerical stability
	Attributes
	----------
	learning_rate : float
		The current learning rate
	t : int
		Timestep
	ms : list, length = len(params)
		First moment vectors
	vs : list, length = len(params)
		Second moment vectors
	References
	----------
	Kingma, Diederik, and Jimmy Ba.
	"Adam: A method for stochastic optimization."
	arXiv preprint arXiv:1412.6980 (2014).
	"""

	def __init__(self, params, learning_rate_init=0.001, beta_1=0.9,
				 beta_2=0.999, epsilon=1e-8, amsgrad = False):

		self.beta_1 = beta_1
		self.beta_2 = beta_2
		self.epsilon = epsilon
		if type(learning_rate_init) == float:
			self.learning_rate_init = np.ones(len(params))*learning_rate_init
		else:
			self.learning_rate_init = np.array(learning_rate_init)
		self.t = 0
		self.ms = [np.zeros_like(param) for param in params]
		self.vs = [np.zeros_like(param) for param in params]
		self.amsgrad = amsgrad
		self.max_vs = [np.zeros_like(param) for param in params]

	def get_updates(self, grads):
		"""Get the values used to update params with given gradients
		Parameters
		----------
		grads : list, length = len(coefs_) + len(intercepts_)
			Containing gradients with respect to coefs_ and intercepts_ in MLP
			model. So length should be aligned with params
		Returns
		-------
		updates : list, length = len(grads)
			The values to add to params
		"""
		self.t += 1
		self.ms = [self.beta_1 * m + (1 - self.beta_1) * grad
					 for m, grad in zip(self.ms, grads)]
		self.vs = [self.beta_2 * v + (1 - self.beta_2) * (grad ** 2)
					 for v, grad in zip(self.vs, grads)]
		self.max_vs = [np.maximum(v, max_v) for v, max_v in zip(self.vs, self.max_vs)]
		self.learning_rate = (self.learning_rate_init *
								np.sqrt(1 - self.beta_2 ** self.t) /
								(1 - self.beta_1 ** self.t))
		if self.amsgrad:
			updates = [lr * m / (np.sqrt(max_v) + self.epsilon)
			           for lr, m, max_v in zip(self.learning_rate, self.ms, self.max_vs)]
		else:
			updates = [lr * m / (np.sqrt(v) + self.epsilon)
					 for lr, m, v in zip(self.learning_rate, self.ms, self.vs)]
		return updates



class QAOA_layer():
    def __init__(self, depth, Q):
        """
        Initializes the QAOA layer with a given depth and QUBO matrix.

        Args:
            depth (int): The depth of the QAOA circuit (number of layers).
            Q (np.ndarray): QUBO matrix representing the quadratic unconstrained binary optimization problem.
        """
        self.Q = Q  # Store the QUBO matrix
        self.p = depth  # Store the QAOA depth
        self.ham = self.prepare_cost_hamiltonian()  # Prepare the cost Hamiltonian based on QUBO matrix
        self.dev = qml.device("default.qubit", wires=Q.shape[0])  # Define a quantum device with qubits equal to the size of Q

    def qaoa_circuit(self, params):
        """
        Constructs the QAOA circuit with the given parameters.

        Args:
            params (list): A list containing gamma and beta values for parameterized QAOA layers.
        """
        n = self.Q.shape[0]  # Number of qubits based on QUBO matrix size
        gammas = params[:self.p]  # Extract gamma parameters for cost Hamiltonian evolution
        betas = params[self.p:]  # Extract beta parameters for mixer Hamiltonian
        # Apply Hadamard gates to all qubits to initialize in uniform superposition
        for i in range(n):
            qml.Hadamard(wires=i)

        # Apply QAOA layers consisting of cost and mixer Hamiltonians
        for layer in range(self.p):
            self.qubo_cost(gammas[layer])  # Apply cost Hamiltonian with corresponding gamma
            self.mixer(betas[layer])  # Apply mixer Hamiltonian with corresponding beta

    def qubo_cost(self, gamma):
        """
        Implements the cost Hamiltonian evolution for the QUBO problem.

        Args:
            gamma (float): Parameter for cost Hamiltonian evolution.
        """
        n = self.Q.shape[0]  # Get number of qubits
        for i in range(n):
            for j in range(n):
                if self.Q[i, j] != 0:  # If the QUBO coefficient is non-zero
                    if i == j:
                        qml.RZ(2 * gamma * float(self.Q[i, j]), wires=i)  # Apply single-qubit phase rotation
                    else:
                        qml.CNOT(wires=[i, j])  # Apply CNOT before controlled rotation
                        qml.RZ(2 * gamma * float(self.Q[i, j]), wires=j)  # Apply controlled phase rotation
                        qml.CNOT(wires=[i, j])  # Undo entanglement with another CNOT

    def mixer(self, beta):
        """
        Implements the mixer Hamiltonian for QAOA.

        Args:
            beta (float): Parameter for mixer Hamiltonian evolution.
        """
        for i in range(self.Q.shape[0]):
            qml.RX(2 * beta, wires=i)  # Apply X-rotation to all qubits

    def prepare_cost_hamiltonian(self):
        """
        Constructs the cost Hamiltonian for the QUBO problem.

        Returns:
            qml.Hamiltonian: The constructed cost Hamiltonian.
        """
        n = self.Q.shape[0]  # Get the size of the QUBO matrix
        coeffs = []  # Store the coefficients of the Hamiltonian terms
        ops = []  # Store the corresponding Pauli operators

        for i in range(n):
            for j in range(n):
                if self.Q[i, j] != 0:  # Only consider non-zero QUBO elements
                    if i == j:
                        coeffs.append(self.Q[i, j])  # Add coefficient for single-qubit term
                        ops.append(qml.PauliZ(i))  # Add Pauli-Z operator
                    else:
                        coeffs.append(self.Q[i, j])  # Add coefficient for two-qubit interaction
                        ops.append(qml.PauliZ(i) @ qml.PauliZ(j))  # Add ZZ interaction term

        return qml.Hamiltonian(coeffs, ops)  # Return the constructed Hamiltonian



class RL_QAOA():
    def __init__(self, Q, n_c,init_paramter,b_vector, QAOA_depth,gamma=0.99,learning_rate_init=0.001):
        self.Q = Q
        self.n_c = n_c
        self.param = init_paramter
        self.b = b_vector
        self.p = QAOA_depth
        self.qaoa_layer = QAOA_layer(QAOA_depth,Q)
        self.gamma = gamma
        self.optimzer = AdamOptimizer([init_paramter,b_vector],learning_rate_init=learning_rate_init)




    def RL_QAOA(self,episodes,epochs,correc_ans = None):
        ## RL_QAOA실행 episodes : 몬테카를로를 진행할 숫자
        ## epochs : parameter 최적화를 진행할 숫자

        for j in range(epochs):
          value_list = []
          QAOA_diff_list = []
          beta_diff_list = []
          if correc_ans is not None:
            prob = 0


          for i in tqdm(range(episodes)):
            QAOA_diff,beta_diff,value = self.rqaoa_execute()
            value_list.append(value)
            QAOA_diff_list.append(QAOA_diff)
            beta_diff_list.append(beta_diff)

            if correc_ans is not None:
              if value <= correc_ans+0.01 and value >= correc_ans-0.01:
                prob+=1


          batch_maen = np.array(value_list) - np.mean(np.array(value_list),axis=0)
          batch_plus = np.where(batch_maen < 0, batch_maen, 0)
          softmaxed_rewards = signed_softmax_rewards(np.array(batch_plus),beta=1)*episodes
          print(batch_plus)

          for index,val in enumerate(softmaxed_rewards):
            QAOA_diff_list[index] *= -batch_maen[index]
            beta_diff_list[index] *= -batch_maen[index]


          QAOA_diff_sum = np.mean(QAOA_diff_list,axis=0)
          beta_diff_sum = np.mean(beta_diff_list,axis=0)
          value_sum = np.mean(value_list)
          print(f'QAOA diff : {QAOA_diff_sum}')
          print(f'cost : {value_sum}')
          if correc_ans is not None:
              prob = prob/episodes
              print(f'prob : {prob}')
          update = self.optimzer.get_updates([QAOA_diff_sum,beta_diff_sum])
          print(f'QAOA update : {update[0]}')
          self.param += np.array(update[0])
          self.b += np.reshape(update[1],-1)
          print(f'QAOA param : {self.param}')
          print(f'b param : {self.b}')



    def rqaoa_execute(self,cal_grad = True):
        ## RQAOA를 확률적 뽑기로 진행해 주는 함수
        Q_init = copy.deepcopy(self.Q)
        Q_action = copy.deepcopy(self.Q)
        self.same_list = []
        self.diff_list = []
        self.node_assignments = {}  # Keep track of node assignments
        self.edge_expectations = [] # Edge들의 expectation value 저장(Gradient 계산에 필요)
        self.edge_expectations_grad = [] # Edge들의 expectation value gradient 저장(Gradient 계산에 필요)
        self.policys = [] #각 edge가 지정될 확률을 저장(Gradient 계산에 필요)




        QAOA_diff_list = [] ##QAOA layer parameter gradient
        beta_diff_list = [] ##beta parameter gradient
        index = 0
        while Q_init.shape[0] > self.n_c:
            # Select and cut an edge,cal grad
            edge_expectations = self._qaoa_edge_expectations(Q_init,[i for i in range(self.p*index*2,self.p*index*2+2*self.p)])
            selected_edge_idx, policy, edge_res = self._select_edge_to_cut(Q_action,Q_init,edge_expectations)
            if cal_grad:
              #edge_res_grad = self._qaoa_edge_expectations_gradients(Q_init, [i for i in range(self.p*index*2,self.p*index*2+2*self.p)])
              edge_res_grad = self._qaoa_edge_expectations_gradient(Q_init, [i for i in range(self.p*index*2,self.p*index*2+2*self.p)],selected_edge_idx)
            if cal_grad:
              #QAOA_diff = self._compute_log_pol_diff(selected_edge_idx,Q_action,edge_res,edge_res_grad ,policy)
              QAOA_diff = self._compute_log_pol_diff_idx(selected_edge_idx,Q_action,edge_res,edge_res_grad ,policy)
              beta_diff = self._compute_grad_beta(selected_edge_idx,Q_action,policy,edge_res)


              QAOA_diff_list.append(QAOA_diff)
              beta_diff_list.append(beta_diff)
            Q_init,Q_action = self._cut_edge(selected_edge_idx,edge_res,Q_action,Q_init)
            index+=1


        self.QAOA_result = copy.deepcopy(self.node_assignments)
        # 그래프의 크기가 n_c보다 작으면 전체 space를 다 계산해서 최종값 도출
        self._brute_force_optimal(Q_init)


        Value = self._state_energy(np.array(self.node_assignments),self.Q)

        ## 최근에 얻은 값이 최종 변화율에 영향을 크게 미치도록 함수 변경(큰 그래프에서는 뭘 선택하든 중요하지 않을때가 많음)


        ## 정책에 대한 미분과 panalty 받은 보상을 곱해서 최종 기울기 도출
        if cal_grad:

          beta_diff_res = None
          for beta_diff in beta_diff_list:
            if beta_diff_res is None:
              beta_diff_res = beta_diff
            else:
              beta_diff_res += beta_diff

          QAOA_diff_res = None
          for QAOA_diff in QAOA_diff_list:
            if QAOA_diff_res is None:
              QAOA_diff_res = QAOA_diff
            else:
              QAOA_diff_res += QAOA_diff

          return QAOA_diff_res,beta_diff_res,Value


        else:
          return Value




    def _compute_log_pol_diff(self, idx,Q_action,edge_expectations,edge_expectations_grad,policy):
        ## 특정 edge가 선택 됐을 때 그 정책에 대한 paramter의 log기울기 값 계산
        action_space = self._action_space(Q_action)
        betas = self.b[action_space]
        gather = np.zeros_like(policy)
        for i in range(len(edge_expectations_grad)):
            gather[i] += policy[i] * betas[i]

        diff_log_pol = betas[idx] *np.sign(edge_expectations[idx])* edge_expectations_grad[idx]
        for i in range(len(gather)):
            if gather[i]:
                diff_log_pol-= gather[i] * np.sign(edge_expectations[i]) * (
                    (edge_expectations_grad[i]))

        return np.array(diff_log_pol)

    def _compute_log_pol_diff_idx(self, idx,Q_action,edge_expectations,grad,policy):
        ## 특정 edge가 선택 됐을 때 그 정책에 대한 paramter의 log기울기 값 계산
        action_space = self._action_space(Q_action)
        betas = self.b[action_space]
        diff_log_pol = betas[idx] *np.sign(edge_expectations[idx])* grad - policy[idx] * betas[idx] *np.sign(edge_expectations[idx])* grad
        return np.array(diff_log_pol)

    def _compute_grad_beta(self,idx,Q_action,policy,edge_expectations):

        grad_beta = []
        abs_expectations = abs(np.array(edge_expectations))
        action_space = self._action_space(Q_action)

        betas_idx = action_space

        grad = np.zeros(len(self.b))
        grad[betas_idx[idx]] += abs_expectations[idx]
        for i in range(len(action_space)):
            grad[betas_idx[i]] -= policy[i] * abs_expectations[i]


        return np.array(grad)



    def _cut_edge(self, selected_edge_idx,expectations,Q_action,Q_init):
        ## 선택된 edge에 해당하는 index와 그에 해당하는 expectaion value를 받아 1 혹은 -1 를 edge의 노드에 부여하는 메소드
        ## 또한 Q_action,줄어들어진 Q도 여기서 갱신해준다
        edge_list = [(i, j) for i in range(Q_init.shape[0]) for j in range(Q_init.shape[0]) if Q_init[i, j] != 0 and i!=j]
        edge_to_cut = edge_list[selected_edge_idx]
        edge_to_cut = sorted(edge_to_cut,reverse=True)

        expectation = expectations[selected_edge_idx]
        ###수정


        i = edge_to_cut[1]
        j = edge_to_cut[0]

        for key in dict(sorted(self.node_assignments.items(), key=lambda item: item[0])):
          if i >= key:
            i+=1
          if j >= key:
            j+=1


        new_Q,Q_action = reduce_hamiltonian(Q_init,Q_action,edge_to_cut[0],edge_to_cut[1],i,j,int(np.sign(expectation)))
        self.node_assignments[i] = 1
        if expectation > 0:
          self.same_list.append((i,j))
        else:
          self.diff_list.append((i,j))



        return new_Q,Q_action

    def _select_edge_to_cut(self,Q_action,Q_init,edge_expectations):
          ## ZZ를 측정하여 beta와 곱한 후 softmax를 진행하여 자를 edge를 확률적으로 고르는 method

          action_space = self._action_space(Q_action)
          try:
            interactions = abs(np.array((edge_expectations))) * self.b[action_space]
          except:
            print(abs(np.array((edge_expectations))),self.b[action_space])
            raise ValueError("Invalid input",action_space,Q_init)
          probabilities = torch.softmax(torch.tensor(interactions), dim=0).numpy()
          selected_edge_idx = np.random.choice(len(probabilities), p=probabilities)
          return selected_edge_idx, probabilities, edge_expectations




    def _action_space(self,Q_action):
        ## 현재 살아있는 edge들이 기존 그래프에서 몇번째 edge였는지 표시해주는 action space뽑아내주는 함수
        ## Q_action : 죽은 노드들이 있는 행과 열에 대하여 0으로 대체된 살아있는 노드들만 표시되는 매트릭스
        action_space_list = []
        index = 0
        for i in range(Q_action.shape[0]):
            for j in range(Q_action.shape[0]):
                if i!=j:
                    if Q_action[i,j] != 0:
                        action_space_list.append(index)
                    index+=1
        return action_space_list

    def _qaoa_edge_expectations(self, Q,idx):
        ## 각 edge별 ZZ interaction을 도출해주는 함수
        self.qaoa_layer = QAOA_layer(self.p,Q)
        @qml.qnode(self.qaoa_layer.dev)
        def circuit(param):
            self.qaoa_layer.qaoa_circuit(param)
            return [qml.expval(qml.PauliZ(i) @ qml.PauliZ(j)) for i in range(Q.shape[0]) for j in range(Q.shape[0]) if Q[i, j] != 0 and i!=j]

        return circuit(self.param[idx])

    def _qaoa_edge_expectations_gradients(self, Q,idx):
      ## 각 edge별 ZZ interaction의 기울기를 도출해주는 함수
        self.qaoa_layer = QAOA_layer(self.p,Q)
        res = []
        @qml.qnode(self.qaoa_layer.dev)
        def circuit(params,i,j):
            self.qaoa_layer.qaoa_circuit(params[idx])
            return qml.expval(qml.PauliZ(i) @ qml.PauliZ(j))
        for i in range(Q.shape[0]) :
          for j in range(Q.shape[0]):
             if Q[i, j] != 0 and i != j:
                # Compute gradients for each parameter
                gradients = qml.grad(circuit)(self.param,i,j)
                res.append(gradients)
        return res


    def _qaoa_edge_expectations_gradient(self, Q,idx,index):
      ## 각 edge별 ZZ interaction의 기울기를 도출해주는 함수
      self.qaoa_layer = QAOA_layer(self.p,Q)
      res = []
      number = 0
      @qml.qnode(self.qaoa_layer.dev)
      def circuit(params,i,j):
          self.qaoa_layer.qaoa_circuit(params[idx])
          return qml.expval(qml.PauliZ(i) @ qml.PauliZ(j))
      for i in range(Q.shape[0]) :
        for j in range(Q.shape[0]):
            if Q[i, j] != 0 and i != j:
              if number == index:
                # Compute gradients for each parameter
                gradients = qml.grad(circuit)(self.param,i,j)
                return gradients
              number+=1




    def _brute_force_optimal(self,Q):


        n = self.Q.shape[0]
        configs = list(itertools.product([-1, 1], repeat=n))
        best_value = np.inf
        best_config = None
        res_node = None
        comb_list = find_valid_combinations(n,self.same_list,self.diff_list)

        if len(comb_list) > 2**(self.n_c):
          print('something happened ... ' ,len(comb_list),self.n_c)
          raise
        for comb in comb_list:
          value = self._state_energy(np.array(comb),self.Q)
          if value < best_value:
              best_value = value
              res_node = copy.copy(comb)

        self.node_assignments = res_node





    def _state_energy(self,state,Q):
        ## State가 주어질 때 그 에너지를 계산해주는 함수


        # 동일한 크기의 단위 행렬 생성
        identity_matrix = np.eye(Q.shape[0], dtype=bool)

        # 대각선 요소를 제거한 새로운 행렬 생성
        interaction = np.where(identity_matrix, 0, Q)
        diagonal_elements = np.diag(Q)
        value = diagonal_elements@state + state.T@interaction@state
        return value

    def _dict_to_list(self,node_assign):
        ##보조 함수 assign된 node들의 0 과 1을 list 형태로 순서대로 출력해주는 함수
        node_assignments_list = [0]*len(node_assign)
        for key,value in node_assign.items(): # Change: Iterate through key-value pairs using .items()
            node_assignments_list[key]=value
        return np.array(node_assignments_list)

# RL-QAOA setup
rl_qaoa = RL_QAOA(Q,Q.shape[0],init_params,b_vector = np.array([25.]*70) ,QAOA_depth=depth,gamma = 0.99,learning_rate_init=[0.02,0.000])
#final_config = rl_qaoa.rqaoa_execute()
rl_qaoa.n_c = 2
#print(f"classical_result : {final_config},best : {rl_qaoa.node_assignments}" )


# Execute RQAOA
final_config = rl_qaoa.RL_QAOA(episodes=150,epochs=60,correc_ans=float(final_config[2]))

100%|██████████| 150/150 [01:33<00:00,  1.61it/s]


[-15.17333333   0.         -31.17333333  -9.17333333   0.
   0.           0.          -5.17333333 -11.17333333   0.
   0.           0.           0.         -39.17333333   0.
  -5.17333333   0.           0.           0.           0.
   0.           0.         -39.17333333 -39.17333333   0.
   0.           0.           0.         -29.17333333   0.
 -15.17333333  -7.17333333   0.           0.         -25.17333333
 -31.17333333   0.           0.           0.           0.
 -29.17333333   0.           0.           0.           0.
   0.         -15.17333333   0.         -39.17333333  -7.17333333
   0.         -13.17333333  -1.17333333   0.           0.
 -39.17333333  -1.17333333 -29.17333333 -31.17333333   0.
 -13.17333333   0.          -1.17333333   0.           0.
 -11.17333333   0.         -23.17333333   0.         -25.17333333
   0.         -15.17333333   0.         -11.17333333   0.
  -1.17333333   0.         -39.17333333   0.           0.
   0.         -23.17333333 -39.17333333 -39.1733

 37%|███▋      | 55/150 [00:33<00:57,  1.67it/s]


[0.06120805 0.0208614  0.59153926 0.13189349 0.24921469 0.15389739
 0.1514178  0.06237413 0.03856118 0.00557559 0.03013348 0.03303431
 0.02599635] [25. 25. 25. 25. 25. 25. 25. 25. 25. 25. 25. 25. 25. 25. 25. 25. 25. 25.
 25. 25.]


ValueError: ('Invalid input', tensor([[ -6,   6,  -1, -17,   9,  -2,  11,   0],
        [  6,  -2,  -6,   0,   5,   3,   5,   0],
        [  0,   0,  -2,   9,   0,   5,  -4,   0],
        [  0,   0,   0,  -2,   7,   5,   6,   0],
        [  0,   0,   0,   0, -10,  -5,   1,   0],
        [  0,   0,   0,   0,   0,  -7,  -4,   0],
        [  0,   0,   0,   0,   0,   0,  -2,   0],
        [  0,   0,   0,   0,   0,   0,   0,   0]], requires_grad=True), tensor([[ -2,  -6,   0,   5,   3,   5],
        [  0,  -2,   9,   0,   5,  -4],
        [  0,   0,  -2,   7,   5,   6],
        [  0,   0,   0, -10,  -5,   1],
        [  0,   0,   0,   0,  -7,  -4],
        [  0,   0,   0,   0,   0,  -2]], requires_grad=True))

In [204]:
rl_qaoa.rqaoa_execute()

(tensor([   3.42128821,    1.15604338,  111.2022278 ,   11.64212479,
          170.36880341,   -6.87780669,   11.60568981,    0.46636549,
          206.10011713,   -2.73938896, -104.69025636,  -12.29783324], requires_grad=True),
 tensor([ 2.93035951e-01,  1.81439731e-02,  1.91231773e-01,
         -2.52315734e-02, -4.17973811e-01, -1.20652274e-01,
         -1.91603669e-03, -1.38212958e-03, -4.27245806e-04,
         -1.19438850e-03, -5.82505315e-02, -5.78304487e-03,
         -8.51660137e-02, -4.24185334e-03, -4.04468189e-04,
          0.00000000e+00, -2.85625825e-02, -1.85476789e-05,
         -3.49013546e-03, -3.85219442e-02,  7.66413887e-02,
         -5.08591549e-04, -2.73962942e-02, -1.77158378e-03,
         -3.46209425e-02, -5.82794176e-03, -1.31033296e-03,
         -1.01806074e-03,  1.45924872e-01,  0.00000000e+00,
          0.00000000e+00,  0.00000000e+00, -2.08304891e-03,
         -2.81195199e-04,  0.00000000e+00,  0.00000000e+00,
          0.00000000e+00, -3.30175585e-03,  0.00000

In [187]:
find_valid_combinations(8,rl_qaoa.same_list,rl_qaoa.diff_list)

[[-1, -1, -1, 1, 1, -1, 1, 1],
 [-1, -1, -1, 1, 1, 1, 1, 1],
 [-1, -1, 1, -1, -1, -1, 1, -1],
 [-1, -1, 1, -1, -1, 1, 1, -1],
 [-1, 1, -1, 1, 1, -1, -1, 1],
 [-1, 1, -1, 1, 1, 1, -1, 1],
 [-1, 1, 1, -1, -1, -1, -1, -1],
 [-1, 1, 1, -1, -1, 1, -1, -1],
 [1, -1, -1, 1, 1, -1, 1, 1],
 [1, -1, -1, 1, 1, 1, 1, 1],
 [1, -1, 1, -1, -1, -1, 1, -1],
 [1, -1, 1, -1, -1, 1, 1, -1],
 [1, 1, -1, 1, 1, -1, -1, 1],
 [1, 1, -1, 1, 1, 1, -1, 1],
 [1, 1, 1, -1, -1, -1, -1, -1],
 [1, 1, 1, -1, -1, 1, -1, -1]]

In [205]:
rl_qaoa.same_list

[(3, 7), (6, 7), (1, 7)]

In [206]:
rl_qaoa.diff_list

[(2, 7), (0, 4), (4, 5)]

In [147]:
def generate_upper_triangular_qubo(size, low=-10, high=10, integer=True, seed=None):
    """
    Generates an upper-triangular QUBO (Quadratic Unconstrained Binary Optimization) matrix.

    Args:
        size (int): The number of variables (size of the QUBO matrix).
        low (int/float): Minimum value of the random elements.
        high (int/float): Maximum value of the random elements.
        integer (bool): If True, generates integer values; otherwise, generates float values.
        seed (int, optional): Random seed for reproducibility.

    Returns:
        np.ndarray: An upper-triangular QUBO matrix of the specified size.
    """
    if seed is not None:
        np.random.seed(seed)

    # Generate random values for the upper triangular part including diagonal
    if integer:
        Q = np.random.randint(low, high, (size, size))
    else:
        Q = np.random.uniform(low, high, (size, size))

    # Keep only the upper triangle values (including diagonal), set lower triangle to zero
    Q = np.triu(Q)

    # Ensure diagonal values are positive (bias terms)
    np.fill_diagonal(Q,np.diagonal(Q))

    return Q

# Example usage
qubo_matrix = generate_upper_triangular_qubo(size=3, low=-5, high=5, integer=True, seed=70)
print("Randomly generated upper-triangular QUBO matrix:\n", qubo_matrix)

import numpy as np

# Original QUBO matrix
qubo_matrix = np.array([
    [0.0658492, -0.024853287,  0.064008761,  0.058160105,  0.058757735],
    [-0.024853287, 0.080128677, -0.025012106, -0.023698929, -0.013849719],
    [0.064008761, -0.025012106,  0.063201845,  0.053540045,  0.055350148],
    [0.058160105, -0.023698929,  0.053540045,  0.066177288,  0.055601435],
    [0.058757735, -0.013849719,  0.055350148,  0.055601435,  0.059764565]
])

diagonal = np.diag(np.diag(qubo_matrix)) + np.eye(qubo_matrix.shape[0])

# Combine results: upper triangle + modified diagonal






# Create an upper triangular matrix by doubling the upper triangle values and keeping diagonal
modified_qubo = np.triu(qubo_matrix, k=1) * 2 + np.diag(np.diag(qubo_matrix))
modified_qubo =  modified_qubo + diagonal

print("Modified QUBO Matrix:\n", modified_qubo)
qubo_matrix = modified_qubo


Randomly generated upper-triangular QUBO matrix:
 [[ 1 -3  3]
 [ 0 -1  0]
 [ 0  0 -1]]
Modified QUBO Matrix:
 [[ 1.1316984  -0.04970657  0.12801752  0.11632021  0.11751547]
 [ 0.          1.16025735 -0.05002421 -0.04739786 -0.02769944]
 [ 0.          0.          1.12640369  0.10708009  0.1107003 ]
 [ 0.          0.          0.          1.13235458  0.11120287]
 [ 0.          0.          0.          0.          1.11952913]]


In [148]:
import numpy as np
from itertools import combinations
import random
def cut_k_matrices_randomly(matrix, m, k):
    n = matrix.shape[0]
    if m >= n:
        raise ValueError("m must be smaller than the size of the matrix")

    selected_subsets = []
    remaining_indices = list(range(n))
    combinations_list = generate_combinations(n, m)
    random_indices = random.sample(range(len(combinations_list)), k)
    indices = [combinations_list[i] for i in random_indices]
    for indice in indices:
        matrix_copy = copy.deepcopy(matrix)
        remaining_indices = list(range(n))
        remaining_indices = sorted(set(remaining_indices) - set(indice))
        reduced_matrix = matrix_copy[np.ix_(remaining_indices, remaining_indices)]
        selected_subsets.append(reduced_matrix)

    return selected_subsets

def generate_combinations(n, m):
    return list(combinations(range(n), m))

# Example usage
n = 5  # Matrix size
m = 2  # Number of indices to remove per step
k = 10  # Number of unique reduced matrices
matrix = np.random.randint(1, 100, (n, n))
print("Original Matrix:")
print(matrix)

cut_matrices = cut_k_matrices_randomly(matrix, m, k)
for i, cut_matrix in enumerate(cut_matrices):
    print(f"Reduced Matrix {i+1}:")
    print(cut_matrix)

combinations_list = generate_combinations(n, m)
print("All combinations:")
print(combinations_list)


Original Matrix:
[[73 97 24 53 98]
 [85  2 17 80 42]
 [78 91 57 22 48]
 [61 99 16 11 64]
 [12 94 36 79 88]]
Reduced Matrix 1:
[[ 2 17 80]
 [91 57 22]
 [99 16 11]]
Reduced Matrix 2:
[[57 22 48]
 [16 11 64]
 [36 79 88]]
Reduced Matrix 3:
[[73 97 53]
 [85  2 80]
 [61 99 11]]
Reduced Matrix 4:
[[ 2 17 42]
 [91 57 48]
 [94 36 88]]
Reduced Matrix 5:
[[73 53 98]
 [61 11 64]
 [12 79 88]]
Reduced Matrix 6:
[[ 2 80 42]
 [99 11 64]
 [94 79 88]]
Reduced Matrix 7:
[[73 97 24]
 [85  2 17]
 [78 91 57]]
Reduced Matrix 8:
[[73 97 98]
 [85  2 42]
 [12 94 88]]
Reduced Matrix 9:
[[73 24 53]
 [78 57 22]
 [61 16 11]]
Reduced Matrix 10:
[[73 24 98]
 [78 57 48]
 [12 36 88]]
All combinations:
[(0, 1), (0, 2), (0, 3), (0, 4), (1, 2), (1, 3), (1, 4), (2, 3), (2, 4), (3, 4)]


In [149]:
class QAOA_pretrain():
  def __init__(self,Q,size,depth,number):
    self.Q = Q
    self.size = size
    self.p = depth
    self.number = number


  def qaoa_exp(self,param):
    Q_action = copy.deepcopy(self.Q)
    Q_list = cut_k_matrices_randomly(Q_action, self.size,self.number)
    res = 0
    for Q in Q_list:
      qaoa = QAOA_layer(self.p, Q)
      @qml.qnode(qaoa.dev)
      def qaoa_expectation(param):
          """
          Quantum node that runs the QAOA circuit and measures the expectation value
          of the cost Hamiltonian.

          Args:
              params (np.ndarray): Array of QAOA parameters (gammas and betas).

          Returns:
              float: Expectation value of the cost Hamiltonian.
          """
          qaoa.qaoa_circuit(param)
          return qml.expval(qaoa.ham)
      res += qaoa_expectation(param)
    return res/len(Q_list)
  # Define the cost function for optimization
  def cost_function(self,params):
      """
      Computes the expectation value of the cost Hamiltonian.

      Args:
          params (np.ndarray): Array of QAOA parameters (gammas and betas).

      Returns:
          float: Expectation value of the cost Hamiltonian.
      """
      return self.qaoa_exp(params)


In [150]:
import pennylane as qml
from pennylane import numpy as np
from scipy.optimize import minimize

# Define QAOA depth

''' Q = qubo_matrix
print(Q)
depth = 2 '''
depth = 1
size = 8
Q = generate_upper_triangular_qubo(size,-10,10,integer=True)

param_list = []

start = 0
while size>2:
    if len(generate_combinations(Q.shape[0], start))>5:
        reps = 5
    else:
        reps = len(generate_combinations(Q.shape[0], start))
    QAOA_pre = QAOA_pretrain(Q,start,depth,reps)
    init_params = np.random.uniform(0, np.pi, 2 * depth)


    # Optimize the QAOA parameters using classical optimizer (COBYLA)
    result = minimize(QAOA_pre.cost_function, init_params, method="COBYLA", options={'maxiter': 200})

    # Extract optimized parameters and minimum cost
    param = result.x
    min_cost = result.fun
    param_list.append(param)
    # Print the results
    print("Optimized parameters (gammas and betas):", param)
    print("Minimum cost achieved:", min_cost)
    size = size-2
    start +=2

Optimized parameters (gammas and betas): [2.03430697 3.79264059]
Minimum cost achieved: -2.974199560983268
Optimized parameters (gammas and betas): [1.82457401 0.57245894]
Minimum cost achieved: 1.5749409358645052
Optimized parameters (gammas and betas): [2.14200351 2.55636641]
Minimum cost achieved: 2.0219856739084063


In [151]:
import pennylane as qml
from pennylane import numpy as np

# Define the QUBO matrix for the problem
def generate_upper_triangular_qubo(size, low=-10, high=10, integer=True, seed=None):
    """
    Generates an upper-triangular QUBO (Quadratic Unconstrained Binary Optimization) matrix.

    Args:
        size (int): The number of variables (size of the QUBO matrix).
        low (int/float): Minimum value of the random elements.
        high (int/float): Maximum value of the random elements.
        integer (bool): If True, generates integer values; otherwise, generates float values.
        seed (int, optional): Random seed for reproducibility.

    Returns:
        np.ndarray: An upper-triangular QUBO matrix of the specified size.
    """
    if seed is not None:
        np.random.seed(seed)

    # Generate random values for the upper triangular part including diagonal
    if integer:
        Q = np.random.randint(low, high, (size, size))
    else:
        Q = np.random.uniform(low, high, (size, size))

    # Keep only the upper triangle values (including diagonal), set lower triangle to zero
    Q = np.triu(Q)

    # Ensure diagonal values are positive (bias terms)
    np.fill_diagonal(Q, np.abs(np.diagonal(Q)))

    return Q



# Initialize the QAOA layer
qaoa = QAOA_layer(depth, Q)

# Define the QNode for optimization
@qml.qnode(qaoa.dev)
def qaoa_expectation(params):
    """
    Quantum node that runs the QAOA circuit and measures the expectation value
    of the cost Hamiltonian.

    Args:
        params (np.ndarray): Array of QAOA parameters (gammas and betas).

    Returns:
        float: Expectation value of the cost Hamiltonian.
    """
    qaoa.qaoa_circuit(params)
    return qml.expval(qaoa.ham)

# Define the cost function for optimization
def cost_function(params):
    """
    Computes the expectation value of the cost Hamiltonian.

    Args:
        params (np.ndarray): Array of QAOA parameters (gammas and betas).

    Returns:
        float: Expectation value of the cost Hamiltonian.
    """
    return qaoa_expectation(params)





'''

opt = qml.AdamOptimizer(stepsize=0.1)

# Initial random parameters for QAOA (gammas and betas)
init_params = optimized_params

# Optimization loop
max_iterations = 200
params = init_params
for i in range(max_iterations):
    params = opt.step(cost_function, params)
    if i % 20 == 0:
        cost = cost_function(params)
        print(f"Iteration {i}: Cost = {cost}")

# Final optimized parameters
final_cost = cost_function(params)
print("Optimized parameters (gammas and betas):", params)
print("Final minimum cost achieved:", final_cost) '''





'\n\nopt = qml.AdamOptimizer(stepsize=0.1)\n\n# Initial random parameters for QAOA (gammas and betas)\ninit_params = optimized_params\n\n# Optimization loop\nmax_iterations = 200\nparams = init_params\nfor i in range(max_iterations):\n    params = opt.step(cost_function, params)\n    if i % 20 == 0:\n        cost = cost_function(params)\n        print(f"Iteration {i}: Cost = {cost}")\n\n# Final optimized parameters\nfinal_cost = cost_function(params)\nprint("Optimized parameters (gammas and betas):", params)\nprint("Final minimum cost achieved:", final_cost) '

In [152]:
params = param_list[0]
dev = qml.device("default.qubit", shots=1500)
@qml.qnode(dev)
def sample_solution(params):
    """Runs the optimized QAOA circuit and samples the output state."""
    qaoa.qaoa_circuit(params)
    return qml.sample()

# Get multiple samples from the optimized circuit
num_samples = 1000
samples = sample_solution(params)

# Convert measurement results from {-1,1} to {0,1}
binary_solutions = samples*(-2)+1

# Compute the cost for each sampled solution
def evaluate_solution(sample):
    # 동일한 크기의 단위 행렬 생성
    identity_matrix = np.eye(Q.shape[0], dtype=bool)

    # 대각선 요소를 제거한 새로운 행렬 생성
    interaction = np.where(identity_matrix, 0, Q)
    diagonal_elements = np.diag(Q)
    value = diagonal_elements@sample + sample.T@interaction@sample
    return value

# Evaluate the cost for all sampled solutions and find the best
costs = np.array([evaluate_solution(sample) for sample in binary_solutions])
best_solution = binary_solutions[np.argmin(costs)]
best_cost = np.min(costs)

print("Best solution found:", best_solution)
print("Cost of best solution:", best_cost)

Best solution found: [-1 -1 -1 -1  1  1  1  1]
Cost of best solution: -82


In [153]:
dev = qml.device("default.qubit", shots=1500)
@qml.qnode(dev)
def sample_solution(params):
    """Runs the optimized QAOA circuit and samples the output state."""
    qaoa.qaoa_circuit(params)
    return qml.sample()

In [154]:
# Initial parameters for QAOA
init_params = params
init_params = np.reshape(init_params,(-1))

# RL-QAOA setup
rl_qaoa = RL_QAOA(Q,Q.shape[0],init_params,b_vector = np.array([25.]*30) ,QAOA_depth=1,gamma = 0.99,learning_rate_init=[0.01,0.05])
final_config = rl_qaoa.rqaoa_execute()
rl_qaoa.n_c = 1
print(f"classical_result : {final_config},best : {rl_qaoa.node_assignments}" )

classical_result : (None, None, tensor(-82, requires_grad=True)),best : [-1, -1, -1, -1, 1, 1, 1, 1]


In [156]:
param_list

[array([2.03430697, 3.79264059]),
 array([1.82457401, 0.57245894]),
 array([2.14200351, 2.55636641])]

In [161]:
### Single optimization
# Example QUBO matrix
#Q = Q_onsite

# Initial parameters for QAOA
init_params = np.array(param_list*2)


#init_params = np.random.uniform(0, np.pi, 2 * depth*5)
init_params = np.reshape(init_params,(-1))

# RL-QAOA setup
rl_qaoa = RL_QAOA(Q,Q.shape[0],init_params,b_vector = np.array([25.]*int((Q.shape[0]**2))) ,QAOA_depth=depth,gamma = 0.99,learning_rate_init=[0.02,0.000])
final_config = rl_qaoa.rqaoa_execute()
rl_qaoa.n_c = 2
print(f"classical_result : {final_config},best : {rl_qaoa.node_assignments}" )


# Execute RQAOA
final_config = rl_qaoa.RL_QAOA(episodes=150,epochs=60,correc_ans=float(final_config[2]))

classical_result : (None, None, tensor(-82, requires_grad=True)),best : [-1, -1, -1, -1, 1, 1, 1, 1]


  0%|          | 0/150 [00:00<?, ?it/s]


ValueError: matmul: Input operand 1 does not have enough dimensions (has 0, gufunc core with signature (n?,k),(k,m?)->(n?,m?) requires 1)

In [117]:
rl_qaoa.rqaoa_execute()

IndexError: index 2 is out of bounds for axis 0 with size 2

In [None]:
from pennylane import numpy as np
from scipy.optimize import minimize


# Initial parameters for QAOA
init_params = np.array(param_list)


#init_params = np.random.uniform(0, np.pi, 2 * depth*5)
init_params = np.reshape(init_params,(-1))

# RL-QAOA setup
rl_qaoa = RL_QAOA(Q,2,init_params,b_vector = np.array([25.]*70) ,QAOA_depth=depth,gamma = 0.99,learning_rate_init=[0.02,0.000])
# 최적화할 목적 함수 정의 (x만 최적화하고, 일부 값은 고정)
def objective_function(params):
    rl_qaoa.param = np.array(params[:len(rl_qaoa.param)])
    rl_qaoa.b = np.array(params[len(rl_qaoa.param):])
    result = 0
    for i in range(150):
      res = rl_qaoa.rqaoa_execute(False)
      result += float(res)
    result/=150
    return result

# 두 번째와 세 번째 인수 고정
fixed_val1 = 3.0
fixed_val2 = 2.0

# 진행 상태 출력을 위한 콜백 함수
iteration_count = 0

def callback(xk):
    global iteration_count
    iteration_count += 1
    if iteration_count % 3 == 0:
        print(f"Iteration {iteration_count}: x = {xk}, objective = {objective_function(xk)}")

# 초기 추정값 (x만 최적화)
x0 = np.array(list(rl_qaoa.param)+list(rl_qaoa.b))
x0 = np.reshape(x0,-1)

# 최적화 수행 (COBYLA 사용)
result = minimize(
    objective_function,  # 고정된 인수 적용
    x0,
    method='COBYLA',
    callback=callback,
    options={'maxiter': 16}  # 최대 100번 반복
)

# 최적화 결과 출력
print("\n최적화 완료!")
print("최적화된 x:", result.x)
print("최적화된 함수 값:", result.fun)
print("성공 여부:", result.success)
print("메시지:", result.message)


Iteration 3: x = [ 2.01768174  2.41262379  5.2386235  -1.02456235  1.95411415  0.83626581
 25.         25.         25.         25.         25.         25.
 25.         25.         25.         25.         25.         25.
 25.         25.         25.         25.         25.         25.
 25.         25.         25.         25.         25.         25.
 25.         25.         25.         25.         25.         25.
 25.         25.         25.         25.         25.         25.
 25.         25.         25.         25.         25.         25.
 25.         25.         25.         25.         25.         25.
 25.         25.         25.         25.         25.         25.
 25.         25.         25.         25.         25.         25.
 25.         25.         25.         25.         25.         25.
 25.         25.         25.         25.        ], objective = -16.213333333333335
Iteration 6: x = [ 2.01768174  2.41262379  4.2386235  -1.02456235  2.95411415  1.83626581
 25.         25.      

KeyboardInterrupt: 

In [None]:
# RL-QAOA setup
#rl_qaoa = RL_QAOA(Q,Q.shape[0],init_params,b_vector = np.array([25.]*70) ,QAOA_depth=depth,gamma = 0.99,learning_rate_init=[0.02,0.000])
#final_config = rl_qaoa.rqaoa_execute()
#rl_qaoa.n_c = 2
#print(f"classical_result : {final_config},best : {rl_qaoa.node_assignments}" )


# Execute RQAOA
final_config = rl_qaoa.RL_QAOA(episodes=150,epochs=60,correc_ans=float(final_config[2]))

100%|██████████| 150/150 [00:36<00:00,  4.07it/s]


[-16. -16.   0. -16. -16. -16.   0. -16.   0. -16.   0. -20.   0. -16.
   0. -16. -16. -16. -16. -16. -16.   0. -16.   0. -16.   0. -16.   0.
   0.   0. -16. -16. -16. -16. -16. -16. -16. -16. -16.   0. -16.   0.
 -16.   0. -16.   0. -16.   0.   0. -16.   0. -16.   0. -16. -16.   0.
 -16. -16. -16.   0. -16. -16. -16. -16. -16. -16.   0. -16. -16. -16.
 -16. -16. -16. -16. -16. -16. -16.   0. -16. -16.   0. -16.   0. -16.
 -16.   0. -16. -16. -16.   0. -16. -16. -16. -16. -16. -16.   0.   0.
   0. -16. -16. -16.   0.   0.   0. -16. -16.   0.   0. -16. -16. -16.
 -16. -16.   0. -16.   0. -16. -16. -16. -16. -16. -16. -16.   0. -16.
   0. -16.   0. -16.   0.   0. -16.   0. -16.   0. -16. -16. -16. -16.
 -16. -16.   0.   0.   0. -16.   0. -16.   0. -16.]
QAOA diff : [256.09063851 -36.19425484  65.8344376   17.98613328  -3.00731666
  -1.42662768]
cost : -53.0
prob : 0.0
QAOA update : [ 0.02 -0.02  0.02  0.02 -0.02 -0.02]
QAOA param : [ 2.03768174  2.39262379  4.2586235  -1.00456235  2.9341

100%|██████████| 150/150 [00:35<00:00,  4.26it/s]


[-21.98666667   0.           0.         -21.98666667   0.
 -21.98666667 -21.98666667   0.         -21.98666667   0.
 -21.98666667 -21.98666667 -21.98666667 -21.98666667   0.
   0.         -21.98666667   0.         -21.98666667 -21.98666667
 -21.98666667   0.         -21.98666667 -21.98666667 -21.98666667
   0.         -21.98666667   0.           0.           0.
 -21.98666667   0.         -21.98666667 -21.98666667 -21.98666667
 -21.98666667 -21.98666667 -21.98666667   0.           0.
   0.         -21.98666667   0.         -21.98666667 -21.98666667
 -21.98666667 -21.98666667   0.         -21.98666667 -21.98666667
 -21.98666667   0.           0.         -21.98666667 -21.98666667
 -21.98666667   0.           0.         -21.98666667 -21.98666667
 -21.98666667   0.           0.         -21.98666667 -21.98666667
   0.         -21.98666667   0.           0.           0.
 -21.98666667 -21.98666667 -21.98666667   0.           0.
 -21.98666667 -21.98666667 -21.98666667   0.         -21.98666667


 17%|█▋        | 26/150 [00:06<00:32,  3.80it/s]


KeyboardInterrupt: 