## QAOA
Given a classical cost function $C(z)$ defined on n-bit strings $z = (z_1, z_2, . . . , z_n) \in \{+1, −1\}^{n}$, the QAOA is a quantum algorithm that aims to find a string $z$ such that $C(z)$ is close to its absolute minimum. The cost function $C$ can be written as an operator that is diagonal in the computational basis, defined as

$$
C|z\rangle = C(z)|z\rangle
$$

The QAOA consists of two components that are repeatedly applied. The first component is parameterized evolution under the diagonal problem
Hamiltonian C,
$$
U_{C}(\gamma) = e^{-i\gamma C}
$$

The second component is parameterized evolution under a local transverse field driver Hamiltonian B,

$$
U_{B}(\beta) = e^{−i\beta B} = \prod_{j=1}^{n}e^{-i\beta X_{j}} \;;\;   B = \sum_{j=1}^{n}X_{j}
$$

The QAOA is a variational algorithm that uses repeated application of these unitaries to prepare a parameterized
state that is then optimized. The depth of the variational algorithm is usually denoted as “p” in the QAOA literature.
Specifically, for depth p we prepare a state parameterized by $\gamma = (\gamma_1, . . . , \gamma_p)$ and $\beta = (\beta_1, . . . , \beta_p)$

$$
|\gamma, \beta\rangle = U_{B}(\beta_p) U_{C}(\gamma_p) ... U_{B}(\beta_1) U_{C}(\gamma_1) |+\rangle^{\otimes n}
$$

For a given p, we attempt to find parameters that minimize the expectation value of the cost
$$
\langle C \rangle = \langle \gamma, \beta | C | \gamma, \beta \rangle
$$

## SK Model

The Sherrington-Kirkpatrick (SK) model describes a classical spin system with all-to-all couplings between the $n$ spins. The classical cost function $C$ is defined as:
$$
C(z) = \sum_{j < k}^{n} w_{j, k} z_{j} z_{k} \; \text{where}\;  w_{j, k} \in \{+1, -1\}
$$

Since the $Z_{j}Z_{k}$ operators all commute with each other, we can write $U_{C}(\gamma)$ as follows:

$$
U_{C}(\gamma) = e^{-i\gamma C} = \prod_{j < k}^{n}e^{-i \gamma w_{j, k} Z_{j} Z_{k}}
$$

Thus, we can decompose $U_{C}(\gamma)$ into $\frac{n(n-1)}{2}$ 2-qubit operations. Each of the 2-qubit unitaries can be implemented in Cirq as an `ZZPowGate` gate as follows:

```python
    def rzz(gamma):
        return cirq.ZZPowGate(exponent=2 * gamma / np.pi, global_shift=-0.5)
```

A sample QAOA circuit for the SK model is constructed using Cirq in the following cell

In [None]:
import attrs
import sympy
import numpy as np
import cirq
from typing import Sequence, Union, Tuple, Set
from qualtran import GateWithRegisters, Signature, QFxp
from qualtran.bloqs.basic_gates.rotation import ZZPowGate, Rx
from qualtran.bloqs.basic_gates import Hadamard
from qualtran.bloqs.on_each import OnEach
from qualtran.bloqs.utils import SymbolicInt, SymbolicFloat
from qualtran.drawing import show_bloq, show_call_graph, show_counts_sigma
from sympy.functions.combinatorial.factorials import binomial 

def rzz(gamma, eps: SymbolicFloat):
    return ZZPowGate(exponent=2 * gamma / np.pi, global_shift=-0.5, eps=eps)

@attrs.frozen
class PhaseOracle(GateWithRegisters):
    """Implements the problem-dependent unitary $U_{C}(γ)=\exp(-i γ C)$

    For the SK-model, the phase oracle $U_{C}(γ)$ can be expressed as 
    $$
       U_{C}(γ) =\prod_{j<k}\exp(-i γ w_{j,k}Z_jZ_k)$
    $$
    """
    bitsize: SymbolicInt
    gamma: SymbolicFloat
    weights: Tuple[int, ...]
    eps: SymbolicFloat

    @property
    def signature(self) -> Signature:
        return Signature.build_from_dtypes(x=QFxp(self.bitsize, self.bitsize))

    # def pretty_name(self) -> str:
    #     return 'e^{-i}'+f'\{self.gamma}'+' C}'

    def decompose_from_registers(self, *, context, x):
        w_idx = 0
        n = self.bitsize
        eps_per_rotation = 2*self.eps/(n*(n-1))
        for i in range(n):
            for j in range(i+1, n):
                yield rzz(w[w_idx] * gamma, eps_per_rotation).on(x[i], x[j])
                w_idx = w_idx + 1

    def build_call_graph(self, ssa: 'SympySymbolAllocator') -> Set['BloqCountT']:
        n = self.bitsize
        num_rotations = binomial(n, 2)
        return {(rzz(self.gamma, self.eps/num_rotations), num_rotations)}
    
    def __str__(self):
        return f'PhaseOracle[{self.gamma}]'

@attrs.frozen
class DriverOracle(GateWithRegisters):
    """Implements the problem-independent driver unitary $U_{B}(β)=\prod_j \exp(-i β X_j)$"""
    bitsize: SymbolicInt
    beta: SymbolicFloat
    eps: SymbolicFloat
    
    @property
    def signature(self) -> Signature:
        return Signature.build_from_dtypes(x=QFxp(self.bitsize, self.bitsize))

    def decompose_from_registers(self, *, context, x):
        yield cirq.Moment(Rx(angle=2*self.beta, eps=self.eps/self.bitsize).on_each(*x))
    
    def __str__(self):
        return f'DriverOracle[{self.beta}]'

    def build_call_graph(self, ssa: 'SympySymbolAllocator') -> Set['BloqCountT']:
        return {(Rx(angle=2*self.beta, eps=self.eps/self.bitsize), self.bitsize)}


@attrs.frozen
class QAOA(GateWithRegisters):
    bitsize: SymbolicInt
    weights: Tuple[int, ...]
    gammas: Tuple[SymbolicFloat, ...]
    betas: Tuple[SymbolicFloat, ...]
    eps: SymbolicFloat

    @property
    def signature(self) -> Signature:
        return Signature.build_from_dtypes(x=QFxp(self.bitsize, self.bitsize))

    def decompose_from_registers(self, *, context, x):
        yield OnEach(len(x), Hadamard()).on_registers(q=x)
        for gamma, beta in zip(self.gammas, self.betas):
            yield PhaseOracle(self.bitsize, gamma, self.weights, self.eps/(2*len(self.gammas))).on(*x)
            yield DriverOracle(self.bitsize, beta, self.eps/(2*len(self.gammas))).on(*x)

    def build_call_graph(self, ssa: 'SympySymbolAllocator') -> Set['BloqCountT']:
        ret = []
        for gamma, beta in zip(self.gammas, self.betas):
            ret += [(PhaseOracle(self.bitsize, gamma, self.weights, self.eps/(2*len(self.gammas))), 1)]
            ret += [(DriverOracle(self.bitsize, beta, self.eps/(2*len(self.gammas))), 1)]
        return set(ret)

n = 50
rng = np.random.default_rng(12345)
w = (-1) ** rng.integers(low=0, high=1, size=(n * (n - 1)) // 2, endpoint=True) # nC2 weights for the cost function.
weights = tuple(w.tolist())
p = 3

n = sympy.Symbol('n')
eps = sympy.Symbol('\epsilon')
gamma = sympy.symbols(','.join(f'gamma_{i}' for i in range(p)))
beta = sympy.symbols(','.join(f'beta_{i}' for i in range(p)))


qaoa_naive = QAOA(n, weights, gamma, beta, eps)
call_graph_naive, sigma_naive = qaoa_naive.call_graph()
show_call_graph(call_graph_naive)
show_counts_sigma(sigma_naive)

## Optimized Phase Oracles for FT Resource Estimation
The phase oracle $O^{\text{phase}}(\gamma)$ is defined as 
$$
    O^{\text{phase}}(\gamma) |z\rangle = e^{-i \gamma C(z)} |z\rangle
$$

In the naive Cirq implementation shown above, the phase oracle $O^{\text{phase}}(\gamma)$ is implemented simply as the unitary $U_{C}(\gamma)$ defined as
$$
U_{C}(\gamma) = e^{-i\gamma C} = \prod_{j < k}^{n}e^{-i \gamma w_{j, k} Z_{j} Z_{k}}
$$


However, in the Fault Tolerant setting, we aim to come up with optimized implementations of $O^{\text{phase}}(\gamma)$ that reduce the Toffoli count. One trick we can use is to compute the cost $C(z)$ for each bitstring $|z\rangle$ in a separate register and then apply a phasing operation to phase each state $|z\rangle |c(z)\rangle$ with a value proportional $\gamma * C(z)$. Phasing can be implemented by preparing an initial phase gradient state and then adding $\gamma * C(z)$ to the phase gradient register in order to apply a phase $e^{-i \gamma C(z)}$ to state $|z\rangle$

### Cost Function Evaluation Oracle
Let $O^{\text{direct}}$ represent an oracle that evaluates the cost function $C(z)$ and stores the output in a new clean register. 

$$
    O^{\text{direct}}|z\rangle |0\rangle^{\otimes b_{\text{dir}}}  = |z\rangle |c(z)\rangle
$$

Here $b_{\text{dir}}$ is the bitsize of the newly allocated ancilla register and in the case of SK model, it is equal to $2\log{N}$. 

### Phase Evaluation via Addition
#### Rotations via addition
Let $|\phi\rangle$ be a phase gradient state defined on a register of bitsize $b_{\text{grad}}$ as

$$
        |\phi_{\text{grad}}\rangle = \frac{1}{\sqrt{2^{b_{\text{grad}}}}} \sum_{k=0}^{2^{b_{\text{grad}}} - 1} \omega_{b_{\text{grad}}}^{-k} |k\rangle 
$$
where
$$
        \omega_{b_{\text{grad}}} = \exp\left(\frac{2\pi i}{2^{b_{\text{grad}}}}\right)
$$

Note that 

\begin{split}
  |\phi_{\text{grad}} + l\rangle = \omega_{b_{\text{grad}}}^{l} |\phi_{\text{grad}}\rangle \\
  |\phi_{\text{grad}} - l\rangle = \omega_{b_{\text{grad}}}^{-l} |\phi_{\text{grad}}\rangle \\
  \text{where}\; \omega_{b_{\text{grad}}}^{l} = \exp\left(\frac{2\pi i l}{2^{b_{\text{grad}}}}\right)
\end{split}

Thus, by adding $l$ into the phase register, we've applied a phase to the state that's proportional to $l$.

Therefore, to apply a phase of $\exp\left(-i\gamma c(z)\right)$ to state $|z\rangle|c(z)\rangle$, we need

$$
U^{\text{phase}}(\gamma) = \sum_{k=0}^{2^{b_{\text{dir}}} - 1} \omega_{b_{\text{dir}}}^{-\widetilde{\gamma} k} |k\rangle\langle k|
$$
where
$$
\widetilde{\gamma} = \gamma \times \frac{2^{b_{\text{dir}}}}{2\pi}
$$
and we assume $\widetilde{\gamma} \le 1$
#### Determining $\gamma_{\text{bitsize}}$
Let the phase applied by the phasing oracle be $e^{-i\widetilde{\gamma C(z)}}$ s.t. $||\widetilde{\gamma C(z)} - \gamma C(z)|| \le \frac{1}{2^{b_{\text{pha}}}}$, then 

$$
\gamma_{\text{bitsize}} = \log{\widetilde{\gamma}} + b_{\text{pha}} + O(1)
$$

#### Determining $b_{\text{grad}}$
$$
b_{\text{grad}} = \left\lceil \log{\frac{(\gamma_{\text{bitsize}} + 2)\pi}{\epsilon}}\right\rceil
$$ 

represent the size of the phase gradient register. The cost to apply a phase $e^{-i \gamma c(z)}$ to a state $|z\rangle |c(z)\rangle$ given that we have a phase gradient state prepared on a register of size $b_{\text{grad}}$ is given by $(b_{\text{grad}} − 2)(\gamma_{\text{bitsize}} + 2)$ because multiplication of $c(z)$ with $\gamma$ and addition into phase gradient register can be reduced to $\gamma_{\text{bitsize}} + 2$ additions / subtractions of a power of 2 into the phase gradient register of size $\text{bgrad}$. Each addition into the phase gradient register takes $(\text{bgrad} − 2)$ Toffoli's


In [None]:
from qualtran import Bloq, GateWithRegisters, Signature, Register, Side, SoquetT, BloqBuilder
from qualtran.bloqs.rotations.phase_gradient import PhaseGradientState, AddScaledValIntoPhaseReg
from qualtran.bloqs.basic_gates import TGate, Hadamard, Rx
from qualtran.bloqs.on_each import OnEach
from qualtran.bloqs.qaoa.sk_model import SKModelCostEval
from qualtran.bloqs.rotations.phasing_via_cost_function import PhaseOracleZPow, PhaseOraclePhaseGradient, PhasingViaCostFunction
from qualtran.bloqs import utils
from typing import Dict, Optional, Set

@attrs.frozen
class QAOA_SKModel(GateWithRegisters):
    bitsize: SymbolicInt
    weights: Tuple[int, ...]
    gammas: Tuple[SymbolicFloat, ...]
    betas: Tuple[SymbolicFloat, ...]
    eps: SymbolicFloat
    use_phase_gradient: bool = False

    @property
    def signature(self) -> Signature:
        return Signature.build_from_dtypes(x=QFxp(self.bitsize, self.bitsize))

    @property
    def cost_reg(self) -> Register:
        return Register(
            'cost_reg', QFxp(2*utils.ceil(utils.log2(self.bitsize)), 2*utils.ceil(utils.log2(self.bitsize)), signed=False)
        )

    @property
    def cost_eval_oracle(self) -> Bloq:
        return SKModelCostEval(self.bitsize, self.weights)

    def phase_oracle(self, gamma: SymbolicFloat) -> Bloq:
        if self.use_phase_gradient:
            return PhaseOraclePhaseGradient(self.cost_reg, gamma, self.eps / (2*len(self.gammas)))
        else:
            return PhaseOracleZPow(self.cost_reg, gamma, self.eps / (2*len(self.gammas)))
    
    def build_composite_bloq(self, bb: 'BloqBuilder', **soqs: 'SoquetT') -> Dict[str, 'SoquetT']:
        if self.use_phase_gradient:
            soqs['phase_grad'] = bb.add(PhaseGradientState(self.phase_oracle(0).b_grad, eps=self.eps/(2*len(self.gammas))))

        
        # Alternativey apply gamma layer and beta layer.
        for gamma, beta in zip(self.gammas, self.betas):
            # 1. Add gamma layer using PhasingViaCostFunction
            soqs = bb.add_d(PhasingViaCostFunction(self.cost_eval_oracle, self.phase_oracle(gamma)), **soqs)
            # 2. Add beta layer
            soqs['x'] = bb.add(DriverOracle(self.bitsize, beta, self.eps/(2*len(self.gammas))), x=soqs['x'])
        
        if self.use_phase_gradient:
            bb.add(
                PhaseGradientState(self.phase_oracle(0).b_grad, eps=self.eps/(2*len(self.gammas))).adjoint(),
                phase_grad=soqs.pop('phase_grad'),
            )
        return soqs

In [None]:
qaoa_phase_via_cost_zpow = QAOA_SKModel(n, weights, gamma, beta, eps, False)
show_bloq(qaoa_phase_via_cost_zpow.decompose_bloq())
call_graph_phase_via_cost_zpow, sigma_phase_via_cost_zpow = qaoa_phase_via_cost_zpow.call_graph()
show_call_graph(call_graph_phase_via_cost_zpow)
show_counts_sigma(sigma_phase_via_cost_zpow)

In [None]:
qaoa_phase_via_cost_qvr = QAOA_SKModel(n, weights, gamma, beta, eps, True)
show_bloq(qaoa_phase_via_cost_qvr.decompose_bloq())
call_graph_phase_via_cost_qvr, sigma_phase_via_cost_qvr = qaoa_phase_via_cost_qvr.call_graph()
show_call_graph(call_graph_phase_via_cost_qvr)
show_counts_sigma(sigma_phase_via_cost_qvr)

In [None]:
def get_sigma_naive(n, eps):
    return QAOA(n, weights, gamma, beta, eps).call_graph()[1]

def get_sigma_zpow(n, eps):
    return QAOA_SKModel(n, weights, gamma, beta, eps, False).call_graph()[1]

def get_sigma_qvr(n, eps):
    return QAOA_SKModel(n, weights, gamma, beta, eps, True).call_graph()[1]

In [None]:
sigma_phase_via_cost_zpow[TGate()] - sigma_phase_via_cost_qvr[TGate()]

In [None]:
# Plot costs for Naive, ZPow, QVR
import matplotlib.pyplot as plt

def plot(eps_val, cost_naive, cost_zpow, cost_qvr):
    x = [-np.log10(v) for v in eps_val]
    # plt.plot(x, cost_naive, label='naive')
    plt.plot(x, cost_zpow, label='zpow')
    plt.plot(x, cost_qvr, label='qvr')
    plt.xlabel("-log10(eps)")
    plt.ylabel("QAOA T-count")
    plt.legend()
    plt.show()    

eps_vals = [1e-1, 1e-2, 1e-3, 1e-4, 1e-5, 1e-6, 1e-7, 1e-8, 1e-9]
n_val = 10_000
cost_naive = [sigma_naive[TGate()].subs(eps, val).subs(n, n_val) for val in eps_vals]
cost_zpow = [sigma_phase_via_cost_zpow[TGate()].subs(eps, val).subs(n, n_val) for val in eps_vals]
cost_qvr = [sigma_phase_via_cost_qvr[TGate()].subs(eps, val).subs(n, n_val) for val in eps_vals]
print(np.array(cost_zpow) - np.array(cost_qvr))
plot(eps_vals, cost_naive, cost_zpow, cost_qvr)

In [None]:
eps_vals = [1e-1, 1e-2, 1e-3, 1e-4, 1e-5, 1e-6, 1e-7, 1e-8, 1e-9]
cost_naive = [get_sigma_naive(n_val, val)[TGate()] for val in eps_vals]
cost_zpow = [get_sigma_zpow(n_val, val)[TGate()] for val in eps_vals]
cost_qvr = [get_sigma_qvr(n_val, val)[TGate()] for val in eps_vals]
plot(eps_vals, cost_naive, cost_zpow, cost_qvr)