# Optimal variational unitary solver
Created 16/09/2025

Objectives:
* Based off [previous notebook](quimb_first_variational_unitary_solver.ipynb), explore possible solvers and see if there's one that gives a clear advantage.
* May need to explore hyperparamters too.
* Compare gradient methods vs non-gradient methods.
* Work off of "hardest case", i.e. $b=0.9$.
    * Figure out required depth.

# Package imports

In [1]:
import sys

In [2]:
sys.path.append("../../../")

In [3]:
from itertools import chain
import re

In [4]:
import h5py
from tenpy.tools import hdf5_io
import tenpy
import tenpy.linalg.np_conserved as npc

import os
import pickle

In [5]:
import numpy as np
import jax.numpy as jnp

import matplotlib.pyplot as plt

In [6]:
import quimb as qu
import quimb.tensor as qtn
from quimb.tensor.optimize import TNOptimizer



# Load data

In [7]:
DATA_DIR = r"../../../data/transverse_cluster_200_site_dmrg"

In [8]:
loaded_data = list()

for local_file_name in os.listdir(DATA_DIR):
    f_name = r"{}/{}".format(DATA_DIR, local_file_name, ignore_unknown=False)
    with h5py.File(f_name, 'r') as f:
        data = hdf5_io.load_from_hdf5(f)
        loaded_data.append(data)

In [9]:
b_parameters = sorted(list(d['paramters']['B'] for d in loaded_data))

In [10]:
psi_dict = dict()

In [11]:
for b in b_parameters:
    psi = next(
        d['wavefunction']
        for d in loaded_data
        if d['paramters']['B'] == b
    )

    rounded_b = round(b, 1)
    psi_dict[rounded_b] = psi

In [16]:
psi = psi_dict[0.9]

# Definitions

In [12]:
np_I = np.array([[1,0],[0,1]])
np_X = np.array([[0,1],[1,0]])

In [13]:
symmetry_actions = [
    [np_I, np_I],
    [np_I, np_X],
    [np_X, np_I],
    [np_X, np_X]
]

In [14]:
symmetry_labels = [
    'II',
    'IX',
    'XI',
    'XX'
]

In [15]:
def generate_problem_rdm(quimb_psi, symmetry_site_pairs, leftmost_symmetry_site,
                         num_symmetry_sites, num_boundary_sites):
    q_top = quimb_psi.copy(deep=True)
    for i, s in symmetry_site_pairs:
        q_top.gate(
            s,
            where=i,
            contract=False,
            inplace=True
        )

    
    indices_to_map = list(chain(
        range(leftmost_symmetry_site-num_boundary_sites, leftmost_symmetry_site),
        range(leftmost_symmetry_site+num_symmetry_sites, leftmost_symmetry_site+num_symmetry_sites+num_boundary_sites)
    ))

    index_mapping = {f'k{i}': f'b{i}' for i in indices_to_map}

    q_bottom = (
        quimb_psi
        .copy()
        .reindex(index_mapping, inplace=True)
        .conj()
    )

    sites_to_contract = {
        'left': list(range(leftmost_symmetry_site-num_boundary_sites)),
        'middle': list(range(leftmost_symmetry_site, leftmost_symmetry_site+num_symmetry_sites)),
        'right': list(range(leftmost_symmetry_site+num_symmetry_sites+num_boundary_sites, quimb_psi.L))
    }

    tags_to_contract = {
        k: [f'I{i}' for i in v]
        for k, v in sites_to_contract.items()
    }

    tn = (q_top & q_bottom)

    tnc = (
        tn
        .contract(tags_to_contract['left'])
        .contract(tags_to_contract['middle'])
        .contract(tags_to_contract['right'])
    )

    return tnc

## Optimisation functions

In [33]:
def loss(circ_pair, problem_rdm):
    c = (problem_rdm & circ_pair)^...

    c_abs_squared = (
        c
        *jnp.conjugate(c)
    )
    c_abs_squared = jnp.real(c_abs_squared)

    out = 1-c_abs_squared

    return out

## Gate functions

In [18]:
def single_qubit_layer(circ, gate_round=None):
    """Apply a parametrizable layer of single qubit ``U3`` gates.
    """
    for i in range(circ.N):
        # initialize with random parameters
        params = qu.randn(3, dist='uniform')
        circ.apply_gate(
            'U3',
            *params,
            i,
            gate_round=gate_round,
            parametrize=True
        )

In [19]:
def two_qubit_layer(circ, gate2='CZ', start=0, gate_round=None):
    """Apply a layer of constant entangling gates.
    """
    num_sites = circ.N # Assuming this is even
    sites = [
        i % num_sites
        for i in range(start, start + num_sites)
    ]
    
    site_pairs = list(zip(sites[::2], sites[1::2]))

    for i, j in site_pairs:
        circ.apply_gate(
            gate2,
            i,
            j,
            gate_round=gate_round
        )

In [20]:
def ansatz_circuit(n, depth, first_site=0, gate2='CZ', **kwargs):
    """Construct a circuit of single qubit and entangling layers.
    """
    circ = qtn.Circuit(n, **kwargs)

    for r in range(depth):
        # single qubit gate layer
        single_qubit_layer(circ, gate_round=r)

        # alternate between forward and backward CZ layers
        two_qubit_layer(
            circ, gate2=gate2, gate_round=r, start=r % 2)

    # add a final single qubit layer
    single_qubit_layer(circ, gate_round=r + 1)

    circ = circ.uni
    
    if first_site != 0:
        index_labels = ['k', 'b']

        index_map = {
            f'{l}{i}': f'{l}{i+first_site}'
            for i in range(n)
            for l in index_labels
        }

        circ.reindex(index_map, inplace=True)

    return circ

In [21]:
def ansatz_circuit_pair(leftmost_symmetry_site,
    num_symmetry_sites, num_boundary_sites, depth,
    gate2='CZ', **kwargs):
    """Construct a circuit of single qubit and entangling layers.
    """
    left_start_site = leftmost_symmetry_site-num_boundary_sites
    right_start_site = leftmost_symmetry_site+num_symmetry_sites

    left_circuit = ansatz_circuit(
        num_boundary_sites,
        depth,
        first_site=left_start_site,
        gate2=gate2,
        **kwargs
    )

    right_circuit = ansatz_circuit(
        num_boundary_sites,
        depth,
        first_site=right_start_site,
        gate2=gate2,
        **kwargs
    )

    circ_pair = (left_circuit & right_circuit)

    return circ_pair

# Initialize problem rdm

In [22]:
num_boundary_sites=2
left_most_symmetry_site=60
num_symmetry_sites=80

total_physical_dim = 2**num_boundary_sites

In [24]:
mps_psi = psi

In [25]:
psi_arrays = list()
psi_arrays.append(mps_psi.get_B(0, 'Th')[0, ...].to_ndarray())
for i in range(1, mps_psi.L-1):
    psi_arrays.append(mps_psi.get_B(i).to_ndarray())
psi_arrays.append(mps_psi.get_B(mps_psi.L-1)[..., 0].to_ndarray())

q1 = (
    qtn
    .tensor_1d
    .MatrixProductState(psi_arrays, shape='lpr')
)

Pick this symmetry action for example.

In [26]:
k2 = 1
bs = symmetry_actions[1]

In [27]:
symmetry_site_pairs = (
    [(i, bs[0]) for i in range(left_most_symmetry_site, left_most_symmetry_site+num_symmetry_sites, 2)]
    + [(i, bs[1]) for i in range(left_most_symmetry_site+1, left_most_symmetry_site+num_symmetry_sites+1, 2)]
)

problem_rdm = generate_problem_rdm(
    q1,
    symmetry_site_pairs,
    left_most_symmetry_site,
    num_symmetry_sites,
    num_boundary_sites
)

# Find gate depth
Iterate over gate depth using a simple L-BFGS-B (gradient) solver.

In [28]:
gate_depths = list(range(1, 6))

In [29]:
n = 2
gate2 = 'CZ'

In [37]:
num_iterations_per_depth = 10

In [38]:
scores = list()

for depth in gate_depths:
    print(depth)
    scores.append(list())

    for _ in range(num_iterations_per_depth):
        circ_pair = ansatz_circuit_pair(
            left_most_symmetry_site,
            num_symmetry_sites,
            num_boundary_sites,
            depth
        )
        
        tnopt = qtn.TNOptimizer(
            circ_pair,                        # the tensor network we want to optimize
            loss,                     # the function we want to minimize
            loss_constants={'problem_rdm': problem_rdm},  # supply U to the loss function as a constant TN
            tags=['U3'],              # only optimize U3 tensors
            #autodiff_backend='jax',   # use 'autograd' for non-compiled optimization
            optimizer='L-BFGS-B',     # the optimization algorithm
        )
        
        circ_opt = tnopt.optimize(
            n=5000,
            jac=True,
            hessp=False
        )

        scores[-1].append(tnopt.losses)



1


+0.815849065781 [best: +0.815849065781] :   2%|█▏                                                           | 97/5000 [00:00<00:39, 122.64it/s]
+0.818654298782 [best: +0.818654298782] :   1%|▍                                                            | 36/5000 [00:00<00:44, 112.33it/s]
+0.855100989342 [best: +0.855100989342] :   1%|▊                                                            | 66/5000 [00:00<00:26, 185.96it/s]
+0.857028961182 [best: +0.857028961182] :   1%|▋                                                            | 60/5000 [00:00<00:27, 180.78it/s]
+0.815865874290 [best: +0.815865874290] :   2%|█                                                            | 88/5000 [00:00<00:24, 199.89it/s]
+0.815807282925 [best: +0.815807282925] :   2%|█▎                                                          | 114/5000 [00:00<00:17, 283.04it/s]
+0.815865933895 [best: +0.815865933895] :   2%|▉                                                            | 79/5000 [00:00<00:23, 211.

2


+0.551293015480 [best: +0.551293015480] :   1%|▌                                                            | 49/5000 [00:00<00:43, 113.71it/s]
+0.551290988922 [best: +0.551290988922] :   1%|▉                                                            | 72/5000 [00:00<00:30, 162.74it/s]
+0.551293849945 [best: +0.551293849945] :   1%|▌                                                            | 49/5000 [00:00<00:42, 116.81it/s]
+0.551290571690 [best: +0.551290571690] :   2%|▉                                                            | 77/5000 [00:00<00:28, 171.25it/s]
+0.551290988922 [best: +0.551290988922] :   1%|▊                                                            | 69/5000 [00:00<00:32, 151.27it/s]
+0.551291108131 [best: +0.551290988922] :   1%|▊                                                            | 69/5000 [00:00<00:41, 117.62it/s]
+0.551286101341 [best: +0.551286101341] :   1%|▋                                                            | 60/5000 [00:00<00:36, 134.

3


+0.551266074181 [best: +0.551266074181] :   1%|▌                                                             | 44/5000 [00:00<01:03, 77.87it/s]
+0.551264584064 [best: +0.551264584064] :   1%|▉                                                             | 72/5000 [00:00<00:53, 92.09it/s]
+0.551292657852 [best: +0.551292657852] :   1%|▋                                                            | 61/5000 [00:00<00:44, 111.29it/s]
+0.551293134689 [best: +0.551293134689] :   1%|▌                                                             | 43/5000 [00:00<01:00, 81.94it/s]
+0.551290869713 [best: +0.551290869713] :   2%|█▏                                                          | 100/5000 [00:00<00:29, 168.28it/s]
+0.551237821579 [best: +0.551237821579] :   1%|▋                                                            | 55/5000 [00:00<00:49, 100.06it/s]
+0.551247954369 [best: +0.551247954369] :   1%|▌                                                             | 50/5000 [00:00<01:09, 70.

4


+0.551287770271 [best: +0.551287770271] :   1%|▊                                                             | 62/5000 [00:00<01:10, 70.33it/s]
+0.551243603230 [best: +0.551243603230] :   1%|▋                                                             | 59/5000 [00:00<00:56, 86.84it/s]
+0.551272034645 [best: +0.551272034645] :   1%|▋                                                             | 51/5000 [00:00<01:02, 78.66it/s]
+0.551294565201 [best: +0.551294565201] :   1%|▋                                                             | 56/5000 [00:00<01:00, 81.92it/s]
+0.551276326180 [best: +0.551276326180] :   2%|█▏                                                           | 93/5000 [00:00<00:42, 115.52it/s]
+0.551289081573 [best: +0.551289081573] :   1%|▌                                                             | 41/5000 [00:00<01:44, 47.57it/s]
+0.551236629486 [best: +0.551236629486] :   2%|▉                                                             | 79/5000 [00:01<01:03, 78.

5


+0.551239728928 [best: +0.551239728928] :   2%|█▎                                                          | 114/5000 [00:00<00:37, 131.84it/s]
+0.551243305206 [best: +0.551243305206] :   2%|█                                                             | 87/5000 [00:00<00:56, 87.73it/s]
+0.551279306412 [best: +0.551279306412] :   2%|▉                                                             | 76/5000 [00:00<00:59, 83.15it/s]
+0.551269173622 [best: +0.551269173622] :   2%|▉                                                             | 80/5000 [00:00<01:01, 80.45it/s]
+0.551252722740 [best: +0.551252722740] :   1%|▋                                                             | 56/5000 [00:01<01:36, 51.33it/s]
+0.551234602928 [best: +0.551234602928] :   2%|▉                                                             | 80/5000 [00:00<00:58, 83.63it/s]
+0.551242709160 [best: +0.551242709160] :   1%|▋                                                             | 57/5000 [00:00<01:07, 72.

In [41]:
scores_array = np.array([[np.min(X) for X in l]for l in scores])

In [43]:
np.round(scores_array, 2)

array([[0.82, 0.82, 0.86, 0.86, 0.82, 0.82, 0.82, 0.82, 0.86, 0.82],
       [0.55, 0.55, 0.55, 0.55, 0.55, 0.55, 0.55, 0.55, 0.55, 0.55],
       [0.55, 0.55, 0.55, 0.55, 0.55, 0.55, 0.55, 0.55, 0.55, 0.55],
       [0.55, 0.55, 0.55, 0.55, 0.55, 0.55, 0.55, 0.55, 0.55, 0.55],
       [0.55, 0.55, 0.55, 0.55, 0.55, 0.55, 0.55, 0.55, 0.55, 0.55]])

In [45]:
np.round(np.sqrt(1-scores_array), 2)

array([[0.43, 0.43, 0.38, 0.38, 0.43, 0.43, 0.43, 0.43, 0.38, 0.43],
       [0.67, 0.67, 0.67, 0.67, 0.67, 0.67, 0.67, 0.67, 0.67, 0.67],
       [0.67, 0.67, 0.67, 0.67, 0.67, 0.67, 0.67, 0.67, 0.67, 0.67],
       [0.67, 0.67, 0.67, 0.67, 0.67, 0.67, 0.67, 0.67, 0.67, 0.67],
       [0.67, 0.67, 0.67, 0.67, 0.67, 0.67, 0.67, 0.67, 0.67, 0.67]])

Did we hit a bottleneck? Scores look worse than for MPO/SVD solutions. Try basinhopping.

In [46]:
scores = list()

for depth in gate_depths:
    print(depth)
    scores.append(list())

    for _ in range(num_iterations_per_depth):
        circ_pair = ansatz_circuit_pair(
            left_most_symmetry_site,
            num_symmetry_sites,
            num_boundary_sites,
            depth
        )
        
        tnopt = qtn.TNOptimizer(
            circ_pair,                        # the tensor network we want to optimize
            loss,                     # the function we want to minimize
            loss_constants={'problem_rdm': problem_rdm},  # supply U to the loss function as a constant TN
            tags=['U3'],              # only optimize U3 tensors
            #autodiff_backend='jax',   # use 'autograd' for non-compiled optimization
            optimizer='L-BFGS-B',     # the optimization algorithm
        )
        
        circ_opt = tnopt.optimize_basinhopping(
            n=500,
            nhop=10,
            jac=True,
            hessp=False
        )

        scores[-1].append(tnopt.losses)



1


+0.815850436687 [best: +0.815805017948] :  10%|██████▏                                                     | 513/5000 [00:00<00:07, 583.23it/s]
+0.815867364407 [best: +0.815854907036] :  12%|██████▉                                                     | 577/5000 [00:00<00:06, 662.96it/s]
+0.815808713436 [best: +0.815803170204] :  10%|█████▋                                                      | 478/5000 [00:00<00:06, 696.20it/s]
+0.815865874290 [best: +0.815865814686] :   8%|█████                                                       | 417/5000 [00:00<00:06, 673.55it/s]
+0.815846741199 [best: +0.815846741199] :  10%|██████▏                                                     | 517/5000 [00:00<00:06, 713.78it/s]
+0.815866708755 [best: +0.815837144852] :  10%|█████▊                                                      | 487/5000 [00:00<00:06, 704.12it/s]
+0.815860688686 [best: +0.815810203552] :  11%|██████▍                                                     | 534/5000 [00:00<00:06, 713.

2


+0.551292300224 [best: +0.551259160042] :  12%|██████▉                                                     | 582/5000 [00:01<00:07, 567.53it/s]
+0.551259875298 [best: +0.551234960556] :   9%|█████▎                                                      | 447/5000 [00:01<00:11, 398.54it/s]
+0.551293253899 [best: +0.551293134689] :  13%|███████▉                                                    | 665/5000 [00:01<00:06, 639.63it/s]
+0.551250755787 [best: +0.551250755787] :  10%|██████                                                      | 502/5000 [00:00<00:07, 577.35it/s]
+0.551284432411 [best: +0.551262140274] :   9%|█████▋                                                      | 471/5000 [00:00<00:08, 564.19it/s]
+0.551293730736 [best: +0.551291465759] :   9%|█████▌                                                      | 463/5000 [00:00<00:07, 570.38it/s]
+0.551284432411 [best: +0.551284432411] :   9%|█████▌                                                      | 467/5000 [00:00<00:07, 570.

3


+0.551264047623 [best: +0.551234245300] :  11%|██████▊                                                     | 563/5000 [00:01<00:08, 526.83it/s]
+0.551259398460 [best: +0.551244020462] :  10%|█████▉                                                      | 498/5000 [00:01<00:09, 485.53it/s]
+0.551278114319 [best: +0.551238000393] :  10%|█████▊                                                      | 481/5000 [00:01<00:09, 461.75it/s]
+0.551281929016 [best: +0.551265239716] :  12%|██████▉                                                     | 575/5000 [00:01<00:09, 488.50it/s]
+0.551286578178 [best: +0.551266908646] :  11%|██████▊                                                     | 563/5000 [00:01<00:10, 408.93it/s]
+0.551243484020 [best: +0.551243484020] :  11%|██████▋                                                     | 554/5000 [00:01<00:12, 369.91it/s]
+0.551255941391 [best: +0.551236271858] :  10%|█████▊                                                      | 484/5000 [00:01<00:10, 451.

4


+0.551274359226 [best: +0.551242947578] :  13%|███████▋                                                    | 644/5000 [00:01<00:09, 437.27it/s]
+0.551272928715 [best: +0.551272809505] :  19%|███████████▎                                                | 946/5000 [00:02<00:10, 390.62it/s]
+0.551277399063 [best: +0.551262378693] :  13%|███████▊                                                    | 646/5000 [00:01<00:12, 348.29it/s]
+0.551268696785 [best: +0.551236748695] :  17%|██████████                                                  | 836/5000 [00:01<00:08, 467.04it/s]
+0.551293134689 [best: +0.551235258579] :  15%|█████████                                                   | 755/5000 [00:01<00:09, 456.14it/s]
+0.551259517670 [best: +0.551259517670] :  10%|██████                                                      | 510/5000 [00:01<00:11, 385.20it/s]
+0.551283717155 [best: +0.551254451275] :  12%|███████▍                                                    | 617/5000 [00:01<00:12, 338.

5


+0.551236748695 [best: +0.551234006882] :  19%|███████████▎                                                | 944/5000 [00:02<00:08, 465.37it/s]
+0.551257610321 [best: +0.551234364510] :  16%|█████████▎                                                  | 775/5000 [00:02<00:12, 327.20it/s]
+0.551243901253 [best: +0.551234245300] :  15%|█████████                                                   | 759/5000 [00:01<00:09, 451.61it/s]
+0.551267981529 [best: +0.551237702370] :  16%|█████████▋                                                  | 805/5000 [00:01<00:08, 468.61it/s]
+0.551239490509 [best: +0.551234126091] :  12%|███████▏                                                    | 604/5000 [00:01<00:10, 402.67it/s]
+0.551275849342 [best: +0.551235139370] :  15%|████████▉                                                   | 741/5000 [00:01<00:11, 374.95it/s]
+0.551252007484 [best: +0.551234364510] :  16%|█████████▍                                                  | 785/5000 [00:01<00:08, 472.

So a circuit depth of 2 looks optimal.

In [47]:
depth = 2

# Gradient-free methods

In [52]:
all_methods = [
    'Nelder-Mead',
    'L-BFGS-B',
    'Powell',
    'CG',
    'BFGS',
    'Newton-CG',
    'TNC',
    'COBYLA',
    'COBYQA',
    'SLSQP',
    'dogleg',
    'trust-ncg',
    'trust-exact',
    'trust-krylov'
]

In [66]:
grad_free_completed_methods = [
    'Nelder-Mead',
    'L-BFGS-B',
    'Powell',
    'CG',
    'BFGS',
    'TNC',
    'COBYLA',
    'SLSQP',
]

In [49]:
num_iterations=10

In [75]:
for method in grad_free_uncompleted_methods:
    print(method)
    scores.append(list())

    for _ in range(num_iterations):
        circ_pair = ansatz_circuit_pair(
            left_most_symmetry_site,
            num_symmetry_sites,
            num_boundary_sites,
            depth
        )
        
        tnopt = qtn.TNOptimizer(
            circ_pair,                        # the tensor network we want to optimize
            loss,                     # the function we want to minimize
            loss_constants={'problem_rdm': problem_rdm},  # supply U to the loss function as a constant TN
            tags=['U3'],              # only optimize U3 tensors
            #autodiff_backend='jax',   # use 'autograd' for non-compiled optimization
            optimizer=method,     # the optimization algorithm
        )
        
        circ_opt = tnopt.optimize(
            n=5000,
            jac=False,
            hessp=False
        )

        scores[-1].append(tnopt.losses)

trust-krylov


  0%|                                                                                                                 | 0/5000 [00:00<?, ?it/s]


ValueError: ('Jacobian is required for trust region ', 'exact minimization.')

In [79]:
len(scores), len(grad_free_completed_methods)

(8, 8)

In [86]:
best_final_scores = [
    min(np.min(X) for X in l)
    for l in scores
]

In [87]:
best_final_scores

[0.5512935519218445,
 0.9971219301223755,
 0.5512736439704895,
 0.9772905111312866,
 0.9941859245300293,
 0.9963281154632568,
 0.5512793660163879,
 0.9914867281913757]

In [88]:
[
    m for (m, s) in zip(grad_free_completed_methods, best_final_scores)
    if s > 0.6
]

['L-BFGS-B', 'CG', 'BFGS', 'TNC', 'SLSQP']

In [90]:
[
    m for (m, s) in zip(grad_free_completed_methods, best_final_scores)
    if s < 0.6
]

['Nelder-Mead', 'Powell', 'COBYLA']

In [119]:
sub_scores = [
    l for (l, s) in zip(scores, best_final_scores)
    if s < 0.6
]

In [120]:
sub_best_iters = [
    np.array([np.argmax(np.array(l1)<0.6) for l1 in l])
    for l in sub_scores
]

In [121]:
sub_best_iters

[array([1339, 1458, 1563, 1473, 1115, 1591, 1669, 1376, 1345, 2194]),
 array([379, 950, 409, 412, 816, 347, 688, 438, 782, 747]),
 array([142, 144, 166, 159, 107, 170, 134, 153, 212, 121])]

In [89]:
len(grad_free_completed_methods)

8

In [93]:
l_bfgs_b_scores = list()

In [94]:
for _ in range(num_iterations):
    circ_pair = ansatz_circuit_pair(
        left_most_symmetry_site,
        num_symmetry_sites,
        num_boundary_sites,
        depth
    )
    
    tnopt = qtn.TNOptimizer(
        circ_pair,                        # the tensor network we want to optimize
        loss,                     # the function we want to minimize
        loss_constants={'problem_rdm': problem_rdm},  # supply U to the loss function as a constant TN
        tags=['U3'],              # only optimize U3 tensors
        #autodiff_backend='jax',   # use 'autograd' for non-compiled optimization
        optimizer='L-BFGS-B',     # the optimization algorithm
    )
    
    circ_opt = tnopt.optimize(
        n=5000,
        eps=1e-4,
        jac=False,
        hessp=False
    )

    l_bfgs_b_scores.append(tnopt.losses)

+0.551310777664 [best: +0.551310241222] :  47%|███████████████████████████▉                               | 2368/5000 [00:04<00:04, 554.26it/s]
+0.551309525967 [best: +0.551309347153] :  46%|███████████████████████████                                | 2294/5000 [00:03<00:03, 683.73it/s]
+0.551351785660 [best: +0.551351428032] :  40%|███████████████████████▌                                   | 1998/5000 [00:02<00:04, 722.30it/s]
+0.551298975945 [best: +0.551298737526] :  58%|██████████████████████████████████▍                        | 2923/5000 [00:03<00:02, 758.54it/s]
+0.551317095757 [best: +0.551317095757] : : 5069it [00:06, 767.49it/s]                                                                         
+0.551379203796 [best: +0.551378905773] :  55%|████████████████████████████████▎                          | 2738/5000 [00:03<00:02, 769.79it/s]
+0.999990224838 [best: +0.999990224838] :   1%|▉                                                            | 74/5000 [00:00<00:18, 260.

In [116]:
l_bfgs_b_num_iters = [
    np.argmax(np.array(l)<0.6)
    for l in l_bfgs_b_scores
]

In [117]:
l_bfgs_b_num_iters

[629, 666, 555, 518, 740, 592, 0, 407, 777, 814]

In [99]:
cg_scores = list()

In [100]:
for _ in range(num_iterations):
    circ_pair = ansatz_circuit_pair(
        left_most_symmetry_site,
        num_symmetry_sites,
        num_boundary_sites,
        depth
    )
    
    tnopt = qtn.TNOptimizer(
        circ_pair,                        # the tensor network we want to optimize
        loss,                     # the function we want to minimize
        loss_constants={'problem_rdm': problem_rdm},  # supply U to the loss function as a constant TN
        tags=['U3'],              # only optimize U3 tensors
        #autodiff_backend='jax',   # use 'autograd' for non-compiled optimization
        optimizer='CG',     # the optimization algorithm
    )
    
    circ_opt = tnopt.optimize(
        n=5000,
        eps=1e-3,
        jac=False,
        hessp=False
    )

    cg_scores.append(tnopt.losses)

+0.551296532154 [best: +0.551295995712] : : 5227it [00:06, 767.88it/s]                                                                         
+0.551293969154 [best: +0.551293611526] :  86%|██████████████████████████████████████████████████▊        | 4304/5000 [00:05<00:00, 797.02it/s]
+0.551294922829 [best: +0.551294684410] : : 5376it [00:06, 804.09it/s]                                                                         
+0.551311135292 [best: +0.551311075687] : : 6005it [00:07, 805.16it/s]                                                                         
+0.551289439201 [best: +0.551289081573] :  87%|███████████████████████████████████████████████████▏       | 4340/5000 [00:05<00:00, 804.01it/s]
+0.551294088364 [best: +0.551293790340] :  85%|█████████████████████████████████████████████████▉         | 4227/5000 [00:05<00:00, 796.84it/s]
+0.551296412945 [best: +0.551293611526] :  93%|███████████████████████████████████████████████████████▏   | 4673/5000 [00:06<00:00, 777.

In [114]:
cg_num_iters = [
    np.argmax(np.array(l)<0.6)
    for l in cg_scores
]

In [115]:
cg_num_iters

[1147, 814, 777, 888, 1036, 888, 814, 3700, 1221, 777]

In [101]:
bfgs_scores = list()

In [102]:
for _ in range(num_iterations):
    circ_pair = ansatz_circuit_pair(
        left_most_symmetry_site,
        num_symmetry_sites,
        num_boundary_sites,
        depth
    )
    
    tnopt = qtn.TNOptimizer(
        circ_pair,                        # the tensor network we want to optimize
        loss,                     # the function we want to minimize
        loss_constants={'problem_rdm': problem_rdm},  # supply U to the loss function as a constant TN
        tags=['U3'],              # only optimize U3 tensors
        #autodiff_backend='jax',   # use 'autograd' for non-compiled optimization
        optimizer='BFGS',     # the optimization algorithm
    )
    
    circ_opt = tnopt.optimize(
        n=5000,
        eps=1e-3,
        jac=False,
        hessp=False
    )

    bfgs_scores.append(tnopt.losses)

+0.551294088364 [best: +0.551293790340] :  76%|████████████████████████████████████████████▋              | 3785/5000 [00:05<00:01, 692.25it/s]
+0.551293611526 [best: +0.551293373108] : : 6784it [00:09, 752.80it/s]                                                                         
+0.551293134689 [best: +0.551292777061] :  76%|████████████████████████████████████████████▋              | 3786/5000 [00:05<00:01, 752.46it/s]
+0.551291584969 [best: +0.551290869713] :  93%|███████████████████████████████████████████████████████▏   | 4674/5000 [00:06<00:00, 773.20it/s]
+0.551293373108 [best: +0.551293134689] : : 5825it [00:07, 745.23it/s]                                                                         
+0.551256895065 [best: +0.551256477833] :  70%|█████████████████████████████████████████▌                 | 3525/5000 [00:04<00:02, 709.99it/s]
+0.551299452782 [best: +0.551299095154] :  75%|████████████████████████████████████████████▏              | 3749/5000 [00:05<00:01, 698.

In [112]:
bfgs_num_iters = [
    np.argmax(np.array(l)<0.6)
    for l in bfgs_scores
]

In [113]:
bfgs_num_iters

[851, 999, 925, 1221, 629, 703, 777, 555, 666, 851]

In [103]:
tnc_scores = list()

In [104]:
for _ in range(num_iterations):
    circ_pair = ansatz_circuit_pair(
        left_most_symmetry_site,
        num_symmetry_sites,
        num_boundary_sites,
        depth
    )
    
    tnopt = qtn.TNOptimizer(
        circ_pair,                        # the tensor network we want to optimize
        loss,                     # the function we want to minimize
        loss_constants={'problem_rdm': problem_rdm},  # supply U to the loss function as a constant TN
        tags=['U3'],              # only optimize U3 tensors
        #autodiff_backend='jax',   # use 'autograd' for non-compiled optimization
        optimizer='TNC',     # the optimization algorithm
    )
    
    circ_opt = tnopt.optimize(
        n=5000,
        eps=1e-3,
        jac=False,
        hessp=False
    )

    tnc_scores.append(tnopt.losses)

  self.res = minimize(
+0.553827941418 [best: +0.553820729256] : : 11951it [00:16, 706.03it/s]                                                                        
+0.887832164764 [best: +0.887813508511] : : 7548it [00:10, 731.57it/s]                                                                         
+0.552288174629 [best: +0.552272439003] : : 13357it [00:18, 729.15it/s]                                                                        
+0.551466464996 [best: +0.551464796066] : : 9472it [00:11, 799.20it/s]                                                                         
+0.553988099098 [best: +0.553982257843] : : 10915it [00:15, 718.06it/s]                                                                        
+0.999999761581 [best: +0.999999761581] :   1%|▍                                                            | 37/5000 [00:00<00:39, 126.56it/s]
+0.553511261940 [best: +0.553508520126] : : 13357it [00:17, 769.46it/s]                                          

In [105]:
slsqp_scores = list()

In [106]:
for _ in range(num_iterations):
    circ_pair = ansatz_circuit_pair(
        left_most_symmetry_site,
        num_symmetry_sites,
        num_boundary_sites,
        depth
    )
    
    tnopt = qtn.TNOptimizer(
        circ_pair,                        # the tensor network we want to optimize
        loss,                     # the function we want to minimize
        loss_constants={'problem_rdm': problem_rdm},  # supply U to the loss function as a constant TN
        tags=['U3'],              # only optimize U3 tensors
        #autodiff_backend='jax',   # use 'autograd' for non-compiled optimization
        optimizer='SLSQP',     # the optimization algorithm
    )
    
    circ_opt = tnopt.optimize(
        n=5000,
        eps=1e-3,
        jac=False,
        hessp=False
    )

    slsqp_scores.append(tnopt.losses)

+0.999999821186 [best: +0.999999761581] :   1%|▍                                                            | 37/5000 [00:00<00:43, 114.21it/s]
+0.551293909550 [best: +0.551293909550] :  18%|██████████▋                                                 | 894/5000 [00:01<00:07, 553.59it/s]
+0.551297724247 [best: +0.551297068596] :  22%|████████████▊                                              | 1090/5000 [00:02<00:07, 516.11it/s]
+0.551296651363 [best: +0.551296532154] :  18%|██████████▋                                                 | 895/5000 [00:01<00:08, 485.75it/s]
+0.551300168037 [best: +0.551300168037] :  19%|███████████▏                                                | 929/5000 [00:02<00:11, 356.13it/s]
+0.551294922829 [best: +0.551294922829] :  25%|██████████████▉                                            | 1266/5000 [00:02<00:08, 433.28it/s]
+0.551295638084 [best: +0.551295638084] :  19%|███████████▋                                                | 969/5000 [00:02<00:09, 425.

In [122]:
slsqp_num_iters = [
    np.argmax(np.array(l)<0.6)
    for l in slsqp_scores
]

In [124]:
best_iters = [
    *sub_best_iters,
    np.array(l_bfgs_b_num_iters),
    np.array(cg_num_iters),
    np.array(bfgs_num_iters),
    np.array(slsqp_num_iters)
]

In [125]:
best_iters

[array([1339, 1458, 1563, 1473, 1115, 1591, 1669, 1376, 1345, 2194]),
 array([379, 950, 409, 412, 816, 347, 688, 438, 782, 747]),
 array([142, 144, 166, 159, 107, 170, 134, 153, 212, 121]),
 array([629, 666, 555, 518, 740, 592,   0, 407, 777, 814]),
 array([1147,  814,  777,  888, 1036,  888,  814, 3700, 1221,  777]),
 array([ 851,  999,  925, 1221,  629,  703,  777,  555,  666,  851]),
 array([  0, 337, 523, 373, 410, 483, 337, 335, 413, 704])]

In [126]:
mean_steps = [
    np.sum(X)/np.count_nonzero(X)
    for X in best_iters
]

In [127]:
mean_steps

[1512.3, 596.8, 150.8, 633.1111111111111, 1206.2, 817.7, 435.0]

Double check that Cobyla is doing that well.

In [129]:
method = 'COBYLA'

cobyla_test_scores = list()

for _ in range(30):
    circ_pair = ansatz_circuit_pair(
        left_most_symmetry_site,
        num_symmetry_sites,
        num_boundary_sites,
        depth
    )
    
    tnopt = qtn.TNOptimizer(
        circ_pair,                        # the tensor network we want to optimize
        loss,                     # the function we want to minimize
        loss_constants={'problem_rdm': problem_rdm},  # supply U to the loss function as a constant TN
        tags=['U3'],              # only optimize U3 tensors
        #autodiff_backend='jax',   # use 'autograd' for non-compiled optimization
        optimizer=method,     # the optimization algorithm
    )
    
    circ_opt = tnopt.optimize(
        n=5000,
        jac=False,
        hessp=False
    )

    cobyla_test_scores.append(tnopt.losses)

+0.551334977150 [best: +0.551334857941] :  25%|███████████████                                            | 1274/5000 [00:02<00:06, 595.84it/s]
+0.551319718361 [best: +0.551319301128] :  24%|█████████████▉                                             | 1179/5000 [00:01<00:06, 623.71it/s]
+0.551325380802 [best: +0.551325082779] :  48%|████████████████████████████▎                              | 2395/5000 [00:04<00:04, 539.81it/s]
+0.551293134689 [best: +0.551292896271] :  21%|████████████▎                                              | 1043/5000 [00:07<00:30, 131.79it/s]
+0.551310002804 [best: +0.551309704781] :  21%|████████████▌                                              | 1069/5000 [00:01<00:06, 588.70it/s]
+0.551302373409 [best: +0.551301836967] :  16%|█████████▎                                                  | 780/5000 [00:01<00:08, 519.58it/s]
+0.551336646080 [best: +0.551336050034] :  29%|█████████████████▍                                         | 1473/5000 [00:02<00:06, 556.

In [130]:
cobyla_test_num_iters = [
    np.argmax(np.array(l)<0.6)
    for l in cobyla_test_scores
]

In [132]:
np.mean(cobyla_test_num_iters)

137.8

Looks good!

# Gradient based methods

In [168]:
grad_based_methods = [
    #'Nelder-Mead',
    'L-BFGS-B',
    #'Powell',
    'CG',
    'BFGS',
    'Newton-CG',
    'TNC',
    #'COBYLA',
    'SLSQP',
    'trust-constr',
    'adam',
    'nadam',
    'rmsprop',
    'sgd',
    'cadam'
]

In [135]:
num_iterations=10

In [159]:
from collections import defaultdict

In [171]:
grad_scores = defaultdict(list)

In [172]:
for method in grad_based_methods:
    print(method)

    for _ in range(num_iterations):
        circ_pair = ansatz_circuit_pair(
            left_most_symmetry_site,
            num_symmetry_sites,
            num_boundary_sites,
            depth
        )
        
        tnopt = qtn.TNOptimizer(
            circ_pair,                        # the tensor network we want to optimize
            loss,                     # the function we want to minimize
            loss_constants={'problem_rdm': problem_rdm},  # supply U to the loss function as a constant TN
            tags=['U3'],              # only optimize U3 tensors
            #autodiff_backend='jax',   # use 'autograd' for non-compiled optimization
            optimizer=method,     # the optimization algorithm
            progbar=False
        )
        
        circ_opt = tnopt.optimize(
            n=2000,
            jac=True,
            hessp=False
        )

        grad_scores[method].append(tnopt.losses)

L-BFGS-B
CG




BFGS




Newton-CG


  self.res = minimize(


TNC




SLSQP
trust-constr


  self.H.update(self.x - self.x_prev, self.g - self.g_prev)


adam
nadam
rmsprop
sgd
cadam


In [179]:
grad_num_iters = {
    k: [np.argmax(np.array(l1)<0.6) for l1 in l]
    for k, l in grad_scores.items()
}

In [181]:
grad_num_iters

{'L-BFGS-B': [22, 34, 13, 13, 14, 28, 19, 14, 11, 16],
 'CG': [30, 20, 21, 24, 21, 18, 27, 28, 13, 24],
 'BFGS': [734, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 'Newton-CG': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 'TNC': [49, 25, 50, 32, 104, 75, 58, 26, 116, 81],
 'SLSQP': [17, 20, 20, 15, 18, 0, 22, 12, 9, 26],
 'trust-constr': [44, 30, 43, 43, 40, 81, 88, 47, 51, 95],
 'adam': [782, 826, 779, 760, 781, 629, 822, 882, 781, 1613],
 'nadam': [847, 544, 1380, 1437, 690, 628, 1047, 690, 669, 604],
 'rmsprop': [62, 64, 54, 66, 85, 77, 78, 49, 52, 84],
 'sgd': [279, 1054, 0, 639, 272, 329, 467, 207, 1344, 1694],
 'cadam': [1127, 1312, 978, 1251, 621, 787, 1133, 1371, 638, 748]}

In [183]:
grad_mean_num_iters = {
    k: np.sum(X)/np.count_nonzero(X) for k, X in grad_mean_num_iters.items()
}

  k: np.sum(X)/np.count_nonzero(X) for k, X in grad_mean_num_iters.items()


In [184]:
grad_mean_num_iters

{'L-BFGS-B': 18.4,
 'CG': 22.6,
 'BFGS': 73.4,
 'Newton-CG': nan,
 'TNC': 61.6,
 'SLSQP': 15.9,
 'trust-constr': 56.2,
 'adam': 865.5,
 'nadam': 853.6,
 'rmsprop': 67.1,
 'sgd': 628.5,
 'cadam': 996.6}

# Conclusion
Looks like Cobyla for gradient free and L-BFGS-B with gradients win.