In [1]:
import numpy as np
import sympy as sy
import scipy as sc

import matplotlib.pyplot as plt

import cirq


##### **!! NOTE: This python + cirq notebook is legacy code for reference. It should work fine but the Julia + Yao Edition is much more up-to-date and tested. All relevant experiments have been performed using the Julia Edition of this notebook, *not* this one. !!**

In [2]:
def prepare_loss_gradient_approx(loss_func, stochastic_approx=False, stoch_shift_size_c=0.05):
    
    def loss_grad_func(eval_point, *loss_func_args):
        """
        Returns the gradient of the loss function at the given point.
        If stochastic_approx is True, then the gradient is estimated using the SPSA rule.
        """
        
        loss_grad = []
        #eval_point = np.array(eval_point)
        
        if not stochastic_approx: #Non-stochastic parameter shift rule
            for input_dim in range(len(eval_point)):
                dim_basis_vec = np.zeros(len(eval_point))
                dim_basis_vec[input_dim] = 1.0
                loss_grad.append((loss_func(eval_point + np.pi*dim_basis_vec/2, *loss_func_args) - loss_func(eval_point - np.pi*dim_basis_vec/2, *loss_func_args))/2)
            loss_grad = np.array(loss_grad)
        else: #SPSA
            random_shift_vec = np.random.choice([-1,1],len(eval_point))
            loss_grad = (loss_func(eval_point + stoch_shift_size_c*random_shift_vec, *loss_func_args) - loss_func(eval_point - stoch_shift_size_c*random_shift_vec, *loss_func_args))/(2*stoch_shift_size_c*random_shift_vec)
        
        return loss_grad
    return loss_grad_func

### The Meta-VQE

In [3]:
def processing_step_circuit(qubits, thetas):
    """
    Returns a circuit that applies the processing step of the VQE algorithm.
    
    Args:
    ----------
        qubits [list]: Qubits to apply the circuit to.
        thetas [list or ndarray]: A list of the variational parameters.
    
    Yields:
    ----------
        [list]: Gates in the circuit.
    """
    assert len(thetas) == 2 * 2 * len(qubits) # 2 paramterized layers per qubit with 2 rotation gates each
    
    rz_thetas = thetas[0:2*len(qubits)]
    ry_thetas = thetas[2*len(qubits):4*len(qubits)]
    
    #Variational Layer 1:
    for q_i, _ in enumerate(qubits):
        yield [cirq.rz(rz_thetas[2*q_i])(qubits[q_i]), cirq.ry(ry_thetas[2*q_i])(qubits[q_i])] # Even indices in the theta vectors are for the first variational layer
    
    # In-between Entangling Layer (even control indices):
    yield [cirq.CNOT(qubits[q_i], qubits[q_i+1]) for q_i in range(0, len(qubits)-1, 2)]
    
    #Variational Layer 2:
    for q_i, _ in enumerate(qubits):
        yield [cirq.rz(rz_thetas[2*q_i + 1])(qubits[q_i]), cirq.ry(ry_thetas[2*q_i + 1])(qubits[q_i])] # Odd indices in the theta vectors are for the second variational layer
    
    # Final Entangling Layer (odd control indices):
    yield [cirq.CNOT(qubits[q_i], qubits[q_i+1]) for q_i in range(1, len(qubits)-1, 2)]


In [4]:
import sympy

num_qubits = 4
qubits = cirq.LineQubit.range(num_qubits)

theta_0 = [sympy.Symbol("theta_{}^({})".format(i, j)) for j in range(2) for i in range(2 * num_qubits)]

circuit = cirq.Circuit(processing_step_circuit(qubits, theta_0))
print(circuit.to_text_diagram(transpose=True))

0               1               2               3
│               │               │               │
Rz(theta_0^(0)) Rz(theta_2^(0)) Rz(theta_4^(0)) Rz(theta_6^(0))
│               │               │               │
Ry(theta_0^(1)) Ry(theta_2^(1)) Ry(theta_4^(1)) Ry(theta_6^(1))
│               │               │               │
@───────────────X               @───────────────X
│               │               │               │
Rz(theta_1^(0)) Rz(theta_3^(0)) Rz(theta_5^(0)) Rz(theta_7^(0))
│               │               │               │
Ry(theta_1^(1)) Ry(theta_3^(1)) Ry(theta_5^(1)) Ry(theta_7^(1))
│               │               │               │
│               @───────────────X               │
│               │               │               │


In [5]:
def encoding_function(Delta, phi):
    """
    Returns the value of the encoding function for a given Delta and input vector phi
    
    Args:
    ----------
        Delta [float]: The value of the parameter Delta.
        phi [list or ndarray]: A list of the variational parameters.
    
    Returns:
    ----------
        [ndarray]: The encoding function.
    """
    weights = phi[:int(len(phi)/2)]
    biases = phi[int(len(phi)/2):]
    weights_z, weights_y = weights[:int(len(weights)/2)], weights[int(len(weights)/2):]
    biases_z, biases_y = biases[:int(len(biases)/2)], biases[int(len(biases)/2):]
    
    return np.concatenate((weights_z * Delta + biases_z, weights_y * Delta + biases_y))

def encoding_step_circuit(qubits, phi, Delta, encoding_function):
    """
    Returns a circuit that applies the encoding step of the VQE algorithm.
    
    Args:
    ----------
        qubits [list]: Qubits to apply the circuit to.
        phi [list or ndarray]: A list of the variational parameters.
        Delta [float]: The value of the parameter Delta.
        encoding_function [function]: The encoding function.
    
    Yields:
    ----------
        [list]: Gates in the circuit.
    """
    
    assert len(phi) == 2 * 2 * 2 * len(qubits) # 2 paramterized layers per qubit with 2 fun
    
    yield processing_step_circuit(qubits, encoding_function(Delta, phi))

In [6]:
import sympy

num_qubits = 4
qubits = cirq.LineQubit.range(num_qubits)

phi_0 = np.array([sympy.Symbol("{}_{}^({})".format(phi_type, i, j)) for phi_type in ["w", "p"] for j in range(2) for i in range(2 * num_qubits)])

circuit = cirq.Circuit(encoding_step_circuit(qubits, phi_0, Delta=sympy.Symbol("Delta") ,encoding_function=encoding_function))
print(circuit.to_text_diagram(transpose=True))

0                           1                           2                           3
│                           │                           │                           │
Rz(Delta*w_0^(0) + p_0^(0)) Rz(Delta*w_2^(0) + p_2^(0)) Rz(Delta*w_4^(0) + p_4^(0)) Rz(Delta*w_6^(0) + p_6^(0))
│                           │                           │                           │
Ry(Delta*w_0^(1) + p_0^(1)) Ry(Delta*w_2^(1) + p_2^(1)) Ry(Delta*w_4^(1) + p_4^(1)) Ry(Delta*w_6^(1) + p_6^(1))
│                           │                           │                           │
@───────────────────────────X                           @───────────────────────────X
│                           │                           │                           │
Rz(Delta*w_1^(0) + p_1^(0)) Rz(Delta*w_3^(0) + p_3^(0)) Rz(Delta*w_5^(0) + p_5^(0)) Rz(Delta*w_7^(0) + p_7^(0))
│                           │                           │                           │
Ry(Delta*w_1^(1) + p_1^(1)) Ry(Delta*w_3^(1) + p_3^(1)) Ry(Del

In [7]:

def xxz_chain_hamiltonian_1D(qubits, hamilt_params):
    """
    Returns tuple of generators for every hamiltonian term in the XXZ chain hamiltonian
    such that each inidivdual term is given as (coefficient, gate set)
    
    Args:
    ----------
        qubits [list]: Qubits to apply the circuit to.
        hamilt_params [list]: A list of the hamiltonian parameters.
        
    Returns:
    ----------
        [tuple]: Tuple of tuples of hamiltonian term coefficient and gate generators.
    """
    
    def x_interact_term():
        for q_i, _ in enumerate(qubits):
            yield cirq.X(qubits[q_i]) * cirq.X(qubits[(q_i+1) % len(qubits)])
    
    def y_interact_term():
        for q_i, _ in enumerate(qubits):
            yield cirq.Y(qubits[q_i]) * cirq.Y(qubits[(q_i+1) % len(qubits)])
    
    def z_interact_term():
        for q_i, _ in enumerate(qubits):
            yield cirq.Z(qubits[q_i]) * cirq.Z(qubits[(q_i+1) % len(qubits)])
    
    def z_align_term():
        for q_i, _ in enumerate(qubits):
            yield cirq.Z(qubits[q_i])

    return (1.0, x_interact_term()), (1.0, y_interact_term()),  (hamilt_params["Delta"],z_interact_term()), (hamilt_params["lambda"], z_align_term())

def get_XXZ_hamiltonian_measurement_basis_change(qubits, hamilt_params):
    """
    Returns the gates for the measurement basis change needed to measure each commuting set of terms in the XXZ chain hamiltonian.
    Equivalent to applying to every pauli-string the .to_z_basis_op() method.
    Note that the following code is generalizable to other hamiltonians
    However, the iteration over terms (i.e. term_tuples) assumes that all pauli strings within each term commute with each other
    
    Args:
    ----------
        qubits [list]: Qubits to apply the circuit to.
        hamilt_params [list]: A list of the hamiltonian parameters.
    
    Yields:
    ----------
        [list]: Gates in the circuit.
    """	
    
    term_tuples = xxz_chain_hamiltonian_1D(qubits, hamilt_params)
    
    for term_coeff, term_generator in term_tuples:
        
        measured_strings = [] # list of pauli operator objects
        term_measurement_gateset = []
        measured_qubits = []
        
        for pauli_str in term_generator:
            measured_strings.append(pauli_str)
            for qubit, operator in pauli_str.items():
                if operator == cirq.X and qubit not in measured_qubits:
                    term_measurement_gateset.append(cirq.H(qubit))
                    measured_qubits.append(qubit)
                elif operator == cirq.Y and qubit not in measured_qubits:
                    term_measurement_gateset.append(cirq.inverse(cirq.S(qubit)))
                    term_measurement_gateset.append(cirq.H(qubit))
                    measured_qubits.append(qubit)

        yield (term_coeff, measured_strings), term_measurement_gateset

#def get_hamilt_term_fold_func(term_pauli_strings):
#    def term_fold_func(meas_results):
#        #Change from binary to -1 and 1
#        meas_results = 1 - 2 * np.array([meas_results[q_i] for q_i in range(len(meas_results))]).astype(int)
#        
#        energy_tot = np.sum([np.prod([meas_results[q_i] for q_i, _ in enumerate(pauli_str.qubits)]) for pauli_str in term_pauli_strings])
#        
#        return energy_tot
#    return term_fold_func

def meas_hamilt_expectation(qubits, circuit_params, hamilt_params, n_meas_reps = 1000):
    """
    Returns the expectation value of the hamiltonian on the state defined by the given circuit parameters.
    Non-cheating version, i.e. the state vector is not computed but instead the expectation value is estimated by sampling measurement outcomes
    
    Args:
    ----------
        qubits [list]: Qubits to apply the circuit to.
        circuit_params [list]: A list of the circuit parameters.
        hamilt_params [list]: A list of the hamiltonian parameters.
        n_meas_reps [int]: Number of measurement repetitions.
    
    Returns:
    ----------
        [float]: Expectation value of the hamiltonian.
    
    """
    energy_expect = 0.0
    
    #Iterate over terms with commuting pauli strings in the hamiltonian
    for measured_term, term_basis_change_gates in get_XXZ_hamiltonian_measurement_basis_change(qubits, hamilt_params):
        
        term_coeff, measured_strings = measured_term

        #Note that we have to re-initialize the circuit for each term, since those are generators
        encoding_step = encoding_step_circuit(qubits, circuit_params[:int(8*len(qubits))], Delta=hamilt_params['Delta'] ,encoding_function=encoding_function)
        processing_step = processing_step_circuit(qubits, circuit_params[int(8*len(qubits)):])

        #Prepare, run and measure the circuit 
        term_basis_change_gates.extend([cirq.measure(q, key=f"%d" % q.x) for q in qubits])
        
        #OR: term_basis_change_gates = cirq.measure_paulistring_terms(cirq.PauliString({q_i: measured_strings[0]._qubit_pauli_map[qubits[0]] for q_i in qubits}), key_func = lambda q: f"%d" % q.x)

        #print("%%%%%%%%%%%%%%%%%% Next %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
        term_meas_circuit = cirq.Circuit([encoding_step,
                                    processing_step,
                                    term_basis_change_gates,
                                    ])
        
        #print(term_meas_circuit.to_text_diagram(transpose=True))
        
        #print("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
        
        
        simulator = cirq.Simulator()
        results = simulator.run(term_meas_circuit, repetitions=int(n_meas_reps))
        
        meas_outcomes_hist = results.multi_measurement_histogram(keys=[f"%d" % q.x for q in qubits])
        assert n_meas_reps == int(np.sum(list(meas_outcomes_hist.values())))
        
        #print("Finished term measurements: ", meas_outcomes_hist)
        
        term_expect = 0.0
        for pauli_str in measured_strings:
            pauli_str_expect = 0.0
            for meas_outcomes, count in meas_outcomes_hist.items():
                assert len(meas_outcomes) == len(qubits)
                pauli_str_eigval = np.prod([1 - 2 * meas_outcomes[q_i.x] for q_i in pauli_str.qubits])
                #print(pauli_str.qubits,[q_i.x for q_i in pauli_str.qubits],[meas_outcomes[q_i.x] for q_i in pauli_str.qubits], [1 - 2 * meas_outcomes[q_i.x] for q_i in pauli_str.qubits])
                pauli_str_expect += pauli_str_eigval * count/n_meas_reps
            
            term_expect += pauli_str_expect
            
        energy_expect += term_coeff * term_expect
    
    return energy_expect

def direct_hamilt_expectation(qubits, circuit_params, hamilt_params):
    """
    Returns the expectation value of the hamiltonian on the state defined by the given circuit parameters.
    Cheating version, i.e. the state vector is computed exactly and the hamiltonian matrix representation is used to compute the expectation value directly; no sampling noise.
    
    Args:
    ----------
        qubits [list]: Qubits to apply the circuit to.
        circuit_params [list]: A list of the circuit parameters.
        hamilt_params [list]: A list of the hamiltonian parameters.
    
    Returns:
    ----------
        [float]: Expectation value of the hamiltonian.
    """
    
    energy_expect = 0.0

    #Iterate over terms with commuting pauli strings in the hamiltonian
    for measured_term, term_basis_change_gates in get_XXZ_hamiltonian_measurement_basis_change(qubits, hamilt_params):
        
        term_coeff, measured_strings = measured_term

        #Note that we have to re-initialize the circuit for each term, since those are generators
        encoding_step = encoding_step_circuit(qubits, circuit_params[:int(8*len(qubits))], Delta=hamilt_params['Delta'] ,encoding_function=encoding_function)
        processing_step = processing_step_circuit(qubits, circuit_params[int(8*len(qubits)):])

        pqc_circuit = cirq.Circuit([encoding_step, processing_step])
        expval = lambda state: np.real(sum([state.conj() @ pauli_str.matrix(qubits) @ state for pauli_str in measured_strings]))
        
        simulator = cirq.Simulator()
        fin_state_vec = simulator.simulate(pqc_circuit, qubit_order=[*qubits]).final_state_vector
        #print("Term energy expect: ",expval(fin_state_vec), " for state vector: ", fin_state_vec)
        energy_expect += term_coeff*expval(fin_state_vec)
    
    return energy_expect


def exact_hamilt_GS_energy(qubits, hamilt_params):
    """
    Returns the exact ground state energy of the hamiltonian.
    
    Args:
    ----------
        qubits [list]: Qubits to apply the circuit to.
        hamilt_params [list]: A list of the hamiltonian parameters.
    
    Returns:
    ----------
        [float]: Exact ground state energy of the hamiltonian.
    """
    
    hamilt_matrix = np.zeros((2**len(qubits), 2**len(qubits)), dtype=np.complex128)
    
    term_tuples = xxz_chain_hamiltonian_1D(qubits, hamilt_params)
    for term_coeff, term_generator in term_tuples:
        for pauli_str in term_generator:
            hamilt_matrix += term_coeff*pauli_str.matrix(qubits)
    
    eigvals = np.linalg.eigvalsh(hamilt_matrix)
    gs_energy = np.min(eigvals)
    
    return gs_energy




In [8]:

import sympy

num_qubits = 3
qubits = cirq.LineQubit.range(num_qubits)

hamilt_params = {}
hamilt_params["lambda"] =  2.0  # Transverse field strength
hamilt_params["Delta"] = 2.0 # Anisotropy parameter

print("############ Direct Hamiltonian Circuits ############")

for term_coeff, term_generator in xxz_chain_hamiltonian_1D(qubits, hamilt_params):
    print("\nThe following term has coefficient", term_coeff, "\n")
    circuit = cirq.Circuit(term_generator)
    print(circuit.to_text_diagram(transpose=True))

print("\n############ Measurement Basis Change Circuits ############")

for measured_term, term_basis_change_gates in get_XXZ_hamiltonian_measurement_basis_change(qubits, hamilt_params):
    term_coeff, measured_strings = measured_term
    print("\nThe following term has coefficient", term_coeff, "\n")
    term_basis_change_gates.append(cirq.measure(*qubits, key='Zmeasurements'))
    circuit = cirq.Circuit(term_basis_change_gates)
    print(circuit.to_text_diagram(transpose=True))

############ Direct Hamiltonian Circuits ############

The following term has coefficient 1.0 

0               1               2
│               │               │
PauliString(+X)─X               │
│               │               │
│               PauliString(+X)─X
│               │               │
X───────────────┼───────────────PauliString(+X)
│               │               │

The following term has coefficient 1.0 

0               1               2
│               │               │
PauliString(+Y)─Y               │
│               │               │
│               PauliString(+Y)─Y
│               │               │
Y───────────────┼───────────────PauliString(+Y)
│               │               │

The following term has coefficient 2.0 

0               1               2
│               │               │
PauliString(+Z)─Z               │
│               │               │
│               PauliString(+Z)─Z
│               │               │
Z───────────────┼───────────────PauliString(

In [9]:
num_qubits = 8
qubits = cirq.LineQubit.range(num_qubits)

hamilt_params = {}
hamilt_params["lambda"] =  0.75  # Transverse field strength
hamilt_params["Delta"] = 0.5 # Anisotropy parameter

exact_hamilt_GS_energy(qubits, hamilt_params)

-12.487918414869853

In [10]:
#np.random.seed(0)
#np.random.seed(7) #43
init_phi_params = np.random.normal(0, np.pi/2, 8*len(qubits))	
init_theta_params = np.random.normal(0, np.pi/2, 4*len(qubits))
init_circuit_params = np.concatenate([init_phi_params, init_theta_params])

print(meas_hamilt_expectation(qubits, init_circuit_params, hamilt_params,100000))
print(direct_hamilt_expectation(qubits, init_circuit_params, hamilt_params))

0.19441500000000017
0.19132160504954204


### Construct training set of hamiltonian expectation values


In [57]:

def construct_training_set(train_set_size, min_hamilt_params = {"lambda": 0.75, "Delta": -1.1}, max_hamilt_params = {"lambda": 0.75, "Delta": 1.1}, mode="equidistant"):
    """
    Constructs a training set of hamiltonian parameters.
    
    Args:
    ----------
        train_set_size [int]: Size of the training set.
        min_hamilt_params [dict]: Dictionary of the minimum hamiltonian parameters.
        max_hamilt_params [dict]: Dictionary of the maximum hamiltonian parameters.
        mode [str]: Mode of the training set construction. Either "equidistant" or "uniform_random".
    
    Returns:
    ----------
        [list]: List of hamiltonian parameters.
    """
    
    assert min_hamilt_params.keys() == max_hamilt_params.keys()

    training_set = []
    for i in range(train_set_size):
        sample_i = {}
        for param in min_hamilt_params.keys():
            if mode == "equidistant":
                sample_i[param] = min_hamilt_params[param] + i*(max_hamilt_params[param] - min_hamilt_params[param])/(train_set_size-1)
            elif mode == "uniform_random": 
                sample_i[param] = np.random.uniform(min_hamilt_params[param], max_hamilt_params[param])
        
        training_set.append(sample_i)
    
    return training_set   
 
def construct_test_set(test_set_size, min_hamilt_params = {"lambda": 0.75, "Delta": -1.1}, max_hamilt_params = {"lambda": 0.75, "Delta": 1.1}, mode="uniform_random"):
    """
    Constructs a test set of hamiltonian parameters.
    
    Args:
    ----------
        test_set_size [int]: Size of the test set.
        min_hamilt_params [dict]: Dictionary of the minimum hamiltonian parameters.
        max_hamilt_params [dict]: Dictionary of the maximum hamiltonian parameters.
        mode [str]: Mode of the test set construction. Either "equidistant" or "uniform_random".
    
    Returns:
    ----------
        [list]: List of hamiltonian parameters.
    """
    
    return construct_training_set(test_set_size, min_hamilt_params, max_hamilt_params, mode)




In [58]:

train_set_size = 10
min_hamilt_params = {"lambda": 0.75, "Delta": -1.1}
max_hamilt_params = {"lambda": 0.75, "Delta": 1.1}

print(construct_training_set(train_set_size=train_set_size))

[{'lambda': 0.75, 'Delta': -1.1}, {'lambda': 0.75, 'Delta': -0.8555555555555556}, {'lambda': 0.75, 'Delta': -0.6111111111111112}, {'lambda': 0.75, 'Delta': -0.3666666666666667}, {'lambda': 0.75, 'Delta': -0.12222222222222223}, {'lambda': 0.75, 'Delta': 0.12222222222222223}, {'lambda': 0.75, 'Delta': 0.3666666666666667}, {'lambda': 0.75, 'Delta': 0.6111111111111112}, {'lambda': 0.75, 'Delta': 0.8555555555555556}, {'lambda': 0.75, 'Delta': 1.1}]


In [59]:
from scipy.optimize import OptimizeResult
def adam(
    fun,
    x0,
    jac,
    args=(),
    learning_rate=0.04, #0.001,
    beta1=0.9,
    beta2=0.999,
    eps=1e-8,
    startiter=0,
    maxiter=10000,
    callback=None,
    **kwargs):
    """scipy.optimize.minimize compatible implementation of ADAM - [http://arxiv.org/pdf/1412.6980.pdf].
    Adapted from autograd/misc/optimizers.py.
    """
    
    x = x0
    m = np.zeros_like(x)
    v = np.zeros_like(x)

    for i in range(startiter, startiter + maxiter):
        g = jac(x, *args)

        if callback and callback(x):
            break

        m = (1 - beta1) * g + beta1 * m  # first  moment estimate.
        v = (1 - beta2) * (g**2) + beta2 * v  # second moment estimate.
        mhat = m / (1 - beta1**(i + 1))  # bias correction.
        vhat = v / (1 - beta2**(i + 1))
        x = x - learning_rate * mhat / (np.sqrt(vhat) + eps)

    i += 1
    return OptimizeResult(x=x, fun=fun(x, *args), jac=g, nit=i, nfev=i, success=True)

#Write a function that implements adam with a decaying adaptive learning rate
def adam_with_lr_decay(
    fun,
    x0,
    jac,
    args=(),
    init_learning_rate=0.2, #0.001,
    lr_decay_rate=0.6,
    lr_decay_steps=10000,
    lr_decay_type="exponential",
    beta1=0.9,
    beta2=0.999,
    eps=1e-8,
    startiter=0,
    maxiter=10000,
    callback=None,
    **kwargs):
    """scipy.optimize.minimize compatible implementation of ADAM with learning rate decay - [http://arxiv.org/pdf/1412.6980.pdf].
    Adapted from autograd/misc/optimizers.py.
    """
    x = x0
    m = np.zeros_like(x)
    v = np.zeros_like(x)
    lr = init_learning_rate

    for i in range(startiter, startiter + maxiter):
        g = jac(x, *args)

        if callback and callback(x):
            break

        m = (1 - beta1) * g + beta1 * m
        v = (1 - beta2) * (g**2) + beta2 * v
        mhat = m / (1 - beta1**(i + 1))
        vhat = v / (1 - beta2**(i + 1))
        if lr_decay_type == "exponential":
            lr = init_learning_rate * (lr_decay_rate ** (i/lr_decay_steps))
        elif lr_decay_type == "linear":
            lr = init_learning_rate/(i+1)
        elif lr_decay_type == "factor":
            lr *= lr_decay_rate
        
        x = x - lr * mhat / (np.sqrt(vhat) + eps)
        
    i += 1
    return OptimizeResult(x=x, fun=fun(x, *args), jac=g, nit=i, nfev=i, success=True)


def adamw_with_lr_decay(
    fun,
    x0,
    jac,
    args=(),
    init_learning_rate=0.04,
    lr_decay_rate=0.5,
    lr_decay_steps=200,
    lr_decay_type="exponential",
    weight_decay=0.01, # weight decay parameter for AdamW
    beta1=0.9,
    beta2=0.999,
    eps=1e-8,
    startiter=0,
    maxiter=10000,
    callback=None,
    **kwargs):
    """scipy.optimize.minimize compatible implementation of ADAMW without learning rate decay - [http://arxiv.org/pdf/1412.6980.pdf].
    Adapted from autograd/misc/optimizers.py.
    """
    
    x = x0
    m = np.zeros_like(x)
    v = np.zeros_like(x)
    lr = init_learning_rate

    for i in range(startiter, startiter + maxiter):
        g = jac(x, *args)

        if callback and callback(x):
            break

        m = (1 - beta1) * g + beta1 * m  # momentum
        v = (1 - beta2) * (g**2) + beta2 * v  # RMSProp

        mhat = m / (1 - beta1**(i + 1))  # bias correction
        vhat = v / (1 - beta2**(i + 1))  # bias correction

        if lr_decay_type == "exponential":
            lr = init_learning_rate * (lr_decay_rate ** (i/lr_decay_steps))
        elif lr_decay_type == "linear":
            lr = init_learning_rate/(i+1)
        elif lr_decay_type == "factor":
            lr *= lr_decay_rate

        step_size = lr * mhat / (np.sqrt(vhat) + eps)

        x = (1 - weight_decay) * x - step_size  # AdamW update rule
        
    i += 1
    return OptimizeResult(x=x, fun=fun(x, *args), jac=g, nit=i, nfev=i, success=True)

                    

In [60]:
def initialize_circuit_params(qubits, params_init_mode):
    """
    Initialize the parameters of the circuit.
    
    Args:
    ----------------
        qubits [list]: list of qubits
        params_init_mode [str]: mode of initialization of the parameters.
            "zero": all parameters are initialized to zero
            "normal_random": all parameters are initialized to a random value drawn from a normal distribution
    
    Returns:
    ----------------
        init_circuit_params [ndarray]: array of the initial parameters of the circuit
    """
    
    init_circuit_params = None
    
    if params_init_mode == "zero":
        init_phi_params = np.zeros(8*len(qubits))
        init_theta_params = np.zeros(4*len(qubits))
        init_circuit_params = np.concatenate([init_phi_params, init_theta_params])
    elif params_init_mode == "normal_random":
        #init_phi_params = np.random.normal(-np.pi/2, np.pi/2, 8*len(qubits))	
        #init_theta_params = np.random.normal(-np.pi/2, np.pi/2, 4*len(qubits))
        init_phi_params = np.random.normal(-0.02, 0.02, 8*len(qubits))	
        init_theta_params = np.random.normal(-0.02, 0.02, 4*len(qubits))
        init_circuit_params = np.concatenate([init_phi_params, init_theta_params])
    return init_circuit_params


In [61]:
from scipy.optimize import minimize
import time


def meta_vqe_energy_loss(circuit_params, qubits, hamilt_params_training_set, n_meas_reps=1000):
    """
    Compute the energy loss of the meta_vqe circuit.
    
    Args:
    ----------------
        circuit_params [ndarray]: array of the parameters of the circuit
        qubits [list]: list of qubits
        hamilt_params_training_set [list]: list of the parameters of the hamiltonians
        n_meas_reps [int]: number of measurement repetitions

    Returns:
    ----------------
        energy_loss [float]: energy loss of the meta_vqe circuit
    """
    
    energy_loss = 0
    
    #start_time = time.time()
    for hamilt_params_sample in hamilt_params_training_set:
        #energy_loss += meas_hamilt_expectation(qubits, circuit_params, hamilt_params_sample, n_meas_reps)
        energy_loss += direct_hamilt_expectation(qubits, circuit_params, hamilt_params_sample)
    
    #print("Time taken for the meta_vqe loss evaluation: ", time.time() - start_time)
    
    return energy_loss


def train_meta_vqe(qubits, hamilt_params_training_set, init_circuit_params=None, params_init_mode="normal_random", n_meas_reps=1000):
    """
    Train the meta_vqe circuit and return the trained PQC parameters.
    
    Args:
    ----------------
        qubits [list]: list of qubits
        hamilt_params_training_set [list]: list of the parameters of the hamiltonians
        init_circuit_params [ndarray]: array of the initial parameters of the circuit
        params_init_mode [str]: mode of initialization of the parameters.

    Returns:
    ----------------
        trained_circuit_params [ndarray]: array of the trained parameters of the circuit
    """
    
    if init_circuit_params is None:
        init_circuit_params = initialize_circuit_params(qubits, params_init_mode)
    
    #train_loss_func = lambda circuit_params: meta_vqe_energy_loss(circuit_params, hamilt_params_training_set = hamilt_params_training_set)

    def callbackF(Xi):
        global Nfeval
        print("Iter: ", Nfeval, " Loss on train set: ", meta_vqe_energy_loss(Xi, qubits, hamilt_params_training_set, n_meas_reps=1000))
        Nfeval += 1

    print("Train loss should converge to ", sum(exact_hamilt_GS_energy(qubits, hamilt_params_training_set[i]) for i in range(len(hamilt_params_training_set))))
    print("Starting training...")
    
    loss_args = (qubits, hamilt_params_training_set, n_meas_reps)
    meta_vqe_loss_grad_fun = prepare_loss_gradient_approx(meta_vqe_energy_loss, stochastic_approx=True, stoch_shift_size_c=0.01)
    #meta_vqe_loss_grad_fun = prepare_loss_gradient_approx(lambda x: meta_vqe_energy_loss(x, *loss_args), stochastic_approx=True, stoch_shift_size_c=0.001)
    
    optim_result = minimize(meta_vqe_energy_loss, #train_loss_func, 
                            args=loss_args,
                            x0=init_circuit_params, 
                            method='L-BFGS-B', #adamw_with_lr_decay, #'L-BFGS-B', #'COBYLA', #adam_with_lr_decay,,#'COBYLA', 
                            callback=callbackF,
                            #jac=meta_vqe_loss_grad_fun, #None,
                            options={'maxiter': 12000},
                            )
    print(optim_result)
    #print(optim_result.fun)
    return optim_result.x

def evaluate_meta_vqe(qubits, circuit_params, hamilt_params_test_set, n_meas_reps=1000):
    """
    Evaluate the meta_vqe circuit on a test set of hamiltonian parameters.

    Args:
    ----------------
        qubits [list]: list of qubits
        circuit_params [ndarray]: array of the parameters of the circuit
        hamilt_params_test_set [list]: list of the parameters of the hamiltonians
        n_meas_reps [int]: number of measurement repetitions
    
    Returns:
    ----------------
        energy_expectations [list]: list of the energy expectations
        exact_energy_expectations [list]: list of the exact energy expectations
        abs_energy_errors [list]: list of the absolute energy errors
    """
    
    #Evaluate meta-VQE on test set:
    energy_expectations = []
    exact_energy_expectations = []
    abs_energy_errors = []
    
    #Infere on test set:
    for hamilt_params_sample in hamilt_params_test_set:
        #sample_energy_expect = meas_hamilt_expectation(qubits, circuit_params, hamilt_params_sample, n_meas_reps)
        sample_energy_expect =  direct_hamilt_expectation(qubits, circuit_params, hamilt_params_sample)
        energy_expectations.append(sample_energy_expect)
        sample_exact_energy_expect = exact_hamilt_GS_energy(qubits, hamilt_params_sample)
        exact_energy_expectations.append(sample_exact_energy_expect)
        abs_energy_errors.append(np.abs(sample_energy_expect - sample_exact_energy_expect))
    
    print("Meta-VQE absolute energy errors: ", abs_energy_errors)
    return np.array(energy_expectations), np.array(abs_energy_errors)

def meta_vqe(qubits, hamilt_params_training_set, hamilt_params_test_set):
    """
    Convenience wrapper to train and evaluate the meta_vqe circuit.

    Args:
    ----------------
        qubits [list]: list of qubits
        hamilt_params_training_set [list]: list of the parameters of the hamiltonians
        hamilt_params_test_set [list]: list of the parameters of the hamiltonians
    
    Returns:
    ----------------
        energy_expectations [list]: list of the energy expectations
        abs_energy_errors [list]: list of the absolute energy errors
    """
    
    #Train meta-VQE:
    opt_circuit_params = train_meta_vqe(qubits, hamilt_params_training_set, params_init_mode="normal_random", n_meas_reps=1000)
    
    #Evaluate meta-VQE on test set:
    energy_expectations, abs_energy_errors = evaluate_meta_vqe(qubits, opt_circuit_params, hamilt_params_test_set, n_meas_reps=1000)
    
    return energy_expectations, abs_energy_errors

In [62]:
qubits = cirq.LineQubit.range(8)

Nfeval = 1  

train_set_size = 10
min_hamilt_params = {"lambda": 0.75, "Delta": -1.1}
max_hamilt_params = {"lambda": 0.75, "Delta": 1.1}

train_set = construct_training_set(train_set_size=train_set_size, mode="equidistant")
test_set = construct_test_set(test_set_size=100, mode="uniform_random", min_hamilt_params=min_hamilt_params, max_hamilt_params=max_hamilt_params)

meta_vqe(qubits, train_set, test_set)

Train loss should converge to  -125.38941301973591
Starting training...
Iter:  1  Loss on train set:  59.494582553396995
Iter:  2  Loss on train set:  59.49457959643691
Iter:  3  Loss on train set:  59.494579587651344
      fun: 59.494579587651344
 hess_inv: <96x96 LbfgsInvHessProduct with dtype=float64>
      jac: array([-4.58587849e+01, -3.51306149e+01, -1.96685995e+01, -2.95134065e+01,
        1.47565515e-02, -3.03259640e-03,  8.22808488e-03, -4.21280788e-03,
        1.48186776e+02, -3.32231309e-01,  1.48234312e+02,  1.48186560e+02,
        1.50261770e+02,  1.50262547e+02,  1.75834742e+00,  1.48169026e+02,
        6.07300876e-03, -1.23804895e+00, -1.57233870e-01, -1.42883749e-01,
       -1.93480787e-02, -6.00171290e-01, -1.66971859e-01, -1.24271082e-01,
       -1.25258026e-01, -9.13205156e-01, -4.53667326e-01, -4.85259477e-01,
       -2.03699102e-01, -4.02663147e-01, -2.86456512e+00, -2.86354691e+00,
       -4.47456280e+01, -4.47206581e+01, -4.47439092e+01, -4.42845383e+01,
       -

(array([ 5.95975633e+00,  8.48638438e+00,  1.09222120e+01, -2.76768919e+00,
         2.71404077e+00,  8.40460942e+00, -2.19575832e+00,  8.05126325e+00,
         3.83541801e+00,  1.21306542e+01,  1.19844853e+01,  1.10973213e+01,
         1.33270742e+01,  1.19817039e+01,  8.93225996e+00,  1.44826683e+01,
        -4.45125480e-01,  1.21602716e+01,  6.82379002e+00,  1.18058331e+01,
        -3.07230453e-01,  1.07385088e+01,  2.32441097e-01,  1.18421607e+01,
         1.43093290e+01,  5.56413760e+00,  4.73026747e+00,  4.54056022e-01,
         6.80044966e+00,  1.40038845e+01, -6.69848808e-01,  1.40412372e+01,
         3.74036362e-01,  7.64544743e+00,  9.42941935e+00, -6.85270910e-01,
        -1.88554007e-01,  2.20831752e-01,  1.79093385e+00,  6.63896040e-01,
         1.79448640e+00,  4.13082200e+00,  2.79027279e+00,  9.93236153e+00,
         8.38177644e+00,  6.31200027e+00, -6.85952960e-01,  1.25004604e+01,
         2.84715528e+00,  6.81647347e+00,  1.33365009e+01,  6.91732633e+00,
        -1.8

In [255]:
def staged_meta_vqe_energy_loss(circuit_params, qubits, frozen_circuit_params,insertion_pointer, hamilt_params_training_set, n_meas_reps=1000):
    """
    Compute the energy loss of the meta_vqe circuit for a staged meta-vqe, i.e. a meta-vqe trained layer-by-layer.
    
    Args:
    ----------------
        circuit_params [ndarray]: array of the parameters of the circuit
        qubits [list]: list of qubits
        frozen_circuit_params [ndarray]: array of the parameters of the frozen part of the circuit
        insertion_pointer [int]: index of the insertion point of the circuit_params
        hamilt_params_training_set [list]: list of the parameters of the hamiltonians
        n_meas_reps [int]: number of measurement repetitions
    
    Returns:
    ----------------
        energy_loss [float]: energy loss of the meta_vqe circuit
    """
    
    energy_loss = 0
    
    #start_time = time.time()
    for hamilt_params_sample in hamilt_params_training_set:
        full_circuit_params = np.insert(frozen_circuit_params, insertion_pointer, circuit_params)
        #energy_loss += meas_hamilt_expectation(qubits, circuit_params, hamilt_params_sample, n_meas_reps)
        energy_loss += direct_hamilt_expectation(qubits, full_circuit_params, hamilt_params_sample)
    
    #print("Time taken for the meta_vqe loss evaluation: ", time.time() - start_time)
    
    return energy_loss

def train_staged_meta_vqe(qubits, hamilt_params_training_set, init_circuit_params=None, params_init_mode="normal_random", n_meas_reps=1000):
    """
    Train the meta_vqe circuit for a staged meta-vqe, i.e. a meta-vqe trained layer-by-layer.
    
    Args:
    ----------------
        qubits [list]: list of qubits
        hamilt_params_training_set [list]: list of the parameters of the hamiltonians
        init_circuit_params [ndarray]: array of the parameters of the circuit
        params_init_mode [str]: mode of initialization of the circuit parameters
        n_meas_reps [int]: number of measurement repetitions
        
    Returns:
    ----------------
        opt_circuit_params [ndarray]: array of the parameters of the optimized circuit
    """
    
    if init_circuit_params is None:
        init_circuit_params = initialize_circuit_params(qubits, params_init_mode)


    num_var_encoding_layers, num_var_processing_layers = 2, 2 #Default for the paper PQC
    params_per_processing_layer = 2 * len(qubits)
    params_per_encoding_layer = 2 * params_per_processing_layer
    layer_sizes =  [params_per_encoding_layer for i in range(num_var_encoding_layers)] + [params_per_processing_layer for i in range(num_var_processing_layers)]
    print("Layer sizes: ", layer_sizes)

    print("Train loss should converge to ", sum(exact_hamilt_GS_energy(qubits, hamilt_params_training_set[i]) for i in range(len(hamilt_params_training_set))))
    print("Starting training...")


    meta_vqe_loss_grad_fun = prepare_loss_gradient_approx(staged_meta_vqe_energy_loss, stochastic_approx=True, stoch_shift_size_c=0.01)


    optim_circuit_params = init_circuit_params
    for repetit in range(4):
        print("Running repetition: ", repetit)
        for lay_i in reversed(range(len(layer_sizes))):
            
            unfreeze_steps = 200
            
            insertion_pointer = sum(layer_sizes[:lay_i]) #Index of the first trainable parameter in the complete circuit parameter vector
            frozen_circuit_params = np.concatenate([optim_circuit_params[:insertion_pointer], optim_circuit_params[insertion_pointer + layer_sizes[lay_i]:]])
            trainable_circuit_params = optim_circuit_params[insertion_pointer:insertion_pointer + layer_sizes[lay_i]]
            print("Unfrozen params: ", len(trainable_circuit_params), " Frozen params: ", len(frozen_circuit_params))
            
            def callbackF(Xi):
                global Nfeval
                print("Iter: ", Nfeval, " Loss on train set: ", staged_meta_vqe_energy_loss(Xi, qubits, frozen_circuit_params, insertion_pointer, hamilt_params_training_set, n_meas_reps=1000))
                Nfeval += 1
            
            loss_args = (qubits, frozen_circuit_params, insertion_pointer, hamilt_params_training_set, n_meas_reps)
            
            
            optim_result = minimize(staged_meta_vqe_energy_loss,
                                    args=loss_args, 
                                    x0=trainable_circuit_params, 
                                    method=adam, #'L-BFGS-B', #'COBYLA', #adam_with_lr_decay,,#'COBYLA', 
                                    callback=callbackF,
                                    jac=meta_vqe_loss_grad_fun, #None,
                                    options={'maxiter': unfreeze_steps},
                                    )
            print(optim_result)
            optim_circuit_params = np.insert(frozen_circuit_params,insertion_pointer, optim_result.x)        
        
    #print(optim_result.fun)
    return optim_circuit_params

def staged_meta_vqe(qubits, hamilt_params_training_set, hamilt_params_test_set):
    """
    Convenience function for training and evaluating a staged meta-vqe, i.e. a meta-vqe trained layer-by-layer.
    
    Args:
    ----------------
        qubits [list]: list of qubits
        hamilt_params_training_set [list]: list of the parameters of the hamiltonians
        hamilt_params_test_set [list]: list of the parameters of the hamiltonians
        
    Returns:
    ----------------
        energy_expectations [list]: list of the energy expectations
        abs_energy_errors [list]: list of the absolute energy errors
    """
    
    #Train meta-VQE:
    opt_circuit_params = train_staged_meta_vqe(qubits, hamilt_params_training_set, params_init_mode="normal_random", n_meas_reps=1000)
    
    #Evaluate meta-VQE on test set:
    #energy_expectations, abs_energy_errors = evaluate_meta_vqe(qubits, opt_circuit_params, hamilt_params_test_set, n_meas_reps=1000)
    
    ##return energy_expectations, abs_energy_errors


qubits = cirq.LineQubit.range(8)

Nfeval = 1  

train_set_size = 10
min_hamilt_params = {"lambda": 0.75, "Delta": -1.1}
max_hamilt_params = {"lambda": 0.75, "Delta": 1.1}

train_set = construct_training_set(train_set_size=train_set_size, mode="equidistant")
test_set = construct_test_set(test_set_size=100, mode="uniform_random", min_hamilt_params=min_hamilt_params, max_hamilt_params=max_hamilt_params)

staged_meta_vqe(qubits, train_set, test_set)

Layer sizes:  [32, 32, 16, 16]
Train loss should converge to  -125.38941301973591
Starting training...
Running repetition:  0
Unfrozen params:  16  Frozen params:  80
Iter:  1  Loss on train set:  59.59967672618967
Iter:  2  Loss on train set:  59.394431836798006
Iter:  3  Loss on train set:  59.11632385704006
Iter:  4  Loss on train set:  58.83675465883579
Iter:  5  Loss on train set:  58.40185787056015
Iter:  6  Loss on train set:  57.81619904253222
Iter:  7  Loss on train set:  57.14359060399025
Iter:  8  Loss on train set:  56.255244123437244
Iter:  9  Loss on train set:  55.30922542503967
Iter:  10  Loss on train set:  54.05975351500556
Iter:  11  Loss on train set:  52.777767512180716
Iter:  12  Loss on train set:  51.44109069706958
Iter:  13  Loss on train set:  50.11264276976726
Iter:  14  Loss on train set:  48.835835568424656
Iter:  15  Loss on train set:  47.60791189628557
Iter:  16  Loss on train set:  46.3996651584349
Iter:  17  Loss on train set:  45.22996555627721
Iter: 

In [210]:
def dropout_meta_vqe_energy_loss(circuit_params, qubits, frozen_circuit_params,insertion_pointers, inverted_insertion_pointers, hamilt_params_training_set, n_meas_reps=1000):
    """
    Computes the energy loss of a meta-vqe with parameter dropout, i.e. a meta-vqe where the parameters of the circuit are randomly dropped out in each training step.
    
    Args:
    ----------------
        circuit_params [np.array]: array of the circuit parameters
        qubits [list]: list of qubits
        frozen_circuit_params [np.array]: array of the frozen circuit parameters
        insertion_pointers [list]: list of the insertion pointers
        inverted_insertion_pointers [list]: list of the inverted insertion pointers
        hamilt_params_training_set [list]: list of the parameters of the hamiltonians
        n_meas_reps [int]: number of measurement repetitions
        
    Returns:
    ----------------
        energy_loss [float]: energy loss of the meta-vqe
    """
    
    energy_loss = 0
    full_circuit_params = np.zeros(len(circuit_params) + len(frozen_circuit_params))
    full_circuit_params[insertion_pointers] = circuit_params
    full_circuit_params[inverted_insertion_pointers] = frozen_circuit_params
    
    #start_time = time.time()
    for hamilt_params_sample in hamilt_params_training_set:
        #energy_loss += meas_hamilt_expectation(qubits, circuit_params, hamilt_params_sample, n_meas_reps)
        energy_loss += direct_hamilt_expectation(qubits, full_circuit_params, hamilt_params_sample)
    
    #print("Time taken for the meta_vqe loss evaluation: ", time.time() - start_time)
    
    return energy_loss

def train_dropout_meta_vqe(qubits, hamilt_params_training_set, init_circuit_params=None, params_init_mode="normal_random", n_meas_reps=1000):
    """
    Trains a meta-vqe with parameter dropout, i.e. a meta-vqe where the parameters of the circuit are randomly dropped out in each training step.
    
    Args:
    ----------------
        qubits [list]: list of qubits
        hamilt_params_training_set [list]: list of the parameters of the hamiltonians
        init_circuit_params [np.array]: array of the initial circuit parameters
        params_init_mode [str]: mode for initializing the circuit parameters
        n_meas_reps [int]: number of measurement repetitions
    
    Returns:
    ----------------
        optim_circuit_params [np.array]: array of the optimized circuit parameters
    """
    
    if init_circuit_params is None:
        init_circuit_params = initialize_circuit_params(qubits, params_init_mode)


    print("Train loss should converge to ", sum(exact_hamilt_GS_energy(qubits, hamilt_params_training_set[i]) for i in range(len(hamilt_params_training_set))))
    print("Starting training...")


    meta_vqe_loss_grad_fun = prepare_loss_gradient_approx(dropout_meta_vqe_energy_loss, stochastic_approx=True, stoch_shift_size_c=0.01)

    optim_circuit_params = init_circuit_params
    dropout_rate = 0.8
    
    for repetit in range(80):
        shuffled_inds = np.random.permutation(len(optim_circuit_params)) #np.random.randint(0, len(optim_circuit_params), size=int((1-dropout_rate)*len(optim_circuit_params)))
        insertion_pointers, inverted_insertion_pointers = shuffled_inds[:int((1-dropout_rate)*len(optim_circuit_params))], shuffled_inds[int((1-dropout_rate)*len(optim_circuit_params)):]
        trainable_circuit_params = optim_circuit_params[insertion_pointers]
        frozen_circuit_params = optim_circuit_params[inverted_insertion_pointers]
        
        print("Unfrozen params: ", len(trainable_circuit_params), " Frozen params: ", len(frozen_circuit_params))
        
        def callbackF(Xi):
            global Nfeval
            print("Iter: ", Nfeval, " Loss on train set: ", dropout_meta_vqe_energy_loss(Xi, qubits, frozen_circuit_params, insertion_pointers, inverted_insertion_pointers, hamilt_params_training_set, n_meas_reps=1000))
            Nfeval += 1
        
        loss_args = (qubits, frozen_circuit_params, insertion_pointers, inverted_insertion_pointers, hamilt_params_training_set, n_meas_reps)
        
        #meta_vqe_loss_grad_fun = prepare_loss_gradient_approx(lambda x: dropout_meta_vqe_energy_loss(x, *loss_args), stochastic_approx=True, stoch_shift_size_c=0.001)
    
        optim_result = minimize(dropout_meta_vqe_energy_loss,
                                args=loss_args, 
                                x0=trainable_circuit_params, 
                                method="COBYLA", #adam, #'L-BFGS-B', #'COBYLA', #adam_with_lr_decay,,#'COBYLA', 
                                callback=callbackF,
                                jac=meta_vqe_loss_grad_fun, #None,
                                options={'maxiter': 50},
                                )
        print(optim_result)
        optim_circuit_params[insertion_pointers] = optim_result.x     
        
    #print(optim_result.fun)
    return optim_circuit_params

def dropout_meta_vqe(qubits, hamilt_params_training_set, hamilt_params_test_set):
    """
    Convenience function for training and evaluating a dropout meta-vqe, i.e. a meta-vqe where the parameters of the circuit are randomly dropped out in each training step.
    
    Args:
    ----------------
        qubits [list]: list of qubits
        hamilt_params_training_set [list]: list of the parameters of the hamiltonians in the training set
        hamilt_params_test_set [list]: list of the parameters of the hamiltonians in the test set
    
    Returns:
    ----------------
        energy_expectations [list]: list of the energy expectations of the hamiltonians in the test set
        abs_energy_errors [list]: list of the absolute energy errors of the hamiltonians in the test set
    """
    
    #Train meta-VQE:
    opt_circuit_params = train_dropout_meta_vqe(qubits, hamilt_params_training_set, params_init_mode="normal_random", n_meas_reps=1000)
    
    #Evaluate meta-VQE on test set:
    #energy_expectations, abs_energy_errors = evaluate_meta_vqe(qubits, opt_circuit_params, hamilt_params_test_set, n_meas_reps=1000)
    
    ##return energy_expectations, abs_energy_errors


qubits = cirq.LineQubit.range(8)

Nfeval = 1  

train_set_size = 10
min_hamilt_params = {"lambda": 0.75, "Delta": -1.1}
max_hamilt_params = {"lambda": 0.75, "Delta": 1.1}

train_set = construct_training_set(train_set_size=train_set_size, mode="equidistant")
test_set = construct_test_set(test_set_size=100, mode="uniform_random", min_hamilt_params=min_hamilt_params, max_hamilt_params=max_hamilt_params)

dropout_meta_vqe(qubits, train_set, test_set)

Train loss should converge to  -65.56662081055886
Starting training...
Unfrozen params:  9  Frozen params:  39


  warn('Method %s does not use gradient information (jac).' % method,


Iter:  1  Loss on train set:  29.848294994033672
Iter:  2  Loss on train set:  20.67942723111402
Iter:  3  Loss on train set:  19.03461001974393
Iter:  4  Loss on train set:  19.034599333402895
Iter:  5  Loss on train set:  21.241273359923063
Iter:  6  Loss on train set:  19.034597154249738
Iter:  7  Loss on train set:  19.03459618306855
Iter:  8  Loss on train set:  19.03643008354366
Iter:  9  Loss on train set:  19.034594614883904
Iter:  10  Loss on train set:  8.356113272396618
Iter:  11  Loss on train set:  1.666650185429389
Iter:  12  Loss on train set:  -1.0119488578395919
Iter:  13  Loss on train set:  -2.3579173912451346
Iter:  14  Loss on train set:  -4.329720804531213
Iter:  15  Loss on train set:  10.724485917052926
Iter:  16  Loss on train set:  -4.325915153598629
Iter:  17  Loss on train set:  -16.115202673701855
Iter:  18  Loss on train set:  -28.4491817847101
Iter:  19  Loss on train set:  -33.21124851403562
Iter:  20  Loss on train set:  -28.83240464624062
Iter:  21  Lo

  warn('Method %s does not use gradient information (jac).' % method,


Iter:  52  Loss on train set:  -34.48793064031567
Iter:  53  Loss on train set:  -39.15146736256149
Iter:  54  Loss on train set:  -39.22290789064992
Iter:  55  Loss on train set:  -37.143018372144155
Iter:  56  Loss on train set:  -35.27015426760535
Iter:  57  Loss on train set:  -24.745621442239607
Iter:  58  Loss on train set:  -24.663650507559947
Iter:  59  Loss on train set:  -35.273326335987626
Iter:  60  Loss on train set:  -37.08684393479913
Iter:  61  Loss on train set:  -27.39172405601239
Iter:  62  Loss on train set:  -37.805223467728084
Iter:  63  Loss on train set:  -38.48325974365437
Iter:  64  Loss on train set:  -38.37852596392495
Iter:  65  Loss on train set:  -39.26807529423103
Iter:  66  Loss on train set:  -38.48882465533058
Iter:  67  Loss on train set:  -39.31020925024282
Iter:  68  Loss on train set:  -38.73600861179296
Iter:  69  Loss on train set:  -39.24038404047503
Iter:  70  Loss on train set:  -36.80836947991742
Iter:  71  Loss on train set:  -39.0623527354

  warn('Method %s does not use gradient information (jac).' % method,


Iter:  103  Loss on train set:  -40.445907974587016
Iter:  104  Loss on train set:  -36.46970534202188
Iter:  105  Loss on train set:  -40.48560115462903
Iter:  106  Loss on train set:  -35.533288544919834
Iter:  107  Loss on train set:  -32.73730664798739
Iter:  108  Loss on train set:  -35.2254135515628
Iter:  109  Loss on train set:  -29.236949255907778
Iter:  110  Loss on train set:  -35.27431692340643
Iter:  111  Loss on train set:  -35.26246471679172
Iter:  112  Loss on train set:  -39.83393587612456
Iter:  113  Loss on train set:  -40.60463218683392
Iter:  114  Loss on train set:  -41.64997233512596
Iter:  115  Loss on train set:  -38.68855753267314
Iter:  116  Loss on train set:  -41.642143520561916
Iter:  117  Loss on train set:  -41.45221777386834
Iter:  118  Loss on train set:  -41.262303613874536
Iter:  119  Loss on train set:  -39.407896206850744
Iter:  120  Loss on train set:  -40.45969107120544
Iter:  121  Loss on train set:  -41.24415542753581
Iter:  122  Loss on train 

  warn('Method %s does not use gradient information (jac).' % method,


Iter:  154  Loss on train set:  -41.9617972974298
Iter:  155  Loss on train set:  -47.465537448118305
Iter:  156  Loss on train set:  -35.51300593170161
Iter:  157  Loss on train set:  -42.013447630353795
Iter:  158  Loss on train set:  -47.46554587724884
Iter:  159  Loss on train set:  -47.242942568554085
Iter:  160  Loss on train set:  -39.389948357660096
Iter:  161  Loss on train set:  -40.04384225430879
Iter:  162  Loss on train set:  -36.961599298578975
Iter:  163  Loss on train set:  -42.24685782288154
Iter:  164  Loss on train set:  -46.389346996940915
Iter:  165  Loss on train set:  -47.46553881728004
Iter:  166  Loss on train set:  -43.75338906625003
Iter:  167  Loss on train set:  -46.25109691823076
Iter:  168  Loss on train set:  -47.74648685111495
Iter:  169  Loss on train set:  -47.82735864300331
Iter:  170  Loss on train set:  -46.50849529220914
Iter:  171  Loss on train set:  -47.83031824771194
Iter:  172  Loss on train set:  -47.62208363565743
Iter:  173  Loss on train 

  warn('Method %s does not use gradient information (jac).' % method,


Iter:  205  Loss on train set:  -46.32974727451016
Iter:  206  Loss on train set:  -44.286911889656224
Iter:  207  Loss on train set:  -35.211402327504366
Iter:  208  Loss on train set:  -44.00042955397075
Iter:  209  Loss on train set:  -31.986230136823508
Iter:  210  Loss on train set:  -47.35232314090204
Iter:  211  Loss on train set:  -46.7854908903064
Iter:  212  Loss on train set:  -48.060394059259075
Iter:  213  Loss on train set:  -47.928429913658135
Iter:  214  Loss on train set:  -37.560132974961064
Iter:  215  Loss on train set:  -45.48490245550635
Iter:  216  Loss on train set:  -48.05810208743844
Iter:  217  Loss on train set:  -46.36624444432859
Iter:  218  Loss on train set:  -48.02872139852957
Iter:  219  Loss on train set:  -43.290288036949896
Iter:  220  Loss on train set:  -48.186421784874724
Iter:  221  Loss on train set:  -46.60813458428271
Iter:  222  Loss on train set:  -47.94051115116787
Iter:  223  Loss on train set:  -46.26645024840024
Iter:  224  Loss on trai

  warn('Method %s does not use gradient information (jac).' % method,


Iter:  256  Loss on train set:  -49.09127298314358
Iter:  257  Loss on train set:  -49.09127410584462
Iter:  258  Loss on train set:  -40.20303924964162
Iter:  259  Loss on train set:  -39.88748235646712
Iter:  260  Loss on train set:  -34.73642457443277
Iter:  261  Loss on train set:  -48.873973752999426
Iter:  262  Loss on train set:  -46.17641586292429
Iter:  263  Loss on train set:  -47.98562415131114
Iter:  264  Loss on train set:  -49.09126863554775
Iter:  265  Loss on train set:  -40.55801120404875
Iter:  266  Loss on train set:  -47.607154576640504
Iter:  267  Loss on train set:  -49.08071773311329
Iter:  268  Loss on train set:  -47.05273598531787
Iter:  269  Loss on train set:  -48.192740326781646
Iter:  270  Loss on train set:  -49.09126881089692
Iter:  271  Loss on train set:  -48.29605621183415
Iter:  272  Loss on train set:  -49.11054377382115
Iter:  273  Loss on train set:  -48.869380732369265
Iter:  274  Loss on train set:  -49.110544374210484
Iter:  275  Loss on train 

  warn('Method %s does not use gradient information (jac).' % method,


Iter:  307  Loss on train set:  -39.76047809046095
Iter:  308  Loss on train set:  -47.845489137880456
Iter:  309  Loss on train set:  -44.43419797656048
Iter:  310  Loss on train set:  -37.70441091278454
Iter:  311  Loss on train set:  -41.549234160645256
Iter:  312  Loss on train set:  -44.17217442968813
Iter:  313  Loss on train set:  -36.766682724920365
Iter:  314  Loss on train set:  -46.50808117642721
Iter:  315  Loss on train set:  -41.354699179877166
Iter:  316  Loss on train set:  -39.32371549874297
Iter:  317  Loss on train set:  -46.919023409125685
Iter:  318  Loss on train set:  -48.75136511152872
Iter:  319  Loss on train set:  -49.58231691341972
Iter:  320  Loss on train set:  -49.20751736599085
Iter:  321  Loss on train set:  -49.37280360349807
Iter:  322  Loss on train set:  -48.84442633184465
Iter:  323  Loss on train set:  -49.506973924626465
Iter:  324  Loss on train set:  -48.89468449618387
Iter:  325  Loss on train set:  -49.459190177714504
Iter:  326  Loss on trai

  warn('Method %s does not use gradient information (jac).' % method,


Iter:  358  Loss on train set:  -49.65076801600606
Iter:  359  Loss on train set:  -48.13413031494291
Iter:  360  Loss on train set:  -45.63607493915104
Iter:  361  Loss on train set:  -49.65077425503486
Iter:  362  Loss on train set:  -38.740824734211245
Iter:  363  Loss on train set:  -40.47509746574164
Iter:  364  Loss on train set:  -39.884313801997834
Iter:  365  Loss on train set:  -49.65077317306891
Iter:  366  Loss on train set:  -49.58544494600144
Iter:  367  Loss on train set:  -43.47064971578575
Iter:  368  Loss on train set:  -48.25348276318877
Iter:  369  Loss on train set:  -49.650772878798485
Iter:  370  Loss on train set:  -48.00572972134024
Iter:  371  Loss on train set:  -49.57094622614116
Iter:  372  Loss on train set:  -48.92975726318398
Iter:  373  Loss on train set:  -48.86030073660561
Iter:  374  Loss on train set:  -49.65077494123508
Iter:  375  Loss on train set:  -48.661235728982376
Iter:  376  Loss on train set:  -49.650771197740326
Iter:  377  Loss on train 

  warn('Method %s does not use gradient information (jac).' % method,


Iter:  409  Loss on train set:  -49.20424665277926
Iter:  410  Loss on train set:  -40.91687579580087
Iter:  411  Loss on train set:  -41.86847042152412
Iter:  412  Loss on train set:  -40.0435238901687
Iter:  413  Loss on train set:  -40.874350660992505
Iter:  414  Loss on train set:  -47.53837557508055
Iter:  415  Loss on train set:  -49.791329384787076
Iter:  416  Loss on train set:  -49.7913278928172
Iter:  417  Loss on train set:  -49.79132744175453
Iter:  418  Loss on train set:  -43.07737014815153
Iter:  419  Loss on train set:  -47.92494069003905
Iter:  420  Loss on train set:  -48.97619673328077
Iter:  421  Loss on train set:  -49.810209420373376
Iter:  422  Loss on train set:  -49.52646281040077
Iter:  423  Loss on train set:  -49.59045473170701
Iter:  424  Loss on train set:  -49.13867658295448
Iter:  425  Loss on train set:  -49.81021945746649
Iter:  426  Loss on train set:  -49.638712998496835
Iter:  427  Loss on train set:  -49.81021416058508
Iter:  428  Loss on train set

  warn('Method %s does not use gradient information (jac).' % method,


Iter:  460  Loss on train set:  -40.77345556509904
Iter:  461  Loss on train set:  -36.91487904454062
Iter:  462  Loss on train set:  -39.326896752604576
Iter:  463  Loss on train set:  -42.87234998039827
Iter:  464  Loss on train set:  -49.85936423637699
Iter:  465  Loss on train set:  -44.84061853374913
Iter:  466  Loss on train set:  -49.67107739104288
Iter:  467  Loss on train set:  -47.62227741328759
Iter:  468  Loss on train set:  -49.35233165681072
Iter:  469  Loss on train set:  -38.01221817147994
Iter:  470  Loss on train set:  -46.68834706615236
Iter:  471  Loss on train set:  -49.19656323639833
Iter:  472  Loss on train set:  -49.64674173764538
Iter:  473  Loss on train set:  -49.184787155641786
Iter:  474  Loss on train set:  -49.85936877183927
Iter:  475  Loss on train set:  -49.29134546012337
Iter:  476  Loss on train set:  -49.8760405020772
Iter:  477  Loss on train set:  -49.56348559130015
Iter:  478  Loss on train set:  -49.85780254752562
Iter:  479  Loss on train set:

  warn('Method %s does not use gradient information (jac).' % method,


Iter:  511  Loss on train set:  -48.863863152105246
Iter:  512  Loss on train set:  -37.703480342677324
Iter:  513  Loss on train set:  -39.92858753985913
Iter:  514  Loss on train set:  -49.90188044427571
Iter:  515  Loss on train set:  -49.046442054809745
Iter:  516  Loss on train set:  -45.34242560380183
Iter:  517  Loss on train set:  -47.22307288947551
Iter:  518  Loss on train set:  -39.154762363173774
Iter:  519  Loss on train set:  -37.20022074456937
Iter:  520  Loss on train set:  -36.64473411900323
Iter:  521  Loss on train set:  -46.266169669975014
Iter:  522  Loss on train set:  -48.79781113054062
Iter:  523  Loss on train set:  -49.89980043899031
Iter:  524  Loss on train set:  -49.38096588495595
Iter:  525  Loss on train set:  -49.901885428088875
Iter:  526  Loss on train set:  -49.52309112181652
Iter:  527  Loss on train set:  -49.9776077470655
Iter:  528  Loss on train set:  -50.30854375611447
Iter:  529  Loss on train set:  -50.02395989981467
Iter:  530  Loss on train 

  warn('Method %s does not use gradient information (jac).' % method,


Iter:  562  Loss on train set:  -50.373770270165004
Iter:  563  Loss on train set:  -45.76094270343685
Iter:  564  Loss on train set:  -49.48084893840345
Iter:  565  Loss on train set:  -37.67453044441069
Iter:  566  Loss on train set:  -39.92072316667614
Iter:  567  Loss on train set:  -39.13256834263282
Iter:  568  Loss on train set:  -50.373770010606
Iter:  569  Loss on train set:  -50.373771107085275
Iter:  570  Loss on train set:  -41.22640609195243
Iter:  571  Loss on train set:  -38.62332319941961
Iter:  572  Loss on train set:  -46.836341058078794
Iter:  573  Loss on train set:  -50.37376934892847
Iter:  574  Loss on train set:  -49.486086724733354
Iter:  575  Loss on train set:  -50.31063701830922
Iter:  576  Loss on train set:  -48.38147623647687
Iter:  577  Loss on train set:  -50.373763943734346
Iter:  578  Loss on train set:  -46.9890940697125
Iter:  579  Loss on train set:  -49.82828822897601
Iter:  580  Loss on train set:  -50.37377248893513
Iter:  581  Loss on train set

  warn('Method %s does not use gradient information (jac).' % method,


Iter:  613  Loss on train set:  -41.878198487266495
Iter:  614  Loss on train set:  -50.60736252651302
Iter:  615  Loss on train set:  -38.13923995499873
Iter:  616  Loss on train set:  -39.86337692032466
Iter:  617  Loss on train set:  -46.68786604571767
Iter:  618  Loss on train set:  -37.537958167166394
Iter:  619  Loss on train set:  -46.30648740061714
Iter:  620  Loss on train set:  -42.62583247795992
Iter:  621  Loss on train set:  -41.532087315917046
Iter:  622  Loss on train set:  -36.85882303190396
Iter:  623  Loss on train set:  -47.2402113406343
Iter:  624  Loss on train set:  -49.98580794527393
Iter:  625  Loss on train set:  -50.60736387952719
Iter:  626  Loss on train set:  -49.95922838960553
Iter:  627  Loss on train set:  -50.622747100424185
Iter:  628  Loss on train set:  -49.72582480247401
Iter:  629  Loss on train set:  -50.44108496219607
Iter:  630  Loss on train set:  -50.07726003192643
Iter:  631  Loss on train set:  -50.38125760918853
Iter:  632  Loss on train se

  warn('Method %s does not use gradient information (jac).' % method,


Iter:  664  Loss on train set:  -45.52651028511089
Iter:  665  Loss on train set:  -41.938450388762334
Iter:  666  Loss on train set:  -38.018652494980216
Iter:  667  Loss on train set:  -38.855542054529174
Iter:  668  Loss on train set:  -50.81192653049466
Iter:  669  Loss on train set:  -46.122783840207816
Iter:  670  Loss on train set:  -45.439641837533124
Iter:  671  Loss on train set:  -44.60039620930028
Iter:  672  Loss on train set:  -38.0312955955702
Iter:  673  Loss on train set:  -35.98679909504439
Iter:  674  Loss on train set:  -47.197328226588134
Iter:  675  Loss on train set:  -50.434652298568786
Iter:  676  Loss on train set:  -50.80693407993161
Iter:  677  Loss on train set:  -50.4409322187213
Iter:  678  Loss on train set:  -50.81193120575721
Iter:  679  Loss on train set:  -49.990953520986885
Iter:  680  Loss on train set:  -50.689785830339176
Iter:  681  Loss on train set:  -50.564748260642936
Iter:  682  Loss on train set:  -50.69999061694167
Iter:  683  Loss on tra

  warn('Method %s does not use gradient information (jac).' % method,


Iter:  715  Loss on train set:  -41.476614581108635
Iter:  716  Loss on train set:  -39.25160976689681
Iter:  717  Loss on train set:  -45.07190197695621
Iter:  718  Loss on train set:  -50.89719322623798
Iter:  719  Loss on train set:  -50.500422356032686
Iter:  720  Loss on train set:  -47.12206295552929
Iter:  721  Loss on train set:  -41.76899722583242
Iter:  722  Loss on train set:  -45.12779841444927
Iter:  723  Loss on train set:  -45.19958751469347
Iter:  724  Loss on train set:  -43.054324909302686
Iter:  725  Loss on train set:  -48.053465613152966
Iter:  726  Loss on train set:  -50.44093036646132
Iter:  727  Loss on train set:  -48.25779104728574
Iter:  728  Loss on train set:  -50.59524192847295
Iter:  729  Loss on train set:  -50.89719059882115
Iter:  730  Loss on train set:  -50.19673365823339
Iter:  731  Loss on train set:  -50.90734757666658
Iter:  732  Loss on train set:  -50.62160894992762
Iter:  733  Loss on train set:  -50.79345102403462
Iter:  734  Loss on train s

  warn('Method %s does not use gradient information (jac).' % method,


Iter:  766  Loss on train set:  -50.5945785279016
Iter:  767  Loss on train set:  -38.19602468250994
Iter:  768  Loss on train set:  -40.5865887431239
Iter:  769  Loss on train set:  -36.92002721952844
Iter:  770  Loss on train set:  -39.062295012253585
Iter:  771  Loss on train set:  -39.76183737429161
Iter:  772  Loss on train set:  -45.09763104434857
Iter:  773  Loss on train set:  -45.628820948541275
Iter:  774  Loss on train set:  -48.3455322373166
Iter:  775  Loss on train set:  -34.57553558773026
Iter:  776  Loss on train set:  -46.96150893401542
Iter:  777  Loss on train set:  -50.418432720499254
Iter:  778  Loss on train set:  -50.925752778090214
Iter:  779  Loss on train set:  -50.4028129048771
Iter:  780  Loss on train set:  -50.628860303724394
Iter:  781  Loss on train set:  -49.57190697185746
Iter:  782  Loss on train set:  -50.81158220496532
Iter:  783  Loss on train set:  -50.31706798112388
Iter:  784  Loss on train set:  -50.92051099795411
Iter:  785  Loss on train set:

  warn('Method %s does not use gradient information (jac).' % method,


Iter:  817  Loss on train set:  -41.69098189318679
Iter:  818  Loss on train set:  -38.087279717519735
Iter:  819  Loss on train set:  -42.09217038175836
Iter:  820  Loss on train set:  -45.140525043902
Iter:  821  Loss on train set:  -41.69249233762126
Iter:  822  Loss on train set:  -42.05177240090766
Iter:  823  Loss on train set:  -39.004072525671745
Iter:  824  Loss on train set:  -45.05897655196302
Iter:  825  Loss on train set:  -51.017934879423855
Iter:  826  Loss on train set:  -37.57391673462405
Iter:  827  Loss on train set:  -47.056219792436856
Iter:  828  Loss on train set:  -50.623085831515446
Iter:  829  Loss on train set:  -50.756407715469685
Iter:  830  Loss on train set:  -50.579090013078044
Iter:  831  Loss on train set:  -50.89007393129035
Iter:  832  Loss on train set:  -50.270517754141
Iter:  833  Loss on train set:  -50.87116820841774
Iter:  834  Loss on train set:  -50.30563380169836
Iter:  835  Loss on train set:  -51.01793225081909
Iter:  836  Loss on train se

  warn('Method %s does not use gradient information (jac).' % method,


Iter:  868  Loss on train set:  -37.844693905652164
Iter:  869  Loss on train set:  -40.7560990758591
Iter:  870  Loss on train set:  -39.70080300603303
Iter:  871  Loss on train set:  -51.02862810548776
Iter:  872  Loss on train set:  -49.14619669693599
Iter:  873  Loss on train set:  -46.40202968050621
Iter:  874  Loss on train set:  -42.30908950513543
Iter:  875  Loss on train set:  -46.895447027559044
Iter:  876  Loss on train set:  -45.30213210334231
Iter:  877  Loss on train set:  -44.1045324946699
Iter:  878  Loss on train set:  -46.76260022804126
Iter:  879  Loss on train set:  -50.12532868653484
Iter:  880  Loss on train set:  -49.13868148096692
Iter:  881  Loss on train set:  -50.977179835677155
Iter:  882  Loss on train set:  -51.02862307833181
Iter:  883  Loss on train set:  -50.79255670815692
Iter:  884  Loss on train set:  -50.95673147571856
Iter:  885  Loss on train set:  -50.50352215137048
Iter:  886  Loss on train set:  -50.92874006322534
Iter:  887  Loss on train set:

  warn('Method %s does not use gradient information (jac).' % method,


Iter:  919  Loss on train set:  -45.50740717701877
Iter:  920  Loss on train set:  -46.77488135315807
Iter:  921  Loss on train set:  -51.16741993747951
Iter:  922  Loss on train set:  -51.16741599449741
Iter:  923  Loss on train set:  -45.9270485691726
Iter:  924  Loss on train set:  -46.091789756786255
Iter:  925  Loss on train set:  -39.07446394253542
Iter:  926  Loss on train set:  -43.67601561533138
Iter:  927  Loss on train set:  -43.02292116680103
Iter:  928  Loss on train set:  -41.78349445959958
Iter:  929  Loss on train set:  -49.53118278978554
Iter:  930  Loss on train set:  -49.89319643403951
Iter:  931  Loss on train set:  -50.608398216595745
Iter:  932  Loss on train set:  -51.16741861971872
Iter:  933  Loss on train set:  -50.7847810676313
Iter:  934  Loss on train set:  -51.16741738229835
Iter:  935  Loss on train set:  -50.863081947547805
Iter:  936  Loss on train set:  -51.104884657602376
Iter:  937  Loss on train set:  -50.32217806055388
Iter:  938  Loss on train set

  warn('Method %s does not use gradient information (jac).' % method,


Iter:  970  Loss on train set:  -46.453578706251825
Iter:  971  Loss on train set:  -39.70379762253696
Iter:  972  Loss on train set:  -39.84565457376185
Iter:  973  Loss on train set:  -41.966434359874214
Iter:  974  Loss on train set:  -39.297790466347266
Iter:  975  Loss on train set:  -51.19445829376891
Iter:  976  Loss on train set:  -51.19445650460827
Iter:  977  Loss on train set:  -51.19445753870625
Iter:  978  Loss on train set:  -51.194453537635354
Iter:  979  Loss on train set:  -34.36992320217634
Iter:  980  Loss on train set:  -46.870777465190784
Iter:  981  Loss on train set:  -50.63580235218761
Iter:  982  Loss on train set:  -51.132402089931986
Iter:  983  Loss on train set:  -50.44206903371972
Iter:  984  Loss on train set:  -51.01798528965026
Iter:  985  Loss on train set:  -50.31489680415027
Iter:  986  Loss on train set:  -51.19446071604836
Iter:  987  Loss on train set:  -50.04700295536866
Iter:  988  Loss on train set:  -51.19445256401079
Iter:  989  Loss on train

  warn('Method %s does not use gradient information (jac).' % method,


Iter:  1021  Loss on train set:  -38.1853185006477
Iter:  1022  Loss on train set:  -51.203472187074915
Iter:  1023  Loss on train set:  -50.29466595717983
Iter:  1024  Loss on train set:  -43.42675322492704
Iter:  1025  Loss on train set:  -45.505284663200825
Iter:  1026  Loss on train set:  -42.57884041849768
Iter:  1027  Loss on train set:  -39.414910813697524
Iter:  1028  Loss on train set:  -49.541774015761895
Iter:  1029  Loss on train set:  -38.39732178577019
Iter:  1030  Loss on train set:  -45.28807889634611
Iter:  1031  Loss on train set:  -46.606521422404484
Iter:  1032  Loss on train set:  -50.900731166313825
Iter:  1033  Loss on train set:  -51.20347517802817
Iter:  1034  Loss on train set:  -50.81709500447134
Iter:  1035  Loss on train set:  -51.18420802529968
Iter:  1036  Loss on train set:  -50.82861518894101
Iter:  1037  Loss on train set:  -50.90524242478374
Iter:  1038  Loss on train set:  -50.875668909263545
Iter:  1039  Loss on train set:  -51.19758811921941
Iter: 

  warn('Method %s does not use gradient information (jac).' % method,


Iter:  1072  Loss on train set:  -45.5868959022955
Iter:  1073  Loss on train set:  -51.24559827678266
Iter:  1074  Loss on train set:  -39.50920855350025
Iter:  1075  Loss on train set:  -51.24559667189692
Iter:  1076  Loss on train set:  -42.347815376795495
Iter:  1077  Loss on train set:  -49.94206719615374
Iter:  1078  Loss on train set:  -45.45780769972784
Iter:  1079  Loss on train set:  -50.37013091517892
Iter:  1080  Loss on train set:  -51.24559850832499
Iter:  1081  Loss on train set:  -42.68576920048165
Iter:  1082  Loss on train set:  -49.55366423551654
Iter:  1083  Loss on train set:  -51.24559605429371
Iter:  1084  Loss on train set:  -48.2314159755541
Iter:  1085  Loss on train set:  -51.24559953665037
Iter:  1086  Loss on train set:  -48.93150477737185
Iter:  1087  Loss on train set:  -51.14822787933386
Iter:  1088  Loss on train set:  -49.77417114378983
Iter:  1089  Loss on train set:  -51.11657107302777
Iter:  1090  Loss on train set:  -48.914346596014795
Iter:  1091 

  warn('Method %s does not use gradient information (jac).' % method,


Iter:  1123  Loss on train set:  -51.25335132996324
Iter:  1124  Loss on train set:  -51.08076922918891
Iter:  1125  Loss on train set:  -51.25334637593072
Iter:  1126  Loss on train set:  -49.952478348154365
Iter:  1127  Loss on train set:  -39.462727672178374
Iter:  1128  Loss on train set:  -41.788221210042224
Iter:  1129  Loss on train set:  -41.3607054224016
Iter:  1130  Loss on train set:  -39.450084458790954
Iter:  1131  Loss on train set:  -37.7920280421786
Iter:  1132  Loss on train set:  -40.22562401461337
Iter:  1133  Loss on train set:  -48.12399645495046
Iter:  1134  Loss on train set:  -50.58931239274846
Iter:  1135  Loss on train set:  -51.253349387133106
Iter:  1136  Loss on train set:  -50.22157604607294
Iter:  1137  Loss on train set:  -51.240652924618686
Iter:  1138  Loss on train set:  -50.386360191987634
Iter:  1139  Loss on train set:  -51.25334805826544
Iter:  1140  Loss on train set:  -50.651345449455796
Iter:  1141  Loss on train set:  -51.23587284534018
Iter: 

  warn('Method %s does not use gradient information (jac).' % method,


Iter:  1174  Loss on train set:  -45.33693265614092
Iter:  1175  Loss on train set:  -38.47662755780863
Iter:  1176  Loss on train set:  -39.54021140325966
Iter:  1177  Loss on train set:  -39.79045671056869
Iter:  1178  Loss on train set:  -45.75523372437007
Iter:  1179  Loss on train set:  -51.3266312094732
Iter:  1180  Loss on train set:  -51.11902151176142
Iter:  1181  Loss on train set:  -40.229852449635395
Iter:  1182  Loss on train set:  -43.892812353674174
Iter:  1183  Loss on train set:  -39.91123371314131
Iter:  1184  Loss on train set:  -47.69731658335636
Iter:  1185  Loss on train set:  -50.741828143023
Iter:  1186  Loss on train set:  -48.44676310229449
Iter:  1187  Loss on train set:  -50.87747162947061
Iter:  1188  Loss on train set:  -48.94927684644525
Iter:  1189  Loss on train set:  -50.6257987089122
Iter:  1190  Loss on train set:  -51.32662943687659
Iter:  1191  Loss on train set:  -50.43530744001321
Iter:  1192  Loss on train set:  -51.316173740270685
Iter:  1193  

  warn('Method %s does not use gradient information (jac).' % method,


Iter:  1225  Loss on train set:  -39.574172197877246
Iter:  1226  Loss on train set:  -45.916127973110974
Iter:  1227  Loss on train set:  -42.59936655485847
Iter:  1228  Loss on train set:  -47.01155535053866
Iter:  1229  Loss on train set:  -46.550625944127006
Iter:  1230  Loss on train set:  -45.50991989310532
Iter:  1231  Loss on train set:  -40.055380551242955
Iter:  1232  Loss on train set:  -39.88585158452793
Iter:  1233  Loss on train set:  -41.54175923624125
Iter:  1234  Loss on train set:  -42.48767315796148
Iter:  1235  Loss on train set:  -49.11957642311883
Iter:  1236  Loss on train set:  -50.382091825942545
Iter:  1237  Loss on train set:  -51.24846955100731
Iter:  1238  Loss on train set:  -50.748446953921004
Iter:  1239  Loss on train set:  -51.18608393005978
Iter:  1240  Loss on train set:  -51.08688721360609
Iter:  1241  Loss on train set:  -51.22560902714379
Iter:  1242  Loss on train set:  -50.764266641376814
Iter:  1243  Loss on train set:  -51.207108916716955
Iter

  warn('Method %s does not use gradient information (jac).' % method,


Iter:  1276  Loss on train set:  -51.35866970867533
Iter:  1277  Loss on train set:  -46.354619849003804
Iter:  1278  Loss on train set:  -37.88116372549443
Iter:  1279  Loss on train set:  -42.8988969976723
Iter:  1280  Loss on train set:  -42.46837100739282
Iter:  1281  Loss on train set:  -51.358671845013916
Iter:  1282  Loss on train set:  -50.6322949327638
Iter:  1283  Loss on train set:  -49.43611132561677
Iter:  1284  Loss on train set:  -46.060110106116944
Iter:  1285  Loss on train set:  -40.71985810866898
Iter:  1286  Loss on train set:  -50.03777454791183
Iter:  1287  Loss on train set:  -51.35866654994951
Iter:  1288  Loss on train set:  -47.95280158722865
Iter:  1289  Loss on train set:  -50.87999452875721
Iter:  1290  Loss on train set:  -49.04701406765866
Iter:  1291  Loss on train set:  -50.973628108238536
Iter:  1292  Loss on train set:  -51.358664517035365
Iter:  1293  Loss on train set:  -51.20928907916668
Iter:  1294  Loss on train set:  -51.372242997770925
Iter:  1

  warn('Method %s does not use gradient information (jac).' % method,


Iter:  1327  Loss on train set:  -50.475693879639884
Iter:  1328  Loss on train set:  -45.520208644381455
Iter:  1329  Loss on train set:  -40.9389158749059
Iter:  1330  Loss on train set:  -46.895416753888874
Iter:  1331  Loss on train set:  -44.245811983625444
Iter:  1332  Loss on train set:  -46.0565310607273
Iter:  1333  Loss on train set:  -48.88278804257034
Iter:  1334  Loss on train set:  -39.77562882790077
Iter:  1335  Loss on train set:  -51.38459622878015
Iter:  1336  Loss on train set:  -41.292684996536714
Iter:  1337  Loss on train set:  -49.82727719300105
Iter:  1338  Loss on train set:  -51.28125589504265
Iter:  1339  Loss on train set:  -48.473740084469476
Iter:  1340  Loss on train set:  -51.0104361871565
Iter:  1341  Loss on train set:  -49.56023257368908
Iter:  1342  Loss on train set:  -51.192289858146864
Iter:  1343  Loss on train set:  -49.990573968446576
Iter:  1344  Loss on train set:  -48.420934523663
Iter:  1345  Loss on train set:  -50.86370035976282
Iter:  13

  warn('Method %s does not use gradient information (jac).' % method,


Iter:  1378  Loss on train set:  -41.86746946958781
Iter:  1379  Loss on train set:  -38.829904997737366
Iter:  1380  Loss on train set:  -50.85491019348884
Iter:  1381  Loss on train set:  -45.73151407595982
Iter:  1382  Loss on train set:  -51.41484231183939
Iter:  1383  Loss on train set:  -44.02715391991382
Iter:  1384  Loss on train set:  -51.41484424126356
Iter:  1385  Loss on train set:  -46.64686553020463
Iter:  1386  Loss on train set:  -49.974366857758696
Iter:  1387  Loss on train set:  -42.83333376254093
Iter:  1388  Loss on train set:  -49.0311847715547
Iter:  1389  Loss on train set:  -51.468744506770314
Iter:  1390  Loss on train set:  -47.98084721604067
Iter:  1391  Loss on train set:  -51.46874353498832
Iter:  1392  Loss on train set:  -49.99525535176173
Iter:  1393  Loss on train set:  -49.63110252314297
Iter:  1394  Loss on train set:  -50.957248983610256
Iter:  1395  Loss on train set:  -51.468741027667775
Iter:  1396  Loss on train set:  -51.209731160522516
Iter:  

  warn('Method %s does not use gradient information (jac).' % method,


Iter:  1429  Loss on train set:  -51.49740759153154
Iter:  1430  Loss on train set:  -40.28161097684222
Iter:  1431  Loss on train set:  -39.34282504585754
Iter:  1432  Loss on train set:  -51.49741043395544
Iter:  1433  Loss on train set:  -51.49741063718204
Iter:  1434  Loss on train set:  -44.18949233724519
Iter:  1435  Loss on train set:  -51.49740509347198
Iter:  1436  Loss on train set:  -38.667042677874285
Iter:  1437  Loss on train set:  -42.31424624799293
Iter:  1438  Loss on train set:  -43.488517938706046
Iter:  1439  Loss on train set:  -47.833761880120925
Iter:  1440  Loss on train set:  -51.49740000342483
Iter:  1441  Loss on train set:  -48.3986118762039
Iter:  1442  Loss on train set:  -51.49740569454408
Iter:  1443  Loss on train set:  -49.48621280026382
Iter:  1444  Loss on train set:  -50.891481052746485
Iter:  1445  Loss on train set:  -51.49740459881945
Iter:  1446  Loss on train set:  -50.52264727840219
Iter:  1447  Loss on train set:  -51.49740338464714
Iter:  14

  warn('Method %s does not use gradient information (jac).' % method,


Iter:  1480  Loss on train set:  -45.50390909570688
Iter:  1481  Loss on train set:  -37.601934158142804
Iter:  1482  Loss on train set:  -42.343744249258165
Iter:  1483  Loss on train set:  -51.50840110844037
Iter:  1484  Loss on train set:  -38.23209563350643
Iter:  1485  Loss on train set:  -49.28597391435598
Iter:  1486  Loss on train set:  -45.985172286992935
Iter:  1487  Loss on train set:  -40.011717399204045
Iter:  1488  Loss on train set:  -45.44796682107527
Iter:  1489  Loss on train set:  -43.198326459611266
Iter:  1490  Loss on train set:  -49.27066865070162
Iter:  1491  Loss on train set:  -50.374984613653545
Iter:  1492  Loss on train set:  -51.36778228409108
Iter:  1493  Loss on train set:  -51.002443181511524
Iter:  1494  Loss on train set:  -51.50840509096731
Iter:  1495  Loss on train set:  -50.667319257083335
Iter:  1496  Loss on train set:  -51.46392976984934
Iter:  1497  Loss on train set:  -51.13771010881322
Iter:  1498  Loss on train set:  -51.424951887426715
Ite

  warn('Method %s does not use gradient information (jac).' % method,


Iter:  1531  Loss on train set:  -39.4450066408596
Iter:  1532  Loss on train set:  -51.51243636737096
Iter:  1533  Loss on train set:  -51.51244125020895
Iter:  1534  Loss on train set:  -46.64463122838578
Iter:  1535  Loss on train set:  -40.75815516320201
Iter:  1536  Loss on train set:  -44.613972991232394
Iter:  1537  Loss on train set:  -40.5608418265831
Iter:  1538  Loss on train set:  -41.34166509929414
Iter:  1539  Loss on train set:  -46.06138502951142
Iter:  1540  Loss on train set:  -34.233063984262635
Iter:  1541  Loss on train set:  -46.47145251456626
Iter:  1542  Loss on train set:  -50.82562503813042
Iter:  1543  Loss on train set:  -51.51243795997919
Iter:  1544  Loss on train set:  -51.175901065531775
Iter:  1545  Loss on train set:  -51.512439272083945
Iter:  1546  Loss on train set:  -51.16449474044318
Iter:  1547  Loss on train set:  -51.35805862664363
Iter:  1548  Loss on train set:  -50.884791720267266
Iter:  1549  Loss on train set:  -51.25997343462312
Iter:  15

  warn('Method %s does not use gradient information (jac).' % method,


Iter:  1582  Loss on train set:  -40.00608624419017
Iter:  1583  Loss on train set:  -39.45443228918194
Iter:  1584  Loss on train set:  -40.271504590928735
Iter:  1585  Loss on train set:  -50.694259622734315
Iter:  1586  Loss on train set:  -42.65117314041494
Iter:  1587  Loss on train set:  -45.48066462438391
Iter:  1588  Loss on train set:  -39.17393722897979
Iter:  1589  Loss on train set:  -45.859715677921585
Iter:  1590  Loss on train set:  -49.984486543210494
Iter:  1591  Loss on train set:  -36.34749251722688
Iter:  1592  Loss on train set:  -47.29879232794412
Iter:  1593  Loss on train set:  -51.154784615867925
Iter:  1594  Loss on train set:  -51.31888860284562
Iter:  1595  Loss on train set:  -50.80626377486826
Iter:  1596  Loss on train set:  -51.51657991581504
Iter:  1597  Loss on train set:  -50.94081378048942
Iter:  1598  Loss on train set:  -51.454556964500554
Iter:  1599  Loss on train set:  -50.63246377114694
Iter:  1600  Loss on train set:  -51.50546174104309
Iter: 

  warn('Method %s does not use gradient information (jac).' % method,


Iter:  1633  Loss on train set:  -41.78090062359175
Iter:  1634  Loss on train set:  -46.05144576506023
Iter:  1635  Loss on train set:  -50.70545238499985
Iter:  1636  Loss on train set:  -39.05249912292007
Iter:  1637  Loss on train set:  -41.43278754076482
Iter:  1638  Loss on train set:  -42.04952039739389
Iter:  1639  Loss on train set:  -51.55792472026628
Iter:  1640  Loss on train set:  -45.67339396416516
Iter:  1641  Loss on train set:  -46.334453922907905
Iter:  1642  Loss on train set:  -39.75854938368208
Iter:  1643  Loss on train set:  -49.580862640365105
Iter:  1644  Loss on train set:  -51.475335366103806
Iter:  1645  Loss on train set:  -50.116326170782145
Iter:  1646  Loss on train set:  -50.9416559417116
Iter:  1647  Loss on train set:  -48.30461994805056
Iter:  1648  Loss on train set:  -49.102835635154996
Iter:  1649  Loss on train set:  -51.171910776838864
Iter:  1650  Loss on train set:  -51.55792210801531
Iter:  1651  Loss on train set:  -51.175920470989496
Iter: 

  warn('Method %s does not use gradient information (jac).' % method,


Iter:  1684  Loss on train set:  -42.92330092187708
Iter:  1685  Loss on train set:  -50.40569481904632
Iter:  1686  Loss on train set:  -47.505070608710476
Iter:  1687  Loss on train set:  -46.109417137037966
Iter:  1688  Loss on train set:  -51.56255795063379
Iter:  1689  Loss on train set:  -51.56255872954331
Iter:  1690  Loss on train set:  -47.23110958671492
Iter:  1691  Loss on train set:  -45.57103599336126
Iter:  1692  Loss on train set:  -51.56255745896311
Iter:  1693  Loss on train set:  -44.808979101185216
Iter:  1694  Loss on train set:  -49.60987117304299
Iter:  1695  Loss on train set:  -51.44755694586037
Iter:  1696  Loss on train set:  -50.47112895343901
Iter:  1697  Loss on train set:  -51.56255287680494
Iter:  1698  Loss on train set:  -49.98659559928389
Iter:  1699  Loss on train set:  -51.15193014832853
Iter:  1700  Loss on train set:  -51.562557432136586
Iter:  1701  Loss on train set:  -51.24411470644043
Iter:  1702  Loss on train set:  -51.562558197237806
Iter:  

  warn('Method %s does not use gradient information (jac).' % method,


Iter:  1735  Loss on train set:  -47.62794623857682
Iter:  1736  Loss on train set:  -37.62606782291206
Iter:  1737  Loss on train set:  -44.51343477239987
Iter:  1738  Loss on train set:  -39.24370982642802
Iter:  1739  Loss on train set:  -46.472167351906705
Iter:  1740  Loss on train set:  -51.56836976879785
Iter:  1741  Loss on train set:  -50.65380680246765
Iter:  1742  Loss on train set:  -39.53901448476359
Iter:  1743  Loss on train set:  -40.53453684234321
Iter:  1744  Loss on train set:  -42.140981149117565
Iter:  1745  Loss on train set:  -49.14121083104782
Iter:  1746  Loss on train set:  -50.54788602666701
Iter:  1747  Loss on train set:  -51.46574304192304
Iter:  1748  Loss on train set:  -51.14089038057487
Iter:  1749  Loss on train set:  -51.4537421943414
Iter:  1750  Loss on train set:  -50.857917347106245
Iter:  1751  Loss on train set:  -51.56837253315286
Iter:  1752  Loss on train set:  -50.7106553717391
Iter:  1753  Loss on train set:  -51.55880527545304
Iter:  1754

  warn('Method %s does not use gradient information (jac).' % method,


Iter:  1786  Loss on train set:  -46.1175167807373
Iter:  1787  Loss on train set:  -38.90045364945532
Iter:  1788  Loss on train set:  -45.671505998607074
Iter:  1789  Loss on train set:  -51.338057671213214
Iter:  1790  Loss on train set:  -42.988836101095124
Iter:  1791  Loss on train set:  -51.57310191005705
Iter:  1792  Loss on train set:  -51.57311115963974
Iter:  1793  Loss on train set:  -51.57311173741285
Iter:  1794  Loss on train set:  -51.57311137742778
Iter:  1795  Loss on train set:  -39.94804922981413
Iter:  1796  Loss on train set:  -50.004033900653056
Iter:  1797  Loss on train set:  -51.2659411409281
Iter:  1798  Loss on train set:  -48.632512039013754
Iter:  1799  Loss on train set:  -51.52439919870771
Iter:  1800  Loss on train set:  -49.33352938876689
Iter:  1801  Loss on train set:  -51.57311048004887
Iter:  1802  Loss on train set:  -48.32126422980186
Iter:  1803  Loss on train set:  -51.573108404591466
Iter:  1804  Loss on train set:  -48.32126452316769
Iter:  1

  warn('Method %s does not use gradient information (jac).' % method,


Iter:  1837  Loss on train set:  -39.51908998706899
Iter:  1838  Loss on train set:  -51.589891106785174
Iter:  1839  Loss on train set:  -38.203023476049644
Iter:  1840  Loss on train set:  -50.459422590535546
Iter:  1841  Loss on train set:  -50.689004654603416
Iter:  1842  Loss on train set:  -49.251899839501334
Iter:  1843  Loss on train set:  -45.80494253393453
Iter:  1844  Loss on train set:  -50.65581086286575
Iter:  1845  Loss on train set:  -40.646285116512516
Iter:  1846  Loss on train set:  -44.36761841976604
Iter:  1847  Loss on train set:  -49.63735599884953
Iter:  1848  Loss on train set:  -50.88307547665472
Iter:  1849  Loss on train set:  -51.58988992768148
Iter:  1850  Loss on train set:  -50.07728327566336
Iter:  1851  Loss on train set:  -51.567616519308004
Iter:  1852  Loss on train set:  -51.10789871526385
Iter:  1853  Loss on train set:  -51.57282613935199
Iter:  1854  Loss on train set:  -51.375425370573
Iter:  1855  Loss on train set:  -51.56959058803256
Iter:  

  warn('Method %s does not use gradient information (jac).' % method,


Iter:  1888  Loss on train set:  -51.731089318449776
Iter:  1889  Loss on train set:  -39.662759154006885
Iter:  1890  Loss on train set:  -41.689620353118116
Iter:  1891  Loss on train set:  -40.3367577735237
Iter:  1892  Loss on train set:  -50.51387894555849
Iter:  1893  Loss on train set:  -49.48579199934917
Iter:  1894  Loss on train set:  -45.85116987012042
Iter:  1895  Loss on train set:  -51.42893563478284
Iter:  1896  Loss on train set:  -42.31968654370935
Iter:  1897  Loss on train set:  -41.460500086175585
Iter:  1898  Loss on train set:  -49.03696934912102
Iter:  1899  Loss on train set:  -50.88452170867601
Iter:  1900  Loss on train set:  -51.73108177799048
Iter:  1901  Loss on train set:  -50.92248623402165
Iter:  1902  Loss on train set:  -51.72156920345326
Iter:  1903  Loss on train set:  -51.09137726166048
Iter:  1904  Loss on train set:  -51.696310810749246
Iter:  1905  Loss on train set:  -51.31003119413705
Iter:  1906  Loss on train set:  -51.71835068362628
Iter:  1

  warn('Method %s does not use gradient information (jac).' % method,


Iter:  1939  Loss on train set:  -51.738816706179115
Iter:  1940  Loss on train set:  -51.738817721694254
Iter:  1941  Loss on train set:  -46.834103976488976
Iter:  1942  Loss on train set:  -51.7388143085853
Iter:  1943  Loss on train set:  -51.170898683230334
Iter:  1944  Loss on train set:  -44.516442474132546
Iter:  1945  Loss on train set:  -43.09643461771744
Iter:  1946  Loss on train set:  -49.655957765407635
Iter:  1947  Loss on train set:  -50.072135234033226
Iter:  1948  Loss on train set:  -45.5512874126979
Iter:  1949  Loss on train set:  -50.30986090807118
Iter:  1950  Loss on train set:  -51.43287858115261
Iter:  1951  Loss on train set:  -51.73882732986086
Iter:  1952  Loss on train set:  -51.04853708764478
Iter:  1953  Loss on train set:  -51.73882320343034
Iter:  1954  Loss on train set:  -51.27161339952822
Iter:  1955  Loss on train set:  -51.738826302616246
Iter:  1956  Loss on train set:  -51.62387065246477
Iter:  1957  Loss on train set:  -51.7098953581646
Iter:  

  warn('Method %s does not use gradient information (jac).' % method,


Iter:  1990  Loss on train set:  -44.97658841733323
Iter:  1991  Loss on train set:  -44.62235380744577
Iter:  1992  Loss on train set:  -40.710035056916695
Iter:  1993  Loss on train set:  -40.771382692395804
Iter:  1994  Loss on train set:  -51.28030541336493
Iter:  1995  Loss on train set:  -51.825944562339664
Iter:  1996  Loss on train set:  -51.825941940603826
Iter:  1997  Loss on train set:  -50.61140839285713
Iter:  1998  Loss on train set:  -44.38458675297442
Iter:  1999  Loss on train set:  -42.93651854547854
Iter:  2000  Loss on train set:  -49.824589649761364
Iter:  2001  Loss on train set:  -51.43592534555933
Iter:  2002  Loss on train set:  -48.55124128799919
Iter:  2003  Loss on train set:  -51.83001849053694
Iter:  2004  Loss on train set:  -48.73600546688039
Iter:  2005  Loss on train set:  -51.320551585047795
Iter:  2006  Loss on train set:  -51.830015582317884
Iter:  2007  Loss on train set:  -51.40628287795291
Iter:  2008  Loss on train set:  -51.83001479602871
Iter:

  warn('Method %s does not use gradient information (jac).' % method,


Iter:  2041  Loss on train set:  -37.75809262525418
Iter:  2042  Loss on train set:  -40.726559325241155
Iter:  2043  Loss on train set:  -40.143231647396455
Iter:  2044  Loss on train set:  -47.52460703937128
Iter:  2045  Loss on train set:  -51.85706567161686
Iter:  2046  Loss on train set:  -51.53408071179029
Iter:  2047  Loss on train set:  -51.85706738329604
Iter:  2048  Loss on train set:  -39.65221892132775
Iter:  2049  Loss on train set:  -40.46285700458524
Iter:  2050  Loss on train set:  -33.34445595190973
Iter:  2051  Loss on train set:  -46.893886346391916
Iter:  2052  Loss on train set:  -50.86415628706372
Iter:  2053  Loss on train set:  -51.647415984499276
Iter:  2054  Loss on train set:  -51.48112245215418
Iter:  2055  Loss on train set:  -51.85706764086569
Iter:  2056  Loss on train set:  -51.52391758592884
Iter:  2057  Loss on train set:  -51.840631666804086
Iter:  2058  Loss on train set:  -50.96569719843546
Iter:  2059  Loss on train set:  -51.85707074096554
Iter:  

  warn('Method %s does not use gradient information (jac).' % method,


Iter:  2092  Loss on train set:  -51.86451519304649
Iter:  2093  Loss on train set:  -51.86450667342039
Iter:  2094  Loss on train set:  -44.43221906458356
Iter:  2095  Loss on train set:  -44.39955907289898
Iter:  2096  Loss on train set:  -39.35966783877196
Iter:  2097  Loss on train set:  -47.641518031656034
Iter:  2098  Loss on train set:  -46.34026086312246
Iter:  2099  Loss on train set:  -51.458261165067775
Iter:  2100  Loss on train set:  -49.58082962315336
Iter:  2101  Loss on train set:  -42.21980626032259
Iter:  2102  Loss on train set:  -49.35884187369701
Iter:  2103  Loss on train set:  -51.319231998422225
Iter:  2104  Loss on train set:  -51.8645098504217
Iter:  2105  Loss on train set:  -51.36431754628559
Iter:  2106  Loss on train set:  -51.86451668016606
Iter:  2107  Loss on train set:  -51.128953088479946
Iter:  2108  Loss on train set:  -51.76153818406262
Iter:  2109  Loss on train set:  -51.53359884214433
Iter:  2110  Loss on train set:  -51.847662793656646
Iter:  2

  warn('Method %s does not use gradient information (jac).' % method,


Iter:  2143  Loss on train set:  -46.91035339665349
Iter:  2144  Loss on train set:  -46.07752478464983
Iter:  2145  Loss on train set:  -47.995355907413845
Iter:  2146  Loss on train set:  -37.81584916167764
Iter:  2147  Loss on train set:  -47.64025392652551
Iter:  2148  Loss on train set:  -39.867632683696705
Iter:  2149  Loss on train set:  -40.96664387869237
Iter:  2150  Loss on train set:  -39.70111292113617
Iter:  2151  Loss on train set:  -46.198500499768905
Iter:  2152  Loss on train set:  -44.54303815387581
Iter:  2153  Loss on train set:  -49.86187182043587
Iter:  2154  Loss on train set:  -51.01062420346389
Iter:  2155  Loss on train set:  -51.773754579844876
Iter:  2156  Loss on train set:  -51.52744260320203
Iter:  2157  Loss on train set:  -51.821091114053154
Iter:  2158  Loss on train set:  -51.613475514799646
Iter:  2159  Loss on train set:  -51.527534281385144
Iter:  2160  Loss on train set:  -51.518020057959696
Iter:  2161  Loss on train set:  -51.71432522313144
Iter

  warn('Method %s does not use gradient information (jac).' % method,


Iter:  2194  Loss on train set:  -45.410082848910974
Iter:  2195  Loss on train set:  -50.53476038775387
Iter:  2196  Loss on train set:  -45.85919529203628
Iter:  2197  Loss on train set:  -51.870781680012435
Iter:  2198  Loss on train set:  -51.51464795864296
Iter:  2199  Loss on train set:  -42.87260867337097
Iter:  2200  Loss on train set:  -46.90208193925694
Iter:  2201  Loss on train set:  -50.89026414323778
Iter:  2202  Loss on train set:  -42.41650259734498
Iter:  2203  Loss on train set:  -47.62844138958033
Iter:  2204  Loss on train set:  -50.2749887472207
Iter:  2205  Loss on train set:  -51.759004322290544
Iter:  2206  Loss on train set:  -50.41654658450756
Iter:  2207  Loss on train set:  -51.55334178797649
Iter:  2208  Loss on train set:  -51.87078078445266
Iter:  2209  Loss on train set:  -50.994439330192364
Iter:  2210  Loss on train set:  -51.85547266135558
Iter:  2211  Loss on train set:  -51.51961826328613
Iter:  2212  Loss on train set:  -51.85689247899713
Iter:  22

  warn('Method %s does not use gradient information (jac).' % method,


Iter:  2245  Loss on train set:  -37.73961139509636
Iter:  2246  Loss on train set:  -47.81869009887084
Iter:  2247  Loss on train set:  -43.38454299992279
Iter:  2248  Loss on train set:  -50.15985726031991
Iter:  2249  Loss on train set:  -47.20064577877646
Iter:  2250  Loss on train set:  -46.90639400809287
Iter:  2251  Loss on train set:  -39.16343250310022
Iter:  2252  Loss on train set:  -39.79275020775259
Iter:  2253  Loss on train set:  -40.45263751039863
Iter:  2254  Loss on train set:  -42.746958123596585
Iter:  2255  Loss on train set:  -49.22779579610448
Iter:  2256  Loss on train set:  -51.02947929426916
Iter:  2257  Loss on train set:  -51.798324552350365
Iter:  2258  Loss on train set:  -51.265666062054905
Iter:  2259  Loss on train set:  -51.83460163439881
Iter:  2260  Loss on train set:  -51.560212253598586
Iter:  2261  Loss on train set:  -51.716025825169176
Iter:  2262  Loss on train set:  -50.808772035045706
Iter:  2263  Loss on train set:  -51.6366148250986
Iter:  

  warn('Method %s does not use gradient information (jac).' % method,


Iter:  2296  Loss on train set:  -39.069046952483845
Iter:  2297  Loss on train set:  -43.53606953143264
Iter:  2298  Loss on train set:  -41.797320032603594
Iter:  2299  Loss on train set:  -45.4261696670607
Iter:  2300  Loss on train set:  -47.20769982140639
Iter:  2301  Loss on train set:  -42.31756175620679
Iter:  2302  Loss on train set:  -43.16622302017497
Iter:  2303  Loss on train set:  -47.61177615291446
Iter:  2304  Loss on train set:  -42.11286189245711
Iter:  2305  Loss on train set:  -46.10877169032458
Iter:  2306  Loss on train set:  -50.42647663134937
Iter:  2307  Loss on train set:  -51.41523627984504
Iter:  2308  Loss on train set:  -51.778473103949466
Iter:  2309  Loss on train set:  -51.80886865067712
Iter:  2310  Loss on train set:  -51.88020010026628
Iter:  2311  Loss on train set:  -51.59704953991117
Iter:  2312  Loss on train set:  -51.7074005197021
Iter:  2313  Loss on train set:  -51.76338625503981
Iter:  2314  Loss on train set:  -51.91798791860819
Iter:  2315

  warn('Method %s does not use gradient information (jac).' % method,


Iter:  2347  Loss on train set:  -52.06666491228914
Iter:  2348  Loss on train set:  -42.96209927964053
Iter:  2349  Loss on train set:  -52.06665965471069
Iter:  2350  Loss on train set:  -41.06775973328024
Iter:  2351  Loss on train set:  -46.42663080197363
Iter:  2352  Loss on train set:  -52.06666719313194
Iter:  2353  Loss on train set:  -48.05952125669992
Iter:  2354  Loss on train set:  -41.7251682950223
Iter:  2355  Loss on train set:  -42.60425566015801
Iter:  2356  Loss on train set:  -37.47949721537474
Iter:  2357  Loss on train set:  -47.6825791557642
Iter:  2358  Loss on train set:  -51.313755075740694
Iter:  2359  Loss on train set:  -52.06665639188478
Iter:  2360  Loss on train set:  -51.64401953090652
Iter:  2361  Loss on train set:  -52.066668064754445
Iter:  2362  Loss on train set:  -51.73568683278775
Iter:  2363  Loss on train set:  -52.066665744718826
Iter:  2364  Loss on train set:  -51.786582237533665
Iter:  2365  Loss on train set:  -51.9806445388137
Iter:  2366

  warn('Method %s does not use gradient information (jac).' % method,


Iter:  2398  Loss on train set:  -41.013251463496765
Iter:  2399  Loss on train set:  -52.08522897452667
Iter:  2400  Loss on train set:  -47.63352579191501
Iter:  2401  Loss on train set:  -48.0396994328776
Iter:  2402  Loss on train set:  -47.081207519977625
Iter:  2403  Loss on train set:  -40.63399302330053
Iter:  2404  Loss on train set:  -40.732219864271784
Iter:  2405  Loss on train set:  -39.87159115521878
Iter:  2406  Loss on train set:  -45.94416174998385
Iter:  2407  Loss on train set:  -34.643335455497144
Iter:  2408  Loss on train set:  -47.302971432568775
Iter:  2409  Loss on train set:  -51.817633848903306
Iter:  2410  Loss on train set:  -52.08523456674691
Iter:  2411  Loss on train set:  -51.76873362372039
Iter:  2412  Loss on train set:  -52.01050755302973
Iter:  2413  Loss on train set:  -51.750588853780435
Iter:  2414  Loss on train set:  -51.88441573824219
Iter:  2415  Loss on train set:  -51.2453947567129
Iter:  2416  Loss on train set:  -51.973687420509144
Iter: 

  warn('Method %s does not use gradient information (jac).' % method,


Iter:  2449  Loss on train set:  -51.63199673992818
Iter:  2450  Loss on train set:  -40.91900044272296
Iter:  2451  Loss on train set:  -42.50031432630081
Iter:  2452  Loss on train set:  -41.07835534921348
Iter:  2453  Loss on train set:  -37.056148564028035
Iter:  2454  Loss on train set:  -40.41620499515889
Iter:  2455  Loss on train set:  -50.45315688822079
Iter:  2456  Loss on train set:  -47.694453992104854
Iter:  2457  Loss on train set:  -43.21644418011635
Iter:  2458  Loss on train set:  -44.419475945444205
Iter:  2459  Loss on train set:  -50.16361060093368
Iter:  2460  Loss on train set:  -51.41355210536972
Iter:  2461  Loss on train set:  -52.12315606070527
Iter:  2462  Loss on train set:  -51.38654442959458
Iter:  2463  Loss on train set:  -51.94696780982525
Iter:  2464  Loss on train set:  -50.951349609114416
Iter:  2465  Loss on train set:  -52.106433114155934
Iter:  2466  Loss on train set:  -51.552767379944775
Iter:  2467  Loss on train set:  -52.01103417196825
Iter: 

  warn('Method %s does not use gradient information (jac).' % method,


Iter:  2500  Loss on train set:  -50.79131151743144
Iter:  2501  Loss on train set:  -47.52317471083197
Iter:  2502  Loss on train set:  -50.441453061237674
Iter:  2503  Loss on train set:  -52.135592591705105
Iter:  2504  Loss on train set:  -42.593753860645606
Iter:  2505  Loss on train set:  -46.42544441742064
Iter:  2506  Loss on train set:  -39.49579882392836
Iter:  2507  Loss on train set:  -52.135587160329095
Iter:  2508  Loss on train set:  -52.13559449163372
Iter:  2509  Loss on train set:  -42.89760713276826
Iter:  2510  Loss on train set:  -50.847718002020514
Iter:  2511  Loss on train set:  -52.01367744325161
Iter:  2512  Loss on train set:  -51.74786518274705
Iter:  2513  Loss on train set:  -52.135596724697095
Iter:  2514  Loss on train set:  -49.61961621586862
Iter:  2515  Loss on train set:  -51.76684278202214
Iter:  2516  Loss on train set:  -48.763119005285425
Iter:  2517  Loss on train set:  -52.13559324264231
Iter:  2518  Loss on train set:  -49.98273873113526
Iter:

  warn('Method %s does not use gradient information (jac).' % method,


Iter:  2551  Loss on train set:  -49.896664970190045
Iter:  2552  Loss on train set:  -39.630652610733705
Iter:  2553  Loss on train set:  -42.25126185543797
Iter:  2554  Loss on train set:  -39.08751203604878
Iter:  2555  Loss on train set:  -52.1392439045892
Iter:  2556  Loss on train set:  -42.61313443682637
Iter:  2557  Loss on train set:  -47.080959209866506
Iter:  2558  Loss on train set:  -52.13924690018311
Iter:  2559  Loss on train set:  -50.59924898866102
Iter:  2560  Loss on train set:  -42.893400408475884
Iter:  2561  Loss on train set:  -48.632180339166524
Iter:  2562  Loss on train set:  -51.92000640100934
Iter:  2563  Loss on train set:  -49.68487830587727
Iter:  2564  Loss on train set:  -52.13924171254464
Iter:  2565  Loss on train set:  -48.601645706557676
Iter:  2566  Loss on train set:  -51.76658310448158
Iter:  2567  Loss on train set:  -49.87849453639757
Iter:  2568  Loss on train set:  -51.597757034989606
Iter:  2569  Loss on train set:  -52.13924789683155
Iter: 

  warn('Method %s does not use gradient information (jac).' % method,


Iter:  2602  Loss on train set:  -52.15157979629726
Iter:  2603  Loss on train set:  -43.07516566751186
Iter:  2604  Loss on train set:  -52.15157835458953
Iter:  2605  Loss on train set:  -41.351648948126495
Iter:  2606  Loss on train set:  -50.748256973656254
Iter:  2607  Loss on train set:  -40.015205000689164
Iter:  2608  Loss on train set:  -43.14832340980933
Iter:  2609  Loss on train set:  -49.396391368845414
Iter:  2610  Loss on train set:  -46.6827426697227
Iter:  2611  Loss on train set:  -41.55337376252575
Iter:  2612  Loss on train set:  -49.14555828430166
Iter:  2613  Loss on train set:  -51.35911822277311
Iter:  2614  Loss on train set:  -52.151579443275594
Iter:  2615  Loss on train set:  -51.611541770743855
Iter:  2616  Loss on train set:  -52.15157937417346
Iter:  2617  Loss on train set:  -51.328461583717996
Iter:  2618  Loss on train set:  -52.10634184952755
Iter:  2619  Loss on train set:  -51.71198469699701
Iter:  2620  Loss on train set:  -52.087222752663195
Iter:

  warn('Method %s does not use gradient information (jac).' % method,


Iter:  2653  Loss on train set:  -40.583736274266066
Iter:  2654  Loss on train set:  -40.19887472939921
Iter:  2655  Loss on train set:  -47.385116886191874
Iter:  2656  Loss on train set:  -47.478155216855804
Iter:  2657  Loss on train set:  -47.78045157478181
Iter:  2658  Loss on train set:  -42.70656415580345
Iter:  2659  Loss on train set:  -52.177035911101214
Iter:  2660  Loss on train set:  -46.265351953545874
Iter:  2661  Loss on train set:  -42.450106225113764
Iter:  2662  Loss on train set:  -41.58110667958385
Iter:  2663  Loss on train set:  -49.482457005756665
Iter:  2664  Loss on train set:  -51.01699578646465
Iter:  2665  Loss on train set:  -52.092930518912425
Iter:  2666  Loss on train set:  -51.44552983633239
Iter:  2667  Loss on train set:  -52.11055574453564
Iter:  2668  Loss on train set:  -51.813937860627384
Iter:  2669  Loss on train set:  -52.177038162651705
Iter:  2670  Loss on train set:  -51.96340020359162
Iter:  2671  Loss on train set:  -52.04524207917169
It

  warn('Method %s does not use gradient information (jac).' % method,


Iter:  2704  Loss on train set:  -52.18238736294915
Iter:  2705  Loss on train set:  -37.17113064851103
Iter:  2706  Loss on train set:  -52.18238144559604
Iter:  2707  Loss on train set:  -52.18238263061934
Iter:  2708  Loss on train set:  -52.18238169266218
Iter:  2709  Loss on train set:  -44.051961185646164
Iter:  2710  Loss on train set:  -45.86879102174274
Iter:  2711  Loss on train set:  -52.18238224014496
Iter:  2712  Loss on train set:  -49.27425091179485
Iter:  2713  Loss on train set:  -41.736752980648056
Iter:  2714  Loss on train set:  -49.46423395782527
Iter:  2715  Loss on train set:  -51.410777673696096
Iter:  2716  Loss on train set:  -52.182395915698706
Iter:  2717  Loss on train set:  -51.374855109848376
Iter:  2718  Loss on train set:  -52.182391347547764
Iter:  2719  Loss on train set:  -51.8304599891905
Iter:  2720  Loss on train set:  -52.182392848946954
Iter:  2721  Loss on train set:  -51.99408698220982
Iter:  2722  Loss on train set:  -52.18238797009784
Iter: 

  warn('Method %s does not use gradient information (jac).' % method,


Iter:  2755  Loss on train set:  -42.60716086488102
Iter:  2756  Loss on train set:  -47.22766133997591
Iter:  2757  Loss on train set:  -49.208781159490734
Iter:  2758  Loss on train set:  -43.64802986853428
Iter:  2759  Loss on train set:  -52.18605739757738
Iter:  2760  Loss on train set:  -45.99563142395378
Iter:  2761  Loss on train set:  -41.40217719698617
Iter:  2762  Loss on train set:  -46.27949000376116
Iter:  2763  Loss on train set:  -37.29415463024043
Iter:  2764  Loss on train set:  -40.32005772347782
Iter:  2765  Loss on train set:  -49.581823520671534
Iter:  2766  Loss on train set:  -51.962151709354906
Iter:  2767  Loss on train set:  -50.91801228838378
Iter:  2768  Loss on train set:  -50.07245402592855
Iter:  2769  Loss on train set:  -51.41006637305367
Iter:  2770  Loss on train set:  -52.186060054992375
Iter:  2771  Loss on train set:  -51.6777969693857
Iter:  2772  Loss on train set:  -52.0566631524477
Iter:  2773  Loss on train set:  -51.595178513782955
Iter:  27

  warn('Method %s does not use gradient information (jac).' % method,


Iter:  2806  Loss on train set:  -45.3364465731751
Iter:  2807  Loss on train set:  -39.77161699897165
Iter:  2808  Loss on train set:  -46.4810892161357
Iter:  2809  Loss on train set:  -41.124354995667645
Iter:  2810  Loss on train set:  -52.19627043702954
Iter:  2811  Loss on train set:  -50.20967320278911
Iter:  2812  Loss on train set:  -45.47435319178316
Iter:  2813  Loss on train set:  -39.05968908299754
Iter:  2814  Loss on train set:  -47.436726753252906
Iter:  2815  Loss on train set:  -44.001281339537535
Iter:  2816  Loss on train set:  -50.050609077802704
Iter:  2817  Loss on train set:  -51.6013610798411
Iter:  2818  Loss on train set:  -52.03105038144305
Iter:  2819  Loss on train set:  -51.517014358293295
Iter:  2820  Loss on train set:  -52.19627686403078
Iter:  2821  Loss on train set:  -51.1452099104022
Iter:  2822  Loss on train set:  -52.14535128732166
Iter:  2823  Loss on train set:  -51.688296999500686
Iter:  2824  Loss on train set:  -52.128805731601936
Iter:  28

  warn('Method %s does not use gradient information (jac).' % method,


Iter:  2857  Loss on train set:  -48.14359553838554
Iter:  2858  Loss on train set:  -45.950639807100025
Iter:  2859  Loss on train set:  -51.886831665968295
Iter:  2860  Loss on train set:  -52.26940004038072
Iter:  2861  Loss on train set:  -52.26940426048049
Iter:  2862  Loss on train set:  -50.892298652657864
Iter:  2863  Loss on train set:  -47.38429409892912
Iter:  2864  Loss on train set:  -51.72994189957322
Iter:  2865  Loss on train set:  -43.227390851925136
Iter:  2866  Loss on train set:  -45.16255955651573
Iter:  2867  Loss on train set:  -50.927579833835644
Iter:  2868  Loss on train set:  -52.21766237773334
Iter:  2869  Loss on train set:  -50.49865624809452
Iter:  2870  Loss on train set:  -52.2694001856569
Iter:  2871  Loss on train set:  -50.56702447736448
Iter:  2872  Loss on train set:  -51.9890510512968
Iter:  2873  Loss on train set:  -52.26939335389956
Iter:  2874  Loss on train set:  -52.1861499405269
Iter:  2875  Loss on train set:  -52.26009588568721
Iter:  287

  warn('Method %s does not use gradient information (jac).' % method,


Iter:  2908  Loss on train set:  -42.90972157187042
Iter:  2909  Loss on train set:  -41.61417214096756
Iter:  2910  Loss on train set:  -45.884721512129225
Iter:  2911  Loss on train set:  -41.426989446431215
Iter:  2912  Loss on train set:  -51.857616508400696
Iter:  2913  Loss on train set:  -40.65458628571699
Iter:  2914  Loss on train set:  -46.73948633847266
Iter:  2915  Loss on train set:  -47.55341618515156
Iter:  2916  Loss on train set:  -47.51500703579805
Iter:  2917  Loss on train set:  -38.75525967105268
Iter:  2918  Loss on train set:  -48.944377086182214
Iter:  2919  Loss on train set:  -51.56895914769308
Iter:  2920  Loss on train set:  -52.14219555810357
Iter:  2921  Loss on train set:  -51.795417864068575
Iter:  2922  Loss on train set:  -52.261601913178026
Iter:  2923  Loss on train set:  -51.5626413904374
Iter:  2924  Loss on train set:  -52.09831160717663
Iter:  2925  Loss on train set:  -51.57824617850312
Iter:  2926  Loss on train set:  -52.195407031099194
Iter: 

  warn('Method %s does not use gradient information (jac).' % method,


Iter:  2959  Loss on train set:  -45.9332668080285
Iter:  2960  Loss on train set:  -47.53113066821723
Iter:  2961  Loss on train set:  -48.88153640402279
Iter:  2962  Loss on train set:  -38.05045459304297
Iter:  2963  Loss on train set:  -41.659055462202915
Iter:  2964  Loss on train set:  -52.315043897990314
Iter:  2965  Loss on train set:  -41.102940243551245
Iter:  2966  Loss on train set:  -52.315041246119826
Iter:  2967  Loss on train set:  -52.31504000314927
Iter:  2968  Loss on train set:  -35.86724676052489
Iter:  2969  Loss on train set:  -50.55199553490357
Iter:  2970  Loss on train set:  -52.0738741554411
Iter:  2971  Loss on train set:  -51.007642765992124
Iter:  2972  Loss on train set:  -51.67678634659449
Iter:  2973  Loss on train set:  -49.486614177133255
Iter:  2974  Loss on train set:  -51.13291074662276
Iter:  2975  Loss on train set:  -52.31503487398553
Iter:  2976  Loss on train set:  -51.69996392834672
Iter:  2977  Loss on train set:  -52.3150418401299
Iter:  29

  warn('Method %s does not use gradient information (jac).' % method,


Iter:  3010  Loss on train set:  -46.85297365576895
Iter:  3011  Loss on train set:  -52.31727233698507
Iter:  3012  Loss on train set:  -51.7878983697465
Iter:  3013  Loss on train set:  -50.931814431674816
Iter:  3014  Loss on train set:  -47.719465946484455
Iter:  3015  Loss on train set:  -45.85332038913634
Iter:  3016  Loss on train set:  -52.31726960545891
Iter:  3017  Loss on train set:  -41.50067924381031
Iter:  3018  Loss on train set:  -51.85880726832876
Iter:  3019  Loss on train set:  -43.244148704366374
Iter:  3020  Loss on train set:  -50.73777911918824
Iter:  3021  Loss on train set:  -52.225070332270754
Iter:  3022  Loss on train set:  -52.3172770899159
Iter:  3023  Loss on train set:  -52.28455703040166
Iter:  3024  Loss on train set:  -52.251745979499375
Iter:  3025  Loss on train set:  -51.735164271186726
Iter:  3026  Loss on train set:  -52.31727291789592
Iter:  3027  Loss on train set:  -51.82887127381153
Iter:  3028  Loss on train set:  -52.311162136229854
Iter:  

  warn('Method %s does not use gradient information (jac).' % method,


Iter:  3061  Loss on train set:  -51.86145265366273
Iter:  3062  Loss on train set:  -39.285801064092354
Iter:  3063  Loss on train set:  -52.31788971197845
Iter:  3064  Loss on train set:  -52.317887789316785
Iter:  3065  Loss on train set:  -43.76997668323827
Iter:  3066  Loss on train set:  -50.787294600799044
Iter:  3067  Loss on train set:  -47.27673482905241
Iter:  3068  Loss on train set:  -52.31788954702928
Iter:  3069  Loss on train set:  -52.317891666556754
Iter:  3070  Loss on train set:  -47.42019428491283
Iter:  3071  Loss on train set:  -47.591323054476646
Iter:  3072  Loss on train set:  -52.29279749453469
Iter:  3073  Loss on train set:  -49.98986879939496
Iter:  3074  Loss on train set:  -52.31789269405207
Iter:  3075  Loss on train set:  -50.88192507682395
Iter:  3076  Loss on train set:  -52.317893632079816
Iter:  3077  Loss on train set:  -51.94684108729724
Iter:  3078  Loss on train set:  -52.3178939621653
Iter:  3079  Loss on train set:  -50.92897058437239
Iter:  

  warn('Method %s does not use gradient information (jac).' % method,


Iter:  3112  Loss on train set:  -48.19397255296418
Iter:  3113  Loss on train set:  -40.94228625255934
Iter:  3114  Loss on train set:  -43.721225778634945
Iter:  3115  Loss on train set:  -46.41879638301234
Iter:  3116  Loss on train set:  -52.33647071672778
Iter:  3117  Loss on train set:  -42.1845962642988
Iter:  3118  Loss on train set:  -41.00192884510467
Iter:  3119  Loss on train set:  -40.19771735115601
Iter:  3120  Loss on train set:  -50.94887050038743
Iter:  3121  Loss on train set:  -36.708652632839645
Iter:  3122  Loss on train set:  -47.89837203695682
Iter:  3123  Loss on train set:  -51.740961970514704
Iter:  3124  Loss on train set:  -52.24912077164022
Iter:  3125  Loss on train set:  -51.65103250053887
Iter:  3126  Loss on train set:  -52.33647565816679
Iter:  3127  Loss on train set:  -51.90950891922941
Iter:  3128  Loss on train set:  -52.16394907347437
Iter:  3129  Loss on train set:  -51.80733080795708
Iter:  3130  Loss on train set:  -52.312077370472686
Iter:  31

  warn('Method %s does not use gradient information (jac).' % method,


Iter:  3163  Loss on train set:  -43.322936383094785
Iter:  3164  Loss on train set:  -50.08649487185938
Iter:  3165  Loss on train set:  -40.204350110832834
Iter:  3166  Loss on train set:  -42.18400201903135
Iter:  3167  Loss on train set:  -40.56690280798239
Iter:  3168  Loss on train set:  -42.2749262459014
Iter:  3169  Loss on train set:  -46.890942237783875
Iter:  3170  Loss on train set:  -47.65231093517237
Iter:  3171  Loss on train set:  -52.35433116458924
Iter:  3172  Loss on train set:  -40.5881565895729
Iter:  3173  Loss on train set:  -50.025354853532804
Iter:  3174  Loss on train set:  -52.19508051638471
Iter:  3175  Loss on train set:  -49.02470570456936
Iter:  3176  Loss on train set:  -51.59136705619245
Iter:  3177  Loss on train set:  -50.2052170202808
Iter:  3178  Loss on train set:  -51.829047097313214
Iter:  3179  Loss on train set:  -49.70552415503064
Iter:  3180  Loss on train set:  -51.063068325971656
Iter:  3181  Loss on train set:  -51.61945615319907
Iter:  31

  warn('Method %s does not use gradient information (jac).' % method,


Iter:  3214  Loss on train set:  -52.36093348420006
Iter:  3215  Loss on train set:  -46.3848033504802
Iter:  3216  Loss on train set:  -50.75472625087411
Iter:  3217  Loss on train set:  -40.08608480150408
Iter:  3218  Loss on train set:  -49.98804505689895
Iter:  3219  Loss on train set:  -43.68937655999968
Iter:  3220  Loss on train set:  -51.88815410107288
Iter:  3221  Loss on train set:  -37.22495866896752
Iter:  3222  Loss on train set:  -49.85738843423796
Iter:  3223  Loss on train set:  -41.77217566912795
Iter:  3224  Loss on train set:  -49.5685349138229
Iter:  3225  Loss on train set:  -51.85697477251155
Iter:  3226  Loss on train set:  -52.360940570571074
Iter:  3227  Loss on train set:  -52.279464388453626
Iter:  3228  Loss on train set:  -52.31850357717138
Iter:  3229  Loss on train set:  -51.541098594873034
Iter:  3230  Loss on train set:  -52.361823932339355
Iter:  3231  Loss on train set:  -51.781974111062965
Iter:  3232  Loss on train set:  -52.30084853694107
Iter:  32

  warn('Method %s does not use gradient information (jac).' % method,


Iter:  3265  Loss on train set:  -40.597250903919914
Iter:  3266  Loss on train set:  -43.41866240712242
Iter:  3267  Loss on train set:  -52.36912061693304
Iter:  3268  Loss on train set:  -39.699888082440914
Iter:  3269  Loss on train set:  -52.36911875093012
Iter:  3270  Loss on train set:  -40.08337219599147
Iter:  3271  Loss on train set:  -37.617352191312285
Iter:  3272  Loss on train set:  -47.87025681875037
Iter:  3273  Loss on train set:  -50.67881485683239
Iter:  3274  Loss on train set:  -44.62032540672833
Iter:  3275  Loss on train set:  -49.70390041405519
Iter:  3276  Loss on train set:  -50.01840142241754
Iter:  3277  Loss on train set:  -51.7936239388746
Iter:  3278  Loss on train set:  -52.36912055021387
Iter:  3279  Loss on train set:  -51.13084950700319
Iter:  3280  Loss on train set:  -52.369118558393986
Iter:  3281  Loss on train set:  -51.542055093235774
Iter:  3282  Loss on train set:  -52.28273901477908
Iter:  3283  Loss on train set:  -51.34096323476474
Iter:  3

  warn('Method %s does not use gradient information (jac).' % method,


Iter:  3316  Loss on train set:  -39.86105006223363
Iter:  3317  Loss on train set:  -43.673308699358955
Iter:  3318  Loss on train set:  -40.57200238714511
Iter:  3319  Loss on train set:  -46.6279885742531
Iter:  3320  Loss on train set:  -41.71824077577677
Iter:  3321  Loss on train set:  -45.809912046249906
Iter:  3322  Loss on train set:  -49.834458189549345
Iter:  3323  Loss on train set:  -47.03356325203689
Iter:  3324  Loss on train set:  -50.041576748578606
Iter:  3325  Loss on train set:  -41.72027842096594
Iter:  3326  Loss on train set:  -49.58489437220742
Iter:  3327  Loss on train set:  -51.62312398431105
Iter:  3328  Loss on train set:  -52.16548908040602
Iter:  3329  Loss on train set:  -51.758493316663056
Iter:  3330  Loss on train set:  -52.26808951536623
Iter:  3331  Loss on train set:  -51.844070439176356
Iter:  3332  Loss on train set:  -52.30788360975409
Iter:  3333  Loss on train set:  -51.88773643517333
Iter:  3334  Loss on train set:  -52.311532291511796
Iter: 

  warn('Method %s does not use gradient information (jac).' % method,


Iter:  3367  Loss on train set:  -47.86829071631725
Iter:  3368  Loss on train set:  -46.99277867680425
Iter:  3369  Loss on train set:  -52.37954679954044
Iter:  3370  Loss on train set:  -44.516131751029675
Iter:  3371  Loss on train set:  -50.64950261181858
Iter:  3372  Loss on train set:  -42.51495352055616
Iter:  3373  Loss on train set:  -48.51891346501852
Iter:  3374  Loss on train set:  -39.71268927281182
Iter:  3375  Loss on train set:  -50.900092942152796
Iter:  3376  Loss on train set:  -43.87802520743676
Iter:  3377  Loss on train set:  -50.102718608110266
Iter:  3378  Loss on train set:  -52.01257835243631
Iter:  3379  Loss on train set:  -52.37955453646255
Iter:  3380  Loss on train set:  -52.074670029578456
Iter:  3381  Loss on train set:  -52.34308435809055
Iter:  3382  Loss on train set:  -51.80665497000522
Iter:  3383  Loss on train set:  -52.279722007495565
Iter:  3384  Loss on train set:  -51.74068789192888
Iter:  3385  Loss on train set:  -52.35534772894661
Iter:  

  warn('Method %s does not use gradient information (jac).' % method,


Iter:  3418  Loss on train set:  -52.38349881882599
Iter:  3419  Loss on train set:  -52.383505237827805
Iter:  3420  Loss on train set:  -37.750618323898884
Iter:  3421  Loss on train set:  -40.107570534206516
Iter:  3422  Loss on train set:  -40.91343660223308
Iter:  3423  Loss on train set:  -42.817716445506285
Iter:  3424  Loss on train set:  -39.960233138663455
Iter:  3425  Loss on train set:  -39.11307298909649
Iter:  3426  Loss on train set:  -52.383499134778944
Iter:  3427  Loss on train set:  -45.712305195691286
Iter:  3428  Loss on train set:  -50.63987928258915
Iter:  3429  Loss on train set:  -52.383497751597446
Iter:  3430  Loss on train set:  -47.95962301694544
Iter:  3431  Loss on train set:  -51.451566980340345
Iter:  3432  Loss on train set:  -52.38350598978796
Iter:  3433  Loss on train set:  -51.439870098013635
Iter:  3434  Loss on train set:  -52.19650722678827
Iter:  3435  Loss on train set:  -51.237235232235484
Iter:  3436  Loss on train set:  -52.3834974230885
It

  warn('Method %s does not use gradient information (jac).' % method,


Iter:  3469  Loss on train set:  -42.643535422941206
Iter:  3470  Loss on train set:  -52.01475193511551
Iter:  3471  Loss on train set:  -50.166732160712314
Iter:  3472  Loss on train set:  -45.98556104630393
Iter:  3473  Loss on train set:  -42.911543261693694
Iter:  3474  Loss on train set:  -49.76173181360416
Iter:  3475  Loss on train set:  -42.851569871285655
Iter:  3476  Loss on train set:  -52.397472724775184
Iter:  3477  Loss on train set:  -48.376739431448925
Iter:  3478  Loss on train set:  -39.0312823277533
Iter:  3479  Loss on train set:  -48.740349119978646
Iter:  3480  Loss on train set:  -51.65421257836665
Iter:  3481  Loss on train set:  -52.38195357087255
Iter:  3482  Loss on train set:  -52.258971823331294
Iter:  3483  Loss on train set:  -52.261044338602254
Iter:  3484  Loss on train set:  -52.186216219096956
Iter:  3485  Loss on train set:  -52.34145911046811
Iter:  3486  Loss on train set:  -51.54415643280496
Iter:  3487  Loss on train set:  -52.39747009185056
Ite

  warn('Method %s does not use gradient information (jac).' % method,


Iter:  3520  Loss on train set:  -50.88410684132998
Iter:  3521  Loss on train set:  -40.09883696856634
Iter:  3522  Loss on train set:  -42.69510796917735
Iter:  3523  Loss on train set:  -43.33790903116205
Iter:  3524  Loss on train set:  -47.90328425762148
Iter:  3525  Loss on train set:  -44.777090405139155
Iter:  3526  Loss on train set:  -52.00680527281643
Iter:  3527  Loss on train set:  -42.69090457804056
Iter:  3528  Loss on train set:  -46.47819149235644
Iter:  3529  Loss on train set:  -39.31576781461435
Iter:  3530  Loss on train set:  -48.698602231644564
Iter:  3531  Loss on train set:  -51.61246103681278
Iter:  3532  Loss on train set:  -52.38079467483209
Iter:  3533  Loss on train set:  -51.99438016867847
Iter:  3534  Loss on train set:  -52.333663310204216
Iter:  3535  Loss on train set:  -51.742245418331336
Iter:  3536  Loss on train set:  -52.38513990931513
Iter:  3537  Loss on train set:  -51.90894106669574
Iter:  3538  Loss on train set:  -52.30464627879658
Iter:  3

  warn('Method %s does not use gradient information (jac).' % method,


Iter:  3571  Loss on train set:  -42.6764990979396
Iter:  3572  Loss on train set:  -43.823317768647655
Iter:  3573  Loss on train set:  -41.12881992129562
Iter:  3574  Loss on train set:  -40.11419965918236
Iter:  3575  Loss on train set:  -49.700095639909414
Iter:  3576  Loss on train set:  -43.45076391005316
Iter:  3577  Loss on train set:  -52.40344933665163
Iter:  3578  Loss on train set:  -52.40344992495726
Iter:  3579  Loss on train set:  -52.40345356737961
Iter:  3580  Loss on train set:  -46.85781184256529
Iter:  3581  Loss on train set:  -49.368160593831135
Iter:  3582  Loss on train set:  -51.596080141465734
Iter:  3583  Loss on train set:  -50.06382390183815
Iter:  3584  Loss on train set:  -52.19176180569904
Iter:  3585  Loss on train set:  -48.521599220467664
Iter:  3586  Loss on train set:  -52.403451720020314
Iter:  3587  Loss on train set:  -49.96981020390989
Iter:  3588  Loss on train set:  -52.40344299786783
Iter:  3589  Loss on train set:  -50.90897977728063
Iter:  

  warn('Method %s does not use gradient information (jac).' % method,


Iter:  3622  Loss on train set:  -47.66752366256842
Iter:  3623  Loss on train set:  -46.50200608199668
Iter:  3624  Loss on train set:  -37.8018990895612
Iter:  3625  Loss on train set:  -47.51929471230967
Iter:  3626  Loss on train set:  -47.68483587430065
Iter:  3627  Loss on train set:  -50.09530013532589
Iter:  3628  Loss on train set:  -45.8358246462121
Iter:  3629  Loss on train set:  -52.4119985048801
Iter:  3630  Loss on train set:  -52.4119930073285
Iter:  3631  Loss on train set:  -41.49522185380484
Iter:  3632  Loss on train set:  -49.505891949098654
Iter:  3633  Loss on train set:  -52.020670289021886
Iter:  3634  Loss on train set:  -52.311526439060344
Iter:  3635  Loss on train set:  -51.65441154827773
Iter:  3636  Loss on train set:  -52.40635358668356
Iter:  3637  Loss on train set:  -51.836598783289276
Iter:  3638  Loss on train set:  -52.38072251479657
Iter:  3639  Loss on train set:  -51.922579263729865
Iter:  3640  Loss on train set:  -52.411998238353675
Iter:  364

  warn('Method %s does not use gradient information (jac).' % method,


Iter:  3673  Loss on train set:  -47.561211825056276
Iter:  3674  Loss on train set:  -42.714185173101065
Iter:  3675  Loss on train set:  -51.87833553436475
Iter:  3676  Loss on train set:  -48.50790130098552
Iter:  3677  Loss on train set:  -41.086160254812185
Iter:  3678  Loss on train set:  -52.41483111800974
Iter:  3679  Loss on train set:  -51.988943653750574
Iter:  3680  Loss on train set:  -41.085239651633486
Iter:  3681  Loss on train set:  -52.4148256095542
Iter:  3682  Loss on train set:  -42.49887165765399
Iter:  3683  Loss on train set:  -49.68256207688988
Iter:  3684  Loss on train set:  -52.37103144456415
Iter:  3685  Loss on train set:  -51.16564369217973
Iter:  3686  Loss on train set:  -52.09483617071703
Iter:  3687  Loss on train set:  -49.257290198960504
Iter:  3688  Loss on train set:  -51.625917551901935
Iter:  3689  Loss on train set:  -52.414818853984094
Iter:  3690  Loss on train set:  -51.8290090468131
Iter:  3691  Loss on train set:  -52.402486102380664
Iter:

  warn('Method %s does not use gradient information (jac).' % method,


Iter:  3724  Loss on train set:  -42.750319503147544
Iter:  3725  Loss on train set:  -42.0834495542827
Iter:  3726  Loss on train set:  -46.5287664399943
Iter:  3727  Loss on train set:  -40.05573044930426
Iter:  3728  Loss on train set:  -52.41645742305918
Iter:  3729  Loss on train set:  -52.416460442307
Iter:  3730  Loss on train set:  -49.78316132022286
Iter:  3731  Loss on train set:  -46.211689611642775
Iter:  3732  Loss on train set:  -38.776504849546484
Iter:  3733  Loss on train set:  -41.84186362622036
Iter:  3734  Loss on train set:  -49.47976578761035
Iter:  3735  Loss on train set:  -51.94662642846729
Iter:  3736  Loss on train set:  -50.2650050437981
Iter:  3737  Loss on train set:  -52.41646156435988
Iter:  3738  Loss on train set:  -48.72052380713843
Iter:  3739  Loss on train set:  -52.2241766730886
Iter:  3740  Loss on train set:  -51.4752829450008
Iter:  3741  Loss on train set:  -52.03011817835824
Iter:  3742  Loss on train set:  -52.41093683539928
Iter:  3743  Los

  warn('Method %s does not use gradient information (jac).' % method,


Iter:  3775  Loss on train set:  -52.419167901105425
Iter:  3776  Loss on train set:  -40.683638926637094
Iter:  3777  Loss on train set:  -42.72779219505874
Iter:  3778  Loss on train set:  -50.95538725564607
Iter:  3779  Loss on train set:  -47.69602621186541
Iter:  3780  Loss on train set:  -40.14708538845993
Iter:  3781  Loss on train set:  -52.419173484817
Iter:  3782  Loss on train set:  -50.07240658528702
Iter:  3783  Loss on train set:  -47.06442724724524
Iter:  3784  Loss on train set:  -41.39841315773107
Iter:  3785  Loss on train set:  -49.277859244633966
Iter:  3786  Loss on train set:  -52.41916778996546
Iter:  3787  Loss on train set:  -49.71971953393061
Iter:  3788  Loss on train set:  -52.30195720960644
Iter:  3789  Loss on train set:  -51.190190483143766
Iter:  3790  Loss on train set:  -49.27826332668992
Iter:  3791  Loss on train set:  -51.78118691688655
Iter:  3792  Loss on train set:  -52.419164071925934
Iter:  3793  Loss on train set:  -52.23330979998576
Iter:  37

  warn('Method %s does not use gradient information (jac).' % method,


Iter:  3826  Loss on train set:  -52.42235743499791
Iter:  3827  Loss on train set:  -46.21284618992737
Iter:  3828  Loss on train set:  -41.262168719442556
Iter:  3829  Loss on train set:  -49.68708312715727
Iter:  3830  Loss on train set:  -46.293562654181144
Iter:  3831  Loss on train set:  -41.21633773740912
Iter:  3832  Loss on train set:  -52.42235866129427
Iter:  3833  Loss on train set:  -52.42235772780461
Iter:  3834  Loss on train set:  -46.16917181001937
Iter:  3835  Loss on train set:  -35.97492865376639
Iter:  3836  Loss on train set:  -47.962819031405466
Iter:  3837  Loss on train set:  -51.95070007605144
Iter:  3838  Loss on train set:  -52.422355122183994
Iter:  3839  Loss on train set:  -51.95240370449006
Iter:  3840  Loss on train set:  -52.3832473922284
Iter:  3841  Loss on train set:  -51.965952088650624
Iter:  3842  Loss on train set:  -52.422356237280724
Iter:  3843  Loss on train set:  -51.769222636394296
Iter:  3844  Loss on train set:  -52.42235760579481
Iter: 

  warn('Method %s does not use gradient information (jac).' % method,


Iter:  3877  Loss on train set:  -48.45609785955844
Iter:  3878  Loss on train set:  -40.12518786379275
Iter:  3879  Loss on train set:  -46.50637881962689
Iter:  3880  Loss on train set:  -52.434902692397145
Iter:  3881  Loss on train set:  -45.38811414520407
Iter:  3882  Loss on train set:  -50.692242129959965
Iter:  3883  Loss on train set:  -52.434910572343
Iter:  3884  Loss on train set:  -43.93898230831335
Iter:  3885  Loss on train set:  -47.74302137449711
Iter:  3886  Loss on train set:  -44.881080573484844
Iter:  3887  Loss on train set:  -49.51098843908119
Iter:  3888  Loss on train set:  -52.146765035419655
Iter:  3889  Loss on train set:  -50.8234652484629
Iter:  3890  Loss on train set:  -52.43491173454207
Iter:  3891  Loss on train set:  -50.573992399928265
Iter:  3892  Loss on train set:  -52.32576318566198
Iter:  3893  Loss on train set:  -51.83567495625061
Iter:  3894  Loss on train set:  -51.93156882527911
Iter:  3895  Loss on train set:  -52.42844747305481
Iter:  389

  warn('Method %s does not use gradient information (jac).' % method,


Iter:  3928  Loss on train set:  -52.439913947358534
Iter:  3929  Loss on train set:  -49.683337601446
Iter:  3930  Loss on train set:  -41.31988420806464
Iter:  3931  Loss on train set:  -50.6296111198937
Iter:  3932  Loss on train set:  -47.66018041838137
Iter:  3933  Loss on train set:  -52.439915401497906
Iter:  3934  Loss on train set:  -52.43991676202156
Iter:  3935  Loss on train set:  -40.04006709878055
Iter:  3936  Loss on train set:  -52.43991588551357
Iter:  3937  Loss on train set:  -40.388891033911484
Iter:  3938  Loss on train set:  -49.20382087702217
Iter:  3939  Loss on train set:  -51.83475133780131
Iter:  3940  Loss on train set:  -52.439915893616565
Iter:  3941  Loss on train set:  -52.24472373576927
Iter:  3942  Loss on train set:  -52.405470308531655
Iter:  3943  Loss on train set:  -52.074913552694824
Iter:  3944  Loss on train set:  -52.43991279358243
Iter:  3945  Loss on train set:  -51.55190860309216
Iter:  3946  Loss on train set:  -52.439918514710456
Iter:  3

  warn('Method %s does not use gradient information (jac).' % method,


Iter:  3979  Loss on train set:  -52.440580785304206
Iter:  3980  Loss on train set:  -42.560649017401055
Iter:  3981  Loss on train set:  -47.67027013384785
Iter:  3982  Loss on train set:  -39.66088203020692
Iter:  3983  Loss on train set:  -43.55753740013025
Iter:  3984  Loss on train set:  -48.43232535938608
Iter:  3985  Loss on train set:  -40.166991932884635
Iter:  3986  Loss on train set:  -40.578096656537014
Iter:  3987  Loss on train set:  -41.271009467310435
Iter:  3988  Loss on train set:  -40.62577678806234
Iter:  3989  Loss on train set:  -49.00068514023417
Iter:  3990  Loss on train set:  -51.75086638737793
Iter:  3991  Loss on train set:  -52.440572920032295
Iter:  3992  Loss on train set:  -52.08519292369413
Iter:  3993  Loss on train set:  -52.235589874899965
Iter:  3994  Loss on train set:  -51.60602360531109
Iter:  3995  Loss on train set:  -52.36378058772272
Iter:  3996  Loss on train set:  -51.688995902163235
Iter:  3997  Loss on train set:  -52.33503536479266
Iter

  warn('Method %s does not use gradient information (jac).' % method,


Iter:  4030  Loss on train set:  -44.319672902971924
Iter:  4031  Loss on train set:  -46.22638852273992
Iter:  4032  Loss on train set:  -46.26598037350667
Iter:  4033  Loss on train set:  -41.043115878636044
Iter:  4034  Loss on train set:  -48.21817494861368
Iter:  4035  Loss on train set:  -51.926474918605315
Iter:  4036  Loss on train set:  -39.90411612464985
Iter:  4037  Loss on train set:  -52.44419093646042
Iter:  4038  Loss on train set:  -46.804216344967024
Iter:  4039  Loss on train set:  -49.783989149482046
Iter:  4040  Loss on train set:  -50.34402079375843
Iter:  4041  Loss on train set:  -52.03335882340769
Iter:  4042  Loss on train set:  -51.03822817885213
Iter:  4043  Loss on train set:  -52.1493940261313
Iter:  4044  Loss on train set:  -47.729837436007095
Iter:  4045  Loss on train set:  -52.40249859744721
Iter:  4046  Loss on train set:  -47.51092936514785
Iter:  4047  Loss on train set:  -52.3058764948053
Iter:  4048  Loss on train set:  -52.44418488221676
Iter:  4

### The "classical" VQE

In [None]:
def vqe_energy_loss(circuit_params,qubits, c_hamilt_params, n_meas_reps=1000):
    """
    Compute the loss function of a VQE
    
    Args:
    ----------------
        circuit_params [np.array]: parameters of the circuit
        qubits [list]: list of qubits
        c_hamilt_params [dict]: parameters of the hamiltonian
        n_meas_reps [int]: number of measurement repetitions
    
    Returns:
    ----------------
        loss [float]: loss function value
    """
    
    return direct_hamilt_expectation(qubits, circuit_params, c_hamilt_params)
    #return meas_hamilt_expectation(qubits, circuit_params, c_hamilt_params, n_meas_reps)

def optimize_vqe(qubits,hamilt_params, init_circuit_params, n_meas_reps=1000):
    """
    Optimize a VQE
    
    Args:
    ----------------
        qubits [list]: list of qubits
        hamilt_params [dict]: parameters of the hamiltonian
        init_circuit_params [np.array]: initial parameters of the circuit
        n_meas_reps [int]: number of measurement repetitions
    
    Returns:
    ----------------
        opt_result [OptimizeResult]: result of the optimization
    """
    
    def callbackF(Xi):
        global Nfeval
        print("Iter: ", Nfeval, " Loss on train set: ", vqe_energy_loss(Xi, qubits, hamilt_params, n_meas_reps))
        Nfeval += 1
    
    print("Energy should converge to ", exact_hamilt_GS_energy(qubits, hamilt_params))
    print("Starting VQE minimization...")
    
    opt_result = minimize(vqe_energy_loss,
                            args=(qubits, hamilt_params, n_meas_reps),
                            x0=init_circuit_params,
                            method='COBYLA',
                            callback=callbackF,
                            )
    
    print(opt_result)
    return opt_result.x

def evaluate_vqe(qubits, hamilt_params_set, init_circuit_params=None, params_init_mode="normal_random", n_meas_reps=1000):
    """
    Evaluate a VQE on a set of hamiltonians
    
    Args:
    ----------------
        qubits [list]: list of qubits
        hamilt_params_set [list]: list of the parameters of the hamiltonians
        init_circuit_params [np.array]: initial parameters of the circuit
        params_init_mode [str]: mode for initializing the circuit parameters
        n_meas_reps [int]: number of measurement repetitions
    
    Returns:
    ----------------
        energy_expectations [np.array]: energy expectations of the hamiltonians
        exact_energy_expectations [np.array]: exact energy expectations of the hamiltonians
        abs_energy_errors [np.array]: absolute energy errors of the hamiltonians
    """
    
    energy_expectations = []
    exact_energy_expectations = []
    abs_energy_errors = []
    
    for hamilt_params_sample in hamilt_params_set:
        if init_circuit_params is None:
            init_circuit_params = initialize_circuit_params(qubits, params_init_mode)
    
        opt_circuit_params = optimize_vqe(qubits, hamilt_params_sample, params_init_mode, n_meas_reps)
        
        energy_expectation = direct_hamilt_expectation(qubits, opt_circuit_params, hamilt_params_sample, n_meas_reps)
        exact_energy_expectation = exact_hamilt_GS_energy(qubits, hamilt_params_sample)
        abs_energy_error = np.abs(energy_expectation - exact_energy_expectation)
        energy_expectations.append(energy_expectation)
        exact_energy_expectations.append(exact_energy_expectation)
        abs_energy_errors.append(abs_energy_error)
    
    return np.array(energy_expectations), np.array(abs_energy_errors)
    
def vqe(qubits, hamilt_params_set):
    """
    Convenience function for wrapping VQE routines
    
    Args:
    ----------------
        qubits [list]: list of qubits
        hamilt_params_set [list]: list of the parameters of the hamiltonians
        
    Returns:
    ----------------
        energy_expectations [list]: list of the energy expectations of the hamiltonians in the test set
        abs_energy_errors [list]: list of the absolute energy errors of the hamiltonians in the test set
    """
    
    energy_expectations, abs_energy_errors = evaluate_vqe(qubits, hamilt_params_set, params_init_mode="normal_random", n_meas_reps=1000)
    return energy_expectations, abs_energy_errors

In [145]:
qubits = cirq.LineQubit.range(8)

Nfeval = 1  

vqe_hamilt_params = {"lambda": 0.75, "Delta": 0.1}

vqe(qubits,hamilt_params=vqe_hamilt_params, params_init_mode="normal_random", n_meas_reps=100000)

Energy should converge to  -11.403722989559629
Starting VQE minimization...
Iter:  1  Loss on train set:  3.3393581853085332
Iter:  2  Loss on train set:  3.363206600542764
Iter:  3  Loss on train set:  3.3393580730270656
Iter:  4  Loss on train set:  3.3129355127064017
Iter:  5  Loss on train set:  3.312935295917041
Iter:  6  Loss on train set:  3.3336963990519557
Iter:  7  Loss on train set:  3.3129350972611995
Iter:  8  Loss on train set:  3.355789620554316
Iter:  9  Loss on train set:  3.3129353011139546
Iter:  10  Loss on train set:  3.2672526606345387
Iter:  11  Loss on train set:  3.267253519759116
Iter:  12  Loss on train set:  3.31256952664296
Iter:  13  Loss on train set:  3.2672526724716793
Iter:  14  Loss on train set:  3.2702085574577957
Iter:  15  Loss on train set:  3.267252822905118
Iter:  16  Loss on train set:  3.2707130465546452
Iter:  17  Loss on train set:  3.3448961532893433
Iter:  18  Loss on train set:  3.3482541574712372
Iter:  19  Loss on train set:  3.2403399

### The Opt-Meta-VQE

In [None]:
def opt_meta_vqe(qubits, hamilt_params_training_set, hamilt_params_test_set, n_meas_reps=1000):
    """
    Full routine to train and evaluate a opt-meta-VQE
    
    Args:
    ----------------
        qubits [list]: list of qubits
        hamilt_params_training_set [list]: list of the parameters of the hamiltonians in the training set
        hamilt_params_test_set [list]: list of the parameters of the hamiltonians in the test set
        n_meas_reps [int]: number of measurement repetitions
    
    Returns:
    ----------------
        energy_expectations [list]: list of the energy expectations of the hamiltonians in the test set
        abs_energy_errors [list]: list of the absolute energy errors of the hamiltonians in the test set
    """
    
    #Train meta-VQE:
    meta_vqe_opt_circuit_params = train_meta_vqe(qubits, hamilt_params_training_set, params_init_mode="normal_random")
    
    #Optimize a VQE on each test set sample:
    energy_expectations, abs_energy_errors = evaluate_vqe(qubits, hamilt_params_test_set, init_circuit_params=meta_vqe_opt_circuit_params)
    
    return energy_expectations, abs_energy_errors

In [None]:
qubits = cirq.LineQubit.range(5)

Nfeval = 1

train_set_size = 10
min_hamilt_params = {"lambda": 0.75, "Delta": -1.1}
max_hamilt_params = {"lambda": 0.75, "Delta": 1.1}

train_set = construct_training_set(train_set_size=train_set_size, mode="equidistant")
test_set = construct_test_set(test_set_size=10, mode="uniform_random", min_hamilt_params=min_hamilt_params, max_hamilt_params=max_hamilt_params)

opt_meta_vqe(qubits, train_set, test_set, n_meas_reps=1000)


### Concluding experiments

In [None]:
#Reproducing figure 2 of the meta-vqe paper: Absolute energy and ground state energy as a function of Delta
import matplotlib.pyplot as plt

fig, axes = plt.subfigure(2)
#axes[0].plot() #Exact GS energy
axes[0].plot() #VQE GS energy
axes[0].plot() #Meta-VQE GS energy
axes[0].plot() #Opt-Meta-VQE GS energy
axes[0].set_title("Ground State Energy")
axes[0].set_xlabel("Delta")
axes[0].set_ylabel("GS Energy")

axes[1].plot() #VQE Absolute Energy Error
axes[1].plot() #Meta-VQE Absolute Energy Error
axes[1].plot() #Opt-Meta-VQE Absolute Energy Error
axes[1].set_title("Absolute Energy Error")
axes[1].set_xlabel("Delta")
axes[1].set_ylabel("Absolute Energy Error")

fig.tight_layout()
fig.legend()

