In [None]:
# do all necessary imports for this chapter
using LinearAlgebra
using TensorOperations
using TensorKit
using KrylovKit
using OptimKit
using Printf
using Plots
include("TutorialFunctions.jl")
using .TutorialFunctions: createMPS, normalizeMPS, fixedPoints, rightOrthonormalize, mixedCanonical, expVal2Uniform, expVal2Mixed

# [Tangent-space methods for uniform matrix product states](https://doi.org/10.21468/SciPostPhysLectNotes.7)

## 2. Finding ground states of local Hamiltonians

In the previous chapter, we stated that uniform MPS can be used to efficiently approximate low-energy states of one-dimensional systems with gapped local Hamiltonians. Having defined ways of representing and manipulating MPS, the logical next step is therefore to have a look at how exactly they can be used to find ground states. To this end, we consider a nearest-neighbour Hamiltonian $H$  of the form

$$H = \sum_n h_{n, n+1}$$

acting on an infinite one-dimensional system. Here, $h_{n,n+1}$ is a hermitian operator acting non-trivially on sites $n$ and $n+1$. As in any variational approach, the variational principle serves as a guide for finding ground-state approximations, dictating that the optimal MPS approximation of the ground state corresponds to the minimum of the expectation value of the energy,

$$ \min_A \frac{\left \langle \Psi(\bar{A}) \middle | H  \middle | \Psi(A) \right \rangle}{\left \langle \Psi(\bar{A}) \middle | \Psi(A)  \right \rangle}. $$

In the thermodynamic limit the energy diverges with system size, but, since we are working with translation-invariant states only, we should rather minimize the energy density. In the following we will always restrict our discussion to properly normalized states. Diagrammatically, the minimization problem can then be recast as

<center><img src="img/2minham.svg" alt="minimization of hamiltonian"></center>

In this notebook we illustratre numerical optimization strategies for minimizing this energy density directly.

### 2.1 The gradient

Any optimization problem relies on an efficient evaluation of the gradient, so the first thing to do is to compute this quantity. The objective function $f$ that we want to minimize is a real function of the complex-valued $A$, or equivalently, of the independent variables $A$ and $\bar{A}$. The gradient $g$ is then obtained by differentiating $f(\bar{A},A)$ with respect to $\bar{A}$,

$$
\begin{align}
g &= 2 \times \frac{\partial f(\bar{A},A) }{ \partial \bar{A} } \\
&= 2\times \frac{\partial_{\bar{A}} \left \langle \Psi(\bar{A}) \middle | h  \middle | \Psi(A) \right \rangle } {\left \langle \Psi(\bar{A}) \middle | \Psi(A)  \right \rangle} - 2\times \frac{\left \langle \Psi(\bar{A}) \middle | h  \middle | \Psi(A) \right \rangle} {\left \langle \Psi(\bar{A}) \middle | \Psi(A)  \right \rangle^2} \partial_{\bar{A}} \left \langle \Psi(\bar{A}) \middle | \Psi(A) \right \rangle ,\\
&= 2\times \frac{\partial_{\bar{A}}  \left \langle \Psi(\bar{A}) \middle | h  \middle | \Psi(A) \right \rangle - e \partial_{\bar{A}} \left \langle \Psi(\bar{A}) \middle | \Psi(A)  \right \rangle  } {\left \langle \Psi(\bar{A}) \middle | \Psi(A)  \right \rangle},\\
\end{align}
$$

where we have clearly indicated $A$ and $\bar{A}$ as independent variables and $e$ is the current energy density given by

$$
e = \frac{\left \langle \Psi(\bar{A}) \middle | h  \middle | \Psi(A) \right \rangle} {\left \langle \Psi(\bar{A}) \middle | \Psi(A)  \right \rangle}.
$$

If we make sure that the MPS is properly normalized and subtract the current energy density from every term in the hamiltonian, $h \leftarrow h - e$, the gradient takes on the simple form

$$ g = 2 \times \partial_{\bar{A}} \left \langle \Psi(\bar{A}) \middle | h  \middle | \Psi(A) \right \rangle.$$

Thus, the gradient is obtained by differentiating the expression

<center><img src="img/grad.svg" alt="gradient"></center>

with respect to $\bar{A}$. This gives rise to a sum over all sites, where in every term we differentiate with respect to one tensor $\bar{A}$ in the bra layer. Differentiating with respect to one $\bar{A}$ tensor amounts to leaving out that tensor, and interpreting the open legs as outgoing ones, i.e. each term looks like

<center><img src="img/gradTerm.svg" alt="gradient term"></center>

The full gradient is then obtained as an infinite sum over these terms. By dividing the terms into three different classes and doing some bookkeeping as illustrated below, we can eventually write this sum in a relatively simple closed form.

In [None]:
"""
Regularize Hamiltonian such that its expectation value is 0.

### Arguments

- `h::TensorMap{CartesianSpace, 2, 2}`: Hamiltonian as a tensor with 4 legs of dimension (d, d, d, d), ordered topLeft-topRight-bottomLeft-bottomRight.
- `A::TensorMap{CartesianSpace}`: MPS tensor with 3 legs of dimension (D, d, D), ordered left-bottom-right.
- `fpts::Tuple=fixedPoints(A)`: left and right fixed points of transfermatrix, normalized.

### Returns

- `h̃::TensorMap{CartesianSpace, 2, 2}`: reduced Hamiltonian as a tensor with 4 legs of dimension (d, d, d, d), ordered topLeft-topRight-bottomLeft-bottomRight.
"""
function reducedHamUniform(h, A, fpts=fixedPoints(A))
    # calculate expectation value
    e = real(expVal2Uniform(h, A, fpts))
    
    # substract from hamiltonian
    h̃ = h - e * id(domain(h))
    
    return h̃
end;

#### Terms of the 'center' kind
The first kind of terms that arise in the above expression for the gradient are obtained by differentiation with respect to an $\bar{A}$ tensor on the legs of the Hamiltonian operator. This results in two 'center' terms

<center><img src="img/centerTerms.svg" alt="center terms"></center>

In [None]:
"""
Calculate the value of the center terms.

### Arguments

- `h̃::TensorMap{CartesianSpace, 2, 2}`: reduced Hamiltonian as a tensor with 4 legs of dimension (d, d, d, d), ordered topLeft-topRight-bottomLeft-bottomRight.
- `A::TensorMap{CartesianSpace}`: MPS tensor with 3 legs of dimension (D, d, D), ordered left-bottom-right.
- `fpts::Tuple=fixedPoints(A)`: left and right fixed points of transfermatrix, normalized.

### Returns

- `term1::TensorMap{CartesianSpace}`: first gradient term as a tensor with 3 legs of dimension (D, d, D), ordered left-bottom-right.
- `term2::TensorMap{CartesianSpace}`: second gradient term as a tensor with 3 legs of dimension (D, d, D), ordered left-bottom-right.
"""
function gradCenterTerms(h̃, A, fpts=fixedPoints(A))
    l, r = fpts
    
    # calculate first contraction
    @tensor term1[-1 -2 -3] := l[-1; 1] * r[5; 7] * A[1 3 2] * A[2 4 5] * conj(A[-3 6 7]) * h̃[3 4; -2 6]
    
    # calculate second contraction
    @tensor term2[-1 -2 -3] := l[6; 1] * r[5; -3] * A[1 3 2] * A[2 4 5] * conj(A[6 7 -1]) * h̃[3 4; 7 -2]
    
    return term1, term2
end;

#### Terms of the 'left' kind
For the terms where we leave out an $\bar{A}$ tensor to the left of $h$, which we will call 'left' terms, we can contract everything to the left of this missing $\bar{A}$ with the left fixed point $l$, while everything to the right of $h$ can be contracted with right fixed point $r$.

In between these two outer parts of the network there remains a region where the regular MPS transfer matrix $E$ is applied a number of times. The action of this region is therefore captured by the operator $E^n$, where the power $n$ is determined by the seperation between the outer left and right parts for the specific term under consideration. When summing all left terms, the outer parts of the contraction always remain the same, while only the power $n$ differs for every term. Thus, summing all left terms corresponds to contracting the operator 

$$E_\text{sum} = 1 + E + E^2 + \dots = \frac{1}{1-E}$$

between the left and right outer parts. Here, we have naively used the geometric series to write the sum in a closed form. However, since by our normalization the transfer matrix has leading eigenvalue $1$, this resulting expression will diverge and is therefore ill-defined. We can get around this by introducing a regularized transfer matrix $\tilde{E}$ which is defined by subtracting the divergent part,

<center><img src="img/regTransfer.svg" alt="regularized transfer matrix"></center>

Since we have already shifted the energy density to have a zero expectation value, $h \leftarrow h - e$, it can easily be verified that the contribution of the leading divergent part vanishes in every left term, meaning that we can simply replace the original transfer matrix by its regularized version without changing any of the terms, and only then take the infinite sum which now has a well defined expression in terms of an inverse,

$$ E_\text{sum} \rightarrow \frac{1}{1-\tilde{E}} \equiv (1 - E)^P ,$$

where we have introduced the pseudo-inverse defined as $(1 - E)^P = (1-\tilde{E})^{-1}$.

Using this notation we can define the partial contraction

<center><img src="img/Rh.svg" alt="right effective environment"></center>

such that the sum of all left terms equals

<center><img src="img/leftTerms.svg" alt="left terms"></center>

If we would compute the partial contraction $R_h$ directly by explicitly computing the pseudo-inverse, this would entail a computational complexity $O(D^6)$. Instead, we can define $R_h$ as the solution of a linear system by multiplying both sides of the corresponding definition by $(1-\tilde{E})$. This results in an equation of the form $Ax = b$, which may be solved for $x$ by using Krylov-based iterative methods such as a Generalized Minimal RESidual (GMRES) algorithm as implemented in [`KrylovKit.linsolve`](https://jutho.github.io/KrylovKit.jl/stable/man/linear/#KrylovKit.linsolve). Note that these methods only require the action of $A = (1-\tilde{E})$ on a vector and not the full matrix $A$. This action can again be supplied to the linear solver using a function handle.

In [None]:
"""
Implement the action of (1 - Ẽ) on a right vector.

### Arguments

- `v::TensorMap{CartesianSpace, 1, 1}`: right vector on which (1 - Ẽ) acts, given as a tensor with 2 legs of dimension (D, D), ordered top-bottom.
- `A::TensorMap{CartesianSpace}`: MPS tensor with 3 legs of dimension (D, d, D), ordered left-bottom-right.
- `fpts::Tuple=fixedPoints(A)`: left and right fixed points of transfermatrix, normalized.

### Returns

- `vNew::TensorMap{CartesianSpace, 1, 1}`: result of action of (1 - Ẽ) on `v`, given as a tensor with 2 legs of dimension (D, D), ordered top-bottom.
"""
function Ẽright(v, A, fpts=fixedPoints(A))
    l, r = fpts

    # transfermatrix contribution
    @tensor transfer[-1; -2] := A[-1 2 1] * conj(A[-2 2 3]) * v[1; 3]

    # fixed point contribution
    fixed = tr(l * v) * r

    # sum these with the contribution of the identity
    vNew = v - transfer + fixed

    return vNew
end;

In [None]:
"""
Find the partial contraction for Rh.

### Arguments

- `h̃::TensorMap{CartesianSpace, 2, 2}`: reduced Hamiltonian as a tensor with 4 legs of dimension (d, d, d, d), ordered topLeft-topRight-bottomLeft-bottomRight.
- `A::TensorMap{CartesianSpace}`: MPS tensor with 3 legs of dimension (D, d, D), ordered left-bottom-right.
- `fpts::Tuple=fixedPoints(A)`: left and right fixed points of transfermatrix, normalized.

### Returns

- `Rh::TensorMap{CartesianSpace, 1, 1}`: right partial contraction as a tensor with 2 legs of dimension (D, D), ordered top-bottom.
"""
function RhUniform(h̃, A, fpts=fixedPoints(A))
    l, r = fpts
    
    # construct b, which is the matrix to the right of (1 - E)^P in the figure above
    @tensor b[-1; -2] := r[4; 5] * A[-1 2 1] * A[1 3 4] * conj(A[-2 8 7]) * conj(A[7 6 5]) * h̃[2 3; 8 6]
    
    # solve Ax = b for x
    Rh, _ = linsolve(v -> Ẽright(v, A, fpts), b)
    
    return Rh
end;

In [None]:
"""
Calculate the value of the left terms.

### Arguments

- `h̃::TensorMap{CartesianSpace, 2, 2}`: reduced Hamiltonian as a tensor with 4 legs of dimension (d, d, d, d), ordered topLeft-topRight-bottomLeft-bottomRight.
- `A::TensorMap{CartesianSpace}`: MPS tensor with 3 legs of dimension (D, d, D), ordered left-bottom-right.
- `fpts::Tuple=fixedPoints(A)`: left and right fixed points of transfermatrix, normalized.

### Returns

- `leftTerms::TensorMap{CartesianSpace}`: left terms of gradient as a tensor with 3 legs of dimension (D, d, D), ordered left-bottom-right.
"""
function gradLeftTerms(h̃, A, fpts=fixedPoints(A))
    l, r = fpts
    
    # calculate partial contraction
    Rh = RhUniform(h̃, A, fpts)
    
    # calculate full contraction
    @tensor leftTerms[-1 -2 -3] := Rh[1; -3] * A[2 -2 1] * l[-1; 2]
    
    return leftTerms
end;

#### Terms of the 'right' kind

In a similar way, the terms where we leave out an $\bar{A}$ to the right of  $h$ can be evaluated by defining the partial contraction

<center><img src="img/Lh.svg" alt="Lh"></center>

which can again be found by solving a linear system, such that the sum of all right terms can be written as

<center><img src="img/rightTerms.svg" alt="rightTerms"></center>

In [None]:
"""
Implement the action of (1 - Ẽ) on a left vector.

### Arguments

- `v::TensorMap{CartesianSpace, 1, 1}`: left vector on which (1 - Ẽ) acts, given as a tensor with 2 legs of dimension (D, D), ordered bottom-top.
- `A::TensorMap{CartesianSpace}`: MPS tensor with 3 legs of dimension (D, d, D), ordered left-bottom-right.
- `fpts::Tuple=fixedPoints(A)`: left and right fixed points of transfermatrix, normalized.

### Returns

- `vNew::TensorMap{CartesianSpace, 1, 1}`: result of action of (1 - Ẽ) on `v`, given as a tensor with 2 legs of dimension (D, D), ordered bottom-top.
"""
function Ẽleft(v, A, fpts=fixedPoints(A))
    l, r = fpts

    # transfer matrix contribution
    @tensor transfer[-1; -2] := v[3; 1] * A[1 2 -2] * conj(A[3 2 -1])

    # fixed point contribution
    fixed = tr(v * r) * l

    # sum these with the contribution of the identity
    vNew = v - transfer + fixed

    return vNew
end;

In [None]:
"""
Find the partial contraction for Lh.

### Arguments

- `h̃::TensorMap{CartesianSpace, 2, 2}`: reduced Hamiltonian as a tensor with 4 legs of dimension (d, d, d, d), ordered topLeft-topRight-bottomLeft-bottomRight.
- `A::TensorMap{CartesianSpace}`: MPS tensor with 3 legs of dimension (D, d, D), ordered left-bottom-right.
- `fpts::Tuple=fixedPoints(A)`: left and right fixed points of transfermatrix, normalized.

### Returns

- `Lh::TensorMap{CartesianSpace, 1, 1}`: left partial contraction as a tensor with 2 legs of dimension (D, D), ordered bottom-top.
"""
function LhUniform(h̃, A, fpts=fixedPoints(A))
    l, r = fpts
    
    # construct b, which is the matrix to the right of (1 - E)^P in the figure above
    @tensor b[-1; -2] := l[5; 1] * A[1 3 2] * A[2 4 -2] * conj(A[5 6 7]) * conj(A[7 8 -1]) * h̃[3 4; 6 8]    
    
    # solve Ax = b for x
    Lh, _ = linsolve(v -> Ẽleft(v, A, fpts), b)
    
    return Lh
end;

In [None]:
"""
Calculate the value of the right terms.

### Arguments

- `h̃::TensorMap{CartesianSpace, 2, 2}`: reduced Hamiltonian as a tensor with 4 legs of dimension (d, d, d, d), ordered topLeft-topRight-bottomLeft-bottomRight.
- `A::TensorMap{CartesianSpace}`: MPS tensor with 3 legs of dimension (D, d, D), ordered left-bottom-right.
- `fpts::Tuple=fixedPoints(A)`: left and right fixed points of transfermatrix, normalized.

### Returns

- `rightTerms::TensorMap{CartesianSpace}`: right terms of gradient as a tensor with 3 legs of dimension (D, d, D), ordered left-bottom-right.
"""
function gradRightTerms(h̃, A, fpts=fixedPoints(A))
    l, r = fpts
    
    # calculate partial contraction
    Lh = LhUniform(h̃, A, fpts)
    
    # calculate full contraction
    @tensor rightTerms[-1 -2 -3] := Lh[-1; 1] * A[1 -2 2] * r[2; -3]
    
    return rightTerms
end;

#### The gradient

The full gradient is then found by summing the contributions of all three types of terms,

<center><img src="img/gradFull.svg" alt="gradient"></center>

In [None]:
"""
Calculate the gradient of the expectation value of a given Hamiltonian.

### Arguments

- `h̃::TensorMap{CartesianSpace, 2, 2}`: reduced Hamiltonian as a tensor with 4 legs of dimension (d, d, d, d), ordered topLeft-topRight-bottomLeft-bottomRight.
- `A::TensorMap{CartesianSpace}`: MPS tensor with 3 legs of dimension (D, d, D), ordered left-bottom-right.
- `fpts::Tuple=fixedPoints(A)`: left and right fixed points of transfermatrix, normalized.

### Returns

- `grad::TensorMap{CartesianSpace}`: gradient as a tensor with 3 legs of dimension (D, d, D), ordered left-bottom-right.
"""
function gradient(h, A, fpts=fixedPoints(A))
        
    # renormalize Hamiltonian
    h̃ = reducedHamUniform(h, A, fpts)
        
    # find terms
    centerTerm1, centerTerm2 = gradCenterTerms(h̃, A, fpts)
    leftTerms = gradLeftTerms(h̃, A, fpts)
    rightTerms = gradRightTerms(h̃, A, fpts)
    
    grad = 2 * (centerTerm1 + centerTerm2 + leftTerms + rightTerms)
    
    return grad
end;

### 2.2 Gradient descent algorithms

The most straightforward way to use this expression for the gradient to find the ground state of a Hamiltonian is to implement a gradient-search method for minimizing the energy expecation value. The simplest such method is a steepest-descent search, where in every iteration the tensor $A$ is updated in the direction opposite to the gradient along a small step $\varepsilon$,

$$ A_{i+1} = A_i - \varepsilon g .$$

This procedure is repeated until we find the optimal MPS tensor $A^*$ for which the gradient vanishes. This approach can be improved upon by resorting to other optimization schemes such a conjugate-gradient or quasi-Newton methods. Below we demonstrate both a simple steepest-descent with a fixed step size, as well as an approach using optimization routines supplied by the Julia package [OptimKit.jl](https://github.com/Jutho/OptimKit.jl) through the `OptimKit.optimize` routine.

In [None]:
"""
Find the ground state of a given Hamiltonian using gradient descent.

### Arguments

- `h::TensorMap{CartesianSpace, 2, 2}`: Hamiltonian as a tensor with 4 legs of dimension (d, d, d, d), ordered topLeft-topRight-bottomLeft-bottomRight.
- `D::Int`: bond dimension.
- `A0::TensorMap{CartesianSpace}`: normalized MPS tensor with 3 legs of dimension (D, d, D), ordered left-bottom-right.
- `eps::Float64`: stepsize.
- `tol::Float64`: tolerance for convergence criterium.
- `maxIter::Int`: maximum number of iterations.
- `verbose::Bool`: print progress.

### Returns

- `E::Float64`: expectation value @ minimum
- `A::TensorMap{CartesianSpace}`: ground state MPS tensor with 3 legs of dimension (D, d, D), ordered left-bottom-right.
"""
function groundStateGradDescent(
    h,
    D,
    A0=createMPS(D, dim(space(h, 1)));
    eps=1e-1,
    tol=1e-4,
    maxIter=1e4,
    verbose=true
)
    
    # initialize
    g = gradient(h, A0)
    A = A0
    i = 0

    while norm(g) > tol
        # do a step
        A = A - eps * g
        A = normalizeMPS(A)
        i += 1
        
        if verbose && (i % 100 == 0)
            E = real(expVal2Uniform(h, A))
            @printf "iteration:\t%d,\tenergy:\t%.12f\tgradient norm\t%.4e\n"  i E norm(g)
        end

        # calculate new gradient
        g = gradient(h, A)
        
        if i > maxIter
            println("Warning: gradient descent did not converge after $maxIter iterations!")
            break
        end
    end
    
    # calculate ground state energy
    E = real(expVal2Uniform(h, A))
    
    return E, A
end;

In order to use `OptimKit.optimize` in this context, where we are dealing with an objective function that takes a complex-valued tensor as input and returns a real scalar, we must be careful to correctly treat the real and imaginary parts of the tensor entries as independent variational parameters. Specifically, we need to supply `OptimKit.optimize` with an appropriate inner product `inner(x, ξ1, ξ2)` which computes the inner product between two gradients or similar objects at position `x`. The `x` dependence is useful for optimization on manifolds, but can be ignored for our current purpose. Keeping in mind that we want to treat the real and imaginary parts of the `ξ` independently, we will define `inner` to compute the sum of the conventional inner products of the real and imaginary parts respectively.

In [None]:
"""
Find the ground state using `OptimKit.optimize`.

### Arguments

- `h::TensorMap{CartesianSpace, 2, 2}`: Hamiltonian as a tensor with 4 legs of dimension (d, d, d, d), ordered topLeft-topRight-bottomLeft-bottomRight.
- `D::Int`: bond dimension.
- `A0::TensorMap{CartesianSpace}`: normalized MPS tensor with 3 legs of dimension (D, d, D), ordered left-bottom-right.
- `tol::Float64`: tolerance for convergence criterium.

### Returns

- `E::Float64`: expectation value @ minimum
- `A::TensorMap{CartesianSpace}`: ground state MPS tensor with 3 legs of dimension (D, d, D), ordered left-bottom-right.
"""
function groundStateMinimize(h, D, A0=createMPS(D, dim(space(h, 1))); tol=1e-4)
    
    # define function to optimize with OptimKit.optimize
    """
    Function to optimize via `OptimKit.optimize`.
    
    ### Arguments

    - `A::TensorMap{CartesianSpace}`: MPS tensor with 3 legs of dimension (D, d, D), ordered left-bottom-right.

    ### Returns

    - `e::Float64`: function value @ `A`
    - `g::TensorMap{CartesianSpace}`: gradient as a tensor with 3 legs of dimension (D, d, D), ordered left-bottom-right.
    """
    function f(A)
        A = normalizeMPS(A)
        
        # calculate fixed points
        fpts = fixedPoints(A)
        
        # calculate function value and gradient
        e = real(expVal2Uniform(h, A, fpts))
        g = gradient(h, A, fpts)
        
        return e, g
    end
    
    # specify inner product that treats real and imaginary parts of variational parameters
    # as completely independent variables
    myinner(x, ξ1, ξ2) = dot(real(ξ1), real(ξ2)) + dot(imag(ξ1), imag(ξ2))

    # calculate minimum using conjugte gradient optimization
    A, E, _ = optimize(f, A0, ConjugateGradient(; gradtol=tol, verbosity=1), inner=myinner)
    
    return E, A
end;

To demonstrate these methods, we now have a look the specific case of the antiferromagnetic spin-1 Heisenberg model in one dimension. To this end we first define the spin-1 Heisenberg Hamiltonian:

In [None]:
"""
Construct the spin-1 Heisenberg Hamiltonian for given couplings.

### Arguments

- `Jx::Float64`: coupling strength in x direction.
- `Jy::Float64`: coupling strength in y direction.
- `Jz::Float64`: coupling strength in z direction.
- `hz::Float64`: coupling for Sz terms.

### Returns

- `h::TensorMap{CartesianSpace, 2, 2}`: Spin-1 Heisenberg Hamiltonian.
"""
function Heisenberg(Jx, Jy, Jz, hz)
    
    Sx = TensorMap(ComplexF64[0 1 0; 1 0 1; 0 1 0] ./ sqrt(2), ℝ^3 ← ℝ^3)
    Sy = TensorMap(ComplexF64[0 -im 0; im 0 -im; 0 im 0] ./ sqrt(2), ℝ^3 ← ℝ^3)
    Sz = TensorMap(ComplexF64[1 0 0; 0 0 0; 0 0 -1], ℝ^3 ← ℝ^3)
    I = id(Matrix{ComplexF64}, ℝ^3)

    return -Jx * Sx ⊗ Sx - Jy * Sy ⊗ Sy - Jz * Sz ⊗ Sz - hz/2 * (Sz ⊗ I + I ⊗ Sz)
end;

In [None]:
d, D = 3, 12
A = createMPS(D, d)

h = Heisenberg(-1, -1, -1, 0)

# energy optimization using naive gradient descent
# for D=12 or higher: tolerance lower than 5e-3 gives very long runtimes
println("Gradient descent optimization:\n")
t = @elapsed E1, A1 = groundStateGradDescent(h, D, A; eps=1e-1, tol=5e-3, maxIter=1e4)
println("Time until convergence: $(t)s")
println("Computed energy: $(E1)\n")

# energy optimization using OptimKit.optimize
println("Optimization using OptimKit.optimize:\n")
t0 = time()
t = @elapsed E2, A2 = groundStateMinimize(h, D, A; tol=1e-5)
println("Time until convergence: $(t)s")
print("Computed energy: $(E2)\n")

### 2.3 The VUMPS algorithm

In the previous section we have derived an expression for the gradient starting from an MPS in the uniform gauge, which corresponds to an object that lives in the space of MPS tensors. We now discuss how to improve upon direct optimization schemes based on this form of the gradient by exploiting the structure of the MPS manifold as well as the mixed gauge for MPS.

Indeed, while the gradient in the above form indicates a direction in the space of complex tensors in which the energy decreases, intuitively it would make more sense if we could find a way to interpret the gradient as a direction *along the MPS manifold* along which we can decrease the energy. This can be achieved by interpreting the gradient as a *tangent vector in the tangent space to the MPS manifold*. By formulating the energy optimization in terms of this tangent space gradient written in mixed gauge, one arives at the [VUMPS](https://doi.org/10.1103/PhysRevB.97.045145) algorithm (which stand for 'variational uniform matrix product states'). The precise derivation of the tangent space gradient in mixed gauge falls beyond the scope of this tutorial, and can be found in the [lecture notes](https://doi.org/10.21468/SciPostPhysLectNotes.7). Instead we will simply illustrate the implementation of the VUMPS algorithm given the mixed gauge tangent space gradient.

Most of the following required steps will be reminiscent of those outlined above, where we now consistently work in the mixed gauge. We start off by implementing the regularization of a two-site Hamiltonian in the mixed gauge.

In [None]:
"""
Regularize Hamiltonian such that its expectation value is 0.

### Arguments

- `h::TensorMap{CartesianSpace, 2, 2}`: Hamiltonian as a tensor with 4 legs of dimension (d, d, d, d), ordered topLeft-topRight-bottomLeft-bottomRight.
- `Ac::TensorMap{CartesianSpace, 2, 1}`: MPS tensor with 3 legs of dimension (D, d, D), ordered left-bottom-right, center gauge.
- `Ar::TensorMap{CartesianSpace, 1, 2}`: MPS tensor with 3 legs of dimension (D, d, D), ordered left-bottom-right, right orthonormal.

### Returns

- `h̃::TensorMap{CartesianSpace, 2, 2}`: reduced Hamiltonian as a tensor with 4 legs of dimension (d, d, d, d), ordered topLeft-topRight-bottomLeft-bottomRight.
"""
function reducedHamMixed(h, Ac, Ar)
    # calculate expectation value
    e = real(expVal2Mixed(h, Ac, Ar))
    
    # substract from hamiltonian
    h̃ = h - e * id(domain(h))
    
    return h̃
end;

The variational optimum of the energy is characterized by the condition that the gradient is zero at this point. Writing the tangent space gradient as $G$, we now wish to formulate an algorithm which minimizes the error measure

$$ \varepsilon = \left( \boldsymbol{G}^\dagger \boldsymbol{G} \right)^{1/2} $$

in an efficient way. The explicit form of the tangent space gradient in mixed gauge is given by

$$ G = A^\prime_{C} - A_L C^\prime = A^\prime_{C} - C^\prime A_R, $$

where $A^\prime_{C}$ and $C^\prime$ are defined as

<center><img src="img/Acprime.svg" alt="Ac prime"></center>

and

<center><img src="img/Cprime.svg" alt="C prime"></center>

Here, we again use $L_h$ and $R_h$ to indicate the partial contractions

<center><img src="img/LhMixed.svg" alt="Lh mixed gauge"></center>

and

<center><img src="img/RhMixed.svg" alt="Rh mixed gauge"></center>

where the transfer matrices $E^L_L$ and $E^R_R$ appearing in these expressions now contain only left-gauged and right-gauged MPS tensors $A_L$ and $A_R$ respectively.

If we interpret the two terms appearing in the tangent space gradient as defining the effective Hamiltonians $H_{A_C}(\cdot)$ and $H_C(\cdot)$ such that

$$
\begin{align}
H_{A_C}(A_C) = A_C^\prime \\
H_C(C) = C^\prime ,
\end{align}
$$

we can characterize the variational optimum in terms of the fixed points of these operators. Indeed, since the tangent space gradient should be zero at the variational optimum, this point satisfies $A_C' = A_L C' = C' A_R$. This implies that the optimal MPS should obey the following set of equations,

$$
\begin{align}
H_{A_C}(A_C) \propto A_C \\
H_C(C) \propto C \\
A_C = A_L C = C A_R ,
\end{align}
$$

meaning that the optimal MPS should correspond to a fixed point of the effective Hamiltonians $H_{A_C}$ and $H_C$ and satisfy the mixed gauge condition. The VUMPS algorithm then consists of an iterative method for finding a set $\{A_L, A_C, A_R, C\}$ that satisfies these equations simultaneously.

#### Defining the required operators

Similar to before, we again have to compute the contributions of the left and right environment terms $L_h$ and $R_h$ given above. We therefore require function handles defining the action of the left (resp. right) transfer matrix $E^L_L$ (resp. $E^R_R$) on a left (resp. right) matrix. To this end, we can simply reuse the implementations `ẼLeft` and `ẼRight` defined above, if we take into account that the left (resp. right) fixed point of $E^L_L$ (resp. $E^R_R$) is the identity while its right (resp. left) fixed point is precisely $C C^\dagger$ (resp. $C^\dagger C$). This last fact follows immediately from the mixed gauge condition.

In [None]:
"""
Calculate Lh, for a given MPS in mixed gauge.

### Arguments

- `h̃::TensorMap{CartesianSpace, 2, 2}`: reduced Hamiltonian as a tensor with 4 legs of dimension (d, d, d, d), ordered topLeft-topRight-bottomLeft-bottomRight.
- `Al::TensorMap{CartesianSpace, 2, 1}`: MPS tensor with 3 legs of dimension (D, d, D), ordered left-bottom-right, left orthonormal.
- `C::TensorMap{CartesianSpace, 1, 1}`: center gauge tensor with 2 legs of dimension (D, D), ordered left-right.
- `tol::Float64=1e-5`: tolerance for linear solver.

### Returns

- `Lh::TensorMap{CartesianSpace, 1, 1}`: left partial contraction as a tensor with 2 legs of dimension (D, D), ordered bottom-top.
"""
function LhMixed(h̃, Al, C; tol=1e-5)
    tol = max(tol, 1e-14)
    
    # construct fixed points for Al
    l = id(space(Al, 1)) # left fixed point of left transfer matrix: left orthonormal
    r = C * C' # right fixed point of left transfer matrix
        
    # construct b
    @tensor b[-1; -2] := Al[4 2; 1] * Al[1 3; -2] * conj(Al[4 5; 6]) * conj(Al[6 7; -1]) * h̃[2 3; 5 7]
    
    # solve a x = b for x
    Lh, _ = linsolve(v -> Ẽleft(v, Al, (l, r)), b; tol)
    
    return Lh
end;

In [None]:
"""
Calculate Rh, for a given MPS in mixed gauge.

### Arguments

- `h̃::TensorMap{CartesianSpace, 2, 2}`: reduced Hamiltonian as a tensor with 4 legs of dimension (d, d, d, d), ordered topLeft-topRight-bottomLeft-bottomRight.
- `Ar::TensorMap{CartesianSpace, 1, 2}`: MPS tensor with 3 legs of dimension (D, d, D), ordered left-bottom-right, right orthonormal.
- `C::TensorMap{CartesianSpace, 1, 1}`: center gauge tensor with 2 legs of dimension (D, D), ordered left-right.
- `tol::Float64=1e-5`: tolerance for linear solver.

### Returns

- `Rh::TensorMap{CartesianSpace, 1, 1}`: right partial contraction as a tensor with 2 legs of dimension (D, D), ordered top-bottom.
"""
function RhMixed(h̃, Ar, C; tol=1e-5)
    tol = max(tol, 1e-14)
    
    # construct fixed points for Ar
    l = C' * C # left fixed point of right transfer matrix
    r = id(space(Ar, 3)) # right fixed point of right transfer matrix: right orthonormal

    # construct b
    @tensor b[-1; -2] := Ar[-1; 2 1] * Ar[1; 3 4] * conj(Ar[-2; 7 6]) * conj(Ar[6; 5 4]) * h̃[2 3; 7 5]
    
    # solve ax = b for x
    Rh, _ = linsolve(v -> Ẽright(v, Ar, (l, r)), b; tol)
    
    return Rh
end;

Next we implement the actions of the effective Hamiltonians $H_{A_C}$ and $H_{C}$ defined above,

<center><img src="img/H_Ac.svg" alt="H_Ac"></center>

<center><img src="img/H_C.svg" alt="H_C"></center>

In [None]:
"""
Action of the effective Hamiltonian for Ac (131) on a vector.

### Arguments

- `v::TensorMap{CartesianSpace, 2, 1}`: vector on which the effective Hamiltonian acts, given as a tensor with 3 legs of dimension (D, d, D), ordered left-bottom-right.
- `h̃::TensorMap{CartesianSpace, 2, 2}`: reduced Hamiltonian as a tensor with 4 legs of dimension (d, d, d, d), ordered topLeft-topRight-bottomLeft-bottomRight.
- `Al::TensorMap{CartesianSpace, 2, 1}`: MPS tensor with 3 legs of dimension (D, d, D), ordered left-bottom-right, left orthonormal.
- `Ar::TensorMap{CartesianSpace, 1, 2}`: MPS tensor with 3 legs of dimension (D, d, D), ordered left-bottom-right, right orthonormal.
- `Lh::TensorMap{CartesianSpace, 1, 1}`: left environment tensor with 2 legs of dimension (D, D), ordered bottom-top.
- `Rh::TensorMap{CartesianSpace, 1, 1}`: right environment tensor with 2 legs of dimension (D, D), ordered top-bottom.

### Returns

- `H_AcV::TensorMap{CartesianSpace, 2, 1}`: result of the action of `H_Ac` on `v`, given as a tensor with 3 legs of dimension (D, d, D), ordered left-bottom-right.
"""
function H_Ac(v, h̃, Al, Ar, Lh, Rh)
    # first term
    @tensor term1[-1 -2; -3] := Al[4 2; 1] * v[1 3; -3] * conj(Al[4 5; -1]) * h̃[2 3; 5 -2]

    # second term
    @tensor term2[-1 -2; -3] := v[-1 2; 1] * Ar[1; 3 4] * conj(Ar[-3; 5 4]) * h̃[2 3; -2 5]

    # third term
    @tensor term3[-1 -2; -3] := Lh[-1; 1] * v[1 -2; -3]

    # fourth term
    @tensor term4[-1 -2; -3] := v[-1 -2; 1] * Rh[1; -3]

    # sum
    H_AcV = term1 + term2 + term3 + term4

    return H_AcV
end;

In [None]:
"""
Action of the effective Hamiltonian for C (132) on a vector.

### Arguments

- `v::TensorMap{CartesianSpace, 1, 1}`: vector on which the effective Hamiltonian acts, given as a tensor with 2 legs of dimension (D, D), ordered left-right.
- `h̃::TensorMap{CartesianSpace, 2, 2}`: reduced Hamiltonian as a tensor with 4 legs of dimension (d, d, d, d), ordered topLeft-topRight-bottomLeft-bottomRight.
- `Al::TensorMap{CartesianSpace, 2, 1}`: MPS tensor with 3 legs of dimension (D, d, D), ordered left-bottom-right, left orthonormal.
- `Ar::TensorMap{CartesianSpace, 1, 2}`: MPS tensor with 3 legs of dimension (D, d, D), ordered left-bottom-right, right orthonormal.
- `Lh::TensorMap{CartesianSpace, 1, 1}`: left environment tensor with 2 legs of dimension (D, D), ordered bottom-top.
- `Rh::TensorMap{CartesianSpace, 1, 1}`: right environment tensor with 2 legs of dimension (D, D), ordered top-bottom.

### Returns

- `H_CV::TensorMap{CartesianSpace, 1, 1}`: result of the action of `H_C` on `v`, given as a tensor with 2 legs of dimension (D, D), ordered left-right.
"""
function H_C(v, h̃, Al, Ar, Lh, Rh)
    # first term
    @tensor term1[-1; -2] := Al[5 3; 1] * v[1; 2] * Ar[2; 4 7] * conj(Al[5 6; -1]) * conj(Ar[-2; 8 7]) * h̃[3 4; 6 8]

    # second term
    term2 = Lh * v

    # third term
    term3 = v * Rh

    # sum
    @tensor H_CV = term1 + term2 + term3

    return H_CV
end;

#### Implementing the VUMPS algorithm

In order to find a set $\{A_L^*, A_C^*, A_R^*, C^*\}$ that satisfies the VUMPS fixed point equations given above, we use an iterative method in which each iteration consists of the following steps, each time starting from a given set $\{A_L, A_C, A_R, C\}$:

1. Solve the eigenvalue equations for $H_{A_C}$ and $H_C$, giving new center tensors $\tilde{A}_C$ and $\tilde{C}$.

2. From these new center tensors, construct a set $\{\tilde{A}_L, \tilde{A}_R, \tilde{A}_C, \tilde{C}\}$.

3. Update the set of tensors $\{A_L, A_C, A_R, C\} \leftarrow \{\tilde{A}_L, \tilde{A}_C, \tilde{A}_R, \tilde{C}\}$ and evaluate the norm of the gradient $\varepsilon = \left | \left | H_{A_C} (A_C) - A_L H_C(C) \right | \right |$.

4. If the norm of the gradient lies above the given tolerance, repeat.

##### Updating the center tensors

We start by defining a routine `calcNewCenter` which finds the new center tensors $\tilde{A}_C$ and $\tilde{C}$ by solving the eigenvalue problem defined by the effective Hamiltonians implemented above.

In [None]:
"""
Find a new guess for Ac and C as fixed points of the maps H_Ac and H_C.

### Arguments

- `h̃::TensorMap{CartesianSpace, 2, 2}`: reduced Hamiltonian as a tensor with 4 legs of dimension (d, d, d, d), ordered topLeft-topRight-bottomLeft-bottomRight.
- `Al::TensorMap{CartesianSpace, 2, 1}`: MPS tensor with 3 legs of dimension (D, d, D), ordered left-bottom-right, left orthonormal.
- `Ac::TensorMap{CartesianSpace, 2, 1}`: MPS tensor with 3 legs of dimension (D, d, D), ordered left-bottom-right, center gauge.
- `Ar::TensorMap{CartesianSpace, 1, 2}`: MPS tensor with 3 legs of dimension (D, d, D), ordered left-bottom-right, right orthonormal.
- `C::TensorMap{CartesianSpace, 1, 1}`: center gauge tensor with 2 legs of dimension (D, D), ordered left-right.
- `Lh::TensorMap{CartesianSpace, 1, 1}`: left environment tensor with 2 legs of dimension (D, D), ordered bottom-top.
- `Rh::TensorMap{CartesianSpace, 1, 1}`: right environment tensor with 2 legs of dimension (D, D), ordered top-bottom.
- `tol::Float64=1e-5`: current tolerance.

### Returns

- `Ãc::TensorMap{CartesianSpace, 2, 1}`: new center gauge MPS tensor with 3 legs of dimension (D, d, D), ordered left-bottom-right.
- `C̃::TensorMap{CartesianSpace, 1, 1}`: new center gauge tensor with 2 legs of dimension (D, D), ordered left-right.
"""
function calcNewCenter(h̃, Al, Ac, Ar, C, Lh=nothing, Rh=nothing; tol=1e-5)
    tol = max(tol, 1e-14)
    
    # calculate left en right environment if they are not given
    isnothing(Lh) && (Lh = LhMixed(h̃, Al, C; tol))
    isnothing(Rh) && (Rh = RhMixed(h̃, Ar, C; tol))
    
    # calculate new Ãc
    _, vecs, _ = eigsolve(v -> H_Ac(v, h̃, Al, Ar, Lh, Rh), Ac, 1, :SR; tol)
    Ãc = vecs[1]

    # calculate new C̃
    _, vecs, _ = eigsolve(v -> H_C(v, h̃, Al, Ar, Lh, Rh), C, 1, :SR; tol)
    C̃ = vecs[1]
    
    return Ãc, C̃
end;

##### Extract a new set of mixed-gauge MPS tensors

Once we have new center tensors, we can use these to construct a new set of mixed-gauge MPS tensors. To do this in a stable way, we will determine the global updates $\tilde{A}_L$ and $\tilde{A}_R$ as the left and right isometric tensors that minimize

$$
\begin{align}
\varepsilon_L = \min ||\tilde{A}_C - \tilde{A}_L \tilde{C}||_2 \\
\varepsilon_R = \min ||\tilde{A}_C - \tilde{C} \tilde{A}_L||_2 .
\end{align}
$$

This can be achieved in a robust and close to optimal way by making use of the left and right polar decompositions

$$
\begin{align}
\tilde{A}_C = U^l_{A_C} P^l_{A_C}, \qquad \tilde{C} = U^l_{C} P^l_{C}, \\
\tilde{A}_C = P^r_{A_C}  U^r_{A_C} , \qquad \tilde{C} = P^r_{C} U^r_{C},
\end{align}
$$

to obtain

$$ \tilde{A}_L = U^l_{A_C} (U^l_C)^\dagger, \qquad \tilde{A}_R = (U^r_C)^\dagger U^r_{A_C}. $$

In order to give the  procedure some additional stability, we may also choose to use the $\tilde{A}_L$ obtained with these polar decompositions to compute the tensors $\tilde{A}_R$ and $\tilde{A}_C$ by right orthonormalization of this $\tilde{A}_L$. This approach ensures that the MPS satisfies the mixed gauge condition at all times, improving the overal stabilitiy of the VUMPS algorithm. This procedure is implemented in the `minAcC` routine.

In [None]:
"""
Find Al and Ar corresponding to Ãc and C̃, according to algorithm 5 in the lecture notes.

### Arguments

- `Ãc::TensorMap{CartesianSpace, 2, 1}`: new guess for center gauge MPS tensor with 3 legs of dimension (D, d, D), ordered left-bottom-right.
- `C̃::TensorMap{CartesianSpace, 1, 1}`: new guess for center gauge tensor with 2 legs of dimension (D, D), ordered left-right.
- `tol::Float64=1e-5`: canonicalization tolerance.

### Returns

- `Al::TensorMap{CartesianSpace, 2, 1}`: MPS tensor with 3 legs of dimension (D, d, D), ordered left-bottom-right, left orthonormal.
- `Ar::TensorMap{CartesianSpace, 1, 2}`: MPS tensor with 3 legs of dimension (D, d, D), ordered left-bottom-right, right orthonormal.
- `C::TensorMap{CartesianSpace, 1, 1}`: center gauge tensor with 2 legs of dimension (D, D), ordered left-right.
"""
function minAcC(Ãc, C̃; tol=1e-5)
    tol = max(tol, 1e-14)

    # polar decomposition of Ac
    UlAc, _ = leftorth(Ãc, (1, 2), (3,); alg=Polar())
                    
    # polar decomposition of C
    UlC, _ = leftorth(C̃, (1,), (2,); alg=Polar())
    
    # construct Al
    Al = UlAc * UlC'
    
    # find corresponding Ar, C, and Ac through right orthonormalizing Al
    C, Ar = rightOrthonormalize(Al, C̃; tol)
    nrm = tr(C * C')
    C /= sqrt(nrm)
    @tensor Ac[-1 -2; -3] := Al[-1 -2; 1] * C[1; -3]
    
    return Al, Ac, Ar, C
end;

##### Evaluating the norm of the gradient

As a last step, we use the routine `gradientNorm` to compute the norm of the tangent space gradient in order to check if the procedure has converged.

In [None]:
"""
Calculate the norm of the gradient.

### Arguments

- `h̃::TensorMap{CartesianSpace, 2, 2}`: reduced Hamiltonian as a tensor with 4 legs of dimension (d, d, d, d), ordered topLeft-topRight-bottomLeft-bottomRight.
- `Al::TensorMap{CartesianSpace, 2, 1}`: MPS tensor with 3 legs of dimension (D, d, D), ordered left-bottom-right, left orthonormal.
- `Ac::TensorMap{CartesianSpace, 2, 1}`: MPS tensor with 3 legs of dimension (D, d, D), ordered left-bottom-right, center gauge.
- `Ar::TensorMap{CartesianSpace, 1, 2}`: MPS tensor with 3 legs of dimension (D, d, D), ordered left-bottom-right, right orthonormal.
- `C::TensorMap{CartesianSpace, 1, 1}`: center gauge tensor with 2 legs of dimension (D, D), ordered left-right.
- `Lh::TensorMap{CartesianSpace, 1, 1}`: left environment tensor with 2 legs of dimension (D, D), ordered bottom-top.
- `Rh::TensorMap{CartesianSpace, 1, 1}`: right environment tensor with 2 legs of dimension (D, D), ordered top-bottom.

### Returns

- `norm::Float64`: norm of the gradient @Al, Ac, Ar, C
"""
function gradientNorm(h̃, Al, Ac, Ar, C, Lh, Rh)
    # calculate update on Ac and C using maps H_Ac and H_c
    AcUpdate = H_Ac(Ac, h̃, Al, Ar, Lh, Rh)
    CUpdate = H_C(C, h̃, Al, Ar, Lh, Rh)
    @tensor AlCupdate[-1 -2; -3] := Al[-1 -2; 1] * CUpdate[1; -3]
    
    return norm(AcUpdate - AlCupdate)
end;

Finally, this allows to implement the VUMPS algorithm.

In [None]:
"""
Find the ground state of a given Hamiltonian using VUMPS.

### Arguments

- `h::TensorMap{CartesianSpace, 2, 2}`: Hamiltonian to optimize as a tensor with 4 legs of dimension (d, d, d, d), ordered topLeft-topRight-bottomLeft-bottomRight.
- `D::Int`: bond dimension.
- `A0::TensorMap{CartesianSpace, 2, 1}`: MPS tensor with 3 legs of dimension (D, d, D), ordered left-bottom-right.
- `tol::Float64=1e-4`: relative convergence criterium.

### Returns

- `E::Float64`: expectation value @ minimum
- `Al::TensorMap{CartesianSpace, 2, 1}`: MPS tensor with 3 legs of dimension (D, d, D), ordered left-bottom-right, left orthonormal.
- `Ac::TensorMap{CartesianSpace, 2, 1}`: MPS tensor with 3 legs of dimension (D, d, D), ordered left-bottom-right, center gauge.
- `Ar::TensorMap{CartesianSpace, 1, 2}`: MPS tensor with 3 legs of dimension (D, d, D), ordered left-bottom-right, right orthonormal.
- `C::TensorMap{CartesianSpace, 1, 1}`: center gauge tensor with 2 legs of dimension (D, D), ordered left-right.
"""
function vumps(h, D, A0=createMPS(D, dim(space(h, 1))); tol=1e-4, tolFactor=1e-1, verbose=true)    
    # go to mixed gauge
    Al, Ac, Ar, C = mixedCanonical(A0)
    
    flag = true
    delta = 1e-5
    i = 0
    
    while flag
        i += 1
        
        # regularize H
        h̃ = reducedHamMixed(h, Ac, Ar)
        
        # calculate environments
        Lh = LhMixed(h̃, Al, C; tol=delta*tolFactor)
        Rh = RhMixed(h̃, Ar, C; tol=delta*tolFactor)
        
        # calculate norm
        delta = gradientNorm(h̃, Al, Ac, Ar, C, Lh, Rh)
        
        # check convergence
        delta < tol && (flag = false)
        
        # calculate new center
        Ãc, C̃ = calcNewCenter(h̃, Al, Ac, Ar, C, Lh, Rh; tol=delta*tolFactor)
        
        # find Al, Ar from Ãc, C̃
        Ãl, Ãc, Ãr, C̃ = minAcC(Ãc, C̃; tol=delta*tolFactor^2)
        
        # update tensors
        Al, Ac, Ar, C = Ãl, Ãc, Ãr, C̃
        
        # print current energy
        if verbose
            E = real(expVal2Mixed(h, Ac, Ar))
            @printf "iteration:\t%d\tenergy:\t%.12f\tgradient norm:\t%.4e\n" i E delta
        end
    end
    E = real(expVal2Mixed(h, Ac, Ar))

    return E, Al, Ac, Ar, C

end;

We can again test this implementation on the spin-1 Heisenberg antiferromagnet.

In [None]:
d, D = 3, 12
A = createMPS(D, d)
A = normalizeMPS(A)

h = Heisenberg(-1, -1, -1, 0)

# energy optimization using VUMPS
println("Energy optimization using VUMPS:\n")
t0 = time()
t = @elapsed E, Al, Ac, Ar, C = vumps(h, D, A; tol=1e-4, tolFactor=1e-2, verbose=true)
println("\nTime until convergence: $(t)s")
print("Computed energy: $(E)")

Having obtained this ground state MPS, it is worthwile to have a look at the corresponding entanglement spectrum.

In [None]:
_, S, _ = tsvd(C, (1,), (2,)) # singular values of center matrix give entanglement spectrum
S = diag(S[])
scatter(1:length(S), S, title="Entanglement spectrum of ground state", marker=:x, yaxis=:log, legend=false)

We can clearly see that the entanglement spectrum consists of degenerate groups, which reflects an underlying symmetry in the ground state of the spin-1 Heisenberg antiferromagnet.

### 2.4 Elementary excitations

#### Quasiparticle ansatz

The methods described above can be extended beyond computing the ground state. We briefly discuss how one can also study excitations on top of a given ground state. For this, we introduce the MPS quasiparticle ansatz, given by

<center><img src="img/excitation.svg" alt="quasiparticle ansatz"></center>

This ansatz cosists of defining a new state by changing one $A$ tensor of the ground state at site $n$ and taking a momentum superposition.

Before describing how to optimize the tensor $B$, it is worthwile to investigate the corresponding variational space in a bit more detail. First, we note that this excitation ansatz can be interpreted as nothing more than a boosted version of a tangent vector to the MPS manifold. In particular, this means that we will be able to apply all kinds of useful tricks and manipulations to the tensor $B$ (cfr. the [lecture notes](https://doi.org/10.21468/SciPostPhysLectNotes.7) for an introduction to tangent vectors and their properties). For example, we can see that $B$ has gauge degrees of freedom, as the corresponding excited state is invariant under an additive gauge transformation of the form

<center><img src="img/gaugeExcitation.svg" alt="gauge transform excitation"></center>

where $Y$ is an arbitrary $D \times D$ matrix. This gauge freedom can be eliminated, thereby removing the zero modes in the variational subspace, by imposing a *left gauge-fixing condition*

<center><img src="img/gaugeFix.svg" alt="gauge fix"></center>

If we parametrize the tensor $B$ as

<center><img src="img/VlX.svg" alt="VlX"></center>

where $V_L$ is the $ D \times d \times D(d-1)$ tensor corresponding to the $D(d-1)$-dimensional null space of $A_L$ satisfying

<center><img src="img/Vl.svg" alt="Vl"></center>

then the gauge condition is automatically satisfied. In particular, this fixing of the gauge freedom ensures that the excitation is orthogonal to the ground state,

<center><img src="img/excitationOrth.svg" alt="excitationOrth"></center>

In this form, we have put forward an ansatz for an excited state characterized by a single $D(d-1) \times D$ matrix $X$ such that

1. All gauge degrees of freedom are fixed.
2. All zero modes in the variational subspace are removed.
3. Calculating the norm becomes straightforward.
4. The excitation is orthogonal to the ground state.

#### Solving the eigenvalue problem

Having introduced an excitation  ansatz which has all the right properties and is defined in terms of a single matrix $X$, all that is left to do is to minimize the energy function,

$$  \min_{X} \frac{\left \langle \Phi_p(X) \middle | H  \middle | \Phi_p(X) \right \rangle}{\left \langle \Phi_p(X) \middle | \Phi_p(X)  \right \rangle}. $$

As both the numerator and the denominator are quadratic functions of the variational parameters $X$, this optimization problem reduces to solving a generalized eigenvalue problem

$$ H_{\text{eff}}(q) X = \omega N_{\text{eff}}(q) X, $$

where the effective energy and normalization matrices are defined as

$$
\begin{align}
& 2\pi\delta(p-p') (\boldsymbol{X'})^\dagger H_{\text{eff}}(q) \boldsymbol{X} = \left \langle \Phi_{p'}(X') \middle | H  \middle | \Phi_p(X) \right \rangle \\
& 2\pi\delta(p-p') (\boldsymbol{X'})^\dagger N_{\text{eff}}(q) \boldsymbol{X} = \left \langle \Phi_{p'}(X') \middle | \Phi_p(X) \right \rangle,
\end{align}
$$

and $\boldsymbol{X}$ denotes a vectorized version of the matrix $X$. Since the overlap between two excited states is of the simple Euclidean form (cfr. the [lecture notes](https://doi.org/10.21468/SciPostPhysLectNotes.7)), the effective normalization matrix reduces to the unit matrix, and we are left with an ordinary eigenvalue problem.

To solve this eigenvalue problem, we need to find an expression for $H_{\text{eff}}$, or rather of the action thereof on a trial vector $\boldsymbol{Y}$. In order to find this action we first transform the vector $\boldsymbol{X}$ into a tensor $B$ by contracting its corresponding matrix with the right leg of $V_L$, and then compute all different contributions that pop up in a matrix element of the form $\left \langle \Phi_p(B') \middle | H  \middle | \Phi_p(B) \right \rangle$. This procedure is similar to what we have done when computing the gradient above, where we now need to take into account all different positions of the nearest-neighbor operator $h$ of the Hamiltonian, the input tensor $B$ and the output. Though slightly more involved than before, we can again define the following partion contractions

<center><img src="img/LhMixed.svg" alt="LhMixed"></center>

<br>

<center><img src="img/RhMixed.svg" alt="RhMixed"></center>

<br>

<center><img src="img/LB.svg" alt="LB"></center>

<br>

<center><img src="img/RB.svg" alt="RB"></center>

<br>

<center><img src="img/L1.svg" alt="L1"></center>

<br>

<center><img src="img/R1.svg" alt="R1"></center>

Using these partial contractions, we find the action of the effective energy matrix on a given input tensor $B(Y)$ as

<center><img src="img/HeffExcitation.svg" alt="HeffExcitation"></center>

In the last step, we need the action of $H_{\text{eff}}(p)$ on the vector $\boldsymbol{Y}$, so we need to perform a last contraction

<center><img src="img/quasi_inveff.svg" alt="quasi_inveff"></center>

The total procedure is implemented in the routine `quasiParticle`.

In [None]:
function quasiParticle(h, Al, Ar, Ac, C, p, num; tol=1e-12)
    # renormalize hamiltonian and find left and right environments
    h̃ = reducedHamMixed(h, Ac, Ar)
    Lh = LhMixed(h̃, Al, C; tol)
    Rh = RhMixed(h̃, Ar, C; tol)
    
    function ApplyHeff(x)
        # remember x is (D*(d-1), D))
        
        @tensor B[-1 -2; -3] := Vl[-1 -2; 1] * x[1; -3]
        
        function ApplyELR(x, p)
            overlap = tr(C' * x)
            @tensor y[-1; -2] := Al[-1 3; 1] * conj(Ar[-2; 3 2]) * x[1; 2]
            y = x - exp(1im * p) * (y - overlap * C)
            return y
        end

        function ApplyERL(x, p)
            overlap = tr(C' * x)
            @tensor y[-1; -2] := x[1; 2] * Ar[2; 3 -2] * conj(Al[1 3; -1])
            y = x - exp(1im * p) * (y - overlap * C)
            return y
        end
        
        # right disconnected
        @tensor right[-1; -2] := B[-1 2; 1] * conj(Ar[-2; 2 1])
        right, _ = linsolve(v -> ApplyELR(v, p), right; tol)
        
        # left disconnected
        @tensor left[-1; -2] := Lh[1; 2] * B[2 3; -2] * conj(Al[1 3; -1]) +
                                Al[1 2; 4] * B[4 5; -2] * conj(Al[1 3; 6]) * conj(Al[6 7; -1]) * h̃[3 7; 2 5] +
                                exp(-1im * p) * B[1 2; 4] * Ar[4 5; -2] * conj(Al[1 3; 6]) * conj(Al[6 7; -1]) * h̃[3 7; 2 5]
        left, _ = linsolve(v -> ApplyERL(v, -p), left; tol)
        
        @tensor y[-1 -2; -3] := B[-1 2; 1] * Ar[1; 3 4] * conj(Ar[-3; 5 4]) * h̃[-2 5; 2 3] +
                                exp(1im * p) * Al[-1 2; 1] * B[1 3; 4] * conj(Ar[-3; 5 4]) * h̃[-2 5; 2 3] +
                                exp(-1im * p) * B[4 3; 1] * Ar[1; 2 -3] * conj(Al[4 5; -1]) * h̃[5 -2; 3 2] +
                                Al[4 3; 1] * B[1 2; -3] * conj(Al[4 5; -1]) * h̃[5 -2; 3 2] +
                                exp(1im * p) * Al[1 2; 4] * Al[4 5; 6] * conj(Al[1 3; -1]) * right[6; -3] * h̃[3 -2;2 5] +
                                exp(2im * p) * Al[-1 6; 5] * Al[5 3; 2] * conj(Ar[-3; 4 1]) * right[2; 1] * h̃[-2 4; 6 3] +
                                Lh[-1; 1] * B[1 -2; -3] +
                                B[-1 -2; 1] * Rh[1; -3] +
                                exp(-1im * p) * left[-1; 1] * Ar[1; -2 -3] +
                                exp(+1im * p) * Lh[-1; 1] * Al[1 -2; 2] * right[2; -3]

        @tensor yp[-1; -2] := y[1 2; -2] * conj(Vl[1 2; -1])
        return yp
    end

    # find reduced parametrization
    Vl = leftnull(Al, (1, 2), (3,));
    
    # solve eigenvalue problem
    x0 = TensorMap(randn, ComplexF64, space(Vl, 3) ← space(Al, 1))
    e, x = eigsolve(ApplyHeff, x0, num, :SR; tol)
    
    return x, e
end;


We can use this to compute the Haldane gap on top of the ground state of the spin-1 Heisenberg antiferromagnet we have just obtained using VUMPS.

In [None]:
p = pi
num = 3
x, e = quasiParticle(h, Al, Ar, Ac, C, p, num)
@printf "First triplet: %s" join(real.(e), ", ")