In [1]:
import numpy as np
from scipy.sparse import linalg as la_sparse
from scipy.sparse.linalg import LinearOperator as LinearOperator
from scipy.sparse.linalg import eigs
import scipy.linalg
import time
#import primme

## Davidson Algorithm

Consider a (Hermitian) matrix $\textbf{H}$ that we wish to diagonalize to obtain the first $K$ eigenpairs. Thus, the set $\{(\lambda_i,\textbf{v}_i)\}_{i=1}^K$ must satisfy

$$\textbf{H}\textbf{v}_i = \lambda_i\textbf{v}_i\,\,\,\,\,\,\,\,\,\,\, i = 1\ldots K$$

We consider some search subspace $\textbf{B} = [\textbf{b}_1, \textbf{b}_2,\ldots,\textbf{b}_L]$ where $L \geq K$. The search vectors must be orthonormal, meaning that $\textbf{b}_i\cdot\textbf{b}_j = \delta_{ij}$, or in matrix form $\textbf{B}^T\textbf{B} = \textbf{1}$. The ansatz here is that we can form the true eigenvectors as a linear combination of the search vectors, i.e.

$$\textbf{v}_i\approx \textbf{x}_i = \sum_{j=1}^L c_{ji}\textbf{b}_j \Rightarrow \textbf{X} = \textbf{B}\textbf{C}$$

These approximations to the true eigenvectors found within the search space are known as Ritz vectors. Since the Ritz vectors are approximate eigenvectors, we evaluate their quality by forming the Rayleigh quotient for the matrix $\textbf{H}$. The Rayleigh quotient is strictly positive quantity (for a positive-definite matrix) that we can motivate with the following. Consider

$$\textbf{H}\textbf{V} - \mathrm{diag}(\lambda_i)\textbf{V} = 0$$

where $\textbf{V} = [\textbf{v}_1, \textbf{v}_2, \ldots, \textbf{v}_K]$. Then, the quantity

$$\textbf{V}^T\textbf{H}\textbf{V} - \mathrm{diag}(\lambda_i)\textbf{V}^T\textbf{V} = 0$$

If we substitute the any approximation for the eigenvectors $\textbf{V}$, in our case $\textbf{X} = \textbf{B}\textbf{C}$, we should expect to not satisfy equality, but lie slightly above it

$$\textbf{X}^T\textbf{H}\textbf{X} - \mathrm{diag}(\lambda_i)\textbf{X}^T\textbf{X} \geq 0$$


$$R(\textbf{X}) = \dfrac{\textbf{X}^T\textbf{H}\textbf{X}}{\textbf{X}^T\textbf{X}} \geq \mathrm{diag}(\lambda_i)$$

Note that writing the Rayleigh quotient is precisely what we would consider application of the variational theorem in quantum mechanics. Using our expansion in the search space for $\textbf{X}$,

$$R(\textbf{X}) = \dfrac{\textbf{C}^T\textbf{B}^T\textbf{H}\textbf{B}\textbf{C}}{\textbf{C}^T\textbf{B}^T\textbf{B}\textbf{C}} = \dfrac{\textbf{C}^T(\textbf{B}^T\textbf{H}\textbf{B})\textbf{C}}{\textbf{C}^T\textbf{C}} \geq \mathrm{diag}(\lambda_i)$$

The quantity in the parentheses is the projection of the original matrix onto the search subspace. It is called the interaction matrix

$$\textbf{G} = \textbf{B}^T\textbf{H}\textbf{B}$$

The Rayleigh quotient can be written as a strictly positive quantity in the form $R(\textbf{C}) = \textbf{C}^T\textbf{G}\textbf{C} - \mathrm{diag}(\lambda_i)\textbf{C}^T\textbf{C} \geq 0$. If we consider a variational optimization of $R(\textbf{C})$, we find

$$\dfrac{dR}{d\textbf{C}^T} = \textbf{G}\textbf{C} - \mathrm{diag}(\lambda_i) \textbf{C} = 0 \Rightarrow \textbf{G}\textbf{C} = \mathrm{diag}(\lambda_i) \textbf{C}$$

Thus, in order to find the best approximation to the true eigenvectors in the search subspace, we must pick a linear combination of search subspace vectors given by the eigenvectors of the interaction matrix!

Because $\textbf{H}$ is too large to store and access in memory, we formulate the algorithm to only access the matrix-vector product ${\sigma}_i = \textbf{H}\textbf{b}_i \Rightarrow {\Sigma} = [\sigma_1, \sigma_2, \ldots, \sigma_L] = \textbf{H}\textbf{B}$, thus

$$\textbf{G} = \textbf{B}^T\Sigma$$

And diagonalizing it to obtain

$$\mathrm{diag}(\omega_i) = \textbf{C}^{-1}\textbf{G}\textbf{C}$$

where $\mathrm{diag}(\omega_i)$ is our variational approximation (upper bound) to the true eigenvalues $\mathrm{diag}(\lambda_i)$. The error of our eigenpairs within the search subspace is evaluated by considering how well they satisfy the original eigenvalue equation

$$\textbf{H}\textbf{V} - \mathrm{diag}(\lambda_i)\textbf{V} = 0 \Rightarrow \textbf{H}\textbf{X} - \mathrm{diag}(\omega_i)\textbf{X} \geq 0$$

$$\textbf{r} = (\textbf{H}-\mathrm{diag}(\omega_i))\textbf{X} = \textbf{H}\textbf{B}\textbf{C} - \mathrm{diag}(\omega_i)\textbf{B}\textbf{C} = \Sigma\textbf{C} - \mathrm{diag}(\omega_i)\textbf{B}\textbf{C}$$

Given the residual $\textbf{r}(\textbf{X},\omega_i) = (\textbf{H}-\mathrm{diag}(\omega_i))\textbf{X}$, our goal should be to minimize it the best we can. We do so by considering a first order Taylor expansion 

$$\textbf{r}(\textbf{X}+\delta\textbf{X}, \omega_i) \approx \textbf{r}(\textbf{X},\omega_i)  + \dfrac{\partial \textbf{r}}{\partial \textbf{X}}\delta\textbf{X} = \textbf{r}(\textbf{X},\omega_i) + (\textbf{H}-\mathrm{diag}(\omega_i))\delta\textbf{X} = 0$$

Thus, we have that the necessary addition to the approximate eigenvector is 

$$\delta\textbf{X} = -(\textbf{H}-\mathrm{diag}(\omega_i))^{-1}\textbf{r}(\textbf{X},\omega_i)$$

Most everything shown is not actually unique to the Davidson method; it is primarily based on the variational minimization of the Rayleigh quotient which is a starting point for many Krylov-based numerical eigensolvers. The distinction between method to method typically lies in how the search space is expanded. The Davidson method is defined by using a diagonal preconditioning of the residue vector to produce

$$\textbf{q}_i = (\textbf{H}_{ii} - \omega_i)^{-1}\textbf{r}\,\,\,\,\,\,\,\,\,\,\, i = 1\ldots K$$

The preconditioned residual vectors are concatenated into a matrix $\textbf{Q} = [\textbf{q}_1, \textbf{q}_2,\ldots,\textbf{q}_K]$ and the search subspace is expanded by appending $\textbf{Q}$ to $\textbf{B}$ so that $\textbf{B} \leftarrow [\textbf{B},\textbf{Q}]$. 

The newly augmented matrix $\textbf{B}$ is then orthonormalized using the Gram-Schmidt procedure, and importantly, vectors that are linearly dependent (defined by having a post-orthonormalization $L_2$-norm less than a pre-defined threshold, say $10^{-5}$) are discarded from the search vector set. This elimination is best performed as part of the Gram-Schmidt procedure.

An important point to address is the initial selection of search vectors. In the vanilla Davidson algorithm, one simply uses unit vectors in the direction of the first $L$ largest diagonal entries of the matrix $\textbf{H}$. This will work very well if the matrix is strongly diagonally dominant. If not, then a more creative initial guess must be used. In quantum chemistry, one might start with a small diagonalization problem in the subspace of relevant 1-, 2-, or generally, n-tuply excited determinants. 

As a final remark, although the Davidson method was formulated for sparse, diagonally-dominant, Hermitian matrices, typically best suited for large-scale configuration interaction (CI) calculations, the algorithm is quite robust and performs well even in the case of non-Hermitian matrices, such as those present in EOMCC calculations (the imaginary parts of the eigenvalues in such cases are numerically negligible). The most important properties of a matrix that is diagonalizable by Davidson is sparsity and strong diagonal dominance. Matrices that are weakly diagonally dominant can be iteratively diagonalized using the Jacobi-Davidson algorithm which uses a different search vector update. It is significantly more work and literature reports tend to show that Davidson works just fine in the majority of quantum chemistry calculations, provided that care is taken in choosing the initial search vector subspace.  

In [45]:
n = 5000

sparsity = 1

A = np.zeros((n,n))

for i in range(n):
    for j in range(n):
        if i == j:
            A[i,i] = i+1 + np.random.rand(1)*sparsity
        else:
            A[i,j] = np.random.rand(1)*sparsity
# k = n/2
# for i in range(1,n+1):
#     for j in range(1,n+1):
#         if j <= k:
#             A[i-1,j-1] = (i)*(i==j) - (i-j-k**2)
#         else:
#             A[i-1,j-1] = (i)*(i==j) + (i-j-k**2);


def matvec(x):
    return np.dot(A,x)
Aop = LinearOperator((n,n),matvec=matvec)
D = np.diag(A)

In [46]:
def davidson(A, D, nroot, opts):
    
    import scipy.linalg
    
    def mgs(A,tol=10**-15):
        k = A.shape[1]
        A[:,0] = A[:,0]/np.linalg.norm(A[:,0])
        B = A[:,0][:,np.newaxis]
        ct = 0
        for i in range(1,k):
            ct += 1
            for j in range(ct):
                A[:,ct] -= np.dot(np.conj(A[:,j]).T,A[:,ct])*A[:,j]
            normA = np.linalg.norm(A[:,ct])
            if normA > tol:
                A[:,ct] = A[:,ct]/normA
                B = np.concatenate((B,A[:,ct][:,np.newaxis]),axis=1)
            else:
                np.delete(A,ct,axis=1)
                ct -= 1
        return B
    
    def update_R(res,e,D):
        for i in range(len(res)):
            res[i] = res[i]/(e - D[i])
        return res
    
    def ortho_root_vec(q,B):
        for i in range(B.shape[1]):
            b = B[:,i]/np.linalg.norm(B[:,i])
            q -= np.dot(b.T,q)*b
        return q
    
    print('Beginning Davidson diagonalization algorithm...\n')

    nvec_per_root = opts.get('nvec_per_root')
    max_nvec_per_root = opts.get('max_nvec_per_root')
    maxit = opts.get('maxit')
    tol = opts.get('tol')
    mgs_tol = opts.get('mgs_tol')
    thresh_vec = opts.get('thresh_vec')
    
    mat_dim = A.shape[0]
    curr_size = nvec_per_root*nroot
    max_size = max_nvec_per_root*nroot
    
    if opts.get('init_guess') == 'diagonal':
        idx = np.argsort(D)
        B = np.eye(mat_dim)[:,idx[:curr_size]]
    else:
        B = opts.get('init_guess')
        
    
    eigval = np.zeros(nroot)
    
    it = 0; flag_conv = 0;
    while it < maxit and flag_conv == 0:

        if B.shape[0] < B.shape[1]:
            print('WARNING: Number of search vectors greater than dimension of search vectors... MGS will behave erratically!\n')

        B = mgs(B,mgs_tol)

        curr_size = B.shape[1]

        eigval_old = eigval.copy()

        print('Iter-{}    Subspace size-{}'.format(it,curr_size))
        print('----------------------------------')

        SIGMA = A.matmat(B)
        G = np.dot(np.conj(B).T,SIGMA)
        G_eigval, G_eigvecR = scipy.linalg.eig(G,right=True,left=False)
        idx = np.argsort(G_eigval)
        
        # Right ritz vector
        V = np.dot(B,G_eigvecR)
        # Left ritz vector
        W = np.dot(B, np.conj(np.linalg.inv(G_eigvecR)).T)
        
        eigval = G_eigval[idx[:nroot]]
        alpha = G_eigvecR[:,idx[:nroot]]

        # Right ritz vector
        V = V[:,idx[:nroot]]
        W = W[:,idx[:nroot]]

        Q = np.zeros((mat_dim,nroot))
        resid_norm = np.zeros(nroot)
        ct_add = 0
        for j in range(nroot):
            
            res = np.dot(SIGMA,alpha[:,j]) - eigval[j]*V[:,j]
            resid_norm[j] = np.linalg.norm(res)
            res = res/resid_norm[j]
            
            q = update_R(res,eigval[j],D)
            if ct_add > 0:
                q_orth = ortho_root_vec(q,np.concatenate((B,Q[:,:ct_add]),axis=1))
            else:
                q_orth = ortho_root_vec(1,B)
            if np.linalg.norm(q) > thresh_vec:
                Q[:,ct_add] = q/np.linalg.norm(q)
                ct_add += 1
            

            print('    Root = {}     e = {:>10.12f}    |r| = {:>10.12f}'.\
                      format(j+1,np.real(eigval[j]),resid_norm[j]))

        if sum(resid_norm)/nroot < tol:
            print('Davidson successfully converged in {} iterations'.format(it))
            flag_conv = True
            break
        else:
            if curr_size >= max_size:
                print('Restarting and collapsing...')
                B = np.dot(B,alpha)
                eigval = eigval_old.copy()
            else:
                B = np.concatenate((B,Q[:,:ct_add]),axis=1)
            
        it += 1
    
    return eigval, V, W, flag_conv


In [31]:
def mgs(A,tol=10**-15):
    k = A.shape[1]
    A[:,0] = A[:,0]/np.linalg.norm(A[:,0])
    B = A[:,0][:,np.newaxis]
    ct = 0
    for i in range(1,k):
        ct += 1
        for j in range(ct):
            A[:,ct] -= np.dot(np.conj(A[:,j]).T,A[:,ct])*A[:,j]
        normA = np.linalg.norm(A[:,ct])
        if normA > tol:
            A[:,ct] = A[:,ct]/normA
            B = np.concatenate((B,A[:,ct][:,np.newaxis]),axis=1)
        else:
            np.delete(A,ct,axis=1)
            ct -= 1
    return B

def update_R(res,e,D):
    for i in range(len(res)):
        res[i] = res[i]/(e - D[i])
    return res

def ortho_root_vec(q,B):
    for i in range(B.shape[1]):
        b = B[:,i]/np.linalg.norm(B[:,i])
        q -= np.dot(b.T,q)*b
    return q

In [62]:
def davidson_v2(A, D, nroot, nvec, maxit, tol, thresh_vec):

    print('Beginning Davidson diagonalization algorithm...\n')
    
    mat_dim = A.shape[0]
    curr_size = nroot
    max_size = nvec*nroot
    
    if opts.get('init_guess') == 'diagonal':
        idx = np.argsort(D)
        B = np.eye(mat_dim)[:,idx[:curr_size]]
    else:
        B = opts.get('init_guess')
            
    eigval = np.zeros(nroot)
    
    for it in range(maxit):
        
        B = mgs(B,1e-10)

        curr_size = B.shape[1]

        print('Iter-{}    Subspace size-{}'.format(it,curr_size))
        print('----------------------------------')

        SIGMA = A.matmat(B)
        G = np.dot(B.T,SIGMA)
        eval_E, alpha = scipy.linalg.eig(G)
        #eval_E, alpha = scipy.linalg.eig(G,right=True,left=False)
        idx = np.argsort(eval_E)
        
        # Right ritz vector
        V = np.dot(B,alpha)
        
        eigval = eval_E[idx[:nroot]]
        alpha = alpha[:,idx[:nroot]]

        # Right ritz vector
        V = V[:,idx[:nroot]]

        Q = np.zeros((mat_dim,nroot))
        resid_norm = np.zeros(nroot)
        ct_add = 0
        for j in range(nroot):
            
            res = np.dot(SIGMA,alpha[:,j]) - eigval[j]*V[:,j]
            resid_norm[j] = np.linalg.norm(res)
            res = res/resid_norm[j]
            
            q = update_R(res,eigval[j],D)
            if ct_add > 0:
                q_orth = ortho_root_vec(q,np.concatenate((B,Q[:,:ct_add]),axis=1))
            else:
                q_orth = ortho_root_vec(1,B)
            if np.linalg.norm(q) > thresh_vec:
                Q[:,ct_add] = q/np.linalg.norm(q)
                ct_add += 1
            

            print('    Root = {}     e = {:>10.12f}    |r| = {:>10.12f}'.\
                      format(j+1,np.real(eigval[j]),resid_norm[j]))

        if all(resid_norm < tol):
            print('Davidson successfully converged in {} iterations'.format(it))
            break
        else:
            if curr_size >= max_size:
                print('Restarting and collapsing...')
                B = np.dot(B,alpha)
            else:
                B = np.concatenate((B,Q[:,:ct_add]),axis=1)
            
    
    return eigval, V


In [63]:
opts = {
  "nvec_per_root": 1,
  "max_nvec_per_root": 5,
  "maxit": 80,
  "tol" : 10**-7,
  "mgs_tol" : 10**-10,
  "init_guess" : 'diagonal', 
  "thresh_vec" : 10**-7
}
nroot = 5
nvec = 5
maxit = 80
tol = 1e-07
thresh_vec = 1e-03

e, V  = davidson_v2(Aop, D, nroot, nvec, maxit, tol, thresh_vec)
#e, V, W, _  = davidson(Aop,D,nroot,opts)

Beginning Davidson diagonalization algorithm...

Iter-0    Subspace size-5
----------------------------------
    Root = 1     e = 1.291604661144    |r| = 30.914241992626
    Root = 2     e = 2.089260062559    |r| = 20.564866710503
    Root = 3     e = 3.205672604720    |r| = 28.784884143631
    Root = 4     e = 4.665421559640    |r| = 41.583500411022
    Root = 5     e = 5.555245091375    |r| = 74.269504740679
Iter-1    Subspace size-10
----------------------------------
    Root = 1     e = 0.982027616795    |r| = 11.629162581980




    Root = 2     e = 2.124026245881    |r| = 2.850289568824
    Root = 3     e = 2.697981651519    |r| = 7.423938080489
    Root = 4     e = 4.064257142415    |r| = 15.128381037950
    Root = 5     e = 4.781092193218    |r| = 14.519321718119
Iter-2    Subspace size-15
----------------------------------
    Root = 1     e = 1.008006960808    |r| = 1.325834288977
    Root = 2     e = 2.130168512833    |r| = 0.270278973301
    Root = 3     e = 2.760832681932    |r| = 0.963352333011
    Root = 4     e = 3.979435132645    |r| = 2.711252722093
    Root = 5     e = 4.827155884285    |r| = 6.297825837454
Iter-3    Subspace size-20
----------------------------------
    Root = 1     e = 1.006649170562    |r| = 0.124661288942
    Root = 2     e = 2.130052475705    |r| = 0.026664758652
    Root = 3     e = 2.765651035164    |r| = 0.084694995711
    Root = 4     e = 3.970467395767    |r| = 0.306488006484
    Root = 5     e = 4.816273624894    |r| = 0.895370343968
Iter-4    Subspace size-25
-------

In [20]:
Etrue,Wtrue,Vtrue = scipy.linalg.eig(A,right=True,left=True)
idx = np.argsort(Etrue)
Etrue = Etrue[idx[:nroot]]
Vtrue = Vtrue[:,idx[:nroot]]

In [21]:
print_errors( Vtrue, Wtrue, Etrue, V, W, e )


Printing errors in eigenpairs...
Root-0
Error in eigenvalue =   0.000000  Right Vec rotation =   0.000000 deg   Left Vec rotation =  90.058656 deg

Root-1
Error in eigenvalue =   0.000002  Right Vec rotation = 179.999976 deg   Left Vec rotation =  89.306558 deg

Root-2
Error in eigenvalue =   0.000026  Right Vec rotation = 179.999663 deg   Left Vec rotation =  92.672337 deg

Root-3
Error in eigenvalue =   0.000231  Right Vec rotation = 179.996335 deg   Left Vec rotation = 101.500956 deg

Root-4
Error in eigenvalue =   0.001431  Right Vec rotation = 179.965958 deg   Left Vec rotation =  78.587039 deg



In [18]:
def print_errors( V_true, W_true, e_true, V, W, e ):

    print('\nPrinting errors in eigenpairs...')
    for j in range(len(e_true)):

        ov_right = np.dot(np.conj(V[:,j]).T,V_true[:,j]);
        theta_right = np.degrees(np.arccos(ov_right));

        ov_left = np.dot(np.conj(W[:,j]).T,W_true[:,j]);
        theta_left = np.degrees(np.arccos(ov_left));

        err_e = np.abs(e_true[j] - e[j]);

        print('Root-{}'.format(j))
        print('Error in eigenvalue = {:>10.6f}  Right Vec rotation = {:>10.6f} deg   Left Vec rotation = {:>10.6f} deg\n'\
              .format(err_e, theta_right, theta_left))




In [None]:
function [ V, W, e, res, it, flag_conv] = davidson(A, nroot, B0, flag_eig, opts)
% Block-Davidson algorithm for diagonalizing large sparse (non-Hermitian)
% matrices
% K Hirao, H Nakatusji. J Comp Phys. 45, 246-254 (1982)

% Input:
% A - matrix to be diagonalized (or function for matrix-vector product)
% nroot - number of desired eigenpairs
% B0 - initial search space matrix, used in conjunction with
%      opts.init_guess = 'custom'
% flag_eig = 'right' or 'left' eigenvectors
% opts - struct with fields
%        opts.maxit = 2000;
%        opts.tol = 1e-4;
%        opts.nvec_per_root = 1;
%        opts.max_nvec_per_root = 10;
%        opts.flag_verbose = 1;
%        opts.init_guess = 'diagonal';
%        opts.init_space = 5;

% Output
% V - right eigenvectors
% W - left eigenvectors
% e - eigenvalues
% res - residual
% it - iterations
% flag_conv - 1 if converged, 0 if not converged


    nvec_per_root = opts.nvec_per_root;
    max_nvec_per_root = opts.max_nvec_per_root;
    maxit = opts.maxit;
    tol = opts.tol;
    flag_verbose = opts.flag_verbose;
    
    if flag_verbose == 1
        fprintf('Beginning Davidson diagonlization algorithm...\n')
    end
    
    tic

    mat_dim = size(A,1);
    curr_size = nvec_per_root*nroot;
    max_size = max_nvec_per_root*nroot;
    
    e = zeros(nroot,1);
    D = diag(A);
    
    switch opts.init_guess
        
        case 'diagonal'
            
            [~, idx] = sort(D, 'ascend');
            B = eye(mat_dim); B = B(:,idx(1:curr_size));
            
        case 'random'
            
            B = rand(mat_dim,curr_size);
            
        case 'eigen'
            
            n_init = opts.init_space;
            if n_init < nroot
                fprintf('Warning: initial space must be at least as big as number of roots!\n')
                n_init = 2*nroot;
                fprintf('Changing n_init to %d...\n',n_init);
            end
            Asmall = A(1:n_init, 1:n_init);
            [Vs, Ds] = eig(Asmall);
            [ds_sort, idx] = sort(diag(Ds),'ascend');
            B = zeros(mat_dim, n_init);
            for i = 1:n_init
                B(:,i) = cat(1,Vs(:,idx(i)),zeros(mat_dim-n_init,1));
            end
            
        case 'custom'
            
            B = B0;
            
            
        otherwise
            disp('Please enter a valid initial guess type!')
    end
    
    %%%%%%% SOLVE RIGHT EIGENPROBLEM %%%%%%%%%
    if strcmp(flag_eig, 'right')
        
        it = 0; flag_conv = 0;
        while it < maxit && flag_conv == 0

            if size(B,1) < size(B,2)
                fprintf('WARNING: Number of search vectors greater than dimension of search vectors... MGS will behave erratically!\n')
            end

            [B,~] = mgson(B);

            curr_size = size(B,2);

            e_old = e;

            if flag_verbose == 1
                fprintf('\nIter-%d    Subspace size-%d\n',it,curr_size)
                fprintf('----------------------------------\n')
            end

            SIGMA = A*B;

            G = B'*SIGMA;

            [alpha,GD] = eig(G);
            [e,idx] = sort(diag(GD),'ascend');

            % Right ritz vectors
            V = B*alpha;
            % Left ritz vectors
            W = B*ctranspose(inv(alpha));
            %W = B/alpha';

            % sorting eigenpairs
            e = e(1:nroot);
            alpha = alpha(:,idx(1:nroot));  % expansion coefficients
            V = V(:,idx(1:nroot));
            W = W(:,idx(1:nroot));

            Q = zeros(mat_dim,nroot);
            resid_norm = zeros(nroot,1);

            for j = 1:nroot

                r = SIGMA*alpha(:,j) - e(j)*V(:,j);
                resid_norm(j) = norm(r);

                Q(:,j) = r/(e(j)-D(j));

                if flag_verbose == 1
                    fprintf('   Root = %d     e = %4.10f     |r| = %4.10f\n',j,real(e(j)),resid_norm(j));
                end
            end

                res = sum(resid_norm);
                eps = sqrt(sum((e(1:nroot)-e_old(1:nroot)).^2));
                if res < tol && eps < tol
                    flag_conv = 1;
                    if flag_verbose
                        fprintf('\nDavidson successfully converged in %d iterations (%4.2f seconds)\n',it, toc);
                    end
                else
                    if curr_size >= max_size
                        if flag_verbose == 1
                            fprintf('\nRestarting and collapsing...\n')
                        end
                        B = B*alpha;
                        e = e_old;
                    else
                        B = cat(2,B,Q);
                    end
                end

                it = it + 1;

        end
        
    end
    
    %%%%%%% SOLVE LEFT EIGENPROBLEM %%%%%%%%%
    if strcmp(flag_eig, 'left')
        
        AH = ctranspose(A);
        DH = diag(AH);
    
        it = 0; flag_conv = 0;
        while it < maxit && flag_conv == 0

            if size(B,1) < size(B,2)
                fprintf('WARNING: Number of search vectors greater than dimension of search vectors... MGS will behave erratically!\n')
            end

            [B,~] = mgson(B);

            curr_size = size(B,2);

            e_old = e;

            if flag_verbose == 1
                fprintf('\nIter-%d    Subspace size-%d\n',it,curr_size)
                fprintf('----------------------------------\n')
            end

            LAMBDA = AH*B;

            G = ctranspose(LAMBDA)*B;

            [~,GD,alpha] = eig(G);
            [e,idx] = sort(diag(GD),'ascend');

            % Left Ritz vector
            W = B*alpha;

            % Right Ritz vector
            V = B*ctranspose(inv(alpha));

            % sorting eigenpairs
            e = e(1:nroot);
            alpha = alpha(:,idx(1:nroot));  % expansion coefficients

            % Left Ritz vector
            W = B*alpha;

            Q = zeros(mat_dim,nroot);
            resid_norm = zeros(nroot,1);

            for j = 1:nroot

                r = LAMBDA*alpha(:,j) - e(j)*W(:,j);
                resid_norm(j) = norm(r);

                Q(:,j) = r/(e(j)-DH(j));

                if flag_verbose == 1
                    fprintf('   Root = %d     e = %4.10f     |r| = %4.10f\n',j,real(e(j)),resid_norm(j));
                end
            end

                res = sum(resid_norm);
                eps = sqrt(sum((e(1:nroot)-e_old(1:nroot)).^2));
                if res < tol && eps < tol
                    flag_conv = 1;
                    if flag_verbose
                        fprintf('\nDavidson successfully converged in %d iterations (%4.2f seconds)\n',it, toc);
                    end
                else
                    if curr_size >= max_size
                        if flag_verbose == 1
                            fprintf('\nRestarting and collapsing...\n')
                        end
                        B = B*alpha;
                        e = e_old;
                    else
                        B = cat(2,B,Q);
                    end
                end

                it = it + 1;

        end
    
    end
    
    
end

In [None]:
def davidson_solver(A, V0, D, theta0, max_nvec_per_state, maxit, tol):
    
    B = V0.copy()
    theta = theta0.copy()
    
    it = 1; flag_conv = False;
    while it < maxit:
        
        # orthonormalize search space
        B,_ = np.linalg.qr(B)
        
        # update search space dimension
        L = B.shape[1]
        
        # compute matrix-vector product (could have generic function here)
        sigma = np.dot(A,B)
        
        # calculate and diagonalize small interaction matrix
        G = np.dot(B.T,sigma)
        theta, alpha = np.linalg.eig(G)
        
        # find correct eigenpair using distance from initial guess
        idx = np.argmin(np.abs(theta - theta0*np.ones(len(theta))))
        theta = theta[idx]
        alpha = alpha[:,idx]
        X = np.dot(B,alpha)
        
        # Calculate residual
        r = np.dot(sigma, alpha) - theta*X
        resid_norm = np.linalg.norm(r)

        # Precondition residual
        Q = r/(theta-D[idx])
        
        # Print status
        print('Iter - {}    L = {}     e = {:>10.12f}    |r| = {:>10.12f}'.\
                format(it, L, np.real(theta), resid_norm))


        # Check convergence
        if resid_norm < tol:
            flag_conv = True
            break
        else:
            if L >= max_nvec_per_state:
                print('Restarting and Collapsing...')
                B = np.dot(B,alpha)[:,np.newaxis]
            else:
                if len(Q.shape) > 1:
                    B = np.concatenate((B,Q),axis=1)
                else:
                    B = np.concatenate((B,Q[:,np.newaxis]),axis=1)

        it +=1 
          
    return theta, X, B, flag_conv, it

def davidson_diagonalize(A, nroot, max_nvec_per_state, maxit, tol):
    
    idx = np.zeros(nroot)
    theta = np.zeros(nroot)
    D = np.diag(A)
    
    X = np.zeros((len(D),nroot))
   
    idx = np.argsort(D)[:nroot]
    theta0 = D[idx]
    V0 = np.eye(len(D))[:,idx]
    
    
    for i in range(nroot):
        
        print('Root-{} :'.format(i+1))
        print('---------------------------------------------------------------')
        print('Iter - {}  Initial Guess = {:>10.12f}'.format(0,theta0[i]))
    
        th, x, V0, flag_conv, it = davidson_solver(A, V0, D, theta0[i], max_nvec_per_state, maxit, tol)
        theta[i] = th
        X[:,i] = x
        
        if flag_conv:
            print('Root {} converged in {} iterations'.format(i+1, it))
            print('\n')
        else:
            print('Root {} not converged in {} iterations'.format(i+1,it))
            print('\n')

        
    return theta, X

In [None]:
N = 200
A = np.zeros((N,N))
k = N/2
for i in range(N):
    for j in range(N):
        if j <= k:
            A[i,j] = i*(i==j) - (i-j-k**2)
        else:
            A[i,j] = i*(i==j) + (i-j-k**2)
            
E, V = np.linalg.eig(A)
idx = np.argsort(E)
E = E[idx[:5]]
V = V[:,idx[:5]]

In [None]:
E

In [None]:
maxit = 80
max_nvec_per_state = 50
tol = 10**-6
nroot = 5

theta, X = davidson_diagonalize(A, nroot, max_nvec_per_state, maxit, tol) 

In [None]:
X[:,0]

In [None]:
n = 1332
sparsity = 10**-6

diagonal = []

ct = 0
while len(diagonal) < n:
    
    ct+=0.5
    
    diagonal.append(ct)
    diagonal.append(ct)
    diagonal.append(ct)
    
    ct+= 0.5
    
    diagonal.append(ct)
    
diagonal = np.asarray(diagonal,dtype=float)

#diagonal = np.diag(np.arange(1,n+1))

A = sparsity*np.random.rand(n,n) + np.diag(diagonal)

check_diagonal_dom(A)

maxit = 100
thresh_vec = 10**-5
tol = 10**-5
Lmax = n//2
nroot = 6
nvec_per_state = 2

d = np.diagonal(A)
idx = np.argsort(d)[:nvec_per_state*nroot]
B = np.eye((n))[:,idx]
omega = d[idx]
omega = omega[:nroot]

omega, X, flag_conv = davidson(A,d,B,omega,maxit,thresh_vec,tol,Lmax)

In [None]:
E,V = np.linalg.eig(A)
idx = E.argsort()[:nroot]
E = E[idx]
V = V[:,idx]
print(E)

In [None]:
for j in range(nroot):
    nm = np.sum( abs(X[:,j] - V[:,j]) )
    print(nm)

In [None]:
diagonal = []

ct = 1
while len(diagonal) < n:
    
    diagonal.append(ct)
    diagonal.append(ct)
    diagonal.append(ct)
    
    ct+= 1
    
    diagonal.append(ct)
    
diagonal = np.asarray(diagonal,dtype=float)



In [None]:
# def Amatvec(x):
#     return A@x

# def Amatmat(x):
#     return A@x

# Aop = LinearOperator((n,n),matvec = Amatvec, matmat=Amatmat)

# omega, evecs = eigs(Aop, k=10,M=None, tol=1e-6, which='SM')

# idx = omega.argsort()
# omega = omega[idx]
# evecs = evecs[:,idx]

In [None]:
#         Q = np.zeros((n,nroot))

#         resid_norm = 0
#         for j in range(nroot):

# #             r = AX[:,j] - omega[j]*Rz[:,j]
#             r = np.dot(SIGMA,alpha[:,j]) - omega[j]*Rz[:,j]
#             normr = np.linalg.norm(r)
#             resid_norm += normr
            
#             if normr > thresh_vec:
#                 Q[:,j] = r/(omega[j]-d[j])
#             else:
#                 np.delete(Q,j,axis=1)

#             print("    Root {}: e = {:>10.12f} de = {:>10.12f} "
#               "|r| = {:>10.12f}".format(j+1, np.real(omega[j]), abs(omega[j] - omega_old[j]), normr))

In [None]:
for j in range(nroot):
    nm = np.sum( abs(evecs[:,j] - V[:,j]) )
    print(nm)

In [None]:
# Begin block Davidson routine

def davidson_v2(A,d,maxit,thresh_vec,tol,L,Lmax):

    t0 = time.time()
    
    idx = np.argsort(d)[:L]
    B = np.eye((n))[:,idx]
    omega = d[idx]
    omega = omega[:nroot]

    #L = nroot*2

    it = 1
    while it < maxit:

        omega_old = omega

        B,_ = np.linalg.qr(B)
        L = B.shape[1]

        # Matrix vector (matrix) product
        SIGMA = np.dot(A,B)

        # Create and diagonalize interaction subspace matrix; sort eigenvector/eigenvalue pairs
        G = np.dot(B.T,SIGMA)
        omega,alpha = np.linalg.eig(G)
        idx = omega.argsort()
        omega = omega[idx[:L]][:nroot]
        alpha = alpha[:,idx[:L]][:nroot]
        
        # Ritz vectors (eigenvector approximation)
        Rz = np.dot(B,alpha)
    
        # Residual vector (A*Rz - omega*Rz)
        r = [np.dot(SIGMA,alpha[:,j])-omega[j]*Rz[:,j] for j in range(nroot)]
        
        # Residual norm
        normr = ([np.linalg.norm(r0) for r0 in r])
        resid_norm = sum(normr)
        
        # Subspace expansion array
        Q = np.asarray([r[j]/(omega[j]-d[j]) for j in range(nroot) if normr[j] > thresh_vec],dtype=float)
        Q = Q.T
        
        
        delta_e = np.linalg.norm(omega - omega_old)
        resid_per_root = resid_norm/nroot
        
        print('Iter = {:>2.0f}     L = {:>10.4f}     |r| = {:>10.12f}     de = {:>10.12f}'\
                                                      .format(it,L,resid_per_root,delta_e))

        # Check convergence
        if resid_per_root < tol and delta_e < tol:
            flag_conv = True
            break
        else:
            if L >= Lmax:
                print('Restarting and collapsing...')
                B = np.dot(B, alpha)
                omega = omega_old.copy()
            else:
                B = np.concatenate((B,Q),axis=1)
        it += 1
        
    if flag_conv:
        print('Davidson Converged in {} seconds...'.format(time.time()-t0))
        print('Eigenvalues:')
        print(np.real(omega))
    else:
        print('Davidson failed to converge in {} iterations'.format(maxit))
        

    return omega, Rz, flag_conv, it



In [None]:

nroot = 6
maxit = 100
thresh_vec = 10**-5
tol = 10**-5
L = 2*nroot
Lmax = 200

d = np.diagonal(A)
idx = np.argsort(d)[:L]
B = np.eye((n))[:,idx]
omega = d[idx]
omega = omega[:nroot]
    
omega, Rz, flag_conv, it = davidson_v2(A,d,maxit,thresh_vec,tol,L,Lmax)

In [None]:
E,V = np.linalg.eig(A)
idx = E.argsort()[:nroot]
E = E[idx]
V = V[:,idx]
print(np.real(E))

for j in range(nroot):
    nm = np.sum( abs(Rz[:,j] - V[:,j]) )
    print(nm)