In [None]:
from __future__ import division
import os
import numpy as np
import time
import matplotlib.pyplot as plt
import scipy
import warnings
warnings.filterwarnings('ignore')
np.random.seed(1234)
%matplotlib inline
%load_ext autoreload
%autoreload 

In [None]:
"""To compute distance matrix"""
def distmat( x, y ):
    return np.sum( x**2, 0 )[:,None] + np.sum( y**2, 0 )[None,:] - 2 * x.transpose().dot( y )

"""To Normalise a vector"""
normalize = lambda a: a/np.sum( a )

"""To Compute P"""
def GetP( u, K, v ):
    return u[:,None] * K * v[None,:]

def plotp( x, col, plt, scale = 200, edgecolors = "k"):
  return plt.scatter( x[0,:], x[1,:], s = scale, edgecolors = edgecolors,  c = col, cmap = 'plasma', linewidths = 2 )

In [None]:
relative_path_to_new_folder = "../Images"
os.makedirs(relative_path_to_new_folder, exist_ok = True)
if not os.path.isdir('../Images/Benchmarking_inversion_methods_images'):
    os.makedirs('../Images/Benchmarking_inversion_methods_images')

In [None]:
import computational_OT

## Damped Newton without preconditioning

In [None]:
def dampednewton( epsilons, N, x, y, rho = 0.95, c = 0.1, iterations = 50 ):
    print("Damped Newton... ")
    print( "Doing for (",N[0], N[1],")." )
    dampedNewtonP=[]
    results_dampedNewton  = []
    times_dampedNewton    = []
    Hessians_dampedNewton = []
    # Cost matrix
    Cost = distmat( x, y )
    # a and b
    a = normalize( np.ones( N[0] ) )
    b = normalize( np.ones( N[1] ) )
    for eps in epsilons:
        print("For epsilon = "+str(eps)+":")     
        # Kernel
        K = np.exp( - Cost/eps )
        f, g = a, b
        print( " |- Iterating")  
        start = time.time()
        Optimizer = computational_OT.damped_Newton( K,
                                                    a,
                                                    b,
                                                    f,
                                                    g,
                                                    eps,
                                                    rho,
                                                    c )
        out = Optimizer._update(    max_iterations = iterations,
                                    debug = False )
        end = time.time()
        if ( out != np.zeros(6) ).all():
            results_dampedNewton.append( out )
            times_dampedNewton.append( end - start )
            print( " |- Computing P" )
            print( "" )
            u_opt = np.exp( out['potential_f']/eps )
            K = np.exp( - Cost/eps )
            v_opt =  np.exp( out['potential_g']/eps )
            P_opt = GetP( u_opt, K, v_opt )
            dampedNewtonP.append( P_opt )
            print( " |- Recording (unstabilized) Hessian \n" )
            mat  = - eps * Optimizer.Hessian
            diag = 1/np.sqrt( np.concatenate( ( a, b ), axis = None ) )
            mat = diag[:,None] * mat * diag[None,:]
            Hessians_dampedNewton.append( mat )
        else:
            epsilons.remove( eps )
    # end for
    return {
        'results_list': results_dampedNewton,
        'time_stamps' : times_dampedNewton,
        'Ps'          : dampedNewtonP,
        'Hessians'    : Hessians_dampedNewton 
    }

## Function performing inversion using Conjugate gradient or GMRES and recording the mean time taken over a number of iterations of inversion.

In [None]:
def iterative_inversions( epsilons, N, x, y, null_vector, precond_vectors, Num_inv = 100, inv_method = 'cg' ):
    print( "Doing for (",N[0], N[1],")." )
    # Cost matrix
    Cost = distmat( x, y )
    # a and b
    a = normalize( np.ones(N[0]) )
    b = normalize( np.ones(N[1]) )
    f, g = a, b
    f_g = np.concatenate( ( f, g ), axis = None )
    timings = []
    for eps in epsilons:
        print( "For epsilon = "+str(eps)+" ." )    
        # Kernel
        K = np.exp( - Cost/eps )
        # Compute gradient
        u = np.exp( f_g[:a.shape[0]]/eps )
        v = np.exp( f_g[a.shape[0]:]/eps )
        grad_f = a - ( a * u * np.dot( K, v  * b ) )
        grad_g = b - ( ( b *  v ) * np.dot( K.T, u * a ) )
        gradient = np.concatenate( ( grad_f, grad_g ), axis = None )
        # Compute Hessian
        r1 = ( a * u * np.dot( K,  v  * b ) )
        r2 = ( ( b * v ) * np.dot( K.T, u * a ) )
        # r1 = u * np.dot( self.K,v )
        # r2 = v * np.dot( self.K.T, u )
        # P  = u*self.K*(v.T) # WRONG AGAIN: DANGEROUS CODE!!
        u = u.reshape( u.shape[0], )
        v = v.reshape( v.shape[0], )
        P = a[:,None] * u[:,None] * K * v[None,:] * b[None,:]

        A = np.diag( np.array( r1.reshape( r1.shape[0], ) ) )
        B = P
        C = P.T
        D = np.diag( np.array( r2.reshape( r2.shape[0], ) ) )
        result = np.vstack( ( np.hstack( ( A, B ) ), np.hstack( ( C ,D ) ) ) )
        # Construct modified Hessian
        diag = 1/np.sqrt( np.diag(result).flatten() )
        modified_Hessian = diag[:,None] * result * diag[None,:]        
        # Dummy variable to work on
        matrix = modified_Hessian
        # Dummy variable to work on
        matrix = modified_Hessian
        vector = null_vector# Shape: (n,)
        vector = vector/diag
        vector = vector/np.linalg.norm( vector )
        vector_E = vector
        # Transformations (Initial on gradient and final on result)
        gradient = diag[:,None] * gradient[:,None]
        # Conditioning with other vectors
        #  Naming conventions:
        #  y = Preconditioning vectors as a numpy matrix n by k
        #  matrix = our matrix A to precondition
        #  We only form the data y and z such that
        #  P = id + z*y.T
        y_ = np.array( precond_vectors ).T # Matrix of size n by k
        # Compute eigenvalues
        Ay = np.dot( matrix, y_ )
        eigenvalues = np.sum( y_ * Ay, axis = 0 )
        # Compute data for P = id + y*diag(values)*y.T
        values = ( 1/np.sqrt(eigenvalues) - 1 )    # Vector of size k
        z = y_ * values[None,:]

        # Changing the A matrix to PAP

        # Function mapping v to Pv
        # P = Id + z*y.T
        def _apply_P( vector):
          return vector + z @ ( y_.T @ vector )
        # Function mapping v to P(A+E)Pv
        # A is matrix
        # E is vector_E*vector_E.T
        def _preconditioned_map( vector ):
          vector   = _apply_P( vector )
          vector   = np.dot( matrix, vector )  + vector_E * np.dot( vector_E, vector )
          vector   = _apply_P( vector ) 
          return vector
        # Apply P
        # At beginning on gradient
        # At the end 
        gradient = _apply_P( gradient )
        #
        # Solve either iteratively using CG or exactly
       
        m  = matrix
        A = scipy.sparse.linalg.LinearOperator( ( m.shape[0], m.shape[1] ), matvec = _preconditioned_map ) 
        # Inversion iterations to record the average time taken to invert the Hessian
        t = 0
        for i in range(Num_inv):
          if inv_method == 'cg':
            start = time.time()
            inverse, exit_code = scipy.sparse.linalg.cg(    A,
                                                            gradient,
                                                            x0 = gradient,
                                                            maxiter = 10,
                                                            rtol = 1e-05,
                                                            atol = 1e-10
                                                            )
            end = time.time()
          # print( "  --- CG exit code: ", exit_code)
          else:
            start = time.time()
            inverse, exit_code = scipy.sparse.linalg.gmres(     A,
                                                                gradient,
                                                                x0 = gradient,
                                                                maxiter = 10,
                                                                rtol = 1e-05,
                                                                atol = 1e-10
                                                                )
            end = time.time()
          t +=  1e3 * ( end - start )
        # end for
        timings.append( t/Num_inv )
      # end for
    return timings

In [None]:
N = [ 1000, 1100 ]

In [None]:
x     = np.random.rand( 2, N[0] ) - 0.5
theta = 2 * np.pi * np.random.rand( 1, N[1] )
r     = 0.8 + .2 * np.random.rand( 1, N[1] )
y     = np.vstack( ( r * np.cos(theta), r * np.sin(theta) ) )

In [None]:
epsilons = [  1.0, 0.5, 0.1, 0.05, 0.03 ]
results_dampedNewton = dampednewton( epsilons, N, x, y )

In [None]:
plt.figure( figsize = ( 20, 7 ) )
plt.title( "$$" )
plt.title( "$||P1 -a||_1+||P^T 1 -b||_1$" )
for i in range(len(results_dampedNewton['results_list'])):
  error = np.asarray( results_dampedNewton['results_list'][i]['error_a'] ) + np.asarray( results_dampedNewton['results_list'][i]['error_b'] )
  plt.plot( error, label = 'Damped Newton for $\epsilon = $'+ str(epsilons[i]), linewidth = 2 )
# end for
plt.xlabel( "Number of iterations" )
plt.ylabel( "Error in log-scale" )
plt.legend()
plt.yscale( 'log' )
plt.show()
print( "\n Error plots can increase! The error is not the objective function!" )

In [None]:
def spectral_decomposition( mat ):
    eig, v = np.linalg.eigh( mat )
    sorting_indices = np.argsort( eig )
    eig = eig[ sorting_indices ]
    v   = v[ : , sorting_indices ]
    print( "List of smallest eigenvalues: ", eig[ : 10 ] )
    print( "List of largest  eigenvalues: ", eig[ - 10 : ] )
    return eig, v

In [None]:
eigs = []
eigvecs = []
for i in range(len(epsilons)):
    eps = epsilons[i]
    print("Spectral statistics of Hessian for epsilon = "+str(eps))
    ev = spectral_decomposition( results_dampedNewton['Hessians'][i] )
    eigs.append( ev[0] )
    eigvecs.append( ev[1] )
    print("")
# end for

In [None]:
def build_preconditioners( num_eigs, modified_Hessian, ansatz = True ):
    # Diagonalize
    eigenvalues, eigenvectors = np.linalg.eigh( modified_Hessian )
    sorting_indices = np.argsort( eigenvalues )
    eigenvalues  = eigenvalues[ sorting_indices ]
    eigenvectors = eigenvectors[ : , sorting_indices ]
    # Form null vector
    if not ansatz:
        null_vector = eigenvectors[ : , 0 ]
    else:
        null_vector = np.hstack( ( np.ones(N[0]), - np.ones(N[1]) ) )
        norm = np.sqrt( N[0] + N[1] )
        null_vector = null_vector/norm
    # Form other vectors (only 13)
    _, m = eigenvectors.shape
    indices = []
    for i in range(num_eigs//2):
        indices.append( m - i - 2 )
        indices.append( i + 1 )
    # end for
    if num_eigs%2 != 0:
        indices.append( m - 1 - ( num_eigs//2 ) )
   
    precond_vectors = eigenvectors[ : , indices ]
    precond_vectors = []
    for index in indices:
        precond_vectors.append( eigenvectors[:,index] )
    # end for
    return null_vector, precond_vectors

## Number of times to invert the Hessian

In [None]:
Num_inv = 100

In [None]:
epsilons = [ 1.0, 0.7, 0.5, 0.3, 0.1, 0.07, 0.05, 0.03 ]

## Direct inversion without preconditioning

In [None]:
print( "Doing for (",N[0], N[1],")." )
# Cost matrix
Cost = distmat( x, y )
# a and b
a = normalize( np.ones(N[0]) )
b = normalize( np.ones(N[1]) )
f, g = a, b
f_g = np.concatenate( ( f, g ), axis = None )
# null vector
null_vector = np.hstack( ( np.ones( a.shape[0] ), - np.ones( b.shape[0] ) ) )/np.sqrt( a.shape[0] + b.shape[0] )
null_vector = np.reshape( null_vector, ( a.shape[0] + b.shape[0], 1 ) )
null_vector = null_vector
reg_matrix = np.dot( null_vector, null_vector.T )
timings_direct_inv = []
for eps in epsilons:
   print( "For epsilon = "+str(eps)+" ." )    
   #Kernel
   K = np.exp( - Cost/eps )
   # Compute gradient
   u = np.exp( f_g[:a.shape[0]]/eps )
   v = np.exp( f_g[a.shape[0]:]/eps )
   grad_f = a - ( a * u * np.dot( K, v  * b ) )
   grad_g = b - ( ( b *  v ) * np.dot( K.T, u * a ) )
   gradient = np.concatenate( ( grad_f, grad_g ), axis = None )
   # Compute Hessian
   r1 = ( a * u * np.dot( K,  v  * b ) )
   r2 = ( ( b * v ) * np.dot( K.T, u * a ) )
   u = u.reshape( u.shape[0], )
   v = v.reshape( v.shape[0], )
   P = a[:,None] * u[:,None] * K * v[None,:] * b[None,:]

   A = np.diag( np.array( r1.reshape( r1.shape[0], ) ) )
   B = P
   C = P.T
   D = np.diag( np.array( r2.reshape( r2.shape[0], ) ) )
   result = np.vstack( ( np.hstack( ( A, B ) ), np.hstack( ( C ,D ) ) ) )
   Hessian = - result/eps
   # Inflating the corresponding direction
   mean_eig = -( 0.5 * np.mean( r1 ) + 0.5 * np.mean( r2 ) )/eps
   Hessian_stabilized = Hessian + mean_eig * reg_matrix
   # Inversion iterations to record the average time taken to invert the Hessian
   t = 0
   for i in range(Num_inv):
       start = time.time()
       p_k = - np.linalg.solve( Hessian_stabilized, gradient )
       end = time.time()
       t += 1e3 * ( end - start )
   # end for
   timings_direct_inv.append( t/Num_inv )

## Conjugate Gradient

#### I. 

In [None]:
# Get preconditioning vectors
num_eigs_CG_1 = 7
null_vector, precond_vectors = build_preconditioners( num_eigs_CG_1, results_dampedNewton['Hessians'][-1], ansatz = False )

In [None]:
timings_CG_1 = iterative_inversions( epsilons, N, x, y, null_vector, precond_vectors, inv_method = 'cg', Num_inv = Num_inv )

#### II.

In [None]:
# Get preconditioning vectors
num_eigs_CG_2 = 50
null_vector, precond_vectors = build_preconditioners( num_eigs_CG_2, results_dampedNewton['Hessians'][-1], ansatz = False )

In [None]:
timings_CG_2 = iterative_inversions( epsilons, N, x, y, null_vector, precond_vectors, inv_method = 'cg', Num_inv = Num_inv )

#### III.

In [None]:
# Get preconditioning vectors
num_eigs_CG_3 = 100
null_vector, precond_vectors = build_preconditioners( num_eigs_CG_3, results_dampedNewton['Hessians'][-1], ansatz = False )

In [None]:
timings_CG_3 = iterative_inversions( epsilons, N, x, y, null_vector, precond_vectors, inv_method = 'cg', Num_inv = Num_inv )

## GMRES    

#### I.

In [None]:
# Get preconditioning vectors
num_eigs_GMRES_1 = 7
null_vector, precond_vectors = build_preconditioners( num_eigs_GMRES_1, results_dampedNewton['Hessians'][-1], ansatz = False )

In [None]:
timings_GMRES_1 = iterative_inversions( epsilons, N, x, y, null_vector, precond_vectors, inv_method = 'gmres', Num_inv = Num_inv )

#### II.

In [None]:
# Get preconditioning vectors
num_eigs_GMRES_2 = 50
null_vector, precond_vectors = build_preconditioners( num_eigs_GMRES_2, results_dampedNewton['Hessians'][-1], ansatz = False )

In [None]:
timings_GMRES_2 = iterative_inversions( epsilons, N, x, y, null_vector, precond_vectors, inv_method = 'gmres', Num_inv = Num_inv )

#### III.

In [None]:
# Get preconditioning vectors
num_eigs_GMRES_3 = 100
null_vector, precond_vectors = build_preconditioners( num_eigs_GMRES_3, results_dampedNewton['Hessians'][-1], ansatz = False )

In [None]:
timings_GMRES_3 = iterative_inversions( epsilons, N, x, y, null_vector, precond_vectors, inv_method = 'gmres', Num_inv = Num_inv )

## Benchmarking the time taken to invert the Hessian using different methods

In [None]:
plt.figure( figsize = ( 20, 7 ) )
plt.title( "Benchmarking inversion methods" )
plt.plot( timings_direct_inv[::-1], label = 'Direct inversion', linewidth = 2 )
plt.plot( timings_CG_1[::-1], label = 'CG with '+str(num_eigs_CG_1)+ ' precondiotioning vectors', linewidth = 2 )
plt.plot( timings_CG_2[::-1], label = 'CG with '+str(num_eigs_CG_2)+ ' precondiotioning vectors', linewidth = 2 )
plt.plot( timings_CG_3[::-1], label = 'CG with '+str(num_eigs_CG_3)+ ' precondiotioning vectors', linewidth = 2 )
plt.plot( timings_GMRES_1[::-1], label = 'GMRES with '+str(num_eigs_GMRES_1)+ ' precondiotioning vectors', linewidth = 2 )
plt.plot( timings_GMRES_2[::-1], label = 'GMRES with '+str(num_eigs_GMRES_2)+ ' precondiotioning vectors', linewidth = 2 )
plt.plot( timings_GMRES_3[::-1], label = 'GMRES with '+str(num_eigs_GMRES_3)+ ' precondiotioning vectors', linewidth = 2 )
plt.xlabel( "$\epsilon$" )
plt.ylabel( "Time in ms" )
# plt.yscale( 'log' )
plt.xticks( list(range(len(epsilons))), epsilons[::-1] )
plt.legend( loc = 'upper right' )
plt.savefig( "../Images/Benchmarking_inversion_methods_images/Inversion_benchmark.pdf", format = 'pdf' )
plt.show()