# In this notebook we plot the changes in spectrum of the eigenvalues of the Hessian for varying size of preconditioning eigenvectors.

In [None]:
from __future__ import division
import os
import numpy as np
import time
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')
np.random.seed(1234)
plt.rcParams.update( { 'font.size' :  22 } )
%matplotlib inline 
%load_ext autoreload                                                                                                                                                                            
%autoreload 

In [None]:
relative_path_to_new_folder = "../Images"
os.makedirs(relative_path_to_new_folder, exist_ok = True)
if not os.path.isdir('../Images/Spectralplot_images'):
    os.makedirs('../Images/Spectralplot_images')

In [None]:
"""To compute distance matrix"""
def distmat( x, y ):
    return np.sum( x**2, 0 )[:,None] + np.sum( y**2, 0 )[None,:] - 2 * x.transpose().dot( y )

"""To Normalise a vector"""
normalize = lambda a: a/np.sum( a )

"""To Compute P"""
def GetP( u, K, v):
    return u[:,None] * K * v[None,:]

def plotp( x, col, plt, scale = 200, edgecolors = "k" ):
  return plt.scatter( x[0,:], x[1,:], s = scale, edgecolors = edgecolors,  c = col, cmap = 'plasma', linewidths = 2 )

In [None]:
def generate_data(N):
    """
     N is a list of the size of the data on x and y
    """
    x = np.random.rand( 2, N[0] ) - 0.5
    theta = 2 * np.pi * np.random.rand( 1, N[1] )
    r = 0.8 + .2 * np.random.rand( 1, N[1] )
    y = np.vstack( ( r * np.cos( theta ), r * np.sin( theta ) ) )
    return x, y

In [None]:
def spectral_decomposition( mat ):
    eig, v = np.linalg.eigh( mat )
    sorting_indices = np.argsort( eig )
    eig = eig[ sorting_indices ]
    v   = v[ : , sorting_indices ]
    print( "List of smallest eigenvalues: ", eig[ : 10 ] )
    print( "List of largest  eigenvalues: ", eig[ - 10 : ] )

    return eig, v



In [None]:
def precondition_matrix( matrix, precond_vectors, null_vector ):
    diag   = 1/np.sqrt(np.diag( matrix ).flatten())
    matrix = diag[:,None] * matrix * diag[None,:]
    vector = null_vector
    vector = vector/diag
    vector = vector/np.linalg.norm( vector )
    vector = vector.reshape( (len( vector ), 1) )
    matrix = matrix + np.dot( vector, vector.T )
    n = null_vector.shape[0]
    y = np.array( precond_vectors ).T # Matrix of size n by k
    # Compute eigenvalues
    Ay = np.dot( matrix, y )
    eigenvalues = np.sum( y * Ay, axis = 0 )
    # Compute P_matrix = id + y*diag(values)*y.T
    values = ( 1/np.sqrt(eigenvalues) - 1 )    # Vector of size k
    z = y * values[None,:]
    P_matrix = np.identity( n ) + np.dot( z, y.T )
    B = np.dot( Ay, z.T )
    C = z @ np.dot( y.T, Ay ) @ z.T
    matrix = matrix + B + B.T + C
    return matrix



In [None]:
import computational_OT

In [None]:
N = [ 900, 800 ]
x, y = generate_data( N )

## Entropy regularized formulation

The primal entropy regularized formulation of OT is given by:
$$
OT_{\varepsilon}(\alpha,\beta) = min_{\pi \in \mathcal{U}(\alpha,\beta)} \langle C,\pi \rangle +\varepsilon KL(\pi\|\alpha \otimes \beta)\ ,
$$
where
$\ 
KL(\pi\|\alpha \otimes \beta) 
\ $ is the KL-divergence and $\ \mathcal{U}(\alpha,\beta)=\{\pi: \pi\mathcal{1}=\alpha, \pi^{T}\mathcal{1}=\beta\}$.

## Sinkhorn  
The optimal coupling $\pi^{*}$ has the following form :
$$
\pi^{*} = \alpha \odot diag(u)K diag(v)\odot \beta
$$
and we know that $\pi^{*}\mathbb{1}=\alpha$ and $(\pi^{*})^{T}\mathbb{1}=\beta$.
###
Therefore, Sinkhorn updates is given by the following alternative projections
$$
u^{t+1}  \leftarrow \frac{1}{K(v^{t}\odot \beta)}\ , 
v^{t+1}  \leftarrow \frac{1}{K^{T}(u^{t+1}\odot \alpha)}\ , 
$$
where 
$K = e^{-\frac{C}{\varepsilon}}\in M_{n\times m}(\mathbb{R}),\ \alpha \in \mathbb{R}^{n},\ \beta \in \mathbb{R}^{m}\ ,\ u\in \mathbb{R}^{n},\ v\in \mathbb{R}^{m}\ and \ (u^{0},v^{0})=(u,v)\ .$




## Entropy regularized dual-formulation
The dual formulation of the entropy regularized OT is given by:
$$
OT_{\varepsilon}(\alpha,\beta) = \max_{f\in \mathbb{R}^{n}, g\in\mathbb{R}^{m}} \langle f, \alpha \rangle + \langle g, \beta \rangle - \varepsilon\left(\langle\alpha \otimes \beta, e^{\frac{f}{\varepsilon}}\odot K \odot e^{\frac{g}{\varepsilon}}  \rangle-1\right)\ ,
$$
where
$$
\alpha \in \mathcal{M}_{1}(\mathcal{X}),\ \beta \in \mathcal{M}_{1}(\mathcal{Y}),\ \varepsilon>0,\ f\in\mathbb{R}^{n},\ g\in \mathbb{R}^{m}\ .
$$

## Log-domain Sinkhorn
Now, the exp-log regularized update of the Sinkhorn algorithm is as follows:
$$
m_{i}(g)\leftarrow \min_{j}(C_{ij}-g_{j}^{(t)}),\ \forall\  i = 1,\dots,n\ ,
$$
$$
f^{(t+1)}_{i}\leftarrow -\varepsilon \log\left(\sum_{j=1}^{m}\exp\left(\frac{-\left(C_{ij}-g_{j}^{(t)}-m_{i}(g)\right)}{\varepsilon}\right)\beta_{j}\right)+m_{i}(g),\ \forall\  i=1,\dots,n\ ,
$$
$$
m_{j}(f)\leftarrow \min_{i}(C_{ij}-f_{i}^{(t+1)}),\ \forall\   j=1,\dots,m\
 ,
$$
$$
g^{(t+1)}_{j}\leftarrow -\varepsilon \log\left(\sum_{i=1}^{n}\exp\left(\frac{-\left(C_{ij}-f_{i}^{(t+1)}-m_{j}(f)\right)}{\varepsilon}\right)\alpha_{i}\right)+m_{j}(f),\ \forall\  j=1,\dots,m\ ,
$$
where 
$K=e^{-C/\varepsilon} \in M_{n \times m}(\mathbb{R}),\ $ $\varepsilon >0,\ $ $\alpha \in \mathbb{R}^{n},\ $ $\beta \in \mathbb{R}^{m},\ $
   $f \in \mathbb{R}^{n},\ $ $g \in \mathbb{R}^{m}\ and \ (f^{(0)},g^{(0)})=(f,g)$.


## Log-domain Sinkhorn 

In [None]:
# Log domain Sinkhorn
print( "Log domain Sinkhorn... " )
print( "Doing for (",N[0], N[1],")." )
results_logSinkhorn = []
times_logSinkhorn   = []
logsinkhornP        = []
epsilons = [ 0.5, 0.1, 0.05, 0.01, 0.005, 0.001 ]
#Cost matrix
C = distmat( x, y )
a = normalize( np.ones( N[0] ) )
b = normalize( np.ones( N[1] ) )
for eps in epsilons:
  print( "For epsilon = "+str(eps)+":" )    
  print( " |- Iterating" )
  start = time.time()
  logsinkhorn = computational_OT.log_domainSinkhorn_np(   a,
                                                          b,
                                                          C,
                                                          eps )
  out = logsinkhorn.update( max_iterations = 20000 )
  results_logSinkhorn.append( out )
  end = time.time()
  times_logSinkhorn.append( end - start )
  print( " |- Computing P" )
  print( "" )
  u_opt = np.exp( out['potential_f']/eps )
  K = np.exp( - C/eps )
  v_opt =  np.exp( out['potential_g']/eps )
  P_opt = GetP( u_opt, K, v_opt )
  logsinkhornP.append( P_opt )
# end for

#### Error plot

In [None]:
plt.figure( figsize = ( 20, 7 ) )
plt.subplot( 2, 1, 1 ),
plt.title( "$||P1 -a||_1+||P1 -b||_1$" )
for i in range( len( results_logSinkhorn) ):
  error = np.asarray( results_logSinkhorn[i]['error'] )
  plt.plot( error, label = 'Log-sinkhorn for $\epsilon = $'+ str(epsilons[i]) , linewidth = 2 )
# end for
plt.yscale( 'log' )
plt.legend( loc = 'upper right' )
plt.xlabel( "Iterations" )
plt.ylabel( "Error in log-scale" )
plt.show()

In [None]:
flogsinkhorn, glogsinkhorn = [], []
for i in range(len(results_logSinkhorn)):
    flogsinkhorn.append(results_logSinkhorn[i]['potential_f'])
    glogsinkhorn.append(results_logSinkhorn[i]['potential_g'])
# end for

#### Spectral plot

##### Forming the unnormalised Hessian

In [None]:
Hessians_logsinkhorn = []
for i in range(len(epsilons)):
    u  = np.exp( ( results_logSinkhorn[i]['potential_f'] )/epsilons[i] )  
    v  = np.exp( ( results_logSinkhorn[i]['potential_g'] )/epsilons[i] )
    K  = np.exp( - C/epsilons[i] )
    r1 = u[:,None] * np.dot( K, v )[:,None] 
    r2 = v[:,None] * np.dot( K.T, u )[:,None]                                
    P  = u[:,None] * np.exp( - C/epsilons[i] ) * v[None,:]
    A  = np.diag( np.array( r1.reshape( r1.shape[0], ) ) )
    B  = P       
    C_  = P.T
    D  = np.diag( np.array( r2.reshape( r2.shape[0], ) ) )
    result = np.vstack( ( np.hstack( ( A, B ) ), np.hstack( ( C_, D ) ) ) )    
    Hessians_logsinkhorn.append( result ) 
# end for

In [None]:
eigs = []
eigvecs = []
for i in range(len(epsilons)):
    eps = epsilons[i]
    print( "Spectral statistics of Hessian for epsilon = "+str(eps) )
    diag   = 1/np.sqrt( np.diag( Hessians_logsinkhorn[i] ).flatten())
    result = diag[:,None] * Hessians_logsinkhorn[i] * diag[None,:]
    ev = spectral_decomposition( result )
    eigs.append( ev[0] )
    eigvecs.append( ev[1] )
    print( "" )  
# end for

In [None]:
plt.rcParams.update( { 'font.size' : 8 } )
fig,ax = plt.subplots( figsize = ( 5, 12 ), nrows = len(epsilons), ncols = 1, sharey = True )
plt.title( "Histogram of eigenvalues." )
plt.subplots_adjust( wspace = 0, hspace = 0.5 )
p = np.log10( 0.5 )
for i in range(len(epsilons)):
    ax[i].hist( eigs[i], 50 )
    ax[i].set_title( " $\epsilon$: "+str(epsilons[i]) )
    ax[i].set_ylim( ymin = 10**p ) 
    ax[i].set_yscale( "log" )
# end for   
plt.subplots_adjust( wspace = 0, hspace = 0.4 )
fig.savefig("../Images/Spectralplot_images/WithoutPreconditionedHistograms_LogSinkhorn.pdf", format = 'pdf',  bbox_inches = 'tight' )
plt.show()

##### Spectral plots with preconditioning

In [None]:
def build_preconditioners( num_eigs, modified_Hessian, ansatz = True ):
    # Diagonalize
    eigenvalues, eigenvectors = np.linalg.eigh( modified_Hessian )
    sorting_indices = np.argsort( eigenvalues )
    eigenvalues  = eigenvalues[ sorting_indices ]
    eigenvectors = eigenvectors[ : , sorting_indices ]
    # Form null vector
    if not ansatz:
        null_vector = eigenvectors[ : , 0 ]
    else:
        null_vector = np.hstack( ( np.ones(N[0]), - np.ones(N[1]) ) )
        norm = np.sqrt( N[0] + N[1] )
        null_vector = null_vector/norm
    # Form other vectors (only 13)
    _, m = eigenvectors.shape
    indices=[]
    for i in range(num_eigs//2):
        indices.append( m - i - 2 )
        indices.append( i + 1 )
    # end for
    if num_eigs%2 != 0:
        indices.append( m - 1 - ( num_eigs//2 ) )
   
    precond_vectors = eigenvectors[ :, indices ]
    precond_vectors = []
    for index in indices:
        precond_vectors.append( eigenvectors[ : ,index ] )
    # end fdr
    return null_vector, precond_vectors

In [None]:
num_eigs = [ 0, 50, 100, 200, 400, 800, 1098 ]
preconditioned_Hessians = {}
for numeigs  in  range(len(num_eigs)):
    preconditioned_Hessians[num_eigs[numeigs]] = []
    for i in  range(len(epsilons)):
        diag = 1/np.sqrt(np.diag( Hessians_logsinkhorn[i] ).flatten())
        result = diag[:,None] * Hessians_logsinkhorn[i] * diag[None,:]
        if num_eigs[numeigs] != 0:
            null_vector, precond_vectors = build_preconditioners( num_eigs[ numeigs ], result, ansatz = False )
            # vector = null_vector
            # vector = vector/diag
            # vector = vector/np.linalg.norm( vector )
            # vector = vector.reshape( ( len( vector ), 1) )
            # result = result + np.dot( vector, vector.T )
            y_ = np.array( precond_vectors ).T # Matrix of size n by k
            # Compute eigenvalues
            Ay = np.dot( result, y_ )
            eigenvalues = np.sum( y_ * Ay, axis = 0 )
            # Compute P_matrix = id + y*diag(values)*y.T
            values = ( 1/np.sqrt(eigenvalues) - 1 )    # Vector of size k
            z = y_ * values[None,:]
            B = np.dot( Ay, z.T )
            C_ = z @ np.dot( y_.T, Ay ) @ z.T
            result = result + B + B.T + C_
        preconditioned_Hessians[ num_eigs[ numeigs ] ].append( result )
    # end for
# end for

In [None]:
eigs = {}
for numeigs in  range(len(num_eigs)):
    print( "For number of preconditioning eigenvectors: ", num_eigs[ numeigs ] )
    eigs[ num_eigs[ numeigs ] ] = []
    for i in range(len(epsilons)):
        eps = epsilons[i]
        print( "Spectral statistics of Hessian for epsilon = "+str(eps) )
        ev = spectral_decomposition( preconditioned_Hessians[ num_eigs[ numeigs ] ][i] )
        eigs[ num_eigs[ numeigs ] ].append( ev[0] )
        print( "" )
    # end for
    print( "" )
# end for

In [None]:
plt.rcParams.update( { 'font.size' : 100 } )
fig,ax = plt.subplots( figsize = ( 140, 140 ), nrows = len(num_eigs), ncols = len(epsilons), sharey = True, sharex = False )
plt.subplots_adjust( wspace = 0, hspace = 0.4 )
p = np.log10( 0.5 )   
for numeigs in range(len(num_eigs)):
    for i in range(len(epsilons)):
        ax[ numeigs ][i].hist( eigs[ num_eigs[numeigs] ][i], 50, rwidth = 0.9 )
        ax[ numeigs ][i].set_title( " k = "+str(num_eigs[numeigs])+", $\epsilon$ = " +str(epsilons[i])+ "" )
        ax[ numeigs ][i].set_ylim( ymin = 10**p )
        ax[ numeigs ][i].set_yscale( "log" )  
# end for                                
ax[ len(num_eigs) - 1 ][ len(epsilons) - 1 ].set_xticks( [ 0, 1, 2 ])
fig.savefig("../Images/Spectralplot_images/PreconditionedHistograms_LogSinkhorn.pdf", format = 'pdf', bbox_inches = "tight" )
plt.show()


## Sinkhorn

In [None]:
# Sinkhorn
print("Sinkhorn... ")
print("Doing for (",N[0], N[1],").")
SinkhornP = []
results_Sinkhorn = []
times_Sinkhorn = []
epsilons = [ 0.1 , 0.05 , 0.01, 0.005,  0.001 ]
 # a and b
a = normalize( np.ones( N[0] ) )
b = normalize( np.ones( N[1] ) )
#Cost matrix
C = distmat( x, y )
for eps in epsilons:  
  print("For epsilon: ", eps)
  #Kernel
  K = np.exp( - C/eps )
  print( " |- Iterating")
  #Inflating
  u = a
  v = b
  start     = time.time()
  Optimizer = computational_OT.sinkhorn(  K,
                                          a,
                                          b,
                                          u,
                                          v,
                                          eps )
  out = Optimizer._update()
  results_Sinkhorn.append(out)
  end       = time.time()
  times_Sinkhorn.append( end - start  )
  print( " |- Computing P" )
  print( "" )
  u_opt = np.exp( out['potential_f']/eps )
  K = np.exp( - C/eps )
  v_opt =  np.exp( out['potential_g']/eps )
  P_opt = GetP( u_opt, K, v_opt )
  SinkhornP.append( P_opt )
# end for  

In [None]:
fsinkhorn, gsinkhorn = [], []
for i in range(len(results_Sinkhorn)):
    fsinkhorn.append(results_Sinkhorn[i]['potential_f'])
    gsinkhorn.append(results_Sinkhorn[i]['potential_g'])
# end for

#### Error plot

In [None]:
plt.rcParams.update( { 'font.size' :  22 } )
plt.figure( figsize = ( 20, 7 ) )
plt.subplot( 2, 1, 1 ),
plt.title( "$||P1 -a||_1+||P1 -b||_1$" )
for i in range( len(results_Sinkhorn) ):
  error = np.asarray( results_Sinkhorn[i]['error_a'] ) + np.asarray( results_Sinkhorn[i]['error_b'] )
  plt.plot( error, label = 'Sinkhorn for $\epsilon = $'+ str(epsilons[i]), linewidth = 2 )
# end for
plt.yscale( 'log' )
plt.legend( loc = 'upper right' )
plt.xlabel( "Iterations" )
plt.ylabel( "Error in log-scale" )
plt.show()

#### Spectral plot

In [None]:
Hessians_sinkhorn = []
for i in range(len(epsilons)):
    u  = np.exp( ( results_Sinkhorn[i]['potential_f'] )/epsilons[i] )  
    v  = np.exp( ( results_Sinkhorn[i]['potential_g'] )/epsilons[i] )
    K  = np.exp( - C/epsilons[i] )
    r1 = u[:,None] * np.dot( K, v )[:,None] 
    r2 = v[:,None] * np.dot( K.T, u )[:,None]                                
    P  = u[:,None] * np.exp( - C/epsilons[i] ) * v[None,:]
    A  = np.diag( np.array( r1.reshape( r1.shape[0], ) ) )
    B  = P       
    C_ = P.T
    D  = np.diag( np.array( r2.reshape( r2.shape[0], ) ) )
    result = np.vstack( ( np.hstack( ( A, B ) ), np.hstack( ( C_, D )) ) )    
    Hessians_sinkhorn.append( result )
# end for

In [None]:
eigs = []
eigvecs = []
for i in range(len(epsilons)):
    eps = epsilons[i]
    print("Spectral statistics of Hessian for epsilon = "+str(eps))
    diag   = 1/np.sqrt(np.diag( Hessians_sinkhorn[i] ).flatten())
    result = diag[:,None] * Hessians_sinkhorn[i] * diag[None,:]
    ev = spectral_decomposition( result )
    eigs.append( ev[0] )
    eigvecs.append( ev[1] )
    print("")
# end for

In [None]:
fig, ax = plt.subplots( figsize = ( 36, 6 ), nrows = 1, ncols = len(epsilons), sharey = False )
plt.title("Histogram of eigenvalues.")
for i in range(len(epsilons)):
    ax[i].hist( eigs[i], 50 )
    ax[i].set_title( " $\epsilon$: "+str(epsilons[i]) )
    ax[i].set_xlabel( "Eigenvalues" )
    ax[i].set_yscale( "log" )       
# end for
plt.subplots_adjust( wspace = 0, hspace = 0 )
plt.show()

##### Spectral plots with preconditioning

In [None]:
num_eigs = [ 0, 50, 100, 200, 400, 800, 1098 ]
preconditioned_Hessians = {}
for numeigs  in  range(len(num_eigs)):
    preconditioned_Hessians[ num_eigs[ numeigs ] ] = []
    for i in  range(len(epsilons)):
        diag   = 1/np.sqrt(np.diag( Hessians_logsinkhorn[i] ).flatten())
        result = diag[:,None] * Hessians_logsinkhorn[i] * diag[None,:]
        if num_eigs[numeigs] != 0:
            null_vector, precond_vectors = build_preconditioners( num_eigs[ numeigs ], result, ansatz = False )
            # vector = null_vector
            # vector = vector/diag
            # vector = vector/np.linalg.norm( vector )
            # vector = vector.reshape( ( len( vector ), 1) )
            # result = result + np.dot( vector, vector.T )
            y_ = np.array( precond_vectors ).T # Matrix of size n by k
            # Compute eigenvalues
            Ay = np.dot( result, y_ )
            eigenvalues = np.sum( y_ * Ay, axis = 0 )
            # Compute P_matrix = id + y*diag(values)*y.T
            values = ( 1/np.sqrt(eigenvalues) - 1 )    # Vector of size k
            z = y_ * values[None,:]
            B = np.dot( Ay, z.T )
            C_ = z @ np.dot( y_.T, Ay ) @ z.T
            result = result + B + B.T + C_
        preconditioned_Hessians[ num_eigs[ numeigs ] ].append( result )
    # end for
# end for

In [None]:
eigs = {}
for numeigs in  range(len(num_eigs)):
    eigs[ num_eigs[ numeigs ] ] = []
    for i in range(len(epsilons)):
        eps = epsilons[i]
        print("Spectral statistics of Hessian for epsilon = "+str(eps))
        ev = spectral_decomposition( preconditioned_Hessians[ num_eigs[ numeigs ] ][i])
        eigs[ num_eigs[ numeigs ] ].append( ev[0] )
        print("")
    # end for
# end for

In [None]:
plt.rcParams.update( { 'font.size': 100 } )
fig,ax = plt.subplots( figsize = ( 120, 130 ), nrows = len(num_eigs), ncols = len(epsilons), sharey = True, sharex = False )
plt.subplots_adjust( wspace = 0, hspace = 0.3 )
p = np.log10( 0.5 )   
for numeigs in range(len(num_eigs)):
    for i in range(len(epsilons)):
        ax[ numeigs ][i].hist( eigs[ num_eigs[ numeigs ] ][i], 50, rwidth = 0.9 )
        ax[ numeigs ][i].set_title( " k = "+str(num_eigs[ numeigs ])+", $\epsilon$ = " +str(epsilons[i])+ "" )
        ax[ numeigs ][i].set_ylim( ymin = 10**p )
        ax[ numeigs ][i].set_yscale( "log" )  
    # end for
# end for
ax[ len(num_eigs) - 1 ][ len(epsilons) - 1 ].set_xticks([ 0, 1, 2 ])   
plt.savefig("../Images/Spectralplot_images/PreconditionedHistograms_Sinkhorn.pdf", format = 'pdf', bbox_inches = "tight" )
plt.show()

## Damped Newton without preconditioning

The Hessian of the dual formulation of the entropy regularized OT is given by 
$$
\nabla^{2}Q_{\varepsilon}(f,g)=\frac{-1}{\varepsilon}
\begin{pmatrix}
\Delta(\alpha) && \pi_{\varepsilon}\\
\pi^{T}_{\varepsilon} && \Delta(\beta) 
\end{pmatrix}
\ , 
$$ 
where $\pi\mathbb{1}_{m} = \alpha,\ \pi^{T}\mathbb{1}_{n}=\beta,\ $ and $\Delta = diag: \mathbb{R}^{n} \rightarrow M_{n}(\mathbb{R})$ is the linear operator mapping a vector  to a diagonal matrix  containing  this vector.


This implies 
$$
\begin{pmatrix}
\Delta(\alpha) && \pi_{\varepsilon}\\
\pi^{T}_{\varepsilon} && \Delta(\beta) 
\end{pmatrix}
\begin{pmatrix}
\mathbb{1}_{n}\\
\mathbb{1}_{m}
\end{pmatrix} = 0\ ,
$$
that is,
$$
\begin{pmatrix}
\mathbb{1}_{n}\\
\mathbb{1}_{m}
\end{pmatrix}\in \ker(\nabla^{2}Q_{\alpha, \beta,\varepsilon}(f,g))\ .
$$
Hence, $\nabla^{2}Q_{\alpha, \beta,\varepsilon}(f,g)$ is singular. Therefore, on regularization we have the following Hessian
$
H_{reg} := \nabla^{2}Q_{\alpha, \beta,\varepsilon}(f,g)+\lambda cc^{T}\ ,
$ 
where $c= \begin{pmatrix}\frac{\mathbb{1}}{\sqrt{n+m}}\\-\frac{\mathbb{1}}{\sqrt{n+m}}\end{pmatrix}\in M_{(n+m),1}(\mathbb{R})$.

Now, at the $k^{th}$ iteration solve
$\nabla^{2}Q_{\alpha, \beta,\varepsilon}(f,g)p_{k} = \nabla Q_{\alpha, \beta,\varepsilon}(f,g)$ to obtain the optimizing direction vector $p_{k}$ and then perform the Armijo condition to obtain the update step $\alpha_{k}$ such that we have the update
$$
(f,g) \leftarrow (f,g) + \alpha_{k} p_{k}\ .
$$



In [None]:
# Damped Newton
print("Damped Newton... ")
print( "Doing for (",N[0], N[1],")." )
rho = 0.95
c = 0.05
dampedNewtonP = []
results_dampedNewton  = []
times_dampedNewton    = []
Hessians_dampedNewton = []
# a and b
a = normalize( np.ones( N[0] ) )
b = normalize( np.ones( N[1] ) )
epsilons = [ 0.1 , 0.05 , 0.03 ]
#Cost matrix
C = distmat( x, y )
for eps in epsilons:
    print( "For epsilon = "+str(eps)+":" )    
    #Kernel
    K = np.exp( - C/eps )
    f, g = 0 * a, 0 * b
    print( " |- Iterating")  
    start = time.time()
    Optimizer = computational_OT.damped_Newton( K,
                                                a,
                                                b,
                                                f,
                                                g,
                                                eps,
                                                rho,
                                                c )
    out = Optimizer._update(    maxiter = 50,
                                debug = False )
    end = time.time()
    if out != -1:
        results_dampedNewton.append( out )
        times_dampedNewton.append( end - start )
        print( " |- Computing P" )
        print( "" )
        u_opt = np.exp( out['potential_f']/eps )
        K = np.exp( - C/eps )
        v_opt =  np.exp( out['potential_g']/eps )
        P_opt = GetP( u_opt, K, v_opt )
        dampedNewtonP.append( P_opt )
        print( " |- Recording (unstabilized) Hessian \n" )
        mat  = - eps * Optimizer.Hessian
        diag = 1/np.sqrt( np.concatenate( ( a, b ), axis = None ) )
        mat = diag[:,None] * mat * diag[None,:]
        Hessians_dampedNewton.append( mat )
    else:
        epsilons.remove( eps )
# end for

In [None]:
plt.rcParams.update( { 'font.size': 22 } )
plt.figure( figsize = ( 12, 5 ) )
plt.title("$$")
plt.title("$||P1 -a||_1+||P^T 1 -b||_1$")
for i in range(len(results_dampedNewton)):
  error = np.asarray(results_dampedNewton[i]['error_a']) + np.asarray(results_dampedNewton[i]['error_b'])
  plt.plot( error, label = 'Damped Newton for $\epsilon = $'+ str(epsilons[i]), linewidth = 2)
#  end for
plt.xlabel("Number of iterations")
plt.ylabel("Error in log-scale")
plt.legend( loc = 'upper right' )
plt.yscale( 'log')
plt.tight_layout()
plt.show()

#### Spectral plots 

In [None]:
eigs = []
eigvecs = []
for i in range(len(epsilons)):
    eps = epsilons[i]
    print( "Spectral statistics of Hessian for epsilon =" +str(eps) )
    diag   = 1/np.sqrt(np.diag( Hessians_dampedNewton[i] ).flatten())
    result = diag[:,None] * Hessians_dampedNewton[i] * diag[None,:]
    ev = spectral_decomposition( result )
    eigs.append( ev[0] )
    eigvecs.append( ev[1] )
    print("")
# end for

In [None]:
fig, ax = plt.subplots( figsize = ( 36, 6 ), nrows = 1, ncols = len(epsilons), sharey = False )
plt.title("Histogram of eigenvalues.")
for i in range(len(epsilons)):
    ax[i].hist( eigs[i], 50)
    ax[i].set_title( " $\epsilon$: "+str(epsilons[i]))
    ax[i].set_xlabel("Eigenvalues")
    # ax[i].set_ylim(ymin = 0)
    # ax[i].set_ylim(bottom = 1e-1)
    ax[i].set_yscale( "log" )       
# end for
plt.subplots_adjust( wspace = 0, hspace = 0.4 )
plt.show()

#### Spectral plots with preconditioning

In [None]:
num_eigs = [ 0, 50, 100, 200, 400, 800, 1098 ]
preconditioned_Hessians = {}
for numeigs  in  range(len(num_eigs)):
    preconditioned_Hessians[ num_eigs[ numeigs ] ] = []
    for i in  range(len(epsilons)):
        diag   = 1/np.sqrt(np.diag( Hessians_logsinkhorn[i] ).flatten())
        result = diag[:,None] * Hessians_logsinkhorn[i] * diag[None,:]
        if num_eigs[numeigs] != 0:
            null_vector, precond_vectors = build_preconditioners( num_eigs[ numeigs ], result, ansatz = False )
            # vector = null_vector
            # vector = vector/diag
            # vector = vector/np.linalg.norm( vector )
            # vector = vector.reshape( ( len( vector ), 1) )
            # result = result + np.dot( vector, vector.T )
            y_ = np.array( precond_vectors ).T # Matrix of size n by k
            # Compute eigenvalues
            Ay = np.dot( result, y_ )
            eigenvalues = np.sum( y_ * Ay, axis = 0 )
            # Compute P_matrix = id + y*diag(values)*y.T
            values = ( 1/np.sqrt(eigenvalues) - 1 )    # Vector of size k
            z = y_ * values[None,:]
            B = np.dot( Ay, z.T )
            C_ = z @ np.dot( y_.T, Ay ) @ z.T
            result = result + B + B.T + C_
        preconditioned_Hessians[ num_eigs[ numeigs ] ].append( result ),
    # end for
# end for

In [None]:
eigs = {}
for numeigs in  range(len(num_eigs)):
    eigs[ num_eigs[ numeigs ] ] = []
    for i in range(len(epsilons)):
        eps = epsilons[i]
        print( "Spectral statistics of Hessian for epsilon = "+str(eps) )
        ev = spectral_decomposition( preconditioned_Hessians[ num_eigs[ numeigs ] ][i ])
        eigs[ num_eigs[ numeigs ] ].append( ev[0] )
        print("")
    # end for
# end for

In [None]:
plt.rcParams.update( { 'font.size' : 90 } )
fig, ax = plt.subplots( figsize = ( 120, 130 ), nrows = len(num_eigs), ncols = len(epsilons), sharey = True, sharex = False )
plt.subplots_adjust( wspace = 0, hspace = 0.3 )
p = np.log10( 0.5 )   
for numeigs in range(len(num_eigs)):
    for i in range(len(epsilons)):
        ax[ numeigs ][i].hist( eigs[ num_eigs[ numeigs ] ][i], 50, rwidth = 0.9 )
        ax[ numeigs ][i].set_title( " k = "+str(num_eigs[ numeigs ])+", $\epsilon$ = " +str(epsilons[i])+ "" )
        ax[ numeigs ][i].set_ylim( ymin = 10**p )
        ax[ numeigs ][i].set_yscale( "log" )    
    # end for
# end for
ax[ len(num_eigs) - 1 ][ len(epsilons) - 1 ].set_xticks([ 0, 1, 2 ])  
plt.subplots_adjust( wspace = 0, hspace = 0.4 )
plt.savefig( "../Images/Spectralplot_images/PreconditionedHistograms_DampedNewtonwithoutprecond.pdf", format = 'pdf', bbox_inches = "tight" )
plt.show()

## Semi-dual damped Newton without preconditioning

In [None]:
# Semi dual damped Newton
epsilons = [ 1.0, 0.5, 0.1, 0.05, 0.03, 0.02 ]
print(  " Semi dual damped Newton... "   )
print( " Doing for (",N[0], N[1],"). ")
rho = 0.8
c = 0.1
Semi_dual_dampedNewtonP = []    
results_semi_dual_dampedNewton = []
times_semi_dual_dampedNewton = []
Hessians_semi_dual_dampedNewton = []
#Cost matrix
C = distmat( x, y )
# a and b
a = normalize( np.ones( N[0] ) )
b = normalize( np.ones( N[1] ) )
for eps in epsilons:
    K = np.exp( - C/eps )
    print( " For epsilon = " + str(eps) + ":" )   
    f = a
    print( " |-  Iterating" )  
    start = time.time() 
    Optimizer = computational_OT.semi_dual_dampedNewton_np( C,
                                                            a,
                                                            b,
                                                            f,
                                                            eps,
                                                            rho,
                                                            c,
                                                            exp_log = "True" ) 
    out = Optimizer._update( max_iterations = 50 )
    end = time.time()
    if out != -1:
        results_semi_dual_dampedNewton.append( out )
        times_semi_dual_dampedNewton.append( end - start )
        print( " |- Computing P " )
        print( "" )
        u_opt = np.exp( out['potential_f']/eps )
        K = np.exp( - C/eps )
        v_opt =  np.exp( out['potential_g']/eps )
        P_opt = GetP( u_opt, K, v_opt )
        Semi_dual_dampedNewtonP.append( P_opt )
        print( " |- Recording (unstabilized) Hessian \n " )
        mat  = - eps * Optimizer.Hessian
        diag = 1/np.sqrt( a )
        mat = diag[:, None] * mat * diag[None,:]
        Hessians_semi_dual_dampedNewton.append( mat )
    else:
        epsilons.remove(eps)
# end for

In [None]:
plt.rcParams.update( { 'font.size': 10 } )
plt.figure( figsize = ( 12, 5 ) )
plt.title("$$")
plt.title("$||P1 -a||_1+||P1 -b||_1$")
for i in range(len(results_semi_dual_dampedNewton)):
  error = np.asarray(results_semi_dual_dampedNewton[i]['error'])
  plt.plot( error, label = 'Semi-dual damped Newton for $\epsilon = $'+ str(epsilons[i]), linewidth = 2)
# end for
plt.xlabel("Number of iterations")
plt.ylabel("Error in log-scale")
plt.legend( loc = 'upper right' )
plt.yscale( 'log')
plt.tight_layout()
plt.show()

#### Spectral plots 

In [None]:
eigs = []
eigvecs = []
for i in range(len(epsilons)):
    eps = epsilons[i]
    print( "Spectral statistics of Hessian for epsilon = " +str(eps) )
    diag   = 1/np.sqrt(np.diag( Hessians_semi_dual_dampedNewton[i] ).flatten())
    result = diag[:,None] * Hessians_semi_dual_dampedNewton[i] * diag[None,:]
    ev = spectral_decomposition( result )
    eigs.append( ev[0] )
    eigvecs.append( ev[1] )
    print("")
# end for

In [None]:
fig, ax = plt.subplots( figsize = ( 5, 10 ),  nrows = len(epsilons), ncols = 1, sharey = False )
plt.title("Histogram of eigenvalues.")
for i in range(len(epsilons)):
    ax[i].hist( eigs[i], 50)
    ax[i].set_title( " $\epsilon$: "+str(epsilons[i]))
    ax[i].set_xlabel("Eigenvalues")
    # ax[i].set_ylim(ymin = 0)
    # ax[i].set_ylim(bottom = 1e-1)
    ax[i].set_yscale( "log" )       
# end for
plt.subplots_adjust( wspace = 0, hspace = 1 )
plt.show()

#### Constructing the Hessian in the semi-dual setup using the optimal potentials obtained using log-domain Sinkhorn algorithm

In [None]:
Hessians_logsinkhorn = []
epsilons = [ 0.5, 0.1, 0.05, 0.01, 0.005, 0.001 ]
for i in range(len(epsilons)):
    g = epsilons[i] * np.log( np.sum( a[:,None] * np.exp( -( C - results_logSinkhorn[i]['potential_f'][:,None])/epsilons[i] ), axis = 0 ) )
    M = a[:,None] * np.exp( (results_logSinkhorn[i]['potential_f'][:,None] + results_logSinkhorn[i]['potential_g'][None,:] - C )/epsilons[i]) * np.sqrt( b )[None,:] 
    Rowsum_M = np.sum( M * np.sqrt( b )[ None,:], axis = 1  )
    result = np.diag( Rowsum_M ) - np.dot( M, M.T ) 
    Hessians_logsinkhorn.append( result ) 
# end for

In [None]:
def build_preconditioners( num_eigs, modified_Hessian, ansatz = True ):
    # Diagonalize
    eigenvalues, eigenvectors = np.linalg.eigh( modified_Hessian )
    sorting_indices = np.argsort( eigenvalues  )
    eigenvalues  = eigenvalues[ sorting_indices ]
    eigenvectors = eigenvectors[ : , sorting_indices ]
    # Form null vector
    if not ansatz:
        null_vector = eigenvectors[:, 0]
    else:
        null_vector = np.ones( N[0] ) 
        norm = np.sqrt( N[0] )
        null_vector = null_vector/norm
    # Form other vectors
    indices = []
    for i in range( num_eigs ):
        indices.append( i+1 )
    # end for
    precond_vectors = []
    for index in indices:
        precond_vectors.append( eigenvectors[ :, index ] )
    # end for
    return null_vector, precond_vectors

#### Spectral plots with preconditioning

In [None]:
num_eigs = [ 0, 10, 20, 30, 40, 50 ]
preconditioned_Hessians = {}
for numeigs  in  range(len(num_eigs)):
    preconditioned_Hessians[ num_eigs[ numeigs ] ] = []
    for i in  range(len(epsilons)):
        diag   = 1/np.sqrt(np.diag( Hessians_semi_dual_dampedNewton[i] ).flatten())
        result = diag[:,None] * Hessians_semi_dual_dampedNewton[i] * diag[None,:]
        if num_eigs[numeigs] != 0:
            null_vector, precond_vectors = build_preconditioners( num_eigs[ numeigs ], result, ansatz = False )
            # vector = null_vector
            # vector = vector/diag
            # vector = vector/np.linalg.norm( vector )
            # vector = vector.reshape( ( len( vector ), 1) )
            # result = result + np.dot( vector, vector.T )
            y_ = np.array( precond_vectors ).T # Matrix of size n by k
            # Compute eigenvalues
            Ay = np.dot( result, y_ )
            eigenvalues = np.sum( y_ * Ay, axis = 0 )
            # Compute P_matrix = id + y*diag(values)*y.T
            values = ( 1/np.sqrt(eigenvalues) - 1 )    # Vector of size k
            z = y_ * values[None,:]
            B = np.dot( Ay, z.T )
            C_ = z @ np.dot( y_.T, Ay ) @ z.T
            result = result + B + B.T + C_
        preconditioned_Hessians[ num_eigs[ numeigs ] ].append( result )
    # end for
# end for

In [None]:
eigs = {}
for numeigs in  range(len(num_eigs)):
    eigs[ num_eigs[ numeigs ] ] = []
    for i in range(len(epsilons)):
        eps = epsilons[i]
        print( "Spectral statistics of Hessian for epsilon = "+str(eps) )
        ev = spectral_decomposition( preconditioned_Hessians[ num_eigs[ numeigs ] ][i ])
        eigs[ num_eigs[ numeigs ] ].append( ev[0] )
        print("")
    # end for
# end for

In [None]:
plt.rcParams.update( { 'font.size' : 90 } )
fig, ax = plt.subplots( figsize = ( 120, 130 ), nrows = len(num_eigs), ncols = len(epsilons), sharey = True, sharex = False )
plt.subplots_adjust( wspace = 0, hspace = 0.3 )
p = np.log10( 0.5 )   
for numeigs in range(len(num_eigs)):
    for i in range(len(epsilons)):
        ax[ numeigs ][i].hist( eigs[ num_eigs[ numeigs ] ][i], 50, rwidth = 0.9 )
        ax[ numeigs ][i].set_title( " k = "+str(num_eigs[ numeigs ])+", $\epsilon$ = " +str(epsilons[i])+ "" )
        ax[ numeigs ][i].set_ylim( ymin = 10**p )
        ax[ numeigs ][i].set_yscale( "log" )    
    # end for
# end for
ax[ len(num_eigs) - 1 ][ len(epsilons) - 1 ].set_xticks([ 0, 1 ])  
plt.subplots_adjust( wspace = 0.1, hspace = 0.2 )
plt.savefig( "../Images/Spectralplot_images/PreconditionedHistograms_SemiDual_DampedNewtonwithoutprecond.pdf", format = 'pdf', bbox_inches = "tight" )
plt.show()