# Here we compare the optimal potentials obtained from different algorithms namely: Sinkhorn, damped Newton and semi-dual damped Newton with the optimal potentials obtained from using log-domain Sinkhorn.

In [None]:
from __future__ import division
import os
import numpy as np
import time
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')
np.random.seed(1234)
%matplotlib inline 
%load_ext autoreload                                                                                                                                                                                                
%autoreload 

In [None]:
relative_path_to_new_folder = "../Images"
os.makedirs(relative_path_to_new_folder, exist_ok = True)
if not os.path.isdir('../Images/Correctness_images'):
    os.makedirs('../Images/Correctness_images')

In [None]:
"""To compute distance matrix"""
def distmat( x, y ):
    return np.sum( x**2, 0 )[:,None] + np.sum( y**2, 0 )[None,:] - 2 * x.transpose().dot( y )

"""To Normalise a vector"""
normalize = lambda a: a/np.sum( a )

"""To Compute P"""
def GetP( u, K, v ):
    return u[:,None] * K * v[None,:]

def plotp( x, col, plt, scale = 200, edgecolors = "k" ):
  return plt.scatter( x[0,:], x[1,:], s = scale, edgecolors = edgecolors, c = col, cmap = 'plasma', linewidths = 2 )

In [None]:
def generate_data( N ):
    """
     N is a list of the size of the data on x and y
    """
    x = np.random.rand( 2, N[0] ) - 0.5
    theta = 2 * np.pi*np.random.rand( 1, N[1] )
    r = 0.8 + .2 * np.random.rand( 1, N[1] )
    y = np.vstack( ( r * np.cos( theta ), r * np.sin( theta ) ) )
    return x, y

In [None]:
import computational_OT

### Make potentials independent of any shift by constants

In [None]:
def make_unique_potentials( f, g ):
    # Fixes if f and g have extra useless dimensions
    f = f.flatten()
    g = g.flatten()
    #
    ones_N = np.ones_like(f)
    ones_M = np.ones_like(g)
    coeff = ( np.sum(f) - np.sum(g) )/( len(f) + len(g) )
    f_new = f - coeff * ones_N
    g_new = g + coeff * ones_M
    return ( f_new, g_new )

In [None]:
N = [ 500, 600 ]
x, y = generate_data( N )
epsilons  = [ 1.0, 0.5, 0.1,  0.05, 0.03 ]

## Entropy regularized formulation

The primal entropy regularized formulation of OT is given by:
$$
OT_{\varepsilon}(\alpha,\beta) = min_{\pi \in \mathcal{U}(\alpha,\beta)} \langle C,\pi \rangle +\varepsilon KL(\pi\|\alpha \otimes \beta)\ ,
$$
where
$\ 
KL(\pi\|\alpha \otimes \beta) 
\ $ is the KL-divergence and $\ \mathcal{U}(\alpha,\beta)=\{\pi: \pi\mathcal{1}=\alpha, \pi^{T}\mathcal{1}=\beta\}$.

## Sinkhorn  
The optimal coupling $\pi^{*}$ has the following form :
$$
\pi^{*} = \alpha \odot diag(u)K diag(v)\odot \beta
$$
and we know that $\pi^{*}\mathbb{1}=\alpha$ and $(\pi^{*})^{T}\mathbb{1}=\beta$.
###
Therefore, Sinkhorn updates is given by the following iterative projections
$$
u^{t+1}  \leftarrow \frac{1}{K(v^{t}\odot \beta)}\ , \
v^{t+1}  \leftarrow \frac{1}{K^{T}(u^{t+1}\odot \alpha)}\ , 
$$
where 
$K = e^{-\frac{C}{\varepsilon}}\in M_{n\times m}(\mathbb{R}),\ \alpha \in \mathbb{R}^{n},\ \beta \in \mathbb{R}^{m}\ ,\ u\in \mathbb{R}^{n},\ v\in \mathbb{R}^{m}\ and \ (u^{0},v^{0})=(u,v)\ .$



## Entropy regularized dual-formulation
The dual formulation of the entropy regularized OT is given by:
$$
OT_{\varepsilon}(\alpha,\beta) = \max_{f\in \mathbb{R}^{n}, g\in\mathbb{R}^{m}} \langle f, \alpha \rangle + \langle g, \beta \rangle - \varepsilon\left(\langle\alpha \otimes \beta, e^{\frac{f}{\varepsilon}}\odot K \odot e^{\frac{g}{\varepsilon}}  \rangle-1\right)\ ,
$$
where
$$
\alpha \in \mathcal{M}_{1}(\mathcal{X}),\ \beta \in \mathcal{M}_{1}(\mathcal{Y}),\ \varepsilon>0,\ f\in\mathbb{R}^{n},\ g\in \mathbb{R}^{m}\ .
$$

## Log-domain Sinkhorn
Now, the exp-log regularized update of the Sinkhorn algorithm is as follows:
$$
m_{i}(g)\leftarrow \min_{j}(C_{ij}-g_{j}^{(t)}),\ \forall\  i = 1,\dots,n\ ,
$$
$$
f^{(t+1)}_{i}\leftarrow -\varepsilon \log\left(\sum_{j=1}^{m}\exp\left(\frac{-\left(C_{ij}-g_{j}^{(t)}-m_{i}(g)\right)}{\varepsilon}\right)\beta_{j}\right)+m_{i}(g),\ \forall\  i=1,\dots,n\ ,
$$
$$
m_{j}(f)\leftarrow \min_{i}(C_{ij}-f_{i}^{(t+1)}),\ \forall\   j=1,\dots,m\
 ,
$$
$$
g^{(t+1)}_{j}\leftarrow -\varepsilon \log\left(\sum_{i=1}^{n}\exp\left(\frac{-\left(C_{ij}-f_{i}^{(t+1)}-m_{j}(f)\right)}{\varepsilon}\right)\alpha_{i}\right)+m_{j}(f),\ \forall\  j=1,\dots,m\ ,
$$
where 
$K=e^{-C/\varepsilon} \in M_{n \times m}(\mathbb{R}),\ $ $\varepsilon >0,\ $ $\alpha \in \mathbb{R}^{n},\ $ $\beta \in \mathbb{R}^{m},\ $
   $f \in \mathbb{R}^{n},\ $ $g \in \mathbb{R}^{m}\ and \ (f^{(0)},g^{(0)})=(f,g)$.


## I. Log-domain Sinkhorn


In [None]:
# Log domain Sinkhorn
print( "Log domain Sinkhorn... " )
print( "Doing for (",N[0],N[1],")." )
results_logSinkhorn = []
times_logSinkhorn   = []
logsinkhornP        = []
a = normalize( np.ones( N[0] ) )
b = normalize( np.ones( N[1] ))
#Cost matrix
C = distmat( x, y )
for eps in epsilons:
  print( "For epsilon = "+str(eps)+":" )    
  print( " |- Iterating" )
  start = time.time()
  logsinkhorn = computational_OT.log_domainSinkhorn_np(   a,
                                                          b,
                                                          C,
                                                          eps )
  out = logsinkhorn.update( max_iterations = 500 )
  results_logSinkhorn.append( out )
  end = time.time()
  times_logSinkhorn.append( end - start ) 
  print( " |- Computing P" )
  print( "" )
  u_opt = np.exp( out['potential_f']/eps )
  K = np.exp( - C/eps )
  v_opt =  np.exp( out['potential_g']/eps )
  P_opt = GetP( u_opt, K, v_opt )
  logsinkhornP.append( P_opt )
# end for

In [None]:
plt.figure( figsize = ( 20, 7 ) )
plt.subplot( 2, 1, 1 ),
plt.title( "$||P1 -a||_1+||P1 -b||_1$" )
for i in range( len( results_logSinkhorn) ):
  error = np.asarray( results_logSinkhorn[i]['error'])
  plt.plot( error, label = 'Log-sinkhorn for $\epsilon = $'+ str(epsilons[i]) , linewidth = 2 )
# end for
plt.yscale( 'log' )
plt.legend( loc = "upper right" )
plt.xlabel( "Iterations" )
plt.ylabel( "Error in log-scale" )
plt.savefig( "../Images/Correctness_images/Error_plot_log_domain_Sinkhorn.pdf", format = 'pdf' ) 
plt.show()

In [None]:
flogsinkhorn, glogsinkhorn = [], []
for i in range(len(results_logSinkhorn)):
    flogsinkhorn.append(results_logSinkhorn[i]['potential_f'])
    glogsinkhorn.append(results_logSinkhorn[i]['potential_g'])
# end for

## II. Sinkhorn




In [None]:
# Sinkhorn
print( "Sinkhorn... " )
print( "Doing for (",N[0], N[1],")." )
SinkhornP = []
results_Sinkhorn = []
times_Sinkhorn = []
#Cost matrix
C = distmat( x, y )
 # a and b
a = normalize( np.ones( N[0] ) )
b = normalize( np.ones( N[1] ) )
for eps in epsilons:
  #Kernel
  K = np.exp( - C/eps )
  print( " |- Iterating" )
  #Inflating
  u = a
  v = b
  start = time.time()
  Optimizer = computational_OT.sinkhorn(  K,
                                          a,
                                          b,
                                          u,
                                          v,
                                          eps )
  out = Optimizer._update()
  results_Sinkhorn.append( out ) 
  end = time.time()
  times_Sinkhorn.append( end - start )
  print( " |- Computing P" )
  print( "" )
  u_opt = np.exp( out['potential_f']/eps )
  K = np.exp( - C/eps )
  v_opt =  np.exp( out['potential_g']/eps )
  P_opt = GetP( u_opt, K, v_opt )
  SinkhornP.append( P_opt )
# end for

In [None]:
plt.figure( figsize = ( 20, 7 ) )
plt.subplot( 2, 1, 1 ),
plt.title( "$||P1 -a||_1+||P1 -b||_1$" )
for i in range( len(results_Sinkhorn) ):
  error = np.asarray( results_Sinkhorn[i]['error_a'] ) + np.asarray( results_Sinkhorn[i]['error_b'] )
  plt.plot( error,label = 'Sinkhorn for $\epsilon = $'+ str(epsilons[i]), linewidth = 2 )
# end for
plt.yscale( 'log' )
plt.legend( loc = 'upper right' )
plt.xlabel( "Iterations" )
plt.ylabel( "Error in log-scale" )
plt.savefig( "../Images/Correctness_images/Error_plot_Sinkhorn.pdf", format = 'pdf' ) 
plt.show()

In [None]:
fsinkhorn, gsinkhorn = [], []
for i in range(len(results_Sinkhorn)):
    fsinkhorn.append(results_Sinkhorn[i]['potential_f'])
    gsinkhorn.append(results_Sinkhorn[i]['potential_g'])
# end for

In [None]:
print( "Sinkhorn keys: " )
print( out.keys() )
print( "Sinkhorn log domain keys" )
print( out.keys() )
# TODO: Make same keys
print( "")
print( fsinkhorn[0].shape )
print( flogsinkhorn[0].shape )
# Make outputs have same formats

##### Reality checks

In [None]:
# Reality checks
for i in range(len(results_Sinkhorn)):
    print( f'''i : {i}''') 
    # Couplings
    P_logSK =   logsinkhornP[i]
    P_SK    = SinkhornP[i]
    error   = np.linalg.norm( P_SK - P_logSK, ord = 'fro' )
    print( "Error of couplings : ", error )
    # Sums of potentials f_i + g_j
    sum_SK    = fsinkhorn[i][:,None] + gsinkhorn[i][None,:]
    sum_logSK = flogsinkhorn[i][:,None] + glogsinkhorn[i][None,:]
    print( sum_SK.shape, sum_logSK.shape )
    print( np.mean(sum_SK), np.mean(sum_logSK) )
    sum_SK = sum_SK.squeeze()
    sum_logSK = sum_logSK.squeeze()
    print( sum_SK.shape, sum_logSK.shape )
    error = np.linalg.norm( sum_SK - sum_logSK, ord = np.inf )
    print( "Error of sums of potentials : ", error )
    print( "")
# end for

##### Comparing the final potentials of Sinkhorn and log-domian Sinkhorn

In [None]:
unique_logSK = []
unique_SK = []
errors_f = []
errors_g = []
for i in range(len(results_Sinkhorn)):
    print( f'''i : {i}''' )
    unique_logSK.append( make_unique_potentials( flogsinkhorn[i], glogsinkhorn[i] ) )
    unique_SK.append( make_unique_potentials( fsinkhorn[i], gsinkhorn[i] ) )
    print(unique_logSK[i][0].shape,unique_SK[i][0].shape)
    err_f = np.linalg.norm( unique_logSK[-1][0] - unique_SK[-1][0] )
    err_g = np.linalg.norm( unique_logSK[-1][1] - unique_SK[-1][1] )
    errors_f.append( err_f )
    errors_g.append( err_g )
    print( "norm of err_f: ", err_f )
    print( "norm of err_g: ", err_g )
    print( "" )
# end for

In [None]:
plt.figure( figsize = ( 20, 7 ) )
plt.title( "Difference between potentials." )
plt.plot( list(range(len(epsilons))), np.array(errors_f[::-1]) + np.array(errors_g[::-1]), label = 'difference for potentials ( f, g ) between log-domain Sinkhorn and Sinkhorn ', linewidth = 2, marker= 'o' )
plt.xlabel( "$\epsilon$" )
plt.ylabel( "difference in log-scale" )
plt.legend( loc = 'upper right' )
plt.yscale( 'log' )
plt.xticks( list(range(len(epsilons))), epsilons[::-1] )
plt.savefig( "../Images/Correctness_images/Correctness_comparison_plot_Sinkhorn.pdf", format = 'pdf' ) 
plt.show()

## III. Damped Newton using direct inversion method without preconditioning 
Let us denote the objective function of the dual formulation by $Q_{\alpha, \beta,\varepsilon}$.


The Hessian of the dual formulation of the entropy regularized OT is given by 
$$
\nabla^{2}Q_{\alpha, \beta,\varepsilon}(f,g)=\frac{-1}{\varepsilon}
\begin{pmatrix}
\Delta(\alpha) && \pi_{\varepsilon}\\
\pi^{T}_{\varepsilon} && \Delta(\beta) 
\end{pmatrix}
\ , 
$$ 
where $\pi\mathbb{1}_{m} = \alpha,\ \pi^{T}\mathbb{1}_{n}=\beta,\ $ and $\Delta = diag: \mathbb{R}^{n} \rightarrow M_{n}(\mathbb{R})$ is the linear operator mapping a vector  to a diagonal matrix  containing  this vector.


This implies 
$$
\begin{pmatrix}
\Delta(\alpha) && \pi_{\varepsilon}\\
\pi^{T}_{\varepsilon} && \Delta(\beta) 
\end{pmatrix}
\begin{pmatrix}
\mathbb{1}_{n}\\
\mathbb{1}_{m}
\end{pmatrix} = 0\ ,
$$
that is,
$$
\begin{pmatrix}
\mathbb{1}_{n}\\
\mathbb{1}_{m}
\end{pmatrix}\in \ker(\nabla^{2}Q_{\alpha, \beta,\varepsilon}(f,g))\ .
$$
Hence, $\nabla^{2}Q_{\alpha, \beta,\varepsilon}(f,g)$ is singular. Therefore, on regularization we have the following Hessian
$
H_{reg} := \nabla^{2}Q_{\alpha, \beta,\varepsilon}(f,g)+\lambda cc^{T}\ ,
$ 
where $c= \begin{pmatrix}\frac{\mathbb{1}}{\sqrt{n+m}}\\-\frac{\mathbb{1}}{\sqrt{n+m}}\end{pmatrix}\in M_{(n+m),1}(\mathbb{R})$.

Now, at the $k^{th}$ iteration solve
$\nabla^{2}Q_{\alpha, \beta,\varepsilon}(f,g)p_{k} = \nabla Q_{\alpha, \beta,\varepsilon}(f,g)$ to obtain the optimizing direction vector $p_{k}$ and then perform the Armijo condition to obtain the update step $\alpha_{k}$ such that we have the update
$$
(f,g) \leftarrow (f,g) + \alpha_{k} p_{k}\ .
$$

In [None]:
# Damped Newton
print("Damped Newton... ")
print( "Doing for (",N[0], N[1],")." )
rho = 0.95
c = 0.05
dampedNewtonP = []
results_dampedNewton  = []
times_dampedNewton    = []
Hessians_dampedNewton = []
#Cost matrix
C = distmat( x, y ) 
# a and b
a = normalize( np.ones( N[0] ) )
b = normalize( np.ones( N[1] ) )
for eps in epsilons:
    print( "For epsilon = "+str(eps)+":" )   
    #Kernel
    K = np.exp( - C/eps )
    f, g = a, b
    print( " |- Iterating" )   
    start = time.time()
    Optimizer = computational_OT.damped_Newton( K,
                                                a,
                                                b,
                                                f,
                                                g,
                                                eps,
                                                rho,
                                                c )
    out = Optimizer._update(    max_iterations = 50,
                                debug = False )
    end = time.time()
    if out != -1:
        results_dampedNewton.append( out )
        times_dampedNewton.append( end - start )
        print( " |- Computing P" )
        print( "" )
        u_opt = np.exp( out['potential_f']/eps )
        K = np.exp( - C/eps )
        v_opt =  np.exp( out['potential_g']/eps )
        P_opt = GetP( u_opt, K, v_opt )
        dampedNewtonP.append( P_opt )
        print( " |- Recording (unstabilized) Hessian \n" )
        mat  = - eps * Optimizer.Hessian
        diag = 1/np.sqrt( np.concatenate( ( a, b ), axis = None ) )
        mat = diag[:,None] * mat * diag[None,:]
        Hessians_dampedNewton.append( mat )
    else:
        epsilons.remove( eps )
# end for

In [None]:
plt.figure( figsize = ( 20, 7 ) )
plt.title( "$$" )
plt.title( "$||P1 -a||_1+||P^T 1 -b||_1$" )
for i in range(len(results_dampedNewton)):
  error = np.asarray( results_dampedNewton[i]['error_a'] ) + np.asarray( results_dampedNewton[i]['error_b'] )
  plt.plot( error, label = 'Damped Newton for $\epsilon = $'+str(epsilons[i]), linewidth = 2 )
# end for
plt.xlabel( "Number of iterations" )
plt.ylabel( "Error in log-scale" )
plt.legend( loc = "upper right" )
plt.yscale( 'log' )
plt.savefig( "../Images/Correctness_images/Error_plot_dampedNewton.pdf", format = 'pdf' )  
plt.show()
print( "\n Error plots can increase! The error is not the objective function!" )


##### Comparing the final potentials of log-domain Sinkhorn and damped Newton 

In [None]:
fdampednewton, gdampednewton = [], []
for i in range(len(results_dampedNewton)):
    fdampednewton.append( results_dampedNewton[i]['potential_f'] )
    gdampednewton.append( results_dampedNewton[i]['potential_g'] )
# end for
unique_dampednewton = []
for i in range(len(results_dampedNewton)):
    unique_dampednewton.append( make_unique_potentials( fdampednewton[i], gdampednewton[i] ) )
# end for
errors_f, errors_g = [], []
for i in range(len(results_dampedNewton)):
    print( f'''i : {i}''' )
    err_f = np.linalg.norm( unique_logSK[i][0] - unique_dampednewton[i][0] )
    err_g = np.linalg.norm( unique_logSK[i][1] - unique_dampednewton[i][1] )
    errors_f.append( err_f )
    errors_g.append( err_g )
    print( "norm of err_f: ", err_f )
    print( "norm of err_g: ", err_g )
    print( "")
# end for

In [None]:
plt.figure( figsize = ( 20, 7 ) )
plt.title( "Difference between potentials." )
plt.plot( list(range(len(epsilons))), np.array(errors_f[::-1]) + np.array(errors_g[::-1]), label = 'difference for potentials ( f, g ) between log-domain Sinkhorn and damped Newton using direct solver', linewidth = 2, marker= 'o' )
plt.xlabel("$\epsilon$" )
plt.ylabel( "difference in log-scale" )
plt.legend( loc = "upper right" )
plt.yscale( 'log' )
plt.xticks( list(range(len(epsilons))), epsilons[::-1] )
plt.savefig( "../Images/Correctness_images/Correctness_comparison_plot_dampedNewton.pdf", format = 'pdf' ) 
plt.show()


## IV. Damped Newton with Preconditioning
Here we perform dual damped Newton with preconditioning. Here we consider $t$ eigenvalues of the Hessian that we want to move to one and form the following preconditioning matrix using the corresponding eigenvectors,
$$
P = \left(I_{n+m}-\sum_{i-1}^{t}\left(1 - \frac{1}{\sqrt{\lambda_{i}}}\right)y_{i}y_{i}^{T}\right)\ ,
$$
where
$$
y_{i} \in \ker\left(\nabla^{2}_{f}Q_{\alpha, \beta, \varepsilon}(f)-\lambda_{i}I_{n}\right),\ \forall i= 1,\dots,k\ ,
$$
 are orthonormal.

Now, at the $k^{th}$ iteration we solve the following equation:
$$
(P\nabla^{2}Q_{\alpha, \beta, \varepsilon}(f)P)(Pp_{k})=P\nabla Q_{\alpha, \beta, \varepsilon}(f)\ ,
$$
using iterative inversion methods such as "Conjugate gradient" and "GMRES" to get the ascent direction $p_{k}$, following which we use the Armijo condition to obtain the ascent step size $\alpha_{k}$.


In [None]:
def build_preconditioners( num_eigs, modified_Hessian, ansatz = True ):
    # Diagonalize
    eigenvalues, eigenvectors = np.linalg.eigh( modified_Hessian )
    sorting_indices = np.argsort( eigenvalues )
    eigenvalues  = eigenvalues[ sorting_indices ]
    eigenvectors = eigenvectors[ : , sorting_indices ]
    # Form null vector
    if not ansatz:
        null_vector = eigenvectors[:, 0]
    else:
        null_vector = np.hstack( (np.ones(N[0]), -np.ones(N[1])) )
        norm = np.sqrt( N[0] + N[1] )
        null_vector = null_vector/norm
    # Form other vectors (only 13)
    _,m = eigenvectors.shape
    indices=[]
    for i in range(num_eigs//2):
        indices.append( m - i - 1 )
        indices.append( i + 1 )
    # end for
    if num_eigs//2 != 0:
        indices.append( m - 1 - num_eigs//2 )
    precond_vectors = eigenvectors[:, indices ]
    precond_vectors = []
    for index in indices:
        precond_vectors.append( eigenvectors[:,index] )
    # end for
    return null_vector, precond_vectors

In [None]:
num_eigs = 13
null_vector, precond_vectors = build_preconditioners( num_eigs, Hessians_dampedNewton[-1], ansatz = False )

In [None]:
# Damped Newton with preconditioning
print("Damped Newton with preconditioning... ")
print( "Doing for (",N[0], N[1],")." )
rho = 0.95
c = 0.05
reset_starting_point = True
final_modified_Hessians = []
dampedNewtonwithprecondP = []
results_dampedNewtonwithprecond  = []
times_dampedNewtonwithprecond    = []
f, g = None, None
# Cost matrix
C = distmat( x, y )    
# a and b
a = normalize( np.ones( N[0] ) )
b = normalize( np.ones( N[1] ) )
for eps in epsilons:
    print( "For epsilon = "+str(eps)+":" )    
    #Kernel
    K = np.exp( - C/eps )
    if (f is None) or (g is None): 
        f, g = 0 * a, 0 * b
    print( "Doing for (",N[0],N[1],")." )
    print( " |- Iterating" )  
    start = time.time()
    Optimizer = computational_OT.damped_Newton_with_preconditioning(    K,
                                                                        a,
                                                                        b,
                                                                        f,
                                                                        g,
                                                                        eps,
                                                                        rho,
                                                                        c,
                                                                        null_vector,
                                                                        precond_vectors[:] )
    out = Optimizer._update(    max_iterations = 50,
                                iterative_inversion = 30,
                                version = None,
                                debug = False,
                                optType = 'cg' )
    results_dampedNewtonwithprecond.append( out )
    end = time.time()
    times_dampedNewtonwithprecond.append( end - start  )
    print( " |- Computing P" )
    print( "" )
    u_opt = np.exp( out['potential_f']/eps )
    K = np.exp( - C/eps )
    v_opt =  np.exp( out['potential_g']/eps )
    P_opt = GetP( u_opt, K, v_opt )
    dampedNewtonwithprecondP.append( P_opt )
    if not reset_starting_point:
        f = Optimizer.x[:a.shape[0]]
        g = Optimizer.x[a.shape[0]:]
    final_modified_Hessians.append( Optimizer.modified_Hessian )
# end for


In [None]:
plt.figure( figsize = ( 20, 7 ) )
plt.title( "$$" )
plt.title( "$||P1 -a||_1+||P^T 1 -b||_1$" )
for i in range(len(results_dampedNewtonwithprecond)):
  error = np.asarray( results_dampedNewtonwithprecond[i]['error_a'] ) + np.asarray( results_dampedNewtonwithprecond[i]['error_b'] )
  plt.plot( error, label = 'Damped Newton for $\epsilon = $'+str(epsilons[i]), linewidth = 2 )
# end for
plt.xlabel( "Number of iterations" )
plt.ylabel( "Error in log-scale" )
plt.legend( loc = "upper right" )
plt.yscale( 'log' )
plt.savefig( "../Images/Correctness_images/Error_plot_dampedNewton_with_preconditioning.pdf", format = 'pdf' ) 
plt.show()
print( "\n Error plots can increase! The error is not the objective function!" )


##### Comparing the final potentials of log-domain Sinkhorn and damped Newton with preconditioning

In [None]:
fdampednewtonwithprecond, gdampednewtonwithprecond = [], []
for i in range(len(results_dampedNewtonwithprecond)):
    fdampednewtonwithprecond.append( results_dampedNewtonwithprecond[i]['potential_f'] )
    gdampednewtonwithprecond.append( results_dampedNewtonwithprecond[i]['potential_g'] )
unique_dampednewtonwithprecond = []
for i in range(len(results_dampedNewtonwithprecond)):
    unique_dampednewtonwithprecond.append( make_unique_potentials( fdampednewtonwithprecond[i], gdampednewtonwithprecond[i] ) )
errors_f, errors_g = [], []
for i in range(len(results_dampedNewtonwithprecond)):
    print( f'''i : {i}''')
    err_f = np.linalg.norm( unique_logSK[i][0] - unique_dampednewtonwithprecond[i][0] )
    err_g = np.linalg.norm( unique_logSK[i][1] - unique_dampednewtonwithprecond[i][1] )
    errors_f.append( err_f )
    errors_g.append( err_g )
    print( "norm of err_f: ", err_f )
    print( "norm of err_g: ", err_g )
    print( "" )
# end for

##### Comparing the final potentials of damped Newton and damped Newton with preconditioning

In [None]:
print( "For damped Newton with and without precodnitioning" )
for i in range(len(results_dampedNewtonwithprecond)):
    print( f'''i : {i}''' )
    # Sums of potentials f_i + g_j
    sum_dampedNewton    = fdampednewton[i][:,None] + gdampednewton[i][None,:]
    sum_dampedNewtonprecond = fdampednewtonwithprecond[i][:,None] + gdampednewtonwithprecond[i][None,:]
    sum_dampedNewton    = sum_dampedNewton.squeeze()
    sum_dampedNewtonprecond = sum_dampedNewtonprecond.squeeze()
    error     = np.linalg.norm( sum_dampedNewton - sum_dampedNewtonprecond, ord = np.inf )
    print( "Error of sums of potentials : ", error )
    print( "" )
# end for

In [None]:
plt.figure( figsize = ( 20, 7 ) )
plt.title( "Difference between potentials." )
plt.plot( list(range(len(epsilons))), np.array(errors_f[::-1]) + np.array(errors_g[::-1]), label = 'difference for potentials ( f, g ) between log-domain Sinkhorn  and damped Newton with preconditioning', linewidth = 2, marker= 'o' )
plt.xlabel( "$\epsilon$" )
plt.ylabel( "difference in log-scale" )
plt.legend( loc = "upper right" )
plt.yscale( 'log' )
plt.xticks( list(range(len(epsilons))), epsilons[::-1] )
plt.savefig( "../Images/Correctness_images/Correctness_comparison_plot_dampedNewton_with_preconditioning.pdf", format = 'pdf')   
plt.show()

## The semi-dual formulation of OT:
Using the Shrodinger-bridge equations between the potentials, that is, $g_{j} = -\varepsilon\log\left(\sum_{i}\exp\left(\frac{f_{i}-C_{ij}}{\varepsilon}\right)\alpha_{i}\right)\ , \ \forall j = 1,\dots,m$, the dual formulation of the objective function $Q_{\alpha, \beta,\varepsilon}$ reduces to the semi-dual formulation of the objective function given by,
$$
Q^{semi}_{\alpha, \beta, \varepsilon}(f) = \langle f, \alpha \rangle + \langle g(f,C,\varepsilon), \beta \rangle\ , 
$$
where
$g(f,C,\varepsilon)_{j} = -\varepsilon\log\left(\sum_{i}\exp\left(\frac{f_{i}-C_{ij}}{\varepsilon}\right)\alpha_{i}\right)$.

In this setup, the gradients and the Hessian is as follows,

$a)$ Gradients:
$$
\nabla_{f}Q^{semi}_{\alpha, \beta, \varepsilon}(f)_{i} = \frac{1}{\varepsilon}\alpha_{i}\left(1-\sum_{s=1}^{n}\frac{e^{\frac{f_{i}-C_{ij}}{\varepsilon}}\beta_{s}}{\left(\sum_{t=1}^{n}\alpha_{t}e^{\frac{f_{t}-C_{ts}}{\varepsilon}}\right)}\right)\ ,\ \forall i = 1,\dots,n\ .
$$

b) Hessian:
$$
\nabla^{2}_{f}Q^{semi}_{\alpha, \beta, \varepsilon}(f)_{ii} = \frac{-1}{\varepsilon}\sum_{s=1}^{m}\left(\alpha_{i}\exp\left(\frac{f_{i}+g(f,C,\varepsilon)_{s}-C_{is}}{\varepsilon}\right)\right)\left(1 - \alpha_{i}\left(\exp\left(\frac{f_{i}+g(f,C,\varepsilon)_{s}-C_{is}}{\varepsilon}\right)\right)\right)\beta_{s}\ ,\ \forall i =1,\dots,n
$$
and
$$
\nabla^{2}_{f}Q^{semi}_{\alpha, \beta, \varepsilon}(f)_{ij} = \frac{1}{\varepsilon}\sum_{s=1}^{m}\alpha_{i}\alpha_{j}\left(\exp\left(\frac{f_{i}+g(f,C,\varepsilon)_{s}-C_{is}}{\varepsilon}\right)\right)\left(\exp\left(\frac{f_{j}+g(f,C,\varepsilon)_{s}-C_{js}}{\varepsilon}\right)\right)\beta_{s}\ ,\ \forall i \neq j = 1,\dots,n\ .
$$
Now we plug-in these gradients and Hessian in damped Newton algorithm as we did before.

Here we also incorporate the exp-log stabilization to stabilize $g$,the gradients and the Hessian in the following way,
$$
f^{C}_{j} \leftarrow \min_{i}(C_{ij}-f_{i})\ ,  \ \forall j = 1,\dots,m,\ \text{the C-transform of f}\ . \\
g_{j} = f^{C}_{j} -\varepsilon\log\left(\sum_{i}\exp\left(\frac{f_{i}+f^{C}_{j}-C_{ij}}{\varepsilon}\right)\alpha_{i}\right)\ ,  \ \forall j = 1,\dots,m\ ,
$$
$$
\nabla_{f}Q^{semi}_{\alpha, \beta, \varepsilon}(f)_{i} = \frac{1}{\varepsilon}\alpha_{i}\left(1-\sum_{s=1}^{n}\frac{e^{\frac{f_{i}+f^{C}_{j}-C_{ij}}{\varepsilon}}\beta_{s}}{\left(\sum_{t=1}^{n}\alpha_{t}e^{\frac{f_{t}+f^{C}_{j}-C_{ts}}{\varepsilon}}\right)}\right)\ ,\ \forall i = 1,\dots,n\ , 
$$
$$
\nabla^{2}_{f}Q^{semi}_{\alpha, \beta, \varepsilon}(f)_{ii} = \frac{-1}{\varepsilon}\sum_{s=1}^{m}\left(\alpha_{i}\exp\left(\frac{f_{i}+g(f,C,\varepsilon)_{s}-C_{is}}{\varepsilon}\right)\right)\left(1 - \alpha_{i}\left(\exp\left(\frac{f_{i}+g(f,C,\varepsilon)_{s}-C_{is}}{\varepsilon}\right)\right)\right)\beta_{s}\ ,\ \forall i =1,\dots,n\ , 
$$
$$
\nabla^{2}_{f}Q^{semi}_{\alpha, \beta, \varepsilon}(f)_{ij} = \frac{1}{\varepsilon}\sum_{s=1}^{m}\alpha_{i}\alpha_{j}\left(\exp\left(\frac{f_{i}+g(f,C,\varepsilon)_{s}-C_{is}}{\varepsilon}\right)\right)\left(\exp\left(\frac{f_{j}+g(f,C,\varepsilon)_{s}-C_{js}}{\varepsilon}\right)\right)\beta_{s}\ ,\ \forall i \neq j = 1,\dots,n\ .
$$

## V. Semi-dual damped Newton using direct solver without any preconditioning

In [None]:
# Semi dual damped Newton
print(  " Semi dual damped Newton... "   )
print( " Doing for (",N[0], N[1],"). ")
rho = 0.95
c = 0.5
Semi_dual_dampedNewtonP = []    
results_semi_dual_dampedNewton = []
times_semi_dual_dampedNewton = []
Hessians_semi_dual_dampedNewton = []
#Cost matrix
C = distmat( x, y )
# a and b
a = normalize( np.ones( N[0] ) )
b = normalize( np.ones( N[1] ) )
for eps in epsilons:
    K = np.exp( - C/eps )
    print( "For epsilon = "+str(eps)+":" )   
    f = a
    print( " |- Iterating" )  
    start = time.time()
    Optimizer = computational_OT.semi_dual_dampedNewton_np( C,
                                                            a,
                                                            b,
                                                            f,
                                                            eps,
                                                            rho,
                                                            c,
                                                            log_exp = "True" ) 
    out = Optimizer._update( max_iterations = 50 )
    end = time.time()
    if out != -1:
        results_semi_dual_dampedNewton.append( out )
        times_semi_dual_dampedNewton.append( end - start )
        print( " |- Computing P " )
        print( "" )
        u_opt = np.exp( out['potential_f']/eps )
        K = np.exp( - C/eps )
        v_opt =  np.exp( out['potential_g']/eps )
        P_opt = GetP( u_opt, K, v_opt )
        Semi_dual_dampedNewtonP.append( P_opt )
        print( " |- Recording (unstabilized) Hessian \n " )
        mat  = - eps * Optimizer.Hessian
        diag = 1/np.sqrt( a )
        mat = diag[:, None] * mat * diag[None,:]
        Hessians_semi_dual_dampedNewton.append( mat )
    else:
        epsilons.remove( eps )
# end for

In [None]:
plt.figure( figsize = ( 12, 5 ) )
plt.title( "$$" )
plt.title( "$||P1 -a||_1+||P1 -b||_1$" )
for i in range(len(results_semi_dual_dampedNewton)):
  error = np.asarray(results_semi_dual_dampedNewton[i]['error'])
  plt.plot( error, label = 'Semi dual damped Newton for $\epsilon = $'+ str(epsilons[i]), linewidth = 2)
# end for
plt.xlabel( "Number of iterations" )
plt.ylabel( "Error in log-scale" )
plt.legend( loc = "upper right" )
plt.yscale( 'log' )
plt.tight_layout()
plt.savefig( "../Images/Correctness_images/Error_plot_semi_dual_dampedNewton.pdf", format = 'pdf' ) 
plt.show()
print( "\n Error plots can increase! The error is not the objective function!" )

##### Comparing the final potentials of log-domain Sinkhorn and damped Newton in the semi-dual setup

In [None]:
fdampednewtonSemiDual, gdampednewtonSemiDual = [], []
for i in range(len(results_semi_dual_dampedNewton)):
    fdampednewtonSemiDual.append( results_semi_dual_dampedNewton[i]['potential_f'] )
    gdampednewtonSemiDual.append( results_semi_dual_dampedNewton[i]['potential_g'] )
# end for
unique_dampednewtonSemiDual= []
for i in range(len(results_semi_dual_dampedNewton)):
    unique_dampednewtonSemiDual.append( make_unique_potentials( fdampednewtonSemiDual[i], gdampednewtonSemiDual[i] ) ) 
# end for
errors_f, errors_g = [], []
for i in range(len(results_semi_dual_dampedNewton)):
    print( f'''i : {i}''' )
    err_f = np.linalg.norm( unique_logSK[i][0] - unique_dampednewtonSemiDual[i][0] )
    err_g = np.linalg.norm( unique_logSK[i][1] - unique_dampednewtonSemiDual[i][1] )
    errors_f.append( err_f )
    errors_g.append( err_g )
    print( "norm of err_f: ", err_f )
    print( "norm of err_g: ", err_g )
    print( "" )
# end for

In [None]:
plt.figure( figsize = ( 20, 7 ) )
plt.title( "Difference between potentials." )
plt.plot( list(range(len(epsilons))), np.array(errors_f[::-1]) + np.array(errors_g[::-1]), label = 'difference for potentials ( f, g ) between log-domain Sinkhorn and damped Newton  in the semi dual setup', linewidth = 2, marker= 'o' )
plt.xlabel( "$\epsilon$" )
plt.ylabel( "difference in log-scale" )
plt.legend( loc = "upper right" )
plt.yscale( 'log' )
plt.xticks( list(range(len(epsilons))), epsilons[::-1] )
plt.savefig( "../Images/Correctness_images/Correctness_comparison_plot_semi_dual_dampedNewton.pdf", format = 'pdf' ) 
plt.show()

## VI. Semi-dual damped Newton with preconditioning
Here we perform semi-dual damped Newton with preconditioning. Here we consider $t$ eigenvalues of the Hessian that we want to move to one and form the following preconditioning matrix using the corresponding eigenvectors,
$$
P = \left(I_{n+m}-\sum_{i-1}^{t}\left(1 - \frac{1}{\sqrt{\lambda_{i}}}\right)y_{i}y_{i}^{T}\right)\ ,
$$
where
$$
y_{i} \in \ker\left(\nabla^{2}_{f}Q^{semi}_{\alpha, \beta, \varepsilon}(f)-\lambda_{i}I_{n}\right),\ \forall i= 1,\dots,k\ ,
$$
 are orthonormal.

Now, at the $k^{th}$ iteration we solve the following equation:
$$
(P\nabla^{2}Q^{semi}_{\alpha, \beta, \varepsilon}(f)P)(Pp_{k})=P\nabla Q^{semi}_{\alpha, \beta, \varepsilon}(f)\ ,
$$
using iterative inversion methods such as "Conjugate gradient" and "GMRES" to get the ascent direction $p_{k}$, following which we use the Armijo condition to obtain the ascent step size $\alpha_{k}$.

In [None]:
def build_preconditioners( num_eigs, modified_Hessian, N, ansatz = True ):
    # Diagonalize
    eigenvalues, eigenvectors = np.linalg.eigh( modified_Hessian )
    sorting_indices = np.argsort( eigenvalues )
    eigenvalues  = eigenvalues[ sorting_indices ]
    eigenvectors = eigenvectors[ : , sorting_indices ]
    # Form null vector
    if not ansatz:
        null_vector = eigenvectors[:, 0]
    else:
        null_vector = np.hstack( ( np.ones(N[0]) ) )
        norm = np.sqrt( N[0])
        null_vector = null_vector/norm
    # Form other vectors
    indices = []
    for i in range(num_eigs):
        indices.append( i + 1 )
    # end for
    precond_vectors = eigenvectors[:, indices ]
    precond_vectors = []
    for index in indices:
        precond_vectors.append( eigenvectors[:,index] )
    # end for
    return null_vector, precond_vectors

In [None]:
num_eigs = 30
null_vector, precond_vectors = build_preconditioners( num_eigs, Hessians_semi_dual_dampedNewton[-1], N, ansatz = False )

In [None]:
# Semi dual damped Newton with preconditioning
print(  " Semi dual damped Newton with preconditioning... "   )
print( " Doing for (",N[0], N[1],"). ")
rho = 0.95
c = 0.5
reset_starting_point    = True  
final_modified_Hessians = []
Semi_dual_dampedNewton_with_preconditionerP          = []
results_semi_dual_dampedNewton_with_preconditioner   = []
times_semi_dual_dampedNewton_with_preconditioner     = []
f, g = None, None
# Cost matrix
C = distmat( x, y )
# a and b
a = normalize( np.ones( N[0] ) )
b = normalize( np.ones( N[1] ) )
for eps in epsilons:
    print( "For epsilon = "+str(eps)+":" )    
    if (f is None):
        f = a * 0
    print( " |- Iterating" )  
    start = time.time()
    Optimizer = computational_OT.semi_dual_dampedNewton_with_preconditioning_np(    C,
                                                                                    a,
                                                                                    b,
                                                                                    f,
                                                                                    eps,
                                                                                    rho,
                                                                                    c,
                                                                                    null_vector,
                                                                                    precond_vectors[:],
                                                                                    exp_log = "True" )
    out = Optimizer._update(    max_iterations = 50,
                                iterative_inversion = 30,
                                version = None,
                                debug = False, 
                                optType = 'cg' )
    results_semi_dual_dampedNewton_with_preconditioner.append( out )
    end = time.time()
    times_semi_dual_dampedNewton_with_preconditioner.append( end - start )
    print( " |- Computing P" )
    print( "" )
    u_opt = np.exp( out['potential_f']/eps )
    K = np.exp( - C/eps )
    v_opt =  np.exp( out['potential_g']/eps )
    P_opt = GetP( u_opt, K, v_opt )
    Semi_dual_dampedNewton_with_preconditionerP.append( P_opt )
    if not reset_starting_point:
        f = Optimizer.x[:a.shape[0]]
        g = Optimizer.x[a.shape[0]:]
    final_modified_Hessians.append( Optimizer.modified_Hessian )
# end for

In [None]:
plt.figure( figsize = ( 20, 7 ) ) 
plt.title( "$$" ) 
plt.title( "$||P1 -a||_1+||P1 -b||_1$" ) 
for i in range(len(results_semi_dual_dampedNewton_with_preconditioner)): 
  error = np.asarray(results_semi_dual_dampedNewton_with_preconditioner[i]['error'] ) 
  plt.plot( error, label = 'Semi dual damped Newton for $\epsilon = $'+ str(epsilons[i]), linewidth = 2 ) 
# end for
plt.xlabel( "Number of iterations" )  
plt.ylabel( "Error in log-scale" )  
plt.legend( loc = "upper right" )
plt.yscale( 'log' ) 
plt.savefig( "../Images/Correctness_images/Error_plot_semi_dual_dampedNewton_with_preconditioning.pdf", format = 'pdf' ) 
plt.show() 
print( "\n Error plots can increase! The error is not the objective function!" ) 

#### Comparing the final potentials of log-domain Sinkhorn and damped Newton with preconditioning in the semi-dual setup

In [None]:
fdampednewtonSemiDualwithprecond, gdampednewtonSemiDuawithprecond = [], []
for i in range(len(results_semi_dual_dampedNewton_with_preconditioner)):
    fdampednewtonSemiDualwithprecond.append(results_semi_dual_dampedNewton_with_preconditioner[i]['potential_f'])
    gdampednewtonSemiDuawithprecond.append(results_semi_dual_dampedNewton_with_preconditioner[i]['potential_g'])
# end for
unique_dampednewtonSemiDualwithprecond = []
for i in range(len(results_semi_dual_dampedNewton_with_preconditioner)):
    unique_dampednewtonSemiDualwithprecond.append(make_unique_potentials( fdampednewtonSemiDualwithprecond[i], gdampednewtonSemiDuawithprecond[i]))
# end for
errors_f,errors_g = [], []
for i in range(len(results_semi_dual_dampedNewton_with_preconditioner)):
    print( f'''i : {i}''')
    err_f = np.linalg.norm( unique_logSK[i][0] - unique_dampednewtonSemiDualwithprecond[i][0] )
    err_g = np.linalg.norm( unique_logSK[i][1] - unique_dampednewtonSemiDualwithprecond[i][1] )
    errors_f.append(err_f)
    errors_g.append(err_g)
    print( "norm of err_f: ", err_f )
    print( "norm of err_g: ", err_g )
    print( "" )
# end for

#### Comparing the final potentials of semi-dual damped Newton and semi-dual damped Newton with preconditioning

In [None]:
# Reality checks
print( "For semi-dual damped Newton with and without precodnitioning" )
for i in range(len(results_semi_dual_dampedNewton_with_preconditioner)):
    print( f'''i : {i}''' )
    # Sums of potentials f_i + g_j
    sum_dampedNewtonSemiDual    = fdampednewtonSemiDual[i][:,None] + gdampednewtonSemiDual[i][None,:]
    sum_dampedNewtonSemiDualprecond = fdampednewtonSemiDualwithprecond[i][:,None] + gdampednewtonSemiDuawithprecond[i][None,:]
    sum_dampedNewtonSemiDual    = sum_dampedNewtonSemiDual.squeeze()
    sum_dampedNewtonSemiDualprecond = sum_dampedNewtonSemiDualprecond.squeeze()
    error = np.linalg.norm( sum_dampedNewtonSemiDual - sum_dampedNewtonSemiDualprecond, ord = np.inf )
    print( "Error of sums of potentials : ", error )
    print( "" )
# end for

In [None]:
plt.figure( figsize = ( 20, 7 ) )
plt.title( "$$" )
plt.title( "Difference between potentials." )
plt.plot( list(range(len(epsilons))), np.array(errors_f[::-1]) + np.array(errors_g[::-1]), label = 'difference for potentials ( f, g ) between log-domain Sinkhorn  and damped Newton with preconditioning in the semi dual setup', linewidth = 2, marker= 'o' )
plt.xlabel( "$\epsilon$" )
plt.ylabel( "difference in log-scale" )
plt.legend( loc = "upper right" )
plt.yscale( 'log' )
plt.xticks( list(range(len(epsilons))), epsilons[::-1] )
plt.savefig( "../Images/Correctness_images/Correctness_comparison_plot_semi_dual_dampedNewton_with_preconditioning.pdf", format = 'pdf' ) 
plt.show()

## Comparison plot for comparing the Kantorovich potentials of the various algortihms used above against the ground truth: log-domain Sinkhorn


In [None]:
plt.figure( figsize = ( 20, 7 ) )
plt.title( "$$" )
plt.title( "Difference between potentials with and without regularization." )
# Plot for log-domain Sinkhorn vs Sinkhorn
difference_f = []
difference_g = []
for i in  range(len(results_Sinkhorn)):
    difference_f.append( np.linalg.norm( unique_logSK[i][0] - unique_SK[i][0] ) )
# end for
for i in  range(len(results_Sinkhorn)):
    difference_g.append( np.linalg.norm( unique_logSK[i][1] - unique_SK[i][1] ) ) 
# end for
plt.plot( list(range(len(epsilons))), np.array(difference_f[::-1]) + np.array(difference_g[::-1]),  label = 'difference for potential ( f, g ) between log-domain Sinkhorn and Sinkhorn', linewidth = 2, marker = 'o' )

# Plot for log-domain Sinkhorn vs damped Newton
difference_f = []
difference_g = []
for i in  range(len(results_dampedNewton)):
    difference_f.append( np.linalg.norm( unique_logSK[i][0] - unique_dampednewton[i][0] ) )
# end for
for i in  range(len(results_dampedNewton)):
    difference_g.append( np.linalg.norm( unique_logSK[i][1] - unique_dampednewton[i][1] ) )
# end for
plt.plot( list(range(len(epsilons))), np.array(difference_f[::-1]) + np.array(difference_g[::-1]), label = 'difference for potential ( f, g ) between log-domain Sinkhorn and  damped Newton', linewidth = 2, marker = 'o' )

# Plot for log-domain Sinkhorn vs damped Newton with preconditioning
difference_f = []
difference_g = []
for i in  range(len(results_dampedNewtonwithprecond)):
    difference_f.append( np.linalg.norm( unique_logSK[i][0] - unique_dampednewtonwithprecond[i][0] ) )
# end for
for i in  range(len(results_dampedNewtonwithprecond)):
    difference_g.append( np.linalg.norm( unique_logSK[i][1] - unique_dampednewtonwithprecond[i][1] ) )
# end for
plt.plot( list(range(len(epsilons))), np.array(difference_f[::-1]) + np.array(difference_g[::-1]), label = 'difference for potential ( f, g ) between log-domain Sinkhorn and damped Newton with preconditioning', linewidth = 2, marker = 'o' )

# Plot for log-domain Sinkhorn vs semi-dual damped Newton 
difference_f = []
difference_g = []
for i in  range(len(results_semi_dual_dampedNewton)):
    difference_f.append( np.linalg.norm( unique_logSK[i][0] - unique_dampednewtonSemiDual[i][0] ) )
# end for
for i in  range(len(results_semi_dual_dampedNewton)):
    difference_g.append( np.linalg.norm( unique_logSK[i][1] - unique_dampednewtonSemiDual[i][1] ) )
# end for
plt.plot( list(range(len(epsilons))), np.array(difference_f[::-1]) + np.array(difference_g[::-1]), label = 'difference for potential ( f, g ) between log-domain Sinkhorn and  semi-dual damped Newton', linewidth = 2, marker = 'o' )

# Plot for log-domain Sinkhorn vs semi-dual damped Newton with preconditioning
difference_f = []
difference_g = []
for i in  range(len(results_semi_dual_dampedNewton_with_preconditioner)):
    difference_f.append( np.linalg.norm( unique_logSK[i][0] - unique_dampednewtonSemiDualwithprecond[i][0] ) )
# end for
for i in  range(len(results_semi_dual_dampedNewton_with_preconditioner)):
    difference_g.append( np.linalg.norm( unique_logSK[i][1] - unique_dampednewtonSemiDualwithprecond[i][1] ) )
# end for
plt.plot( list(range(len(epsilons))), np.array(difference_f[::-1]) + np.array(difference_g[::-1]), label = 'difference for potential ( f, g ) between log-domain Sinkhorn and  semi-dual damped Newton with preconditioning', linewidth = 2, marker = 'o' )
plt.xlabel( "$\epsilon$" )
plt.ylabel( "difference in log-scale" )
plt.legend( loc = 'upper right' )
plt.yscale( 'log' )
plt.xticks( list(range(len(epsilons))), epsilons[::-1] )
plt.savefig( "../Images/Correctness_images/Correctness_comparison_plot_all_algorithms.pdf", format = 'pdf' ) 
plt.show()