In [None]:
from __future__ import division
import os
import numpy as np
import time
import matplotlib.pyplot as plt
import scipy as scp
import pylab as pyl


import warnings
warnings.filterwarnings('ignore')
np.random.seed(1234)

%matplotlib inline 
%load_ext autoreload                                                                                                                                                                                                
%autoreload 

In [None]:
%load_ext autoreload
%autoreload 2
    

In [None]:
if not os.path.isdir('DampedNewtonlogexpstabilizationtests_images'):
    os.makedirs('DampedNewtonlogexpstabilizationtests_images')


In [None]:
"""To compute distance matrix"""
def distmat(x,y):
    return np.sum( x**2,0 )[:,None] + np.sum( y**2,0 )[None,:] - 2*x.transpose().dot(y)

"""To Normalise a vector"""
normalize = lambda a: a/np.sum( a )

"""To Compute P"""
def GetP(u,K,v):
    return u[:,None]*K*v[None,:]

def plotp(x, col,plt, scale=200, edgecolors="k"):
  return plt.scatter( x[0,:], x[1,:], s=scale, edgecolors=edgecolors,  c=col, cmap='plasma', linewidths=2 )

In [None]:
def generate_data(N):
    """
     N is a list of the size of the data on x and y
    """
    x = np.random.rand( 2,N[0] )-0.5
    theta = 2*np.pi*np.random.rand( 1,N[1] )
    r = 0.8+.2*np.random.rand( 1,N[1] )
    y = np.vstack( ( r*np.cos( theta ),r*np.sin( theta ) ) )
    return x,y

In [None]:
import computational_OT

# Comparing between Sinkhorn with and without log-domain regularization


In [None]:
N = [ 500,600 ]
x,y = generate_data(N)

## Log-domain sinkhorn

In [None]:
a = normalize(np.ones(N[0]))
b = normalize(np.ones(N[1]))

In [None]:
# Log domain Sinkhorn
print("Log domain Sinkhorn.... ")
results_logSinkhorn = []
times_logSinkhorn   = []
logsinkhornP        = []
epsilons            = [1.0, 0.5, 0.3, 0.1, 0.09, 0.05, 0.03, 0.02, 0.01, 0.005]
epsilons = [0.005]
# epsilons = [1.0, 0.5, 0.3, 0.1, 0.09, 0.05, 0.03, 0.02, 0.001]
# epsilons = [1.0]
#Cost matrix
C = distmat(x,y)
for eps in epsilons:

  print( "Sinkhorn for epsilon = "+str(eps)+":" )    
  



  print( "Doing for (",N[0],N[1],")." )
  print( " |- Iterating" )

  start = time.time()
  logsinkhorn = computational_OT.Log_domainSinkhorn(a,b,C,eps)
  output = logsinkhorn.update( niter = 500 )
  results_logSinkhorn.append( output )
  end = time.time()
  times_logSinkhorn.append(1e-3*(end-start) )
  logsinkhornP.append(GetP(output['potential_f']/eps, np.exp(-C/eps),output['potential_g']/eps))

In [None]:
plt.figure( figsize = (20,7) )

plt.subplot(2,1,1),
plt.title( "$||P1 -a||_1+||P1 -b||_1$" )
for i in range( len( results_logSinkhorn) ):
  error = np.asarray( results_logSinkhorn[i]['error'])
  plt.plot( error, label = 'log-sinkhorn for $\epsilon=$'+ str(epsilons[i]) , linewidth = 2 )
plt.yscale( 'log' )
plt.legend()
plt.xlabel("Iterations")
plt.ylabel("Error in log-scale")
plt.show()


In [None]:
flogsinkhorn, glogsinkhorn = [], []
for i in range(len(results_logSinkhorn)):
    flogsinkhorn.append(results_logSinkhorn[i]['potential_f'])
    glogsinkhorn.append(results_logSinkhorn[i]['potential_g'])

## Sinkhorn

In [None]:
# Sinkhorn
print("Sinkhorn.... ")
SinkhornP=[]
results_Sinkhorn=[]
times_Sinkhorn=[]
epsilons = [1.0, 0.5, 0.3, 0.1, 0.09, 0.05, 0.03, 0.02, 0.01, 0.005]
Pmatrix_dist_linVSsinkhorn=[]
 # a and b
a = normalize(np.ones(N[0]))
a=a.reshape(a.shape[0],-1)
b = normalize(np.ones(N[1]))
b=b.reshape(b.shape[0],-1)

for eps in epsilons:

  
  #Cost matrix
  C = distmat(x,y)
  
 


  #Kernel
  K=np.exp(-C/eps)


  print("Doing for (",N[0],N[1],").")
  print( " |- Iterating")

  #Inflating
  u=a
  v=b

  start=time.time()
  Optimizer=computational_OT.Sinkhorn(K,a,b,u,v,eps)
  out=Optimizer._update()
  results_Sinkhorn.append(out)
  end=time.time()
  times_Sinkhorn.append(end-start)
  print( " |- Computing P")
  print( "" )
  SinkhornP.append(GetP(out['potential_f']/eps,K,out['potential_g']/eps))
  

In [None]:
fsinkhorn, gsinkhorn = [], []
for i in range(len(results_Sinkhorn)):
    fsinkhorn.append(results_Sinkhorn[i]['potential_f'])
    gsinkhorn.append(results_Sinkhorn[i]['potential_g'])

In [None]:
print( "Sinkhorn keys:")
print( out.keys() )
print( "Sinkhorn log domain keys")
print( output.keys() )
# TODO: Make same keys
print( "")
print( fsinkhorn[0].shape )

print( flogsinkhorn[0].shape )
# Make outputs have same formats

#### Reality checks

In [None]:
# Reality checks
for i in range(len(results_Sinkhorn)):
    print( f'''i : {i}''')
    # Couplings
    P_logSK =   logsinkhornP[i]
    P_SK    = SinkhornP[i]
    error   = np.linalg.norm(P_SK-P_logSK, ord='fro')
    print( "Error of couplings : ", error )
    # Sums of potentials f_i + g_j
    sum_SK    = fsinkhorn[i][:,None] + gsinkhorn[i][None,:]
    sum_logSK = flogsinkhorn[i][:,None] + glogsinkhorn[i][None,:]
    print(sum_SK.shape,sum_logSK.shape)
    print(np.mean(sum_SK),np.mean(sum_logSK))
    sum_SK    = sum_SK.squeeze()
    sum_logSK = sum_logSK.squeeze()
    print(sum_SK.shape,sum_logSK.shape)
    error     = np.linalg.norm(sum_SK-sum_logSK, ord=np.inf)
    print( "Error of sums of potentials : ", error )
    print( "")

### Make potentials independent of any shift by constant

In [None]:
def make_unique_potentials(f, g):
    # Fixes if f and g have extra useless dimensions
    f = f.flatten()
    g = g.flatten()
    #
    ones_N = np.ones_like(f)
    ones_M = np.ones_like(g)
    coeff = (np.sum(f)-np.sum(g))/(len(f)+len(g))
    f_new = f-coeff*ones_N
    g_new = g+coeff*ones_M
    return (f_new, g_new)
unique_logSK = []
unique_SK = []
errors_f = []
errors_g = []
for i in range(len(results_Sinkhorn)):
    print( f'''i : {i}''')
    unique_logSK.append(make_unique_potentials( flogsinkhorn[i], glogsinkhorn[i]))
    unique_SK.append(make_unique_potentials( fsinkhorn[i], gsinkhorn[i]))
    print(unique_logSK[i][0].shape,unique_SK[i][0].shape)
    err_f = np.linalg.norm( unique_logSK[-1][0] - unique_SK[-1][0] )
    err_g = np.linalg.norm( unique_logSK[-1][1] - unique_SK[-1][1] )
    errors_f.append( err_f )
    errors_g.append( err_g )
    print( "norm of err_f: ", err_f )
    print( "norm of err_g: ", err_g )
    print( "")

In [None]:
plt.figure( figsize = (20,7) )

plt.subplot(2,1,1),
plt.title( "$||P1 -a||_1+||P1 -b||_1$" )
for i in range( len(results_Sinkhorn) ):
  error=np.asarray( results_Sinkhorn[i]['error_a'] )+np.asarray( results_Sinkhorn[i]['error_b'] )
  plt.plot( error,label = 'Sinkhorn for $\epsilon=$'+ str(epsilons[i]), linewidth = 2 )
plt.yscale( 'log' )
plt.legend()
plt.xlabel("Iterations")
plt.ylabel("Error in log-scale")

plt.show()


In [None]:
plt.figure( figsize = (20,7) )
plt.title( "$$" )
plt.title( "Difference between potentials with and without regularization." )
plt.plot(epsilons, np.array(errors_f) + np.array(errors_g), label = 'difference for potentials (f,g) between log-domain sinkhorn and sinkhorn', linewidth = 2, marker= 'o' )
plt.xlabel("$\epsilon$")
plt.ylabel( "difference in log-scale" )
plt.legend()
plt.yscale( 'log' )
plt.xscale( 'log' )
plt.show()


# Damped Newton 

In [None]:
rho=0.95
c=0.05
DampedNewtonP=[]
results_DampedNewton  = []
times_DampedNewton    = []
Hessians_DampedNewton = []

#epsilons=[0.05,0.08,0.1]
# epsilons=[0.1, 0.2, 0.3, 0.4, 0.5, 0.75, 1.0 ]
dampednewtonepsilons = [1.0, 0.5, 0.3, 0.1, 0.09, 0.05, 0.03]
#epsilons=[0.3]
for eps in dampednewtonepsilons:
    # Line Search
    print("Damped Newton for epsilon="+str(eps)+":")    
    #Cost matrix
    C = distmat(x,y)

    # a and b
    a = normalize(np.ones(N[0]))
    a=a.reshape(a.shape[0],-1)
    b = normalize(np.ones(N[1]))
    b=b.reshape(b.shape[0],-1)

    #Kernel
    K=np.exp(-C/eps)
    f,g=a,b

    print("Doing for (",N[0],N[1],").")
    print( " |- Iterating")  
    start=time.time()
    Optimizer=computational_OT.DampedNewton(K,a,b,f,g,eps,rho,c)
    out=Optimizer._update(maxiter=50)
    results_DampedNewton.append(out)
    end=time.time()
    times_DampedNewton.append(end-start)
    print( " |- Computing P")
    
    DampedNewtonP.append(GetP(np.exp(out['potential_f']/eps),K,np.exp(out['potential_g']/eps)))
    print( " |- Recording (unstabilized) Hessian \n")

    mat  = -eps*Optimizer.Hessian
    diag = 1/np.sqrt( np.vstack( (a,b) ) ).flatten()
    mat = diag[:,None]*mat*diag[None,:]
    Hessians_DampedNewton.append( mat )


In [None]:
plt.figure( figsize = (20,7) )
plt.title( "$$" )
plt.title( "$||P1 -a||_1+||P^T 1 -b||_1$" )

for i in range(len(results_DampedNewton)):
  error = np.asarray( results_DampedNewton[i]['error_a'] )+np.asarray( results_DampedNewton[i]['error_b'] )
  plt.plot( error,label='Damped Newton for $\epsilon=$'+str(epsilons[i]), linewidth = 2 )

plt.xlabel( "Number of iterations" )
plt.ylabel( "Error in log-scale" )
plt.legend()
plt.yscale( 'log' )
plt.show()
print( "\n Error plots can increase! The error is not the objective function!" )


In [None]:
fdampednewton, gdampednewton = [], []
for i in range(len(results_DampedNewton)):
    fdampednewton.append(results_DampedNewton[i]['potential_f'])
    gdampednewton.append(results_DampedNewton[i]['potential_g'])
    
unique_dampednewton = []
for i in range(len(results_DampedNewton)):
    unique_dampednewton.append(make_unique_potentials( fdampednewton[i], gdampednewton[i]))

for i in range(len(results_DampedNewton)):
    print( f'''i : {i}''')
    err_f = np.linalg.norm( unique_logSK[i][0] - unique_dampednewton[i][0] )
    err_g = np.linalg.norm( unique_logSK[i][1] - unique_dampednewton[i][1] )

    print( "norm of err_f: ", err_f )
    print( "norm of err_g: ", err_g )
    print( "")

# Damped Newton with Preconditioning


In [None]:
def build_preconditioners( num_eigs,modified_Hessian, ansatz=True ):
    # Diagonalize
    eigenvalues, eigenvectors = np.linalg.eigh( modified_Hessian )
    sorting_indices = np.argsort( eigenvalues )
    eigenvalues  = eigenvalues[sorting_indices]
    eigenvectors = eigenvectors[:, sorting_indices]
    # Form null vector
    if not ansatz:
        null_vector = eigenvectors[:, 0]
    else:
        null_vector = np.hstack( (np.ones(N[0]), -np.ones(N[1])) )
        norm = np.sqrt( N[0] + N[1] )
        null_vector = null_vector/norm
    # Form other vectors (only 13)
    n,m = eigenvectors.shape
    indices=[]
    for i in range(num_eigs//2):
        indices.append(m-i-1)
        indices.append(i+1)
    if num_eigs//2!=0:
        indices.append(m-1-num_eigs//2)
   
    precond_vectors = eigenvectors[:, indices ]
    precond_vectors = []
    for index in indices:
        precond_vectors.append( eigenvectors[:,index] )
    #
    return null_vector, precond_vectors

num_eigs = 13
null_vector, precond_vectors = build_preconditioners( num_eigs, Hessians_DampedNewton[-1], ansatz=False )

In [None]:
rho = 0.95
c = 0.05
reset_starting_point = True
final_modified_Hessians = []
DampedNewtonP = []
results_DampedNewton  = []
times_DampedNewton    = []

#epsilons = [ 0.05,0.08,0.1 ]
#precond_epsilons = [ 0.2, 0.3, 0.4, 0.5, 0.75, 1.0 ]
precond_epsilons = [1.0, 0.5, 0.3, 0.1, 0.09, 0.05, 0.03]

#epsilons = [ 0.3 ]
f, g = None, None
for eps in precond_epsilons:
    # Line Search
    print( "Damped Newton for epsilon="+str(eps)+":" )    
    # Cost matrix
    C = distmat(x,y)

    # a and b
    a = normalize( np.ones(N[0]) )
    a = a.reshape( a.shape[0],-1 )
    b = normalize( np.ones(N[1]) )
    b = b.reshape( b.shape[0],-1 )

    #Kernel
    K = np.exp(-C/eps)

    if (f is None) or (g is None): 
        f,g = a,b

    print( "Doing for (",N[0],N[1],")." )
    print( " |- Iterating" )  
    start = time.time()
    Optimizer = computational_OT.DampedNewton_With_Preconditioner( K,a,b,f,g,eps,rho,c,null_vector,precond_vectors[:] )
    out = Optimizer._update( maxiter=50, iterative_inversion=30, version=None,debug=False,optType='cg' )
    results_DampedNewton.append( out )
    end = time.time()
    times_DampedNewton.append(end-start)
    print( " |- Computing P" )

    if not reset_starting_point:
        f = Optimizer.x[:a.shape[0]]
        g = Optimizer.x[a.shape[0]:]
        # f = f.reshape( f.shape[0], -1)
        # g = g.reshape( g.shape[0], -1)
    
    DampedNewtonP.append( GetP(np.exp(out['potential_f']/eps),K,np.exp(out['potential_g']/eps)) )
    final_modified_Hessians.append( Optimizer.modified_Hessian )


In [None]:
plt.figure( figsize = (20,7) )
plt.title( "$$" )
plt.title( "$||P1 -a||_1+||P^T 1 -b||_1$" )

for i in range(len(results_DampedNewton)):
  error = np.asarray( results_DampedNewton[i]['error_a'] )+np.asarray( results_DampedNewton[i]['error_b'] )
  plt.plot( error,label='Damped Newton for $\epsilon=$'+str(epsilons[i]), linewidth = 2 )

plt.xlabel( "Number of iterations" )
plt.ylabel( "Error in log-scale" )
plt.legend()
plt.yscale( 'log' )
plt.show()
print( "\n Error plots can increase! The error is not the objective function!" )


In [None]:
fdampednewtonwithprecond, gdampednewtonwithprecond = [], []
for i in range(len(results_DampedNewton)):
    fdampednewtonwithprecond.append(results_DampedNewton[i]['potential_f'])
    gdampednewtonwithprecond.append(results_DampedNewton[i]['potential_g'])
    
unique_dampednewtonwithprecond = []
for i in range(len(results_DampedNewton)):
    unique_dampednewtonwithprecond.append(make_unique_potentials( fdampednewtonwithprecond[i], gdampednewtonwithprecond[i]))


for i in range(len(results_DampedNewton)):
    print( f'''i : {i}''')
    err_f = np.linalg.norm( unique_logSK[i][0] - unique_dampednewtonwithprecond[i][0] )
    err_g = np.linalg.norm( unique_logSK[i][1] - unique_dampednewtonwithprecond[i][1] )
    print( "norm of err_f: ", err_f )
    print( "norm of err_g: ", err_g )
    print( "")

In [None]:

# Reality checks
print("For Damped Newton with and without precodnitioning")
for i in range(len(results_DampedNewton)):
    print( f'''i : {i}''')
    # Sums of potentials f_i + g_j
    sum_dampedNewton    = fdampednewton[i][:,None] + gdampednewton[i][None,:]
    sum_dampedNewtonprecond = fdampednewtonwithprecond[i][:,None] + gdampednewtonwithprecond[i][None,:]
    sum_dampedNewton    = sum_dampedNewton.squeeze()
    sum_dampedNewtonprecond = sum_dampedNewtonprecond.squeeze()
    error     = np.linalg.norm(sum_dampedNewton-sum_dampedNewtonprecond, ord=np.inf)
    print( "Error of sums of potentials : ", error )
    print( "")

# Comparison plot for comparing the Kantorovich potentials against the ground truth: log-domain Sinkhorn


In [None]:
plt.figure( figsize = (20,7) )
plt.title( "$$" )
plt.title( "Difference between potentials with and without regularization." )


difference_f = []
difference_g = []
for i in  range(len(results_Sinkhorn)):
    difference_f.append(np.linalg.norm(unique_logSK[i][0]-unique_SK[i][0]))
for i in  range(len(results_Sinkhorn)):
    difference_g.append(np.linalg.norm(unique_logSK[i][1]-unique_SK[i][1])) 
plt.plot(epsilons, np.array(difference_f)+np.array(difference_g),  label = 'difference for potential (f,g) between log-domain sinkhorn and sinkhorn', linewidth = 2, marker= 'o' )


difference_f = []
difference_g = []
for i in  range(len(results_DampedNewton)):
    difference_f.append(np.linalg.norm(unique_logSK[i][0]-unique_dampednewton[i][0]))
for i in  range(len(results_DampedNewton)):
    difference_g.append(np.linalg.norm(unique_logSK[i][1]-unique_dampednewton[i][1]))
plt.plot(epsilons[:-3], np.array(difference_f)+np.array(difference_g), label = 'difference for potential (f,g) between log-domain sinkhorn and  Damped Newton', linewidth = 2, marker= 'o' )


difference_f = []
difference_g = []
for i in  range(len(results_DampedNewton)):
    difference_f.append(np.linalg.norm(unique_logSK[i][0]-unique_dampednewtonwithprecond[i][0]))
for i in  range(len(results_DampedNewton)):
    difference_g.append(np.linalg.norm(unique_logSK[i][1]-unique_dampednewtonwithprecond[i][1]))
plt.plot(epsilons[:-3], np.array(difference_f)+ np.array(difference_g), label = 'difference for potential (f,g) between log-domain sinkhorn and Damped Newton with preconditioning', linewidth = 2, marker= 'o' )

plt.xlabel("$\epsilon$")
plt.ylabel( "difference in log-scale" )
plt.legend()
plt.yscale( 'log' )
plt.xscale('log')
plt.show()
