In [None]:
#Include
using Plots, LightGraphs, SparseArrays
using Statistics, BenchmarkTools, LinearAlgebra, ProgressMeter
using Distributions, Base.Threads
using Base.GC
using Mosek, MosekTools, OSQP, ECOS, SCS, ProxSDP, CPLEX
using Clustering, JuMP, Roots, PyCall, Base.Threads
plotly()

In [None]:
ot = pyimport("ot")
np = pyimport("numpy")

# Metric Nearness (Dense)

In [None]:
function flipcoin(p)
    x = rand()
    if x <= p
        return 1
    else
        return 0
    end
end

function calculateSTDEV(x,y)
    error = 0.0
    for i=1:length(y)
        error = error + (x-y[i])^2
    end
    
    return sqrt(error/length(y))
end

function genGraph(n,p)
    G = randn(n,n)
    for i = 1:n
        G[i,i] = 0
        for j = 1:i-1
            if flipcoin(p) == 0
                G[i,j] = 0
                G[j,i] = 0
            else
                G[i,j] = G[j,i] 
            end
        end
    end
    
    return G
end  

function Labels(A,k)
    return Set(partialsortperm(A, 1:k, rev=true))
end

function genGraphDir(n,p,alpha,k)
    d = Dirichlet(alpha)
    L = Array{Set{Int},1}(undef,n)
    for i = 1:n
        L[i] = Labels(rand(d),k)
    end
    
    G = zeros(n,n)
    for i = 1:n
        G[i,i] = 0
        for j = 1:i-1
            if flipcoin(p) == 0
                G[i,j] = 0
                G[j,i] = 0
            else
                if isempty(intersect(L[i],L[j]))
                    G[i,j] = 1
                    G[j,i] = 1
                else
                    G[i,j] = 0.00000001
                    G[j,i] = 0.00000001
                end
            end
        end
    end
    
    return G,L
end  

function enumerate_paths2(s)
    P = Array{Any,1}(undef,size(s.parents, 1))
    for v = 1:size(s.parents, 1)
        P[v] = LightGraphs.enumerate_paths(s, v)
    end
    
    return P
end

function BregmanOrig(D)
    (n,n) = size(D)
    #G = sparse(copy(D))
    g = SimpleWeightedGraph(D)
    Z = Dict()
    Z′ = spzeros(n,n)
    maxD = 2
    count = 0
    while(maxD > 1)
        
        for p in keys(Z)
            z = Z[p]
            l = length(p)
            u = p[1]
            v = p[l]
            d = -1*g.weights[u,v]
            for i = 1:l-1
                d = d + g.weights[p[i], p[i+1]]
            end
            c = min(d/l,z)
            for i = 1:l-1
                g.weights[p[i],p[i+1]] -= c
                g.weights[p[i+1],p[i]] -= c
            end
            g.weights[u,v] += c
            g.weights[v,u] += c
            if z == c
                delete!(Z,p)
            else
                Z[p] -= c
            end
        end
    
        for i = 1:n
            for j = 1:i-1
                c = min(g.weights[j,i] - 1e-14,Z′[j,i])
                g.weights[j,i] -= c
                g.weights[i,j] -= c
                Z′[j,i] -= c
                Z′[i,j] -= c             
            end
        end
        
        FS = LightGraphs.floyd_warshall_shortest_paths(g)
        U = FS.dists
        P = enumerate_paths2(FS)
        maxD = 0
        
        for i = 1:n
            for j = 1:i-1
                if g.weights[j,i] - U[j,i] > 0  
                    p = P[j][i] #enumerate_paths2(FS,i,j)
                    l = length(p)
                    u = p[1]
                    v = p[l]
                    d = -1*g.weights[u,v]
                    for k = 1:l-1
                        d = d + g.weights[p[k], p[k+1]]
                    end
                    if d < 0
                        c=d/l
                        for k = 1:l-1
                            g.weights[p[k],p[k+1]] -= c
                            g.weights[p[k+1],p[k]] -= c
                        end
                        g.weights[p[1],p[l]] += c
                        g.weights[p[l],p[1]] += c
                        if haskey(Z,p)
                            Z[p] = Z[p] - c
                        else
                            Z[p] = -1*c
                        end
                        if abs(d) > maxD
                            maxD = abs(d)
                        end
                    end
                end   
            end
        end
        FS =  LightGraphs.floyd_warshall_shortest_paths(g)
        maxD = norm(FS.dists-g.weights)
        @show(maxD)
        count+=1
    end
    return g.weights
end

function make_dist_vector(D)
    n = size(D)[1]
    N = Int((n*(n+1))/2)
    d = zeros(N)
    idx = 1
    coord_to_idx = Dict()
    for i = 1:n
        for j = 1:i
            d[idx] = D[i,j]
            coord_to_idx[(i,j)] = idx
            coord_to_idx[(j,i)] = idx
            idx += 1
        end
    end
    
    return d, coord_to_idx
end

function make_metric_matrix(n,coord_to_idx)
    A = spzeros(n^3,Int((n*(n+1))/2))
    idx = 1
    @showprogress for i = 1:n
        for j = 1:n
            for k = 1:n
                A[idx,coord_to_idx[(i,j)]] = 1
                A[idx,coord_to_idx[(i,k)]] = -1
                A[idx,coord_to_idx[(k,j)]] = -1
                idx += 1
            end
        end
    end
                
    return A
end

# Sample Distribution

In [None]:
N = 100
d1 = Dirichlet(N,1)
d2 = Dirichlet(N,1)

a = rand(d1)
b = rand(d2);

a = a./(sum(a))
b = b./(sum(b))

D = rand(N,N)


@show(sum(a),sum(b),sum(abs.(a-b)));

In [None]:
x = collect(range(-20,20; length = 5001));
F(x) = exp(-1*((x-15)^2/20))
G(x) = exp(-1*((x+15)^2/20))
H(x) = exp(-1*((x+5)^2/10))
#I(x) = exp(-1*((x-5)^2/10))
Exp(x) = exp(-1*x/100)
a = F.(x)
b = G.(x)

a = a./sum(a);
b = b./sum(b);

In [None]:
D = ones(length(a),length(b));

In [None]:
D = zeros(length(a),length(b))
for i = 1:length(a)
    for j = 1:length(b)
        D[i,j] = abs(x[i]-x[j])^2
    end
end

In [None]:
plotly()
scatter(x,a, label="a")
scatter!(x,b, label = "b")

# POT Stochastic

In [None]:
reg = 1
numItermax = 20000
lr = 0.1
batch_size = 3
log = true

In [None]:
sgd_dual_pi, log = ot.stochastic.solve_dual_entropic(a, b, D, reg, batch_size, numItermax, lr, log)

In [None]:
heatmap(sgd_dual_pi, yflip=true)

# Calculate Wasserstein

In [None]:
W = ot.emd2(b,a, PyReverseDims(D))
T= ot.emd(b,a, PyReverseDims(D));

In [None]:
heatmap(T')

In [None]:
sum(T.!=0)

# Qudratric regularization

In [None]:
function calc_opt(a,b,C,γ = 1,τ = 1e-4; maxIters = 1000, verbose = false)
    f = γ.*a
    g = γ.*b
    n = length(f)
    m = length(g)
    
    error = τ+1
    
    L = Dict()
    Z = spzeros(n,m)
    
    count = 1
    
    while(error > τ && count < maxIters)
        error = 0
        
        #Fing violated constraints
        if count < maxIters+1
            for i = 1:n
                for j = 1:m
                    if f[i]+g[j] > C[i,j]
                        L[(i,j)] = 1
                    end
                end
            end
        else
            for _ = 1:n
                i = rand(1:n)
                j = rand(1:m)
                L[(i,j)] = 1
            end
        end
        
        #Do the project and forget steps
        for k = 1:100
            K = collect(keys(L))
            for l = 1:length(K)
                i = K[l][1]
                j = K[l][2]
                
                θ = (C[i,j] - f[i] - g[j])/(2*γ)
                c = min(θ,Z[i,j])
                
                if error < -1*θ
                    error = abs(θ)
                end
                
                f[i] += γ*c
                g[j] += γ*c
                
                if c == Z[i,j] || Z[i,j] - c < τ/(n*m)
                    Z[i,j] = 0
                    delete!(L,K[l])
                else
                    Z[i,j] -= c
                end
            end
        end
        
        if verbose
            @show((error,length(L),count))
        end
        
        count += 1
    end
    
    return f,g,Z,L
end

In [None]:
function calc_opt_entropy(a,b,C,γ = 1,τ = 1e-4; maxIters = 1000, verbose = false)
    f = exp.((γ.*a))
    g = exp.((γ.*b))
    n = length(f)
    m = length(g)
    
    error = τ+1
    
    L = Dict()
    Z = spzeros(n,m)
    
    count = 1
    
    while(error > τ && count < maxIters)
        
        error = 0
        
        #Fing violated constraints
        if count < 200
            for i = 1:n
                for j = 1:m
                    if f[i]+g[j] > C[i,j]
                        L[(i,j)] = 1
                    end
                end
            end
        else
            for _ = 1:100*n
                i = rand(1:n)
                j = rand(1:m)
                L[(i,j)] = 1
            end
        end
        if verbose
            @show(length(L))
            flush(stdout)
        end
        
        #Do the project and forget steps
        for k = 1:20
            K = collect(keys(L))
            for l = 1:length(K)
                i = K[l][1]
                j = K[l][2]
                θ = (log((C[i,j])/(f[i]+g[j]))/γ)
                c = min(θ,Z[i,j])
                
                err = C[i,j] - f[i] - g[j]
                
                if error < -1*err
                    error = abs(err)
                end
                
                f[i] *= exp(γ*c)
                g[j] *= exp(γ*c)
                
                if c == Z[i,j] || Z[i,j] - c < τ/n
                    Z[i,j] = 0
                    delete!(L,K[l])
                else
                    Z[i,j] -= c
                end
            end
        end
        
        if verbose
            @show((error,length(L),count))
            flush(stdout)
        end
        
        count += 1
    end
    
    return f,g,Z,L
end

function cont_opt_entropy(a,b,C,f,g,Z,L,γ = 1,τ = 1e-4 ; maxIters = 1000, verbose = false)
    n = length(f)
    m = length(g)
    
    error = τ+1
    count = 1
    
    while(error > τ && count < maxIters)
        
        error = 0
        
        #Fing violated constraints
        if count < 200
            Threads.@threads for i = 1:n
                for j = 1:m
                    if f[i]+g[j] > C[i,j]
                        L[i,j] = 1
                    end
                end
            end
        else
            for _ = 1:100*n
                i = rand(1:n)
                j = rand(1:m)
                L[i,j] = 1
            end
        end
        
        @show(sum(L.>0))
        flush(stdout)
        
        #Do the project and forget steps
        for k = 1:20
            for i = 1:n
                for j = 1:m
                    if L[i,j] > 0
                        θ = (log(C[i,j]/(f[i]+g[j]))/γ)
                        c = min(θ,Z[i,j])
                
                        err = C[i,j] - f[i] - g[j]
                
                        if error < -1*err
                            error = abs(err)
                        end
                
                        f[i] *= exp(γ*c)
                        g[j] *= exp(γ*c)
                
                        if c == Z[i,j] || Z[i,j] - c < τ/n
                            Z[i,j] = 0
                            L[i,j] = 0
                        else
                            Z[i,j] -= c
                        end
                    end
                end
            end
        end
        
        if verbose
            @show((error,sum(L.>0),count))
            flush(stdout)
        end
        
        count += 1
    end
    
    return f,g,Z,L
end

In [None]:
function myfunc(x::Vector, grad::Vector,y)
    if length(grad) > 0
        grad[1] = exp(x[1]) - exp(y[1])
        grad[2] = exp(x[2]) - exp(y[2])
    end
    return exp(x[1])+exp(x[2])-exp(y[1])*(1+x[1]-y[1])-exp(y[2])*(1+x[2]-y[2])
end

function find_lambda(a,y)
    b = BigFloat(exp(y[1])+exp(y[2]))
    c = BigFloat(exp(y[1]+y[2])-exp(a))
    
    lambda = -1*b*ones(2)/(2)
    d = sqrt(b^2-4*c)/(2)
    
    lambda[1] += d
    lambda[2] -= d
    
    return lambda
end

function check_lambda(y,a,lambda)
    try
        x1 = log(lambda + exp(y[1]))
        x2 = log(lambda + exp(y[2]))
        return myfunc([x1,x2],zeros(2),y)
    catch e
        return false
    end
end

function myconstraint_1(x::Vector, grad::Vector, a)
    if length(grad) > 0
        grad[1] = 1
        grad[2] = 1
    end
    return x[1]+x[2]-a
end
    
function myconstraint_2(x::Vector, grad::Vector, a)
    if length(grad) > 0
        grad[1] = -1
        grad[2] = -1
    end
    return -x[1]-x[2]+a
end

global y = zeros(2)

function calc_opt_exp(a,b,C,γ = 1,τ = 1e-4, maxIters = 1000)
    F = (log.(a) .+ γ)
    G = (log.(b) .+ γ)
    n = length(F)
    m = length(G)
    
    error = τ+1
    
    L = Dict()
    Z = spzeros(n,m)
    
    count = 1
    
    while(error > τ && count < maxIters)
        
        error = 0
        
        #Fing violated constraints
        if count < maxIters
            for i = 1:n
                for j = 1:m
                    if F[i]+G[j] > C[i,j]
                        L[(i,j)] = 1
                    end
                end
            end
        else
            for _ = 1:n
                i = rand(1:n)
                j = rand(1:m)
                L[(i,j)] = 1
            end
        end
        
        #Do the project and forget steps
        for k = 1:5
            K = collect(keys(L))
            if sum(Z .< 0) > 0
                println("Why the fuck")
            end
            for l = 1:length(K)
                i = K[l][1]
                j = K[l][2]
                
                y =[F[i],G[j]]
                
                lambda = find_lambda(C[i,j],y)
                
                r1 = check_lambda(y,C[i,j],lambda[1])
                r2 = check_lambda(y,C[i,j],lambda[2])
                
                if r1 == false && r2 != false
                    θ = lambda[2]/(exp(γ))
                    println(2)
                elseif r2 == false && r1 != false
                    θ = lambda[1]/(exp(γ))
                elseif r1 == false && r2 == false
                    θ = 0
                    println(3)
                else
                    if r1 < r2
                        θ = lambda[1]/(exp(γ))
                        println(1)
                    else
                        θ = lambda[2]/(exp(γ))
                        println(2)
                    end
                        
                end

                #opt.min_objective = (x,g) -> myfunc(x,g,y) 
                #inequality_constraint!(opt, (x,grad) -> myconstraint_1(x,grad,C[i,j]), 1e-8)
                #inequality_constraint!(opt, (x,grad) -> myconstraint_2(x,grad,C[i,j]), 1e-8)

                #(minf,minx,ret) = NLopt.optimize(opt, zeros(2))
                
                #θ = lambda[1]/(exp(γ))
                #@show(exp.(minx)-exp.(y))
                #flush(stdout)
                err = C[i,j] - F[i] - G[j]
                c = min(θ,Z[i,j])
                
                if error < -1*err
                    error = abs(err)
                end
                if θ < Z[i,j]
                    F[i] = log(lambda[1] + exp(y[1]))
                    G[j] = log(lambda[1] + exp(y[2]))
                    Z[i,j] -= θ
                else
                    delete!(L,K[l])
                    F[i] = log(exp(F[i]) + exp(γ)*Z[i,j])
                    G[j] = log(exp(G[j]) + exp(γ)*Z[i,j])
                    Z[i,j] = 0
                end
            end
        end
    
        if count%500 == 0
            @show((error,length(L),count))
            flush(stdout)
        end
        
        count += 1
    end
    
    return F,G,Z,L
end

# Veriffying Theoretical Properties

In [None]:
wasserstein = zeros(10)
active  = zeros(10)
sparsity  = zeros(10)
Zs  = []
fs = []
gs  = []

In [None]:
for i = 1:5
    γ = 10^(i)
    t = @elapsed f,g,Z,L = calc_opt(a,b,D,γ,1e-15,maxIters=15000);
    wasserstein[i] = sum(Z.*D)
    active[i] = length(L)
    sparsity[i] = sum(Z.>0)
    push!(Zs,Z)
    push!(fs,f)
    push!(gs,g)
    @show(t)
    flush(stdout)
end

In [None]:
wasserstein_entropy = zeros(10)
active_entropy  = zeros(10)
sparsity_entropy  = zeros(10)
Zs_entropy  = []
fs_entropy  = []
gs_entropy  = []

for e = 1:4
    γ = 10^e
    t = @elapsed f,g,Z,L = calc_opt_entropy(a,b,max.(1e-10,D),γ,1e-10,maxIters=10000);
    wasserstein_entropy[e] = sum(Z.*D)
    active_entropy[e] = length(L)
    sparsity_entropy[e] = sum(Z.>0)
    push!(Zs_entropy ,Z)
    push!(fs_entropy ,f)
    push!(gs_entropy ,g)
    @show(t)
    flush(stdout)
end

In [None]:
wasserstein_exp = zeros(10)
active_exp  = zeros(10)
sparsity_exp  = zeros(10)
Zs_exp  = []
fs_exp  = []
gs_exp  = []

In [None]:
for e = 1:4
    γ = log(10^e)
    t = @elapsed f,g,Z,L = calc_opt_exp(a,b,D,γ,1e-15,20000);
    wasserstein_exp[e] = sum(Z.*D)
    active_exp[e] = length(L)
    sparsity_exp[e] = sum(Z.>0)
    push!(Zs_exp,Z)
    push!(fs_exp,f)
    push!(gs_exp,g)
    @show(t)
    flush(stdout)
end

In [None]:
drot = zeros(10)
for i = 1:5
    drot[i] = sum(fs[i].*a)+sum(gs[i].*b)
    drot[i] -= 0.5*(norm(fs[i])^2)/(10^i)
    drot[i] -= 0.5*(norm(gs[i])^2)/(10^i)
end
drot

In [None]:
drot_entropy = zeros(10)
for i = 1:4
    drot_entropy[i] = sum(fs_entropy[i].*a)+sum(gs_entropy[i].*b)
    drot_entropy[i] += sum(fs_entropy[i].*(log.(fs_entropy[i]).-1))/(10^i)
    drot_entropy[i] += sum(gs_entropy[i].*(log.(gs_entropy[i]).-1))/(10^i)
    wasserstein_entropy[i] = sum(Zs_entropy[i].*D)
end
@show(wasserstein_entropy)
drot_entropy

In [None]:
drot_exp = zeros(10)
for i = 1:3
    drot_exp[i] = sum(fs_exp[i].*a)+sum(gs_exp[i].*b)
    drot_exp[i] -= sum(exp.(fs_exp[i]))/(10^i)
    drot_exp[i] -= sum(exp.(gs_exp[i]))/(10^i)
end
drot_exp

In [None]:
ra_exp = []
rb_exp = []
ra_error_entropy = zeros(10)
rb_error_entropy = zeros(10)
n = length(a)
m = length(b)
x = collect(1:100)
bar(a, label="a", color = :yellow)
#scatter!(x,b, label = "b")

In [None]:
i = 2
Ra_entropy = Zs[i]*ones(m)
Rb_entropy = Zs[i]'*ones(n)
#bar(Ra_entropy, label = "Entropy, γ = 1e2", color = :green)
i = 1
Ra_entropy = Zs[i]*ones(m)
Rb_entropy = Zs[i]'*ones(n)
bar!(Ra_entropy, label = "Quadratic, γ = 1e1", color = :red)

In [None]:
i = 2;
Ra_entropy = Zs[i]*ones(m);
Rb_entropy = Zs[i]'*ones(n);
bar!(Ra_entropy, label = "Exponential, γ = 1e2", color = :orange)
    #plot!(Rb_entropy, mc =:red, shape=:square, legend=:false)
#savefig("mass-change-entropy-$i.png")

In [None]:
savefig("entropy-mass-creation-differences.png")

In [None]:
scatter!(x,a, label="a")
scatter!(x,b, label = "b")

In [None]:
scatter(x,a, label="a")
scatter!(x,b, label = "b")

In [None]:
n = length(a)
m = length(b)
scatter(x,a, label="a")
scatter!(x,b, label = "b")
i = 6
    Ra = Zs[i]*ones(m)
    Rb = Zs[i]'*ones(n)
    scatter!(x,Ra, mc =:blue, shape =:cross)
    scatter!(x,Rb, mc =:red, shape=:cross, legend=:false)
    savefig("mass-change-quadraic-$i-new.png")

In [None]:
plot(sparsity[1:5], label="Quadratic", color =:green)
plot!(sparsity_exp[1:4], label ="Exponential", color =:blue)
plot!(sparsity_entropy[1:4], label = "Entropy", color =:red)
scatter!(sparsity[1:5], label="Quadratic", color =:green)
scatter!(sparsity_exp[1:4], label ="Exponential", color =:blue)
scatter!(sparsity_entropy[1:4], label = "Entropy", color =:red)
plot!(199*ones(5), label = "True Optimal Transport", color=:black)
plot!(xlabel = "log(γ)", ylabel = "Number of active constraints", legend=:bottomright)


In [None]:
scatter(active[1:6], xlabel = "log(γ)", ylabel = "Number of active constraints", legend=false)
plot!(active[1:6], xlabel = "log(γ)", ylabel = "Number of active constraints", legend=false)

In [None]:
scatter(ra_error[1:6], yaxis=:log, label="Error in distribution a", color=:red)
plot!(ra_error[1:6], color=:red)
plot!(rb_error[1:6], color=:blue)
scatter!(rb_error[1:6], yaxis=:log, color=:blue,label="Error in distribution b", xlabel = "log(γ)", ylabel = "Error", legend=false)

In [None]:
scatter(abs.(W.-drot[1:4])/W, yaxis=:log, xlabel = "log(γ)", ylabel = "Error", color =:red)
plot!(abs.(W.-drot[1:4])/W, yaxis=:log, xlabel = "log(γ)", ylabel = "Error", color=:red)

In [None]:
scatter!(abs.(W.-wasserstein[1:4])/W, yaxis=:log, legend=false, color = :blue)
plot!(abs.(W.-wasserstein[1:4])/W, yaxis=:log, legend=false, color = :blue)

In [None]:
savefig("wasserstein-quadratic-error.png")

In [None]:
scatter(abs.(W.-drot_entropy[1:4])/W, yaxis=:log, xlabel = "log(γ)", ylabel = "Error", color =:red)
plot!(abs.(W.-drot_entropy[1:4])/W, yaxis=:log, xlabel = "log(γ)", ylabel = "Error", color=:red)

In [None]:
scatter!(abs.(W.-wasserstein_entropy[1:4])/W, yaxis=:log, legend=false, color = :blue)
plot!(abs.(W.-wasserstein_entropy[1:4])/W, yaxis=:log, legend=false, color = :blue)

In [None]:
savefig("wasserstein-entropy-error.png")

In [None]:
scatter(abs.(W.-wasserstein_exp[1:3])/W, yaxis=:log, legend=false, color = :blue)
plot!(abs.(W.-wasserstein_exp[1:3])/W, yaxis=:log, legend=false, color = :blue)

In [None]:
scatter!((W.-drot_exp[1:3])/W, yaxis=:log, xlabel = "log(γ)", ylabel = "Error", color =:red)
plot!((W.-drot_exp[1:3])/W, yaxis=:log, xlabel = "log(γ)", ylabel = "Error", color=:red)

In [None]:
savefig("wasserstein-exponential-error.png")

# Method experiment

## Solve Primal using CPLEX and Mosek

In [None]:
heatmap(TZ', color=:viridis)

In [None]:
model = Model()
n = 501
t = 0
t += @elapsed @variable(model, f[1:n])
t += @elapsed @variable(model, g[1:n]);
cons = Array{Any,2}(undef, n,n)
for i = 1:n
    for j = 1:n
        t += @elapsed cons[i,j] = @constraint(model, f[i]+g[j] <= D[i,j])
    end
end

γ = 1e3

In [None]:
@objective(model, Max, sum(f.*a)+sum(g.*b) - (sum(f.^2)+sum(g.^2))/(2*γ));

In [None]:
set_optimizer(model, Mosek.Optimizer)

In [None]:
JuMP.optimize!(model)

In [None]:
P = zeros(n,n)
t += @elapsed for i = 1:n
    for j = 1:n
        P[i,j] = dual(cons[i,j])
    end
end

In [None]:
t

In [None]:
set_optimizer(model, CPLEX.Optimizer)
JuMP.optimize!(model)

In [None]:
set_optimizer(model, OSQP.Optimizer)
JuMP.optimize!(model)

In [None]:
set_optimizer(model, SCS.Optimizer)
JuMP.optimize!(model)

In [None]:
set_optimizer(model, ECOS.Optimizer)
JuMP.optimize!(model)

In [None]:
set_optimizer(model, ProxSDP.Optimizer)
JuMP.optimize!(model)

## Solving the dual with CPLEX and Mosek

In [None]:
using Convex

In [None]:
@time P = Variable(length(a),length(b));

In [None]:
@time vone = ones(length(a));

In [None]:
@time problem = minimize(sum(D.*P)+sumsquares(a-P*vone)*γ/2+sumsquares(b-P'*vone)*γ/2, [P >= 0]);

In [None]:
@time problem = minimize(sum(D.*P), [P >= 0, a==P*vone,b==P'*vone]);

In [None]:
t = @elapsed solve!(problem, CPLEX.Optimizer);

# Color Transfer

In [None]:
using FileIO
img1 = load("./beach.jpg");

In [None]:
img2 = load("./barcelona-morning-sky.jpg");

In [None]:
r1 = convert(Array{Float64,1},vec(red.(img1)));
g1 = convert(Array{Float64,1},vec(green.(img1)));
b1 = convert(Array{Float64,1},vec(blue.(img1)));

r2 = convert(Array{Float64,1},vec(red.(img2)));
g2 = convert(Array{Float64,1},vec(green.(img2)));
b2 = convert(Array{Float64,1},vec(blue.(img2)));

In [None]:
n = size(img1)[1]*size(img1)[2]
m = size(img2)[1]*size(img2)[2]

In [None]:
d1 = zeros(n,3)
d2 = zeros(m,3);

In [None]:
for i = 1:n
    d1[i,1] = r1[i]
    d1[i,2] = g1[i]
    d1[i,3] = b1[i]
end
    
for i = 1:m
    d2[i,1] = r2[i]
    d2[i,2] = g2[i]
    d2[i,3] = b2[i]
end

In [None]:
@show((n,m))

In [None]:
k = 4096
c1 = kmeans(d1',k)

In [None]:
c2 = kmeans(d2',k)

In [None]:
d1 = 0

In [None]:
d2 = 0

In [None]:
a = zeros(k)
for i = 1:n
    a[c1.assignments[i]]+=1
end

In [None]:
b = zeros(k)
for i = 1:m
    b[c2.assignments[i]]+=1
end

In [None]:
a = a./sum(a);
b = b./sum(b);

In [None]:
D = zeros(k,k)

for i = 1:k
    for j = 1:k
        D[i,j] = sum((c1.centers[:,i]-c2.centers[:,j]).^2)
    end
end

In [None]:
Z = ot.sinkhorn(a,b, D, 1e-2)

In [None]:
Z = ot.unbalanced.sinkhorn_stabilized_unbalanced(a, b, D, 1e-3, 1e1)

In [None]:
Z = ot.emd(a,b, PyReverseDims(D))'

In [None]:
γ = 1e5
t = @elapsed f,g,Z,L = calc_opt_entropy(a,b,D2,γ,1e-5,maxIters = 1000, verbose = true);

In [None]:
new_c = zeros(k,3);

In [None]:
for i = 1:k
    for j = 1:k
        new_c[i,:] += Z[i,j]*c2.centers[:,j]
    end
    if sum(Z[i,:]) > 0
        new_c[i,:] = new_c[i,:]/sum(Z[i,:])
    end
end

In [None]:
new_r = zeros(n)
new_g = zeros(n)
new_b = zeros(n);

In [None]:
for i = 1:n
    new_r[i] = new_c[c1.assignments[i],1]
    new_g[i] = new_c[c1.assignments[i],2]
    new_b[i] = new_c[c1.assignments[i],3]
end

In [None]:
new_r_img = reshape(new_r,size(img1))
new_g_img = reshape(new_g,size(img1))
new_b_img = reshape(new_b,size(img1));

In [None]:
new_img = RGB.(new_r_img,new_g_img,new_b_img);

In [None]:
save("beach-barcelona-k-4096-gamma-1e-3-1e1-UOT.png", new_img)

# MNIST - USPS

In [None]:
using HDF5, MLDatasets, JLD2, FileIO, Plots, MAT
plotly()

In [None]:
SVHN = matread("test_32x32.mat")

In [None]:
svhn_x = reshape(convert(Array{Float64,4},SVHN["X"]/255),:,26032);

In [None]:
file = h5open("usps.h5")

In [None]:
d = read(file)

In [None]:
usps_train = reshape(d["train"]["data"],(16,16,:));

In [None]:
i = 2

In [None]:
heatmap(usps_train[:,:,i]',legend=false)

In [None]:
heatmap(new_usps_balanced[:,:,i]',legend=false)

In [None]:
heatmap(new_usps_emd[:,:,i]',legend=false)

In [None]:
heatmap(new_usps_sinkhorn[:,:,i]',legend=false)

In [None]:
heatmap(new_usps_stochastic[:,:,i]',legend=false)

In [None]:
heatmap(new_usps_entropy[:,:,i]',legend=false)

In [None]:
heatmap(new_usps_quadratic[:,:,i]',legend=false)

In [None]:
usps_x = zeros(28,28,size(usps_train)[3]);
for i = 1:size(usps_x)[3]
    usps_x[7:22,7:22,i] = usps_train[:,:,i]
end

In [None]:
usps_x = reshape(usps_x,:,size(usps_x)[3])

In [None]:
train_x, train_y = MNIST.traindata();

In [None]:
mnist_x = zeros(32,32,60000);

In [None]:
train_x = convert(Array{Float64,3},train_x);

In [None]:
for i = 1:60000
    mnist_x[3:30,3:30,i] = train_x[:,:,i]
end

In [None]:
mnist_x = reshape(train_x,:,60000)

In [None]:
mnist_x = vcat(mnist_x,mnist_x,mnist_x);

In [None]:
using Distances
D = pairwise(Euclidean(), usps_x, mnist_x, dims=2)

In [None]:
reg = 1
batch_size = 1000
n,m = size(D)

In [None]:
Z4 = ot.stochastic.solve_dual_entropic(ones(n)/n,ones(m)/m, D, reg, batch_size)

In [None]:
Z3 = ot.unbalanced.sinkhorn_stabilized_unbalanced(ones(n)/n,ones(m)/m, D,1, 1)

In [None]:
Z1 = ot.emd(ones(m)/m,ones(n)/n, PyReverseDims(D))

In [None]:
Z2 = ot.sinkhorn(ones(n)/n,ones(m)/m, D, 1)

In [None]:
γ = 1e5
n,m = size(D)
t = @elapsed f,g,Z,L = calc_opt_exp(ones(n)/n,ones(m)/m,D,γ,1e-5,10000);

In [None]:
@save "MNIST-SVHN_test-OPT-entropy.jld2" Dict("Z" => Z, "f" => f, "g" => g, "L" => L, "gamma" => γ)

In [None]:
MT = MT[collect(keys(MT))[1]]
Z = MT["Z"]

In [None]:
new_usps_ent = zeros(28,28,7291);
#new_usps2 = zeros(28,28,7291);
#new_usps3 = zeros(28,28,7291);
#new_usps4 = zeros(28,28,7291);
for i = 1:7291
    for j = 1:60000
        new_usps_ent[:,:,i] += Z[i,j]*train_x[:,:,j]
        #new_usps2[:,:,i] += Z2[i,j]*train_x[:,:,j]
        #new_usps3[:,:,i] += Z3[i,j]*train_x[:,:,j]
        #new_usps4[:,:,i] += Z4[i,j]*train_x[:,:,j]
    end
    #if sum(Z1[:,i]) > 0
    #    new_usps1[:,:,i] = new_usps1[:,:,i]/sum(Z1[:,i])
    #end
    #if sum(Z2[i,:]) > 0
    #    new_usps2[:,:,i] = new_usps2[:,:,i]/sum(Z2[i,:])
    #end
    #if sum(Z3[i,:]) > 0
    #    new_usps3[:,:,i] = new_usps3[:,:,i]/sum(Z3[i,:])
    #end
    if sum(Z[i,:]) > 0
        new_usps_ent[:,:,i] = new_usps_ent[:,:,i]/sum(Z[i,:])
    end
    if i %200 == 0
        println(i)
        flush(stdout)
    end
end

In [None]:
@save "USPS-sinkhorn" new_usps2
@save "USPS-unbalanced" new_usps3
@save "USPS-stochastic" new_usps4
@save "USPS-emd" new_usps1

In [None]:
heatmap(new_usps1[:,:,12]')

In [None]:
heatmap(usps_train[:,:,12]')

In [None]:
R = load("new-USPS-quadratic.jld2")
k = collect(keys(R))[1]
new_usps_quadratic = R[k]["data"]

In [None]:
new_usps_balanced = reshape(new_usps_balanced,:,7291)
new_usps_emd = reshape(new_usps_emd,:,7291)
new_usps_sinkhorn = reshape(new_usps_sinkhorn,:,7291)
new_usps_stochastic = reshape(new_usps_stochastic,:,7291)
new_usps_entropy = reshape(new_usps_entropy,:,7291)
new_usps_quadratic = reshape(new_usps_quadratic,:,7291)

In [None]:
using Distances
D_unbalanced = pairwise(Euclidean(), new_usps_balanced, mnist_x, dims=2)
D_emd = pairwise(Euclidean(), new_usps_emd, mnist_x, dims=2)
D_stochastic = pairwise(Euclidean(), new_usps_stochastic, mnist_x, dims=2)
D_sinkhorn = pairwise(Euclidean(), new_usps_sinkhorn, mnist_x, dims=2)
D_entropy = pairwise(Euclidean(), new_usps_entropy, mnist_x, dims=2)
D_quadratic = pairwise(Euclidean(), new_usps_quadratic, mnist_x, dims=2)

In [None]:
pred = zeros(7291)
for i = 1:7291
    pred[i] = train_y[argmin(D_quadratic[i,:])]
end

In [None]:
d["train"]

In [None]:
sum(pred .== d["train"]["target"])/7291

In [None]:
usps_y = d["train"]["target"]

pred2 = zeros(60000)
for i = 1:60000
    pred2[i] = usps_y[argmin(D_quadratic[:,i])]
end

sum(pred2 .== train_y)/60000

In [None]:
1

# Text Alignment

In [None]:
nltk = pyimport("nltk")
ft = pyimport("fasttext");

In [None]:
model = ft.load_model("./fastText/cc.en.300.bin")

In [None]:
texts = [
"the sign of four.txt",
"the man in the brown suit.txt",
"the inimitable jeeves.txt",
"the idiot.txt",
"the hound of baskerville.txt",
"the gambler.txt",
"murder on the links.txt",
"my man jeeves.txt",
"poirot investigates.txt",
"right ho jeeves.txt",
"the adventures of sherlock holmes.txt",
"merchant of venice.txt",
"macbeth.txt",
"love among chickens.txt",
"julius caesar.txt",
"crime and punishment.txt"]

In [None]:
text_t[13,12] = 0

In [None]:
emd = zeros(16,16)
entropy = zeros(16,16)
quadratic = zeros(16,16)
cuturi = zeros(16,16)
unbalanced = zeros(16,16)

emd_s = zeros(16,16)
entropy_s = zeros(16,16)
quadratic_s = zeros(16,16)
cuturi_s = zeros(16,16)
unbalanced_s = zeros(16,16)


for t1 = 1:16
    T = read(open(texts[t1], "r"), String);
    T = replace(T, "\n" => " ");
    R = nltk.sent_tokenize(T)
    X = zeros(300,length(R))
    count = 1
    for i = 1:length(R)
        if length(R[i]) > 0
            X[:,count] = model.get_sentence_vector(R[i])
            count += 1
        end
        
    end
    X = X[:,1:count-1]
    for t2 = 1:16
        T = read(open(texts[t2], "r"), String);
        T = replace(T, "\n" => " ");
        R = nltk.sent_tokenize(T)
        Y = zeros(300,length(R))
        count = 1
        for i = 1:length(R)
            if length(R[i]) > 0
                Y[:,count] = model.get_sentence_vector(R[i])
                count += 1
            end
        end
        Y = Y[:,1:count-1]
        
        N = size(X)[2]
        M = size(Y)[2]
    
        D = zeros(N,M)
        for i = 1:N
            for j = 1:M
                D[i,j] = max(1e-10,1-sum(X[:,i].*Y[:,j])/(norm(X[:,i])*norm(Y[:,j])))
            end
        end 
        
        @time f,g,Z_entropy,l_entropy = calc_opt_entropy(ones(N)/N,ones(M)/M,D,1, verbose = false)
        flush(stdout)
        @time f,g,Z_quadratic,l_quadratic = calc_opt(ones(N)/N,ones(M)/M,D,1e2, verbose = false)
        flush(stdout)
        @time Z_emd = ot.emd(ones(M)/M,ones(N)/N,PyReverseDims(D))
        flush(stdout)
        @time Z_cuturi = ot.sinkhorn(ones(N)/N,ones(M)/M,D,1)
        flush(stdout)
        @time Z_unbalanced = ot.unbalanced.sinkhorn_stabilized_unbalanced(ones(N)/N,ones(M)/M, D, 1, 1)
        flush(stdout)
        
        emd[t1,t2] = sum(Z_emd'.*D)
        entropy[t1,t2] = sum(Z_entropy.*D)
        quadratic[t1,t2] = sum(Z_quadratic.*D)
        cuturi[t1,t2] = sum(Z_cuturi.*D)
        unbalanced[t1,t2] = sum(Z_unbalanced.*D)
        
        @show((emd[t1,t2],entropy[t1,t2],quadratic[t1,t2],cuturi[t1,t2],unbalanced[t1,t2]))

        emd_s[t1,t2] = sum(Z_emd.!=0)/(N*M)
        entropy_s[t1,t2] = sum(Z_entropy.!=0)/(N*M)
        quadratic_s[t1,t2] = sum(Z_quadratic.!=0)/(N*M)
        cuturi_s[t1,t2] = sum(Z_cuturi.!=0)/(N*M)
        unbalanced_s[t1,t2] = sum(Z_unbalanced.!=0)/(N*M)
    end
end

In [None]:
sk.metrics.roc_auc_score(vec(text_t),vec(emd))

In [None]:
sk.metrics.roc_auc_score(vec(text_t),vec(entropy))

In [None]:
sk.metrics.roc_auc_score(vec(text_t),vec(quadratic))

In [None]:
sk.metrics.roc_auc_score(vec(text_t),vec(cuturi))

In [None]:
sk.metrics.roc_auc_score(vec(text_t),vec(unbalanced))

In [None]:
R[1086]

In [None]:
N = size(X)[1]
M = size(Y)[2]

D = zeros(N,M)
for i = 1:N
    for j = 1:M
        D[i,j] = max(1e-3,1-sum(X[:,i].*Y[:,j])/(norm(X[:,i])*norm(Y[:,j])))
        if !(D[i,j]<Inf)
            @show((D[i,j],i,j))
        end
    end
end  

In [None]:
f,g,Z,l = calc_opt_entropy(ones(N)/N,ones(M)/M,D,1, verbose = true)

In [None]:
sum(Z.*D)/(sum(Z))

In [None]:
Z_emd = ot.emd(ones(M)/M,ones(N)/N,PyReverseDims(D))

In [None]:
sum(Z_emd'.*D)

In [None]:
sum(Z_emd .>0)

In [None]:
Z_cuturi = ot.sinkhorn(ones(N)/N,ones(M)/M,D,1)

In [None]:
sum(Z_cuturi.*D)

In [None]:
sum(Z_cuturi.>1e-5)

In [None]:
T = readlines(open("test.src", "r"));
Xs = []
num_text = 0
for i = 1:100
    T[i] = replace(T[i], "NEWLINE_CHAR  NEWLINE_CHAR" => "")
    A = split(T[i],"|||||")[1:end-1]
    Y = []
    for j = 1:length(A)
        r = nltk.sent_tokenize(A[j])
        N = length(r)
        X = zeros(N,300)
        count = 1
        for k =1:N
            if length(r[k]) > 0
                X[count,:] = model.get_sentence_vector(r[k])
                count += 1
            end
        end
        X = X[1:count-1,:]
        push!(Y,X)
    end
    push!(Xs,Y)
        
    num_text += length(A)
end

In [None]:
L = length.(Xs);

In [None]:
S_quadratic = zeros(num_text,num_text)
average_sparsity_quadratic  = 0
count1 = 1
count2 = 1
for i1 = 1:length(Xs)
    println("$i1 \r")
    flush(stdout)
    for i2 = 1:L[i1]
        count2 = 1
        X = Xs[i1][i2]
        N = size(X)[1]
        for j1 = 1:length(Xs)
            for j2 = 1:L[j1]
                Y = Xs[j1][j2]
                M = size(Y)[1]
                D = zeros(N,M)
                for i = 1:N
                    for j = 1:M
                        D[i,j] = max(0,1-sum(X[i,:].*Y[j,:])/(norm(X[i,:])*norm(Y[j,:])))
                    end
                end
                
                f,g,Z,l = calc_opt(ones(N)/N,ones(M)/M,D,1e1)
                
                average_sparsity_quadratic  += length(l)/(num_text*num_text)
                
                
                S_quadratic[count1,count2] = sum(D.*Z) 
                count2 += 1
            end
        end
        count1 += 1
    end
end

In [None]:
S_exp = zeros(num_text,num_text)
average_sparsity_exp  = 0
count1 = 1
count2 = 1
for i1 = 1:length(Xs)
    println("$i1 \r")
    flush(stdout)
    for i2 = 1:L[i1]
        count2 = 1
        X = Xs[i1][i2]
        N = size(X)[1]
        for j1 = 1:length(Xs)
            for j2 = 1:L[j1]
                Y = Xs[j1][j2]
                M = size(Y)[1]
                D = zeros(N,M)
                for i = 1:N
                    for j = 1:M
                        D[i,j] = max(0,1-sum(X[i,:].*Y[j,:])/(norm(X[i,:])*norm(Y[j,:])))
                    end
                end
                
                f,g,Z,l = calc_opt_exp(ones(N)/N,ones(M)/M,D,1)
                
                average_sparsity_exp  += length(l)/(num_text*num_text)
                
                
                S_exp[count1,count2] = sum(Z.*D)
                count2 += 1
            end
        end
        count1 += 1
    end
end

In [None]:
S_entropy = zeros(num_text,num_text)
average_sparsity_entropy  = 0
count1 = 1
count2 = 1
for i1 = 1:length(Xs)
    println("$i1 \r")
    flush(stdout)
    for i2 = 1:L[i1]
        count2 = 1
        X = Xs[i1][i2]
        N = size(X)[1]
        for j1 = 1:length(Xs)
            for j2 = 1:L[j1]
                Y = Xs[j1][j2]
                M = size(Y)[1]
                D = zeros(N,M)
                for i = 1:N
                    for j = 1:M
                        D[i,j] = max(1e-10,1-sum(X[i,:].*Y[j,:])/(norm(X[i,:])*norm(Y[j,:])))
                    end
                end
                
                f,g,Z,l = calc_opt_entropy(ones(N)/N,ones(M)/M,D,1e2)
                
                average_sparsity_entropy  += length(l)/(num_text*num_text)
                
                
                S_entropy[count1,count2] = sum(f)/N+sum(g)/M + sum(f.*(log.(f).-1))+sum(g.*(log.(g).-1)) 
                count2 += 1
            end
        end
        count1 += 1
    end
end

In [None]:
S_emd = zeros(num_text,num_text)
average_sparsity_emd  = 0
count1 = 1
count2 = 1
for i1 = 1:length(Xs)
    println("$i1 \r")
    flush(stdout)
    for i2 = 1:L[i1]
        count2 = 1
        X = Xs[i1][i2]
        N = size(X)[1]
        for j1 = 1:length(Xs)
            for j2 = 1:L[j1]
                Y = Xs[j1][j2]
                M = size(Y)[1]
                D = zeros(N,M)
                for i = 1:N
                    for j = 1:M
                        D[i,j] = 1-sum(X[i,:].*Y[j,:])/(norm(X[i,:])*norm(Y[j,:]))
                    end
                end
                
                Z = ot.emd(ones(M)/M,ones(N)/N,PyReverseDims(D))
                
                average_sparsity_emd  += (sum(Z.>0))/(num_text*num_text)
                
                
                S_emd[count1,count2] = sum(D.*Z')
                count2 += 1
            end
        end
        count1 += 1
    end
end

In [None]:
S_kl = zeros(num_text,num_text)
average_sparsity_kl  = 0
count1 = 1
count2 = 1
for i1 = 1:length(Xs)
    println("$i1 \r")
    flush(stdout)
    for i2 = 1:L[i1]
        count2 = 1
        X = Xs[i1][i2]
        N = size(X)[1]
        for j1 = 1:length(Xs)
            for j2 = 1:L[j1]
                Y = Xs[j1][j2]
                M = size(Y)[1]
                D = zeros(N,M)
                for i = 1:N
                    for j = 1:M
                        D[i,j] = 1-sum(X[i,:].*Y[j,:])/(norm(X[i,:])*norm(Y[j,:]))
                    end
                end
                
                Z = ot.unbalanced.sinkhorn_stabilized_unbalanced(ones(N)/N,ones(M)/M,D,1,1e2)
                
                average_sparsity_kl  += (sum(Z.>1e-10))/(num_text*num_text)
                
                
                S_kl[count1,count2] = sum(D.*Z) 
                count2 += 1
            end
        end
        count1 += 1
    end
end

In [None]:
S_cuturi = zeros(num_text,num_text)
average_sparsity_cuturi  = 0
count1 = 1
count2 = 1
for i1 = 1:length(Xs)
    println("$i1 \r")
    flush(stdout)
    for i2 = 1:L[i1]
        count2 = 1
        X = Xs[i1][i2]
        N = size(X)[1]
        for j1 = 1:length(Xs)
            for j2 = 1:L[j1]
                Y = Xs[j1][j2]
                M = size(Y)[1]
                D = zeros(N,M)
                for i = 1:N
                    for j = 1:M
                        D[i,j] = 1-sum(X[i,:].*Y[j,:])/(norm(X[i,:])*norm(Y[j,:]))
                    end
                end
                
                Z = ot.sinkhorn(ones(N)/N,ones(M)/M,D,1)
                
                average_sparsity_cuturi  += (sum(Z.>1e-10))/(num_text*num_text)
                
                
                S_cuturi[count1,count2] = sum(D.*Z) 
                count2 += 1
            end
        end
        count1 += 1
    end
end


In [None]:
S = ones(num_text,num_text)
count1 = 1
count2 = 1
for i1 = 1:length(Xs)
    for i2 = 1:L[i1]
        count2 = 1
        for j1 = 1:length(Xs)
            for j2 = 1:L[j1]
                if i1 == j1
                    S[count1,count2] = 0
                end
                count2 += 1
            end
        end
        count1 += 1
    end
end

In [None]:
using FileIO,JLD2
@save "text-alignment.jld2" Dict("entropy" => S_entropy,
"exp" => S_exp,
"emd" => S_emd,
"cutuir" => S_cuturi,
"kl" => S_kl,
"quadratic" => S_quadratic)

In [None]:
@show(average_sparsity_cuturi,average_sparsity_kl)

In [None]:
@show(average_sparsity_emd,average_sparsity_exp,average_sparsity_quadratic, average_sparsity_entropy)

In [None]:
sk = pyimport("sklearn")

In [None]:
S_entropy

In [None]:
pa1 = 0
for i = 1:num_text
    idx = argmin(S_kl[i,:]+1000*Matrix(I,num_text,num_text)[i,:])
    if S[i,idx] == 0
        pa1 += 1
    end
end
pa1/num_text

In [None]:
pa1 = 0
for i = 1:num_text
    idx = argmin(S_cuturi[i,:]+1000*Matrix(I,num_text,num_text)[i,:])
    if S[i,idx] == 0
        pa1 += 1
    end
end
pa1/num_text

In [None]:
pa1 = 0
for i = 1:num_text
    idx = argmin(S_exp[i,:]+Matrix(I,num_text,num_text)[i,:])
    if S[i,idx] == 0
        pa1 += 1
    end
end
pa1/num_text

In [None]:
pa1 = 0
for i = 1:num_text
    idx = argmax(S_entropy[i,:]-1000*Matrix(I,num_text,num_text)[i,:])
    if S[i,idx] == 0
        pa1 += 1
    end
end
pa1/num_text

In [None]:
pa1 = 0
for i = 1:num_text
    idx = argmin(S_quadratic[i,:]+Matrix(I,num_text,num_text)[i,:])
    if S[i,idx] == 0
        pa1 += 1
    end
end
pa1/num_text

In [None]:
pa1 = 0
for i = 1:num_text
    idx = argmin(S_emd[i,:]+Matrix(I,num_text,num_text)[i,:])
    if S[i,idx] == 0
        pa1 += 1
    end
end
pa1/num_text

In [None]:
println(S_emd[:,19])

In [None]:
for i = 1:num_text
    for j = 1:num_text
        if i == j
            S_entropy[i,j] = 0
            S_emd[i,j] = 0
            S_exp[i,j] = 0
            S_quadratic[i,j] = 0
            S_cuturi[i,j] = 0
        end
        if !(S_emd[i,j] < Inf)
            S_emd[i,j] = 1
        end
        if !(S_entropy[i,j] < Inf)
            S_entropy[i,j] = 1
        end
        if !(S_exp[i,j] < Inf)
            S_exp[i,j] = 1
        end
        if !(S_quadratic[i,j] < Inf)
            S_quadratic[i,j] = 1
        end
        if !(S_cuturi[i,j] < Inf)
            S_cuturi[i,j] = 1
        end
    end
end


In [None]:
for i = 1:num_text
    for j = 1:num_text
        if i == j
            S_kl[i,j] = 0
        end
        if !(S_kl[i,j] < Inf)
            S_kl[i,j] = 1
        end
    end
end

In [None]:
t_quadratic = vec(S_quadratic)
t_emd = vec(S_emd)
t_entropy = -1*vec(S_entropy)
t_exp = vec(S_exp)

In [None]:
t_cuturi = vec(S_cuturi)
t_kl = vec(S_kl)

In [None]:
sk.metrics.roc_auc_score(vec(S),t_kl)

In [None]:
sk.metrics.roc_auc_score(vec(S),t_cuturi)

In [None]:
sk.metrics.roc_auc_score(vec(S),t_emd)

In [None]:
sk.metrics.roc_auc_score(vec(S),t_quadratic)

In [None]:
sk.metrics.roc_auc_score(vec(S),t_entropy)

In [None]:
sk.metrics.roc_auc_score(vec(S),t_exp)

In [None]:
S_entropy