In [None]:
#Include
using LightGraphs, SparseArrays, SimpleWeightedGraphs
using Statistics, BenchmarkTools, LinearAlgebra, ProgressMeter
using Base.Threads, PhyloNetworks, StatsBase, Distributions
using Base.GC, JLD2, FileIO, CSV, DataFrames
using Random, NPZ, GraphRecipes, Plots, Laplacians

In [None]:
include("TreeRep.jl")
include("ConstructTree.jl")
include("LevelTree.jl")
include("NJ.jl")
include("Utilities.jl")
include("Visualize.jl")
include("TreeOpt.jl")
include("SparseRep.jl")
include("Bartal.jl")

In [None]:
plotly()

font = Plots.font("Helvetica", 15)
font2 = Plots.font("Helvetica", 9)
myfonts = Dict(:guidefont=>font, :xtickfont=>font2, :ytickfont=>font2, :legendfont=>font2)

# Visualize the Imunological Trees. 

First argument should be the tree, the second argument should be the weights. 

In [None]:
Visualize.visualize(R,W = wbfs, labels = [text("Dog", :top, 20),text("Bear", :left, 20),
        text("Racoon", :top, 20), text("Weasel", :left, 20),text("Seal", :bottom, 20),
        text("Sea Lion", :top, 20),
        text("Cat", :left, 20),
        text("Monkey", :right, 20),
        "","","","","","",""])
plot!(legend=:false,axis=:false)

In [None]:
Visualize.visualize(g2,W=D5, labels = ["dog","bear","racoon", "weasel","seal","sea lion","cat","monkey     ",
        "","","","","","",""])
plot!(legend=:false,axis=:false)

# Plotting Optimization embeddings for immunological data

In [None]:
P = zeros(8,2)

In [None]:
# PM Coordinates

P[1,:] = [-0.6015,  -0.7989]
P[2,:] = [0.8426,  0.5386]
P[3,:] = [-0.8188,  0.5740]
P[4,:] = [0.7942, -0.6047]
P[5,:] = [-0.9950, -0.0819]
P[6,:] = [-0.1758,  0.9827]
P[7,:] = [0.0065, -0.9208]
P[8,:] = [-0.0761, -0.1009]

In [None]:
scatter(P[:,1],P[:,2], series_annotations = [text("Dog", :right, 20),text("Bear", :right, 20),
        text("Racoon", :right, 20), text("Weasel", :right, 20),text("Seal", :right, 20),
        text("Sea Lion", :right, 20),
        text("Cat", :right, 20),
        text("Monkey", :right, 20)],
legend=:false, ms = 1; myfonts..., axis=:false)

In [None]:
P2 = zeros(8,2)

In [None]:
# Reading in the PT embeddings
T = npzread("./sarich.final_coordinates0.npy")
P2[1,:] = T[2:end]/(1+T[1])

In [None]:
scatter(P2[:,1],P2[:,2], series_annotations = [text("Dog", :right, 20),text("Bear", :right, 20),
        text("Racoon", :top, 20), text("Weasel", :bottom, 20),text("Seal", :right, 20),
        text("Sea Lion", :right, 20),
        text("Cat", :right, 20),
        text("Monkey", :right, 20)],
legend=:false, axis=:false)

# Random tree reconstruction experiment

In [None]:
N = 7

T1 = zeros(N)
T2 = zeros(N)
map1 = zeros(N)
map2 = zeros(N)
dist1 = zeros(N)
dist2 = zeros(N)
nvs1 = zeros(N)
nvs2 = zeros(N)
nvs = zeros(N)

In [None]:
for i = 1:7
    g = utilities.block(LightGraphs.SimpleGraphs.DoubleBinaryTree(i),10)
    n = nv(g)
    g = LightGraphs.bfs_tree(g,rand(1:n))
    G = SimpleGraph(n)
    for e in edges(g)
        add_edge!(G,e)
    end
    @show(G)
    W = rand(n,n)
    W = W+W'
    W = adjacency_matrix(G) .* W
    D = utilities.parallel_dp_shortest_paths(G,W);
    T1[i] = @elapsed G2,W2 = TreeRep.metric_to_structure(D,undef,undef);
    
    nvs[i] = nv(G)
    

    B = W2[1:nv(G2),1:nv(G2)];
    B = sparse(B);
    B = (B .> 0) .* B;
    
    D2 = utilities.parallel_dp_shortest_paths(G2, B, false);
    for i = 1:n
        D2[i,i] = 0
    end
    
    nvs1[i] = nv(G2)
    
    map1[i] = utilities.MAP(D2[1:n,1:n],G)/utilities.MAP(D,G)
    dist1[i] = utilities.avg_distortion(D2[1:n,1:n],D)
    
    
    T2[i] = @elapsed R =  NJ.nj!(copy(convert(Matrix{Float64},D)));
    
    g2 = SimpleGraph(R.numNodes)
    w = spzeros(R.numNodes,R.numNodes)
    for i = 1:R.numEdges
        src = R.edge[i].node[1].number
        dst = R.edge[i].node[2].number
        add_edge!(g2,src,dst)
        w[src,dst] = R.edge[i].length
        w[dst,src] = w[src,dst]
    end
    
    nvs2[i] = nv(g2)
    
    D5 = utilities.parallel_dp_shortest_paths(g2, w)


    dist2[i] = utilities.avg_distortion(D5[1:n,1:n],D);
    map2[i] = utilities.MAP(D5[1:n,1:n],G)/utilities.MAP(D,G)
    
end

@show(T1,T2,map1,map2,dist1,dist2,nvs,nvs1,nvs2)

In [None]:
plot(nvs,T1,label="TreeRep",lc = :blue, linewidth = 2, xlabel = "Nodes", ylabel = "Time taken (Seconds)"; myfonts...)
scatter!(nvs,T1,label="TreeRep",xlabel = "Nodes", ylabel = "Time taken (Seconds)", mc = :blue, shape=:circle; myfonts...)
plot!(nvs,T2,label="NJ", lc = :red, linewidth = 2; myfonts...)
scatter!(nvs,T2,label="NJ", mc = :red, shape =:xcross; myfonts...)

In [None]:
plot(nvs,nvs1./nvs,label = "TreeRep", xlabel = "Nodes", ylabel = "Ratio of number of nodes in returned tree to original tree")
plot!(nvs, nvs2./nvs,label = "NJ")

# Random Points

In [None]:
N = 10
dist1 = zeros(10)
dist2 = zeros(10)
dist3 = zeros(10)
dist4 = zeros(10)
dist5 = zeros(10)
dist6 = zeros(10)
dist7 = zeros(10)

n = 100

for i = 1:N
    sc = 2^i
    Z = CSV.read("rand-dim10-scale$sc.csv")
    D = zeros(100,100)
    for k = 1:50*99
        ii = Z[k,1]
        j = Z[k,2]
        D[ii,j] = Z[k,3]
        D[j,ii] = Z[k,3]
    end
    n = 100
    #D = utilities.rand_hyperbolic(n,10,2^i)
    
    @time G2, W2 = TreeRep.metric_to_structure(D,undef,undef)
    B = W2[1:nv(G2),1:nv(G2)];
    B = sparse(B);
    B = (B .> 0) .* B;
    D2 = utilities.parallel_dp_shortest_paths(G2, B, false);
    α = tr(D2[1:n,1:n]'*D)/tr(D2[1:n,1:n]'*D2[1:n,1:n])
    dist1[i] = utilities.avg_distortion(D2[1:n,1:n]*α,D)
    
    @time R = NJ.nj!(copy(convert(Matrix{Float64},D)))
    g2 = SimpleGraph(R.numNodes)
    w = spzeros(R.numNodes,R.numNodes)
    for i = 1:R.numEdges
        src = R.edge[i].node[1].number
        dst = R.edge[i].node[2].number
        add_edge!(g2,src,dst)
        w[src,dst] = R.edge[i].length
        w[dst,src] = w[src,dst]
    end
    D5 = utilities.parallel_dp_shortest_paths(g2, w, false);
    dist2[i] = utilities.avg_distortion(D5[1:n,1:n],D)
    
    @time T,W4 = ConstructTree.basicConstructTree(collect(2:n),1,D)
    D4 = LightGraphs.floyd_warshall_shortest_paths(T,W4[1:nv(T),1:nv(T)]).dists;
    α = tr(D4[1:n,1:n]'*D)/tr(D4[1:n,1:n]'*D4[1:n,1:n])
    dist3[i] = utilities.avg_distortion(D4[1:n,1:n]*α,D)
    
    
    #g = CompleteGraph(n)
    g = utilities.kNN(D,10)
    @time r = LightGraphs.prim_mst(g,D)
    #r = LightGraphs.bfs_tree(g,1)
    R = SimpleGraph(n)
    for e in r
        add_edge!(R,e)
    end
    wbfs =  adjacency_matrix(R) .* D
    D3 = utilities.parallel_dp_shortest_paths(R,wbfs);
    α = tr(D3[1:n,1:n]'*D)/tr(D3[1:n,1:n]'*D3[1:n,1:n])
    dist4[i] = utilities.avg_distortion(D3[1:n,1:n]*α,D)
    
    g = CompleteGraph(n)
    #g = utilities.kNN(D,10)
    @time r = LightGraphs.prim_mst(g,D)
    #r = LightGraphs.bfs_tree(g,1)
    R = SimpleGraph(n)
    for e in r
        add_edge!(R,e)
    end
    wbfs =  adjacency_matrix(R) .* D
    D3 = utilities.parallel_dp_shortest_paths(R,wbfs);
    α = tr(D3[1:n,1:n]'*D)/tr(D3[1:n,1:n]'*D3[1:n,1:n])
    dist5[i] = utilities.avg_distortion(D3[1:n,1:n]*α,D)
    
    
    g = utilities.kNN(D,10)
    A = adjacency_matrix(g) .* D
    @time R = Laplacians.akpw(A);

    g2 = build_graph(R)
    @time D6 = LightGraphs.floyd_warshall_shortest_paths(g2, R).dists;
    dist6[i] = utilities.avg_distortion(D6[1:n,1:n]*α,D)
    
    
    g = CompleteGraph(n)
    A = adjacency_matrix(g) .* D
    @time R = Laplacians.akpw(A);

    g2 = build_graph(R)
    @time D6 = LightGraphs.floyd_warshall_shortest_paths(g2, R).dists;
    dist7[i] = utilities.avg_distortion(D6[1:n,1:n]*α,D)
    
    g = CompleteGraph(n)
    E = ne(g)
    
    id1 = convert(Array{Int64,1},zeros(E))
    id2 = convert(Array{Int64,1},zeros(E))
    weight = zeros(E)
 
    Ed = collect(edges(g))

    for i = 1:E
        e = Ed[i]
        id1[i] = e.src
        id2[i] = e.dst
        weight[i] = D[e.src,e.dst]
    end
    
    df = DataFrame(id1 = id1, id2 = id2, weight = weight)
    scale = 2^i
    dim = 10
    CSV.write("rand-dim$dim-scale$scale.csv",  df, writeheader=true)
    
    @show((dist1[i],dist2[i]))
end

In [None]:
plot(collect(1:10),dist1,label="TreeRep", lc = :blue, linewidth = 2)
plot!(collect(1:10),dist2,label="NJ", lc = :red, linewidth = 2)
plot!(collect(1:10),dist3,label="ConstructTree", lc = :green, linewidth = 2)
plot!(collect(1:10),dist5,label="MST : Complete", lc = :indigo, linewidth = 2)
plot!(collect(1:10),dist4,label="MST : KNN 10", lc = :darkorange, linewidth = 2)
plot!(collect(1:10),dist6,label="AKPW : KNN 10", lc = :pink, linewidth = 2)
plot!(collect(1:10),dist7,label="AKPW : Complete", lc = :black, linewidth = 2)
plot!(collect(1:10),dist6LM,label="Lorentz Maps", lc = :yellow, linewidth = 2)
plot!(collect(1:10),dist8PT,label="PT", lc = :gray, linewidth = 2)
scatter!(collect(1:10),dist1,label="TreeRep", shape=:circle, mc = :blue)
scatter!(collect(1:10),dist2,label="NJ",shape=:cross, mc = :red)
scatter!(collect(1:10),dist3,label="ConstructTree", shape =:xcross, mc = :green)
scatter!(collect(1:10),dist4,label="MST : KNN 10", shape =:vline, mc = :darkorange)
scatter!(collect(1:10),dist5,label="MST : Complete", shape =:diamond, mc = :indigo)
scatter!(collect(1:10),dist6,label="AKPW : KNN 10", shape = :square, mc = :pink)
scatter!(collect(1:10),dist7,label="AKPW : Complete", shape = :dtriangle, mc = :black)
scatter!(collect(1:10),dist6LM,label="Lorentz Maps", shape = :hexagon, mc = :yellow)
scatter!(collect(1:10),dist8PT,label="PT", shape = :triangle, mc = :gray)
plot!(xlabel = "Dimension (Log Scale)",ylabel = "Average Distortion (Log scale)",yscale=:log; myfonts...)

In [None]:
N = 10
dist1 = zeros(10)
dist2 = zeros(10)
dist3 = zeros(10)
dist4 = zeros(10)
dist5 = zeros(10)
dist6 = zeros(10)
dist7 = zeros(10)

n = 100

for i = 1:N
    sc = 2^i
    Z = CSV.read("rand-dim$sc-scale1.csv")
    D = zeros(100,100)
    for k = 1:50*99
        ii = Z[k,1]
        j = Z[k,2]
        D[ii,j] = Z[k,3]
        D[j,ii] = Z[k,3]
    end
    n = 100
    #D = utilities.rand_hyperbolic(n,2^i,1)
    
    G2, W2 = TreeRep.metric_to_structure(D,undef,undef)
    B = W2[1:nv(G2),1:nv(G2)];
    B = sparse(B);
    B = (B .> 0) .* B;
    D2 = utilities.parallel_dp_shortest_paths(G2, B, false);
    α = tr(D2[1:n,1:n]'*D)/tr(D2[1:n,1:n]'*D2[1:n,1:n])
    dist1[i] = utilities.avg_distortion(D2[1:n,1:n]*α,D)
    
    R = NJ.nj!(copy(convert(Matrix{Float64},D)))
    g2 = SimpleGraph(R.numNodes)
    w = spzeros(R.numNodes,R.numNodes)
    for i = 1:R.numEdges
        src = R.edge[i].node[1].number
        dst = R.edge[i].node[2].number
        add_edge!(g2,src,dst)
        w[src,dst] = R.edge[i].length
        w[dst,src] = w[src,dst]
    end
    D5 = utilities.parallel_dp_shortest_paths(g2, w, false);
    dist2[i] = utilities.avg_distortion(D5[1:n,1:n],D)
    
    T,W4 = ConstructTree.basicConstructTree(collect(2:n),1,D)
    D4 = LightGraphs.floyd_warshall_shortest_paths(T,W4[1:nv(T),1:nv(T)]).dists;
    α = tr(D4[1:n,1:n]'*D)/tr(D4[1:n,1:n]'*D4[1:n,1:n])
    dist3[i] = utilities.avg_distortion(D4[1:n,1:n]*α,D)
    
    
    #g = CompleteGraph(n)
    g = utilities.kNN(D,10)
    r = LightGraphs.prim_mst(g,D)
    #r = LightGraphs.bfs_tree(g,1)
    R = SimpleGraph(n)
    for e in r
        add_edge!(R,e)
    end
    wbfs =  adjacency_matrix(R) .* D
    D3 = utilities.parallel_dp_shortest_paths(R,wbfs);
    α = tr(D3[1:n,1:n]'*D)/tr(D3[1:n,1:n]'*D3[1:n,1:n])
    dist4[i] = utilities.avg_distortion(D3[1:n,1:n]*α,D)
    
    g = CompleteGraph(n)
    #g = utilities.kNN(D,10)
    r = LightGraphs.prim_mst(g,D)
    #r = LightGraphs.bfs_tree(g,1)
    R = SimpleGraph(n)
    for e in r
        add_edge!(R,e)
    end
    wbfs =  adjacency_matrix(R) .* D
    D3 = utilities.parallel_dp_shortest_paths(R,wbfs);
    α = tr(D3[1:n,1:n]'*D)/tr(D3[1:n,1:n]'*D3[1:n,1:n])
    dist5[i] = utilities.avg_distortion(D3[1:n,1:n]*α,D)
    
    g = utilities.kNN(D,10)
    A = adjacency_matrix(g) .* D
    @time R = Laplacians.akpw(A);

    g2 = build_graph(R)
    @time D6 = LightGraphs.floyd_warshall_shortest_paths(g2, R).dists;
    dist6[i] = utilities.avg_distortion(D6[1:n,1:n]*α,D)
    
    
    g = CompleteGraph(n)
    A = adjacency_matrix(g) .* D
    @time R = Laplacians.akpw(A);

    g2 = build_graph(R)
    @time D6 = LightGraphs.floyd_warshall_shortest_paths(g2, R).dists;
    dist7[i] = utilities.avg_distortion(D6[1:n,1:n]*α,D)
    
    g = CompleteGraph(n)
    E = ne(g)
    
    id1 = convert(Array{Int64,1},zeros(E))
    id2 = convert(Array{Int64,1},zeros(E))
    weight = zeros(E)

    Ed = collect(edges(g))

    for i = 1:E
        e = Ed[i]
        id1[i] = e.src
        id2[i] = e.dst
        weight[i] = D[e.src,e.dst]
    end
    
    df = DataFrame(id1 = id1, id2 = id2, weight = weight)
    scale = 1
    dim = 2^i
    CSV.write("rand-dim$dim-scale$scale.csv",  df, writeheader=true)
    
    @show((dist1[i],dist2[i]))
end

In [None]:
plot(collect(1:10),dist1,label="TreeRep", lc = :blue, linewidth = 2)
plot!(collect(1:10),dist2,label="NJ", lc = :red, linewidth = 2)
plot!(collect(1:10),dist3,label="ConstructTree", lc = :green, linewidth = 2)
plot!(collect(1:10),dist5,label="MST : Complete", lc = :indigo, linewidth = 2)
plot!(collect(1:10),dist4,label="MST : KNN 10", lc = :darkorange, linewidth = 2)
plot!(collect(1:10),dist6,label="AKPW : KNN 10", lc = :pink, linewidth = 2)
plot!(collect(1:10),dist7,label="AKPW : Complete", lc = :black, linewidth = 2)
plot!(collect(1:10),dist7LM,label="Lorentz Maps", lc = :yellow, linewidth = 2)
plot!(collect(1:10),dist9PT,label="PT", lc = :gray, linewidth = 2)
scatter!(collect(1:10),dist1,label="TreeRep", shape=:circle, mc = :blue)
scatter!(collect(1:10),dist2,label="NJ",shape=:cross, mc = :red)
scatter!(collect(1:10),dist3,label="ConstructTree", shape =:xcross, mc = :green)
scatter!(collect(1:10),dist4,label="MST : KNN 10", shape =:vline, mc = :darkorange)
scatter!(collect(1:10),dist5,label="MST : Complete", shape =:diamond, mc = :indigo)
scatter!(collect(1:10),dist6,label="AKPW : KNN 10", shape = :square, mc = :pink)
scatter!(collect(1:10),dist7,label="AKPW : Complete", shape = :dtriangle, mc = :black)
scatter!(collect(1:10),dist7LM,label="Lorentz Maps", shape = :hexagon, mc = :yellow)
scatter!(collect(1:10),dist9PT,label="PT", shape = :triangle, mc = :gray)
plot!(xlabel = "Scale (Log scale)",ylabel = "Average Distortion (Log scale)",yscale=:log; myfonts...)

In [None]:
dist6LM =  load("dist6.jld2")["dist6"]

In [None]:
dist7LM = load("dist7.jld2")["dist7"]

In [None]:
dist8PT = load("dist8.jld2")["dist8"]

In [None]:
dist9PT = load("dist9.jld2")["dist9"]

# Making CSV files for inputs to optimization based methods

In [None]:
n = 1000
D = utilities.rand_hyperbolic(1000,10,1000);

In [None]:
g = CompleteGraph(n)
E = ne(g)

In [None]:
id1 = convert(Array{Int64,1},zeros(E))
id2 = convert(Array{Int64,1},zeros(E))
weight = zeros(E)

Ed = collect(edges(g))

for i = 1:E
    e = Ed[i]
    id1[i] = e.src
    id2[i] = e.dst
    weight[i] = D[e.src,e.dst]
end

In [None]:
df = DataFrame(id1 = id1, id2 = id2, weight = weight)
CSV.write("rand1000.csv",  df, writeheader=true)

# Zeisel and CBMC

cite-dists is the CBMC data file

In [None]:
@load "zeisel-dists.jld2" A2

In [None]:
@load "cite-dists.jld2" A2

In [None]:
D = A2
n = size(D)[1]
G = CompleteGraph(n)

Create the data files that will be used by PM, LM, and PT. CBMC is too big to run the optimization methods. 

In [None]:
g = CompleteGraph(size(D)[1])
E = ne(g)

id1 = convert(Array{Int64,1},zeros(E))
id2 = convert(Array{Int64,1},zeros(E))
weight = zeros(E)

Ed = collect(edges(g))

for i = 1:E
    e = Ed[i]
    id1[i] = e.src
    id2[i] = e.dst
    weight[i] = D[e.src,e.dst]
end

df = DataFrame(id1 = id1, id2 = id2, weight = weight)
CSV.write("sarich.csv",  df, writeheader=true)

In [None]:
filename = "sarich."
CSV.write(filename*"edges",CSV.read(filename*"csv"); delim=' ',writeheader=false)

In [None]:
open("zeisel-tree.edges", "w") do f
    for e in edges(G2)
        i = e.src - 1
        j = e.dst - 1
        w = W2[i+1,j+1]/100
        if w < 0
            w = 0
        end
        write(f, "$i $j $w\n")
    end
end

# Load Data Sets

This is the Sarich et al immunological data set

In [None]:
D = sparse([ 0  32  48  51  50  48  98 148;
32   0  26  34  29  33  84 136;
48  26   0  42  44  44  92 152;
51  34  42   0  44  38  86 142;
50  29  44  44   0  24  89 142;
48  33  44  38  24   0  90 142;
98  84  92  86  89  90   0 148;
148 136 152 142 142 142 148 0])

In [None]:
function build_graph(A)
    n = size(A)[1]
    g = SimpleGraph(n)
    for i = 1:n
        for j = 1:i-1
            if A[i,j] != 0
                add_edge!(g,i,j)
            end
        end
    end
    
    return g
end

Load the data sets

In [None]:
G,w = utilities.read_tree_withweights("./../hyperbolics-master/data/edges/bio-CE-GT.edges"," ")
n = nv(G)
E = ne(G)
@show((n,E));
@show(is_connected(G));

In [None]:
G = utilities.remove_loops(utilities.read_tree("./../hyperbolics-master/data/edges/wordnet.edges"," "))
n = nv(G)
E = ne(G)
@show((n,E));
@show(is_connected(G));

Extract largest connected component.

In [None]:
C = connected_components(G)
idxmax = argmax(length.(C))

In [None]:
g,V = induced_subgraph(G, C[idxmax])
@show(is_connected(g));
n = nv(g)
E = ne(g)

print(n," ",E)

In [None]:
@time D = utilities.parallel_dp_shortest_paths(g,adjacency_matrix(g));

# Calculating δ

In [None]:
d = utilities.calc_delta_for_w(D./maximum(D),1)

# Run Alon et al algorithm

In [None]:
A = adjacency_matrix(G).*D;

In [None]:
@time R = Laplacians.akpw(A);

In [None]:
g2 = build_graph(R)
@time D5 = utilities.parallel_dp_shortest_paths(g2,R);

In [None]:
utilities.MAP(D5[1:n,1:n],g)

In [None]:
α = tr(D5[1:n,1:n]'*D)/tr(D5[1:n,1:n]'*D5[1:n,1:n])
utilities.avg_distortion(D5[1:n,1:n]*α,D)

# Bartal

In [None]:
D7 = zeros(n,n)
p2 = Progress(2)
for i = 1:200
    R = Bartal.bartal(g,collect(1:n),D)
    D6 = utilities.parallel_dp_shortest_paths(R[1],R[4])
    p = R[2]
    IndexToIdx = copy(R[2])
    for i = 1:length(p)
        IndexToIdx[p[i]] = i
    end
    D2p = zeros(n,n)
    for i = 1:length(p)
        for j = 1:i-1
            D2p[i,j] = D6[IndexToIdx[i],IndexToIdx[j]]
            D2p[j,i] = D2p[i,j]
        end
    end
    D7 = (D7*(i-1) + D2p)/i
    update!(p2,i)
    flush(stdout)
end
α = tr(D7[1:n,1:n]'*D)/tr(D7[1:n,1:n]'*D7[1:n,1:n])
@show(utilities.avg_distortion(D7*α,D))
@show(utilities.MAP(D7,g))

# Construct Tree

In [None]:
g = CompleteGraph(n)

In [None]:
global λ = 2000
n = size(D)[1]
@time T,W4 = ConstructTree.basicConstructTree(collect(2:n),1,D)

In [None]:
@time D4 = LightGraphs.floyd_warshall_shortest_paths(T,W4[1:nv(T),1:nv(T)]).dists;

In [None]:
α = tr(D4[1:n,1:n]'*D)/tr(D4[1:n,1:n]'*D4[1:n,1:n])

In [None]:
utilities.avg_distortion(D4[1:n,1:n]*α,D)

In [None]:
utilities.MAP(D4[1:n,1:n],g)

# LevelTree algorithm

In [None]:
@time gT = utilities.remove_loops(LevelTree.build_level_graph(g,D,1))
DT = utilities.parallel_dp_shortest_paths(gT, adjacency_matrix(gT));

In [None]:
utilities.MAP(DT,g)

In [None]:
α = tr(DT[1:n,1:n]'*D)/tr(DT[1:n,1:n]'*DT[1:n,1:n])
utilities.avg_distortion(DT[1:n,1:n]*α,D)

# Tree Rep

In [None]:
global p2 = Progress(nv(g))
global jj = 0

In [None]:
NN = 20

times = zeros(NN)
map2 = zeros(NN)
distort = zeros(NN)
map2opt = zeros(NN)
distortopt = zeros(NN)
j = 1

## This has the heurestic optimization as full

In [None]:
for j = 1:20
    times[j] = @elapsed G2,W2 = TreeRep.metric_to_structure(D,undef,undef);
    
    flush(stdout)
    B = W2[1:nv(G2),1:nv(G2)];
    B = sparse(B);
    B = (B .> 0) .* B;
    
    D2 = utilities.parallel_dp_shortest_paths(G2, B,false);
    α = tr(D2[1:n,1:n]'*D)/tr(D2[1:n,1:n]'*D2[1:n,1:n])
    
    map2[j] = utilities.MAP(D2[1:n,1:n],g)
    distort[j] = utilities.avg_distortion(D2[1:n,1:n]*α,D)
    D2 = 0
    B = 0
    GC.gc()
    
    N = size(D)[1]
    
    @show(Sys.free_memory()/2^(30))
    
    L = Int((N*(N-1))/2)
    IDXs = Array{Tuple{Int,Int},1}(undef,L)
    c = 1
    for i = 1:N 
        for j = 1:i-1
            IDXs[c] = (i,j)
            c += 1
        end
    end
    
    @show(Sys.free_memory()/2^(30))
    
    times[j] += @elapsed x,EdgetoIdx = TreeOpt.lsngd_mengdi(G2,D,W2,IDXs,0.0001,200);
    
    IDXs = 0
    W2 = 0
    GC.gc()
    
    N = nv(G2)
    W3 = zeros(N,N)
    E = collect(edges(G2))
    for e in E
        i2 = e.src
        j2 = e.dst
        idx = EdgetoIdx[(i2,j2)]
        w = max(0,x[idx])
        W3[i2,j2] = w
        W3[j2,i2] = w
    end
    
    B = W3[1:nv(G2),1:nv(G2)];
    B = sparse(B);
    B = (B .> 0) .* B;
    D3 = utilities.parallel_dp_shortest_paths(G2, B,false) .+ 1e-14;
    for i = 1:n
        D3[i,i] = 0
    end

    α = tr(D3[1:n,1:n]'*D)/tr(D3[1:n,1:n]'*D3[1:n,1:n])

    distortopt[j] = utilities.avg_distortion(D3[1:n,1:n]*α,D)
    map2opt[j] = utilities.MAP(D3[1:n,1:n],g)
    @show((times[j],distort[j],map2[j]))
end

t = mean(times)
dis = minimum(distort)
m = maximum(map2)
disopt = minimum(distortopt)
mopt = maximum(map2opt)

mdis = mean(distort)
mm = mean(map2)
mdisopt = mean(distortopt)
mmopt = mean(map2opt)

@show((t,dis,m))
@show((t,disopt,mopt))
@show((t,mdis,mm))
@show((t,mdisopt,mmopt))

## This has the full optimization

In [None]:
NN = 20

times = zeros(NN)
map2 = zeros(NN)
distort = zeros(NN)

for j = 1:NN
    times[j] = @elapsed G2,W2 = TreeRep.metric_to_structure(D,undef,undef);
    
    flush(stdout)
    B = W2[1:nv(G2),1:nv(G2)];
    B = sparse(B);
    B = (B .> 0) .* B;
    
    D2 = utilities.parallel_dp_shortest_paths(G2, B,false);
    
    distort[j] = utilities.avg_distortion(D2[1:n,1:n],D)
    map2[j] = utilities.MAP(D2[1:n,1:n],g)
    
    B = 0
    D2 = 0
    
    GC.gc()
    
    @show((times[j],distort[j],map2[j]))
    flush(stdout)
    
    times[j] += @elapsed A,b,EdgetoIdx,x0 = TreeOpt.makeAbMatrix(G2,D,W2)
    times[j] += @elapsed x,loss = TreeOpt.lsngd(A,b,0.00000001,x0,5000)
    flush(stdout)
    N = nv(G2)
    W2  = zeros(N,N)
    E = collect(edges(G2))
    for e in E
        i2 = e.src
        j2 = e.dst
        idx = EdgetoIdx[(i2,j2)]
        w = max(0,x[idx])
        W2[i2,j2] = w
        W2[j2,i2] = w
    end
    flush(stdout)
    B = W2[1:nv(G2),1:nv(G2)];
    B = sparse(B);
    B = (B .> 0) .* B;
    D2 = utilities.parallel_dp_shortest_paths(G2, B) .+1e-13;
    for i = 1:size(D2)[1]
        D2[i,i] = 0
    end
    flush(stdout)

    distort[j] = utilities.avg_distortion(D2[1:n,1:n],D)
    map2[j] = utilities.MAP(D2[1:n,1:n],g)
    flush(stdout)
    @show((times[j],distort[j],map2[j]))
    flush(stdout)
end

t = mean(times)
dis = minimum(distort)
m = maximum(map2)

@show((t,dis,m))

## This has no optimization

In [None]:
N = 20

times = zeros(N)
map2 = zeros(N)
distort = zeros(N)
D2 = 0
j=1
for j = 1:20
    times[j] = @elapsed G2,W2 = TreeRep.metric_to_structure(D,undef,undef);
    G2 = utilities.remove_loops(G2)
    @show(times[j])
    flush(stdout)
    B = W2[1:nv(G2),1:nv(G2)];
    B = sparse(B);
    B = (B .> 0) .* B;
    
    W2 = 0
    GC.gc()
    D2 = utilities.parallel_dp_shortest_paths(G2, B);
    α = trm(D2[1:n,1:n]',D)/(sum(D2.^2))
    distort[j] = utilities.avg_distortion(D2[1:n,1:n]*α,D)
    map2[j] = utilities.MAP(D2[1:n,1:n],g)
    
    @show((distort[j],map2[j]))
end

In [None]:
t = mean(times)
dis = maximum(distort)
m = maximum(map2)
mdis = mean(distort)
mm = mean(map2)

@show((t,dis,m))
@show((t,mdis,mm))

# Neighbor Join

In [None]:
@time R = NJ.nj!(copy(convert(Matrix{Float64},D)));

In [None]:
g2 = SimpleGraph(R.numNodes)
w = spzeros(R.numNodes,R.numNodes)
for i = 1:R.numEdges
    src = R.edge[i].node[1].number
    dst = R.edge[i].node[2].number
    add_edge!(g2,src,dst)
    w[src,dst] = R.edge[i].length
    w[dst,src] = w[src,dst]
end

In [None]:
@time D5 = utilities.parallel_dp_shortest_paths(g2, w)

In [None]:
α = tr(D5[1:n,1:n]'*D)/tr(D5[1:n,1:n]'*D5[1:n,1:n])
@show(utilities.avg_distortion(D5[1:n,1:n],D));

In [None]:
@show(utilities.MAP(D5[1:n,1:n],g));

# BFS Tree

In [None]:
g = utilities.kNN(D,10)

In [None]:
G = CompleteGraph(size(D)[1])

In [None]:
n = nv(G)

In [None]:
@elapsed r = LightGraphs.prim_mst(G,D)

In [None]:
R = SimpleGraph(n)
for e in r
    add_edge!(R,e)
end

In [None]:
@elapsed r = LightGraphs.bfs_tree(g,rand(1:n))

In [None]:
R = SimpleGraph(n)
for e in edges(r)
    add_edge!(R,e)
end

In [None]:
wbfs =  adjacency_matrix(R) .* D
D3 = utilities.parallel_dp_shortest_paths(R,wbfs);

In [None]:
function trm(A,B)
    n = size(A)[1]
    t = 0
    for i = 1:n
        t += sum(A[i,:].*B[:,i])
    end
    
    return t
end

In [None]:
α = trm(D3[1:n,1:n],D)/(sum(D3.^2))

In [None]:
@show(utilities.avg_distortion(D3[1:n,1:n]*α,D));

In [None]:
@show(utilities.MAP(D3[1:n,1:n],g));

## Doing the heurestic optimization for MST

In [None]:
N = size(D)[1]
    
@show(Sys.free_memory()/2^(30))
    
L = Int((N*(N-1))/2)
IDXs = Array{Tuple{Int,Int},1}(undef,L)
c = 1
for i = 1:N 
    for j = 1:i-1
        IDXs[c] = (i,j)
        c += 1
    end
end

In [None]:
@time x,EdgetoIdx = TreeOpt.lsngd_mengdi(R,D,adjacency_matrix(R),IDXs,0.0001,200);

In [None]:
N = nv(R)
W3 = zeros(N,N)
E = collect(edges(R))
for e in E
    i2 = e.src
    j2 = e.dst
    idx = EdgetoIdx[(i2,j2)]
    w = max(0,x[idx])
    W3[i2,j2] = w
    W3[j2,i2] = w
end

B = W3[1:nv(R),1:nv(R)];
B = sparse(B);
B = (B .> 0) .* B;
D3 = utilities.parallel_dp_shortest_paths(R, B) .+ 1e-14;
for i = 1:n
    D3[i,i] = 0
end


distort = utilities.avg_distortion(D3[1:n,1:n],D)
map2 = utilities.MAP(D3[1:n,1:n],g)

@show((distort,map2))

# Calculating statistics for the outputs from PM and LM

In [None]:
Z = CSV.read("rand-dim$sc-scale1.csv")
D = zeros(100,100)
for k = 1:50*99
    i = Z[k,1]
    j = Z[k,2]
    D[i,j] = Z[k,3]
    D[j,i] = Z[k,3]
end

In [None]:
dist7 = zeros(10)

In [None]:
p = collect(1:n) #npzread("./../../../grid-worm-order.npy");

In [None]:
IndexToIdx = copy(p)
for i = 1:length(p)
    IndexToIdx[p[i]] = i
end

In [None]:
IndexToIdx;

In [None]:
D2 = npzread("./zeisel-knn10.npy")
D2p = copy(D2)
for i = 1:length(p)
    for j = 1:i-1
        D2p[i,j] = D2[IndexToIdx[i],IndexToIdx[j]]
        D2p[j,i] = D2p[i,j]
    end
end

In [None]:
α = tr(D2p[1:n,1:n]'*D)/tr(D2p[1:n,1:n]'*D2p[1:n,1:n])

In [None]:
utilities.avg_distortion(D2p*α,D)

In [None]:
utilities.MAP(D2p[1:n,1:n],g)

# Calculate statistics for the outputs from PT 

In [None]:
dist8 = zeros(10)
dist9 = zeros(10)

In [None]:
n=100

In [None]:
i = 10
sc = 2^i

In [None]:
Z = CSV.read("rand-dim$sc-scale1.csv")
D = zeros(100,100)
for k = 1:50*99
    ii = Z[k,1]
    j = Z[k,2]
    D[ii,j] = Z[k,3]
    D[j,ii] = Z[k,3]
end

In [None]:
D2 = npzread("./rand-dim$sc-scale1-l-1024npy.npy")
for i = 1:n
    D2[i,i] = 0
end

In [None]:
α = tr(D2[1:n,1:n]'*D)/tr(D2[1:n,1:n]'*D2[1:n,1:n])

In [None]:
dist8[i] = utilities.avg_distortion(D2*α,D)

In [None]:
utilities.MAP(D2[1:n,1:n],g)

# Ultra method

Not in Tree! I am no Tree! I am Low Dimensional Hyperbolic Embedding. (I think the originla algorithm has a bug)

In [None]:
function compute_Ca(D,a=1)
    n = size(D)[1]
    ma = maximum(D[a,:])
    Ca = 2*ma .- (ones(n)*D[a,:]' + D[a,:]*ones(n)')
    
    return Ca
end

function compute_U(M)
    n = size(M)[1]
    G = CompleteGraph(n)
    r = LightGraphs.prim_mst(G,M)
    R = SimpleGraph(n)
    for e in r
        add_edge!(R,e)
    end
    
    A = LightGraphs.adjacency_matrix(R)
    Mp = A .* M
    
    nextroots = collect(n+2:3*n)
    W = spzeros(3*n,3*n)
    
    W,nextroots = recurse_U(R,collect(1:n),Mp,nextroots,W,n+1)
    
    T = SimpleGraph(3*n)
    I,J,V = findnz(W)
    for k = 1:length(I)
        add_edge!(T,I[k],J[k])
        if W[I[k],J[k]] == -1
            W[I[k],J[k]] = 0
        end
    end
    
    return T,W
end

function recurse_U(T,V,M,nextroots,W,r)
    if length(V) == 1
        W[r,V[1]] = -1
        W[V[1],r] = -1
        
        return W,nextroots
    end
    
    r1 = nextroots[1]
    r2 = nextroots[2]
    
    n = size(M)[1]
    I,J,U = findnz(M)
    m = argmax(U)
    i = I[m]
    j = J[m]
    
    W[r,r1] = U[m]/2
    W[r,r2] = U[m]/2
    W[r2,r] = U[m]/2
    W[r1,r] = U[m]/2
    
    Aij = M[i,j]
    
    rem_edge!(T,i,j)
    C = connected_components(T)
    
    T1,V1 = induced_subgraph(T,C[1])
    T2,V2 = induced_subgraph(T,C[2])
    
    M1 = M[V1,V1]
    M2 = M[V2,V2]
    
    U1 = copy(V1)
    U2 = copy(V2)
    
    for k = 1:length(V1)
        V1[k] = V[V1[k]]
    end
    for k = 1:length(V2)
        V2[k] = V[V2[k]]
    end
    
    W,nextroots = recurse_U(T1,V1,M1,nextroots[3:end],W,r1)
    W,nextroots = recurse_U(T2,V2,M2,nextroots,W,r2)
   
    return W,nextroots
end

function eps(D)
    n = size(D)[1]
    Ca = compute_Ca(D)
    M = D + Ca
    T,W = compute_U(M)
    C = connected_components(T)
    T1,_ = induced_subgraph(T,C[argmax(length.(C))])
    U = LightGraphs.Parallel.floyd_warshall_shortest_paths(T1,W).dists
    AD = U[1:n,1:n] - Ca
    
    @time R = NJ.nj!(copy(convert(Matrix{Float64},AD)))
    g2 = SimpleGraph(R.numNodes)
    w = spzeros(R.numNodes,R.numNodes)
    for i = 1:R.numEdges
        src = R.edge[i].node[1].number
        dst = R.edge[i].node[2].number
        add_edge!(g2,src,dst)
        w[src,dst] = R.edge[i].length
        w[dst,src] = w[src,dst]
    end
    
    return g2,w
end