In [1]:
using LightGraphs, SparseArrays, SimpleWeightedGraphs
using Statistics, BenchmarkTools, LinearAlgebra, ProgressMeter
using Base.Threads, PhyloNetworks, StatsBase, Distributions
using Base.GC, JLD2, FileIO, CSV, DataFrames
using Random, NPZ, GraphRecipes, Plots, Laplacians

In [2]:
include("TreeRep.jl")
include("ConstructTree.jl")
include("LevelTree.jl")
include("NJ.jl")
include("Utilities.jl")
include("Visualize.jl")
include("TreeOpt.jl")
include("SparseRep.jl")
include("Bartal.jl")

Main.Bartal

In [3]:
utilities.tm()

Number of threads = 8
  0.290953 seconds
  0.041666 seconds (539 allocations: 35.440 KiB)
  0.289400 seconds
  0.040000 seconds (1 allocation: 48 bytes)


In [4]:
X = CSV.read("full_taxonomy.csv");

In [5]:
name_to_node_idx = Dict()
next_node = 1
G = SimpleGraph()

{0, 0} undirected simple Int64 graph

In [6]:
m,_ = size(X)
for i = 1:m
    src = X[i,1]
    dst = X[i,2]
    if !haskey(name_to_node_idx,src)
        name_to_node_idx[src] = next_node
        next_node += 1
        add_vertex!(G)
    end
    
    if !haskey(name_to_node_idx,dst)
        name_to_node_idx[dst] = next_node
        next_node += 1
        add_vertex!(G)
    end
    
    u = name_to_node_idx[src]
    v = name_to_node_idx[dst]
    add_edge!(G,u,v)
end

n = nv(G)
W = adjacency_matrix(G);

In [7]:
n = nv(G)
E = ne(G)
@show((n,E));
@show(is_connected(G));

(n, E) = (19155, 78357)
is_connected(G) = true


In [8]:
@time D = utilities.parallel_dp_shortest_paths(G,adjacency_matrix(G));

[32mProgress:  98%|████████████████████████████████████████▎|  ETA: 0:00:01[39m

 38.821643 seconds (3.96 M allocations: 61.501 GiB, 27.03% gc time)


In [9]:
D

19155×19155 Array{Int64,2}:
 0  1  1  1  1  2  2  2  2  2  2  2  2  …  2  2  2  2  2  2  2  2  2  2  2  2
 1  0  1  1  1  1  1  1  1  1  1  1  1     2  2  2  2  2  2  2  2  2  2  2  2
 1  1  0  1  1  1  1  2  2  2  2  2  2     2  2  2  2  2  2  2  2  2  2  2  2
 1  1  1  0  1  1  1  1  1  1  1  1  1     2  2  2  2  2  2  2  2  2  2  2  2
 1  1  1  1  0  1  1  1  1  1  1  1  1     1  1  1  1  1  1  1  1  1  1  1  1
 2  1  1  1  1  0  2  2  2  2  2  2  2  …  2  2  2  2  2  2  2  2  2  2  2  2
 2  1  1  1  1  2  0  2  2  2  2  2  2     2  2  2  2  2  2  2  2  2  2  2  2
 2  1  2  1  1  2  2  0  1  2  2  2  2     2  2  2  2  2  2  2  2  2  2  2  2
 2  1  2  1  1  2  2  1  0  1  1  1  1     2  2  2  2  2  2  2  2  2  2  2  2
 2  1  2  1  1  2  2  2  1  0  2  2  2     2  2  2  2  2  2  2  2  2  2  2  2
 2  1  2  1  1  2  2  2  1  2  0  2  2  …  2  2  2  2  2  2  2  2  2  2  2  2
 2  1  2  1  1  2  2  2  1  2  2  0  2     2  2  2  2  2  2  2  2  2  2  2  2
 2  1  2  1  1  2  2  2  1  2  2  2 

In [10]:
t = @elapsed G2,W2 = TreeRep.metric_to_structure_no_recursion(D,undef,undef)
print(t)

(nv(G), ne(G)) = (22282, 22281)
77.826300527

In [11]:
G2 = utilities.remove_loops(G2)
flush(stdout)
B = W2[1:nv(G2),1:nv(G2)];
B = sparse(B);
B = (B .> 0) .* B;

GC.gc()
D2 = utilities.parallel_dp_shortest_paths(G2, B);
distort = utilities.avg_distortion(D2[1:n,1:n],D)
map2 = utilities.MAP(D2[1:n,1:n],G)

[32mProgress:  99%|████████████████████████████████████████▌|  ETA: 0:00:01[39m

0.33952971851501806

In [12]:
distort

0.08983959851741505

In [13]:
map2

0.33952971851501806