In [12]:
using LinearAlgebra
using Polyhedra, CDDLib
using JuMP, Ipopt, Clarabel
using TropicalFrechetMeans

In [13]:
import TropicalFrechetMeans: tropical_frechet_mean, tropical_frechet_set

tropical_frechet_mean(sample; power=2) = tropical_frechet_mean(Clarabel.Optimizer, sample; power=power)
tropical_frechet_set(sample; power=2, tol=1e-3) = tropical_frechet_set(Clarabel.Optimizer, CDDLib.Library(:exact), sample; power=power, tol=tol)

tropical_frechet_set (generic function with 3 methods)

In [14]:
function trop_normalize(x)
    return x .- first(x)
end

trop_normalize (generic function with 1 method)

In [15]:
using JSON3
using DataFrames

# Function to read the JSON file and convert to a list of matrices
function read_and_convert_json(file_path::String)
    # Read the JSON file
    json_data = JSON3.read(file_path)
    
    # Extract elements from the nested arrays
    elements = [x[1] for x in json_data]
    elements = rationalize.(10000 * elements, tol=1e-2)
    
    # Convert elements into matrices
    num_elements = length(elements)
    matrices = []
    
    for i in 1:64:num_elements
        # Get the next 64 elements
        matrix_elements = elements[i:min(i+63, num_elements)]
        
        # Convert to an 8x8 matrix if there are 64 elements, otherwise create a smaller matrix
        matrix_size = length(matrix_elements)
        sqrt_size = Int(sqrt(matrix_size))
        push!(matrices, reshape(matrix_elements, sqrt_size, sqrt_size))
    end
    
    return matrices
end

# Read and convert the JSON file
file_path = "all_matrices.json"
matrices = read_and_convert_json(file_path)
taxa = ["Tg", "Et", "Cp", "Ta", "Bb", "Tt", "Pv", "Pf"]

8-element Vector{String}:
 "Tg"
 "Et"
 "Cp"
 "Ta"
 "Bb"
 "Tt"
 "Pv"
 "Pf"

In [16]:
"""
Take a matrix of pairwise distances between taxa and returns the cophenetic vector.
"""
function cophenetic_from_distance(pairwise)
    n = size(pairwise, 1)
    coph = [pairwise[i, j] for i in 1:n-1 for j in i+1:n]
    return coph
end

cophenetic_from_distance

In [17]:
"""
Check if a distance matrix defines a phylogenetic tree
"""
function is_phylogenetic_tree(D)
    n = size(D, 1)
    
    # Check if the matrix is symmetric and non-negative
    for i in 1:n
        for j in i:n
            if D[i, j] != D[j, i] || D[i, j] < 0
                return false
            end
        end
    end

    # Check the four-point condition
    for i in 1:n-3
        for j in i+1:n-2
            for k in j+1:n-1
                for l in k+1:n
                    # Calculate distances
                    D_ij_kl = D[i, j] + D[k, l]
                    D_ik_jl = D[i, k] + D[j, l]
                    D_il_jk = D[i, l] + D[j, k]
                    
                    # Check the four-point condition
                    if !(D_ij_kl >= D_ik_jl && D_ij_kl >= D_il_jk) &&
                       !(D_ik_jl >= D_ij_kl && D_ik_jl >= D_il_jk) &&
                       !(D_il_jk >= D_ij_kl && D_il_jk >= D_ik_jl)
                        return false
                    end
                end
            end
        end
    end
    
    return true
end

is_phylogenetic_tree

In [18]:
"""
Check if a distance matrix defines an ultrametric tree
"""
function is_ultrametric_tree(D)
    n = size(D, 1)
    
    # Check if the matrix is symmetric and non-negative
    for i in 1:n
        for j in i:n
            if D[i, j] != D[j, i] || D[i, j] < 0
                return false
            end
        end
    end

    # Check the ultrametric condition
    for i in 1:n-2
        for j in i+1:n-1
            for k in j+1:n
                # Calculate distances
                Dij = D[i, j]
                Dik = D[i, k]
                Djk = D[j, k]
                
                # Check if the largest distance is attained at least twice
                if !(Dij <= max(Dik, Djk) && Dik <= max(Dij, Djk) && Djk <= max(Dij, Dik))
                    return false
                end
            end
        end
    end
    
    return true

end

is_ultrametric_tree

In [19]:
println("All phylogenetic: ", all([is_phylogenetic_tree(matrices[i]) for i = 1:268]))
println("All ultrametric: ", all([is_ultrametric_tree(matrices[i]) for i = 1:268]))

All phylogenetic: true
All ultrametric: false


In [20]:
coph_vecs = [cophenetic_from_distance(mat) for mat in matrices]

268-element Vector{Vector{Rational{Int64}}}:
 [3784, 6626, 9906, 6521, 11778, 8750, 7661, 7601, 10881, 7496  …  4901, 15579, 12551, 11462, 12194, 9167, 8078, 14217, 13128, 1089]
 [3485, 8484, 9427, 8865, 25257, 9257, 10300, 8814, 9756, 9195  …  3645, 25010, 9010, 10054, 24449, 8449, 9492, 20033, 21076, 2163]
 [3570, 5592, 6890, 4591, 4849, 5136, 4872, 3905, 6062, 3763  …  6609, 6867, 7154, 6890, 2383, 4367, 4103, 4625, 4361, 274]
 [1696, 4297, 5665, 6408, 4431, 4218, 4248, 4473, 5841, 6584  …  9039, 7062, 6849, 6880, 5542, 6984, 7014, 5006, 5037, 557]
 [1864, 8173, 6779, 10056, 10843, 9010, 9319, 8433, 7039, 10315  …  11759, 12546, 10713, 11022, 15116, 13283, 13592, 12332, 12642, 2722]
 [3801, 11567, 5222, 5725, 5642, 8167, 7548, 12836, 6491, 6994  …  6129, 6046, 8571, 7952, 601, 7410, 6791, 7327, 6708, 4443]
 [751, 29706, 6877, 4376, 5857, 9999, 8966, 29899, 7070, 4569  …  6079, 7560, 11702, 10669, 3359, 8305, 7272, 9787, 8754, 5104]
 [4080, 8200, 9699, 9924, 11942, 9302, 9463, 7689, 

In [24]:
@time tropical_frechet_mean(coph_vecs) |> trop_normalize

  6.768138 seconds (79.33 M allocations: 5.835 GiB, 8.33% gc time, 0.22% compilation time)


28-element Vector{Float64}:
  0.0
  9.796453618021034e-8
 -1.1183419040827758e-7
  1.0547853252981798e-7
 -4.96050883537838e-8
 -2.1744667498026227e-8
  1.398223372812979e-7
  1.1246609400109714e-7
 -1.2560321007402953e-7
  7.509146351773743e-8
  ⋮
 -1.5001126870586683e-5
 -1.1906876172616127e-6
  3.9068436725325784e-8
 -3.324646384202101e-7
 -1.4674355597755345e-7
  1.2851988472582787e-7
 -1.2704829004464568e-6
 -2.856226545486229e-7
 -3.6294635013221777e-7

In [23]:
@time phylo_frech = tropical_frechet_set(coph_vecs)

 16.595804 seconds (275.28 M allocations: 12.098 GiB, 15.34% gc time)


Polyhedron CDDLib.Polyhedron{Rational{BigInt}}:
202608-element iterator of HalfSpace{Rational{BigInt}, Vector{Rational{BigInt}}}:
 HalfSpace(Rational{BigInt}[1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 155384321//13340)
 HalfSpace(Rational{BigInt}[1, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 111629121//13340)
 HalfSpace(Rational{BigInt}[1, 0, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 156785021//13340)
 HalfSpace(Rational{BigInt}[1, 0, 0, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 86656641//13340)
 HalfSpace(Rational{BigInt}[1, 0, 0, 0, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 127050161//13340)
 HalfSpace(Rational{BigInt}[1, 0, 0, 0, 0, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 141577421//13340)
 HalfSpace(Rational{BigInt}[1, 0, 0, 0, 0, 0, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0,