In [1]:
#ENV["JULIA_DEBUG"] = Main

In [None]:
#ENV["JULIA_DEBUG"] = "none"

In [2]:
using LinearAlgebra, Polyhedra
using JuMP, Ipopt, CDDLib, Clarabel
using Random
using PhyloNetworks

In [3]:
function trop_normalize(x)
    return x .- first(x)
end

trop_normalize (generic function with 1 method)

# Polyhedral point location

In [4]:
"""
Calculate the polyhedral distance between two vectors.
The rows of `alphas` are the facet normals scaled to α⋅x = 1.
"""
function poly_dist(vec1, vec2, alphas)
    differences = alphas * (vec1 - vec2)
    return maximum(differences)
end

poly_dist

In [5]:
"""
Calculate the sum of polyhedral distances between `ref` and the points in `sample`.
The rows of `alphas` are the facet normals scaled to α⋅x = 1.
`power` gives the exponent of the distance before taking the sum.
"""
function sum_of_poly_dist(ref, sample, alphas; power=1)
    return sum([poly_dist(pt, ref, alphas)^power for pt in sample])
end

sum_of_poly_dist

In [6]:
"""
Find one polyhedral Fréchet mean of a given sample.
The rows of `alphas` are the facet normals scaled to α⋅x = 1.
`power` gives the exponent of the distance before taking the sum.
"""
function poly_frechet(sample, alphas; power=2)
    dim = length(sample[1])
    
    # Choose model depending on power
    if power == 1
        error("FW computation not yet implemeneted")
    elseif power == 2
        model = Model(Clarabel.Optimizer)
    else
        model = Model(Ipopt.Optimizer)
    end
    
    set_silent(model)   # suppress printing

    @variable(model, x[1:dim])
    @variable(model, t[1:length(sample)])

    @objective(model, Min, sum(t))
    
    progress = 0
    total = length(sample) * size(alphas, 1)

    for (p_idx, p) in enumerate(sample)
        expressions = alphas * (x - p)
        
        for expr in expressions
            progress += 1
            total = length(sample) * size(expressions, 1)
            @constraint(model, t[p_idx] >= (expr)^power)
        end
        
        @debug "Setting up constraints: $(round(progress/total * 100, digits=3))%   \r"
    end
    
    @debug "\nOptimising..."
    
    optimize!(model)
    minimiser = value.(x)
    
    return minimiser
end

poly_frechet

In [7]:
"""
Find the set of polyhedral Fréchet means of a given sample.
The rows of `alphas` are the facet normals scaled to α⋅x = 1.
`power` gives the exponent of the distance before taking the sum.
`rep` is either "vrep" or "hrep" -- returning either vertices or halfspaces.
"""
function poly_frechet_set(sample, alphas; power=2, rep::String="vrep", tol=1e-3)
    
    dim = length(sample[1])
    num_facets = size(alphas)[1]
    
    # Compute one Fréchet mean
    one_mean = poly_frechet(sample, alphas, power=power)
    
    @debug "Frechet mean found: $(one_mean)"
    
    # Rationalise all coordinates
    rat_alphas = rationalize.(alphas, tol=tol)
    rat_sample = [rationalize.(pt, tol=tol) for pt in sample]
    rat_mean = rationalize.(one_mean, tol=tol)
    
    distances = [poly_dist(rat_mean, pt, alphas) for pt in rat_sample]
    
    Amat = vcat(rat_alphas, -rat_alphas)
    bval1 = Rational{Int64}[]
    bval2 = Rational{Int64}[]
    
    # Discard redundant halfspacesprintln("Setting up constraints: $(round(progress/total * 100, digits=3))%   \r")
    evals = rat_alphas * hcat([rat_mean - pt for pt in rat_sample]...)
    for k in 1:length(rat_sample)
        evals[:,k] .-= distances[k]
    end
    
    progress = 0
    total = num_facets
    
    for k = 1:num_facets
        
        greatest_nonpos = argmax([x > 0 ? -Inf : x for x in evals[k,:]])
        push!(bval1, dot(rat_alphas[k,:], rat_sample[greatest_nonpos]) + 
            distances[greatest_nonpos])
        
        progress += 1
        @debug "Removing redundant half-spaces: $(round(progress/total * 100, digits=3))%   \r"    
    end
    
    @debug "\nFinding defining facets..."
    
    poly = polyhedron(hrep(rat_alphas, bval1), CDDLib.Library(:exact))
    removehredundancy!(poly)
    
    if rep == "hrep"
        return poly
    elseif rep == "vrep"
        @debug "Finding vertices..."
        return vrep(poly)
    else
        @debug "Defaulting to hrep."
        return poly
    end
end

poly_frechet_set

## Examples

### Toy example: Fréchet mean for $L^\infty$-norm in $\mathbb{R}^2$

In [8]:
### Facets for the L-infinity norm
### Technically twice as long as strictly necessary
Linf_facets = [ 1  0 ;
               -1  0 ;
                0  1 ;
                0 -1 ]

sample = [ [0, 0], 
           [2, 2] ]

my_point = [7, 10]

println("Polyhedral distances from sample to $(my_point): ", 
    [poly_dist(pt, my_point, Linf_facets) for pt in sample])

println("Sum of polyhedral distances: ", 
    sum_of_poly_dist(my_point, sample, Linf_facets))

println("Sum of squared polyhedral distances: ", 
    sum_of_poly_dist(my_point, sample, Linf_facets, power=2))


Polyhedral distances from sample to [7, 10]: [10, 8]
Sum of polyhedral distances: 18
Sum of squared polyhedral distances: 164


In [9]:
@show poly_frechet(sample, Linf_facets)
poly_frechet_set(sample, Linf_facets, rep="vrep", tol=1e-3)

poly_frechet(sample, Linf_facets) = [1.0000522121167505, 1.000052212116274]


V-representation CDDGeneratorMatrix{Rational{BigInt}, GMPRational}:
1-element iterator of Vector{Rational{BigInt}}:
 Rational{BigInt}[1, 1]

### Manual tropical Fréchet mean in $\mathbb{R}^3/\mathbb{R}\mathbf{1}$

In [10]:
Linf3_facets = [ -1   1  0;
                  1  -1  0;
                 -1   0  1;
                  1   0 -1;
                  0  -1  1;
                  0   1 -1 ]

sample2 = [ [0, 0, 0], 
            [0, 2, 4],
            [0, 5, 1] ]

@show poly_frechet(sample2, Linf3_facets, power=2) |> trop_normalize
poly_frechet_set(sample2, Linf3_facets, rep="vrep", tol=1e-3)

poly_frechet(sample2, Linf3_facets, power = 2) |> trop_normalize = [0.0, 1.9999999991416932, 1.0001319110045057]


V-representation CDDGeneratorMatrix{Rational{BigInt}, GMPRational}:
1-element iterator of Vector{Rational{BigInt}}:
 Rational{BigInt}[-1, 1, 0],
1-element iterator of Line{Rational{BigInt}, Vector{Rational{BigInt}}}:
 Line(Rational{BigInt}[1, 1, 1])

# Tropical Frèchet means

In [11]:
"""
Find the relevant facet normals in n dimensions for a tropical ball
"""
function trop_facets(n::Int64)
    result = zeros(Rational{Int64}, n * (n - 1), n)
    k = 1
    for i = 1:n
        for j = 1:n
            if i != j
                result[k, i] = 1//1
                result[k, j] = -1//1
                k += 1
            end
        end
    end
    return result
end

trop_facets

In [12]:
"""
Calculate the tropical distance between two vectors.
"""
function trop_dist(vec1, vec2)
    return maximum(vec1 - vec2) - minimum(vec1 - vec2)
end

trop_dist

In [13]:
"""
Calculate the sum of tropical distances between `ref` and the points in `sample`.
"""
function sum_of_trop_dist(ref, sample; power=1)
    return sum([trop_dist(pt, ref)^power for pt in sample])
end

sum_of_trop_dist

In [14]:
"""
Find one polyhedral Fréchet mean of a given sample.
The rows of `alphas` are the facet normals scaled to α⋅x = 1.
`power` gives the exponent of the distance before taking the sum.
"""
function trop_frechet(sample; power=2)
    dim = length(sample[1])
    alphas = trop_facets(dim)
    return poly_frechet(sample, alphas, power=power)
end

trop_frechet

In [15]:
"""
Find one polyhedral Fréchet mean of a given sample.
The rows of `alphas` are the facet normals scaled to α⋅x = 1.
`power` gives the exponent of the distance before taking the sum.
"""
function trop_frechet_set(sample; power=2, rep::String="vrep", tol=1e-3)
    dim = length(sample[1])
    alphas = trop_facets(dim)
    return poly_frechet_set(sample, alphas, power=power, rep=rep, tol=tol)
end

trop_frechet_set

## Examples
### Toy example

In [16]:
sample = [[0,0,0], [0,4,1]]

my_point = [7, 10, 2]

println("Tropical distances: ", 
    [trop_dist(pt, my_point) for pt in sample])

println("Sum of tropical distances: ", 
    sum_of_trop_dist(my_point, sample))

println("Sum of squared tropical distances: ", 
    sum_of_trop_dist(my_point, sample, power=2))

Tropical distances: [8, 6]
Sum of tropical distances: 14
Sum of squared tropical distances: 100


In [17]:
@show trop_frechet(sample)
trop_frechet_set(sample, rep="vrep", tol=1e-3)

trop_frechet(sample) = [-0.8435711338283638, 1.1565453903627, -0.12391541027598676]


V-representation CDDGeneratorMatrix{Rational{BigInt}, GMPRational}:
2-element iterator of Vector{Rational{BigInt}}:
 Rational{BigInt}[-1, 1, 0]
 Rational{BigInt}[0, 2, 0],
1-element iterator of Line{Rational{BigInt}, Vector{Rational{BigInt}}}:
 Line(Rational{BigInt}[1, 1, 1])

### Example of Section 4.2. "Exact Quadratic Optimization"

In [18]:
sample = [[-3,0,0], [0,-6,0], [0,0,-12]]

@show trop_frechet(sample)
trop_frechet_set(sample, rep="vrep", tol=1e-3)

trop_frechet(sample) = [-0.10075916894249483, -0.10075917024553435, -1.1007832316089412]


V-representation CDDGeneratorMatrix{Rational{BigInt}, GMPRational}:
1-element iterator of Vector{Rational{BigInt}}:
 Rational{BigInt}[1, 1, 0],
1-element iterator of Line{Rational{BigInt}, Vector{Rational{BigInt}}}:
 Line(Rational{BigInt}[1, 1, 1])

### Random points in high dimensional space

In [19]:
n = 20
m = n

rand_sample = [rand(1:20, n) for _ in 1:m]

20-element Vector{Vector{Int64}}:
 [18, 20, 19, 18, 13, 6, 6, 15, 12, 15, 11, 7, 11, 17, 14, 11, 9, 6, 19, 6]
 [2, 1, 11, 12, 16, 20, 14, 8, 14, 16, 3, 16, 4, 13, 5, 18, 13, 10, 2, 1]
 [15, 5, 19, 12, 3, 19, 14, 17, 11, 3, 13, 20, 3, 15, 9, 9, 10, 8, 8, 20]
 [2, 5, 3, 15, 17, 8, 9, 8, 10, 10, 3, 3, 4, 14, 4, 6, 8, 17, 20, 10]
 [12, 8, 7, 4, 20, 13, 12, 19, 15, 6, 9, 3, 6, 19, 2, 7, 2, 14, 6, 3]
 [18, 2, 19, 13, 13, 17, 19, 20, 11, 7, 7, 18, 1, 12, 14, 14, 13, 9, 20, 7]
 [9, 11, 17, 3, 3, 13, 4, 3, 17, 14, 9, 19, 12, 19, 3, 9, 17, 3, 16, 6]
 [17, 8, 7, 8, 15, 3, 16, 17, 12, 12, 2, 17, 9, 7, 10, 14, 10, 14, 14, 18]
 [14, 12, 18, 14, 14, 3, 16, 16, 19, 6, 9, 3, 2, 18, 13, 6, 14, 1, 5, 4]
 [16, 20, 18, 18, 18, 20, 5, 15, 13, 15, 17, 1, 6, 13, 11, 7, 9, 7, 12, 7]
 [13, 12, 9, 14, 1, 10, 5, 6, 16, 18, 13, 2, 1, 2, 4, 9, 13, 18, 16, 13]
 [16, 2, 7, 9, 5, 6, 1, 8, 15, 15, 16, 15, 8, 2, 3, 15, 15, 13, 1, 9]
 [9, 16, 9, 19, 14, 12, 13, 14, 6, 10, 7, 7, 19, 10, 6, 6, 13, 15, 15, 16]
 [6, 10, 8, 1

In [20]:
@time trop_frechet(rand_sample) |> trop_normalize

  0.355334 seconds (5.50 M allocations: 277.393 MiB, 14.36% gc time)


20-element Vector{Float64}:
  0.0
 -0.3333264353868799
 -0.5551690614674953
 -1.666659646840344
 -2.3333264294496985
 -0.3333264357615917
 -1.1614122121327455
 -0.3333264435077581
 -0.33332643837820797
 -2.3333264319644513
 -1.333326437339565
 -1.333326441533245
 -2.333326431925343
 -1.3333264379843968
 -2.5868189151460683
 -0.3333264377533851
 -2.3333264371603946
 -1.3333170733167208
  0.6666735430817425
 -1.3333264438542756

In [21]:
@time trop_frechet_set(rand_sample, rep="vrep", tol=1e-3)

  2.703169 seconds (15.31 M allocations: 375.971 MiB, 6.98% gc time, 1.18% compilation time)


V-representation CDDGeneratorMatrix{Rational{BigInt}, GMPRational}:
8-element iterator of Vector{Rational{BigInt}}:
 Rational{BigInt}[1613//1209, 1, 0, -134//403, -1, 1, 0, 1, 1, -1, 0, 0, -1, 0, -1, 1, -1, 0, 2, 0]
 Rational{BigInt}[1613//1209, 1, 2822//1209, -134//403, -1, 1, 0, 1, 1, -1, 0, 0, -1, 0, -1, 1, -1, 0, 2, 0]
 Rational{BigInt}[1613//1209, 1, 2822//1209, -134//403, -1, 1, 1, 1, 1, -1, 0, 0, -1, 0, -1, 1, -1, 0, 2, 0]
 Rational{BigInt}[1613//1209, 1, 0, -134//403, -1, 1, 1, 1, 1, -1, 0, 0, -1, 0, -1, 1, -1, 0, 2, 0]
 Rational{BigInt}[1613//1209, 1, 2822//1209, -134//403, -1, 1, 0, 1, 1, -1, 0, 0, -1, 0, -5, 1, -1, 0, 2, 0]
 Rational{BigInt}[1613//1209, 1, 0, -134//403, -1, 1, 0, 1, 1, -1, 0, 0, -1, 0, -5, 1, -1, 0, 2, 0]
 Rational{BigInt}[1613//1209, 1, 2822//1209, -134//403, -1, 1, 1, 1, 1, -1, 0, 0, -1, 0, -5, 1, -1, 0, 2, 0]
 Rational{BigInt}[1613//1209, 1, 0, -134//403, -1, 1, 1, 1, 1, -1, 0, 0, -1, 0, -5, 1, -1, 0, 2, 0],
1-element iterator of Line{Rational{BigInt}, Ve

### Big random sample in 3-space

In [22]:
n = 3

Random.seed!(50)

for m = 2:3n
    dim_vec = zeros(Int64, n+1)
    for _ = 1:100
        rand_sample = [rand(1:10, n) for _ in 1:m]
        try
            my_frech = trop_frechet_set(rand_sample, rep="hrep", tol=1e-3)
            dim_vec[dim(my_frech) + 1] += 1
        catch e
            println(rand_sample)
        end
    end
    println(m, " sample points ", dim_vec / sum(dim_vec))
end

2 sample points [0.0, 0.26, 0.74, 0.0]
3 sample points [0.0, 0.62, 0.38, 0.0]
4 sample points [0.0, 0.9, 0.1, 0.0]
5 sample points [0.0, 0.93, 0.07, 0.0]
[[8, 9, 10], [6, 7, 2], [4, 6, 4], [9, 10, 10], [8, 5, 6], [5, 7, 3]]
6 sample points [0.0, 0.9696969696969697, 0.030303030303030304, 0.0]
7 sample points [0.0, 1.0, 0.0, 0.0]
8 sample points [0.0, 1.0, 0.0, 0.0]
[[2, 8, 10], [6, 1, 5], [5, 8, 6], [3, 1, 10], [7, 8, 10], [5, 4, 8], [2, 1, 10], [6, 2, 8], [9, 1, 6]]
9 sample points [0.0, 1.0, 0.0, 0.0]


## Application: Phylogenetic trees

In [23]:
using JSON3
using DataFrames

# Function to read the JSON file and convert to a list of matrices
function read_and_convert_json(file_path::String)
    # Read the JSON file
    json_data = JSON3.read(file_path)
    
    # Extract elements from the nested arrays
    elements = [x[1] for x in json_data]
    elements = rationalize.(10000 * elements, tol=1e-2)
    
    # Convert elements into matrices
    num_elements = length(elements)
    matrices = []
    
    for i in 1:64:num_elements
        # Get the next 64 elements
        matrix_elements = elements[i:min(i+63, num_elements)]
        
        # Convert to an 8x8 matrix if there are 64 elements, otherwise create a smaller matrix
        matrix_size = length(matrix_elements)
        sqrt_size = Int(sqrt(matrix_size))
        push!(matrices, reshape(matrix_elements, sqrt_size, sqrt_size))
    end
    
    return matrices
end

# Read and convert the JSON file
file_path = "all_matrices.json"
matrices = read_and_convert_json(file_path)
taxa = ["Tg", "Et", "Cp", "Ta", "Bb", "Tt", "Pv", "Pf"]

8-element Vector{String}:
 "Tg"
 "Et"
 "Cp"
 "Ta"
 "Bb"
 "Tt"
 "Pv"
 "Pf"

In [24]:
"""
Take a matrix of pairwise distances between taxa and returns the cophenetic vector.
"""
function cophenetic_from_distance(pairwise)
    n = size(pairwise, 1)
    coph = [pairwise[i, j] for i in 1:n-1 for j in i+1:n]
    return coph
end

"""
Check if a distance matrix defines a phylogenetic tree
"""
function is_phylogenetic_tree(D)
    n = size(D, 1)
    
    # Check if the matrix is symmetric and non-negative
    for i in 1:n
        for j in i:n
            if D[i, j] != D[j, i] || D[i, j] < 0
                return false
            end
        end
    end

    # Check the four-point condition
    for i in 1:n-3
        for j in i+1:n-2
            for k in j+1:n-1
                for l in k+1:n
                    # Calculate distances
                    D_ij_kl = D[i, j] + D[k, l]
                    D_ik_jl = D[i, k] + D[j, l]
                    D_il_jk = D[i, l] + D[j, k]
                    
                    # Check the four-point condition
                    if !(D_ij_kl >= D_ik_jl && D_ij_kl >= D_il_jk) &&
                       !(D_ik_jl >= D_ij_kl && D_ik_jl >= D_il_jk) &&
                       !(D_il_jk >= D_ij_kl && D_il_jk >= D_ik_jl)
                        return false
                    end
                end
            end
        end
    end
    
    return true
end

"""
Check if a distance matrix defines an ultrametric tree
"""
function is_ultrametric_tree(D)
    n = size(D, 1)
    
    # Check if the matrix is symmetric and non-negative
    for i in 1:n
        for j in i:n
            if D[i, j] != D[j, i] || D[i, j] < 0
                return false
            end
        end
    end

    # Check the ultrametric condition
    for i in 1:n-2
        for j in i+1:n-1
            for k in j+1:n
                # Calculate distances
                Dij = D[i, j]
                Dik = D[i, k]
                Djk = D[j, k]
                
                # Check if the largest distance is attained at least twice
                if !(Dij <= max(Dik, Djk) && Dik <= max(Dij, Djk) && Djk <= max(Dij, Dik))
                    return false
                end
            end
        end
    end
    
    return true

end

is_ultrametric_tree

In [25]:
println("All phylogenetic: ", all([is_phylogenetic_tree(matrices[i]) for i = 1:268]))
println("All ultrametric: ", all([is_ultrametric_tree(matrices[i]) for i = 1:268]))

All phylogenetic: true
All ultrametric: false


In [None]:
coph_vecs = [cophenetic_from_distance(mat) for mat in matrices]
phylo_frech = trop_frechet_set(coph_vecs, rep="hrep")

In [None]:
dim(phylo_frech)

In [None]:
N = 10
M = 100
Random.seed!(123)

dimensions = [[] for _ = 3:N]
for n = 3:N
    for m = 2:M
        sample = [Rational.(rand(0:10000, n)) for _ in 1:m]
        push!(dimensions[n-2], dim(trop_frechet_set(sample, rep="hrep")))
        println("m = ", m, "; n = ", n, " done!")
    end
end

dimensions

In [None]:
using Plots

# Example data: categories and their respective proportions in different groups
categories = ["Category 1", "Category 2", "Category 3"]

proportional_dimensions = [dim_list / M for dim_list in dimensions]
data = hcat(proportional_dimensions...)

# Create the proportional bar chart
bar(data, label=1:M, legend=:topright, title="Proportional Bar Chart", xlabel="Groups", ylabel="Proportion", bar_width=0.7, lw=0, series_annotations=transpose([group1 group2 group3]))

# Save the plot to a file
savefig("proportional_bar_chart.png")

# Display the plot
display(plot)

In [None]:
for thing in dimensions
    println(thing)
    println()
end