# Contact-High-School

In [2]:
using Pkg
Pkg.instantiate()
Pkg.activate("../../orchid")
using Orchid, SparseArrays, DelimitedFiles, Base.Threads, JSON, Printf, CSV, DataFrames
using Statistics

[32m[1m  Activating[22m[39m project at `~/Desktop/Research/Lower Ricci Curvature for Hypergraph/orchid`


In [3]:
# Function to parse the edge list
parse_edgelist(fp) = [parse.(Int, split(r, '\t')) for r in readlines(fp) if r != ""]

# Function to generate incidence matrix
function generate_incidence_matrix(H_list::Vector{Vector{Int64}})
    all_nodes = Set{Int}()
    for edge in H_list
        for node in edge
            push!(all_nodes, node)
        end
    end
    nodes = sort(collect(all_nodes))
    num_edges = length(H_list)
    num_nodes = length(nodes)
    incidence_matrix = zeros(Int, num_edges, num_nodes)
    for (i, edge) in enumerate(H_list)
        for node in edge
            col_index = findfirst(x -> x == node, nodes)
            if col_index !== nothing
                incidence_matrix[i, col_index] = 1
            end
        end
    end
    return sparse(incidence_matrix)
end

function build_horc_specs()
    dispersion_opts = [
        (label = "uw_clique", type = Orchid.DisperseUnweightedClique),
        (label = "w_clique", type = Orchid.DisperseWeightedClique),
        (label = "uw_star", type = Orchid.DisperseUnweightedStar),
    ]
    aggregation_opts = [
        (label = "mean", type = Orchid.AggregateMean),
        (label = "max", type = Orchid.AggregateMax),
    ]
    alphas = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5]
    # alphas = [0.1]
    specs = NamedTuple[]
    for d in dispersion_opts, a in aggregation_opts, alpha in alphas
        alpha_label = replace(@sprintf("%.1f", alpha), "." => "p")
        name = Symbol("horc_$(d.label)_$(a.label)_a$(alpha_label)")
        push!(specs, (; name, dispersion = d.type, aggregation = a.type, alpha))
    end
    return specs
end

function extract_edge_curvature(result)
    aggregations = get(result, :aggregations, nothing)
    aggregations === nothing && return nothing
    for entry in aggregations
        ec = get(entry, :edge_curvature, nothing)
        ec === nothing && continue
        return ec
    end
    return nothing
end

function compute_horc(file_path::AbstractString, specs)
    println("Start $(file_path)")
    H_list = parse_edgelist(file_path)
    H_inc = generate_incidence_matrix(H_list)

    hyperedge_labels = [join(edge, ",") for edge in H_list]
    tbl = DataFrame(hyperedge = hyperedge_labels)

    spec_times = Vector{NamedTuple{(:spec, :time_s), Tuple{Symbol, Float64}}}()

    for spec in specs
        local result
        t = @elapsed begin
            result = try
                hypergraph_curvatures(
                    spec.dispersion,
                    spec.aggregation,
                    H_inc,
                    spec.alpha,
                    Orchid.CostOndemand,
                )
            catch err
                @warn "HORC failed" spec=spec.name error=err
                nothing
            end
        end
        push!(spec_times, (spec = spec.name, time_s = t))

        edge_curvature = result === nothing ? nothing : extract_edge_curvature(result)
        tbl[!, spec.name] =
            edge_curvature === nothing ? fill(missing, nrow(tbl)) : edge_curvature
    end

    # for entry in spec_times
    #     @info "spec runtime" entry...
    # end
    times = [entry.time_s for entry in spec_times]
    if !isempty(times)
        avg_time = mean(times)
        sd_time = std(times; corrected=true)  # sample std; use corrected=false for population
        @info "spec runtime summary" avg_time=avg_time sd_time=sd_time n=length(times)
    end

    return tbl
end


function process_and_save(file_path::AbstractString, output_tsv::AbstractString)
    specs = build_horc_specs()
    df = compute_horc(file_path, specs)
    CSV.write(output_tsv, df; delim = '\t')
    println("Saved HORC table to $(output_tsv)")
end


process_and_save (generic function with 1 method)

In [4]:
file_path = "./derived_data/hyperedges.tsv"
output_tsv = "./derived_data/horc.tsv"

"./derived_data/horc.tsv"

## computation time

In [5]:
compute_horc(file_path, build_horc_specs());  # optional preview

Start ./derived_data/hyperedges.tsv


[36m[1m┌ [22m[39m[36m[1mInfo: [22m[39mspec runtime summary
[36m[1m│ [22m[39m  avg_time = 2.8382338888333334
[36m[1m│ [22m[39m  sd_time = 0.7748573014529637
[36m[1m└ [22m[39m  n = 36


## save

In [6]:
process_and_save(file_path, output_tsv)

Start ./derived_data/hyperedges.tsv
Saved HORC table to ./derived_data/horc.tsv


[36m[1m┌ [22m[39m[36m[1mInfo: [22m[39mspec runtime summary
[36m[1m│ [22m[39m  avg_time = 2.7521139537499995
[36m[1m│ [22m[39m  sd_time = 0.09524687596204647
[36m[1m└ [22m[39m  n = 36
