In [1]:
using Pkg

In [2]:
Pkg.activate("/work/users/s/h/shiyi/hypergraph_with_curvature/orchid")

[32m[1m  Activating[22m[39m project at `/work/users/s/h/shiyi/hypergraph_with_curvature/orchid`


In [3]:
using Orchid
using Printf
using TimerOutputs
using SparseArrays

In [4]:
parse_edgelist(fp) = [parse.(Int, split(r, '\t')) for r in readlines(fp) if r != ""]
function parse_edgelist_collection(fp)
    rc, y = Vector{Int}[], Int[]
    for r in readlines(fp)
        t = parse.(Int, split(r))
        push!(y, t[1])
        push!(rc, t[2:end])
    end
    y, rc
end

parse_edgelist_collection (generic function with 1 method)

In [5]:
function generate_incidence_matrix(H_list::Vector{Vector{Int64}})
    all_nodes = Set{Int}()
    for edge in H_list
        for node in edge
            push!(all_nodes, node)
        end
    end
    nodes = sort(collect(all_nodes))
    num_edges = length(H_list)
    num_nodes = length(nodes)
    incidence_matrix = zeros(Int, num_edges, num_nodes)
    for (i, edge) in enumerate(H_list)
        for node in edge
            col_index = findfirst(x -> x == node, nodes)
            if col_index !== nothing
                incidence_matrix[i, col_index] = 1
            end
        end
    end
    return sparse(incidence_matrix)
end

generate_incidence_matrix (generic function with 1 method)

In [6]:
function parse_syncl_name(fn::AbstractString)
    # match “syn_cl(n,m,k=NUM1,NUM2,NUM3).tsv”  
    re = r"^syn_cl\(n,m,k=(\d+),(\d+),(\d+)\)\.tsv$"
    m = match(re, basename(fn))
    if m === nothing
        error("Filename does not match expected pattern: $fn")
    end
    # captures[1] == first number, [2] == second, [3] == third
    nums = parse.(Int, m.captures)
    return Tuple(nums)  # yields (100, 1000, 4) for example
end

parse_syncl_name (generic function with 1 method)

## test

In [9]:
# Path to your .tsv file
file_dir = "derived_data/"
file_name = "syn_cl(n,m,k=100,1000,4).tsv"
full_path = joinpath(file_dir, file_name)
n, m, k = parse_syncl_name(file_name)
println("n = $n, m = $m, k = $k")

n = 100, m = 1000, k = 4


In [11]:
# Open the file and process it
H_list = parse_edgelist(file_path)
H_Inc = generate_incidence_matrix(H_list);

In [12]:
start_time = time()  # Start time

# Execute the function
hypergraph_curvatures(
    Orchid.DisperseUnweightedClique,
    Orchid.AggregateMean,
    H_Inc,
    0.0,
    Orchid.CostOndemand
)

# Capture the time after execution
elapsed_time = time() - start_time  # Elapsed time
elapsed_time

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mPreparing Input
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mPreparing Neighborhoods
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mPreparing Ondemand Cost Computation
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mPreparing Dispersion
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mComputing Dispersions
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mComputing Directional Curvature
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mComputing Node Curvature Neighborhood
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mComputing Edge Curvature
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mComputing Node Curvature Edges


13.270262002944946

# record runtime

In [13]:
# Path to the syn_cl directory
file_dir = "derived_data/"

# Create an empty array to store results (num_nodes, num_edges, avg_edge_deg, t_hfrc)
results = []

# Iterate over the files in the syn_cl directory
for file_name in readdir(file_dir)
    if endswith(file_name, ".tsv")
        # Extract num_nodes and num_edges from the filename
        n, m, k = parse_syncl_name(file_name)

        # Construct the full path of the file
        file_path = joinpath(file_dir, file_name)
        H_list = parse_edgelist(file_path)
        H_Inc = generate_incidence_matrix(H_list)

        start_time = time()  # Start time
        hypergraph_curvatures(
            Orchid.DisperseUnweightedClique,
            Orchid.AggregateMean,
            H_Inc,
            0.01,
            Orchid.CostOndemand
        )
        t_horc = time() - start_time  # Elapsed time
        
        # Store the results
        push!(results, (n, m, k, t_horc))
    end
end

# Save the results into a txt file
results_file = "output/t_horc.txt"

open(results_file, "w") do f
    # Write the header
    write(f, "n\tm\tk\tt_horc\n")
    # Write each row of results
    for result in results
        write(f, @sprintf("%d\t%d\t%d\t%.2f\n", result[1], result[2], result[3], result[4]))
    end
end

println("Computation times saved to $results_file")


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mPreparing Input
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mPreparing Neighborhoods
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mPreparing Ondemand Cost Computation
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mPreparing Dispersion
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mComputing Dispersions
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mComputing Directional Curvature
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mComputing Node Curvature Neighborhood
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mComputing Edge Curvature
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mComputing Node Curvature Edges
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mPreparing Input
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mPreparing Neighborhoods
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mPreparing Ondemand Cost Computation
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mPreparing Dispersion
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mComputing Disp

Computation times saved to output/t_horc.txt
