In [2]:
import numpy as np
import pandas as pd
import os
import pickle
from hyperbolicity.tree_fitting_methods.hccfit import HccLinkage
from hyperbolicity.tree_fitting_methods.tree_spanner import layering_approx_tree
from hyperbolicity.tree_fitting_methods.treerep import TreeRep
import networkx as nx
from torch_geometric.datasets import Planetoid
from torch_geometric.utils import to_networkx

In [3]:
def merge_csv_files(directory, output_file):
    all_files = []
    
    # Traverse directory and subdirectories
    for root, _, files in os.walk(directory):
        for file in files:
            if file.endswith('hyperbolicity_results.csv'):
                all_files.append(os.path.join(root, file))
    
    df_list = []

    for file_path in all_files:
        df = pd.read_csv(file_path)
        df_list.append(df)

    merged_df = pd.concat(df_list, ignore_index=True)
    merged_df.to_csv(output_file, index=False)
    print(f'Merged {len(all_files)} CSV files into {output_file}')

# C-ELEGANS

In [4]:
base_path = '../datasets'

airport = 'D_celegan.pkl'
airport_path = os.path.join(base_path, airport)
with open(airport_path, 'rb') as f:
    distances = pickle.load(f)

## Gromov and HDTree

In [15]:
merge_csv_files('/share/home/houedry/projects/DifferentiableHyperbolicity/hyperbolicity/expes/results_expes/expe_celegan', '/share/home/houedry/projects/DifferentiableHyperbolicity/hyperbolicity/expes/results_expes/expe_celegan/results_celegan.csv')

Merged 1008 CSV files into /share/home/houedry/projects/DifferentiableHyperbolicity/hyperbolicity/expes/results_expes/expe_celegan/results_celegan.csv


In [16]:
df = pd.read_csv('/share/home/houedry/projects/DifferentiableHyperbolicity/hyperbolicity/expes/results_expes/expe_celegan/results_celegan.csv')

In [17]:
df.loc[df['mean_optim_distortion'].idxmin()]

n_epochs                         177
intermediate_distortion     2.072127
intermediate_l1             0.310326
mean_optim_l1                0.41561
min_optim_l1                0.403629
std_optim_l1                  0.0106
mean_no_optim_l1            1.136062
min_no_optim_l1             1.026068
std_no_optim_l1             0.036759
mean_optim_distortion       1.838259
min_optim_distortion         1.76303
std_optim_distortion        0.057191
mean_no_optim_distortion         3.3
min_no_optim_distortion          3.0
std_no_optim_distortion     0.458258
run_number                         0
dataset                      celegan
learning_rate                    0.1
distance_reg                     0.1
scale_delta                     10.0
epochs                           500
batch_size                        32
n_batches                        100
gpu                             True
Name: 989, dtype: object

## HCC

In [17]:
np.random.seed(42)
indices = np.random.choice(452, size=100, replace=False)
N = distances.shape[0]

In [7]:
l1 = []
distortion = []
denom = N*(N-1)
for root in indices:
    tree_hcc = HccLinkage(distances)
    tree_hcc.fit_tree(root)
    distortion.append(np.abs(tree_hcc.d_T-distances).max())
    l1.append(np.abs(tree_hcc.d_T-distances).sum() / denom)

print(np.mean(l1), np.std(l1))
print(np.mean(distortion), np.std(distortion))

0.8817505837568433 0.22667925078570564
4.31 0.4624932431938871


## Layering Tree

In [None]:
df = pd.read_csv('/share/home/houedry/projects/DifferentiableHyperbolicity/hyperbolicity/datasets/bio-celegans.csv')
df['id1'] -= 1
df['id2'] -= 1
G = nx.Graph()
G.add_edges_from(df[['id1', 'id2']].values)
l1 = []
distortion = []
denom = N*(N-1)
for root in indices:
    layering_tree = layering_approx_tree(G, root)
    distance_layering = nx.floyd_warshall_numpy(layering_tree)
    distortion.append(np.abs(distance_layering-distances).max())
    l1.append(np.abs(distance_layering-distances).sum() / denom)
print(np.mean(l1), np.std(l1))
print(np.mean(distortion), np.std(distortion))

done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
0.739044699095422 0.03490810432775373
5.07 0.2551470164434615


## TreeRep

# CS-PHD

In [23]:
base_path = '../datasets'

airport = 'D_csphd.pkl'
airport_path = os.path.join(base_path, airport)
with open(airport_path, 'rb') as f:
    distances = pickle.load(f)
distances = distances.astype(np.float64)

np.random.seed(42)
N = distances.shape[0]
indices = np.random.choice(N, size=100, replace=False)

## HCC

In [None]:
l1 = []
distortion = []
denom = N*(N-1)
for root in indices:
    tree_hcc = HccLinkage(distances)
    tree_hcc.fit_tree(root)
    distortion.append(np.abs(tree_hcc.d_T-distances).max())
    l1.append(np.abs(tree_hcc.d_T-distances).sum() / denom)

print(np.mean(l1), np.std(l1))
print(np.mean(distortion), np.std(distortion))

2.5996860899390244 1.1140393019427608
23.35 2.075451758051726


## Layering Tree

In [24]:
df = pd.read_csv('/share/home/houedry/projects/DifferentiableHyperbolicity/hyperbolicity/datasets/ca-CSphd.csv')
df['id1'] -= 1
df['id2'] -= 1
G = nx.Graph()
G.add_edges_from(df[['id1', 'id2']].values)
l1 = []
distortion = []
denom = N*(N-1)
for root in indices:
    layering_tree = layering_approx_tree(G, root)
    distance_layering = nx.floyd_warshall_numpy(layering_tree)
    distortion.append(np.abs(distance_layering-distances).max())
    l1.append(np.abs(distance_layering-distances).sum() / denom)
print(np.mean(l1), np.std(l1))
print(np.mean(distortion), np.std(distortion))

8.265081897865855 0.4943979995513763
25.48 0.6079473661428266


# Cora

In [25]:
base_path = '../datasets'

airport = 'D_cora.pkl'
airport_path = os.path.join(base_path, airport)
with open(airport_path, 'rb') as f:
    distances = pickle.load(f)
distances = distances.astype(np.float64)

np.random.seed(42)
N = distances.shape[0]
indices = np.random.choice(N, size=100, replace=False)

## HCC

In [26]:
l1 = []
distortion = []
denom = N*(N-1)
for root in indices:
    tree_hcc = HccLinkage(distances)
    tree_hcc.fit_tree(root)
    distortion.append(np.abs(tree_hcc.d_T-distances).max())
    l1.append(np.abs(tree_hcc.d_T-distances).sum() / denom)

print(np.mean(l1), np.std(l1))
print(np.mean(distortion), np.std(distortion))

2.4474343938024283 0.4383590871665283
12.28 0.96


## Layering Tree

In [28]:
dataset = Planetoid(root='/tmp/Cora', name='Cora')
data = dataset[0]
G = to_networkx(data, to_undirected=True)
largest_cc_nodes = max(nx.connected_components(G), key=len)
G_lcc = G.subgraph(largest_cc_nodes).copy()
np.random.seed(42)
N = G_lcc.number_of_nodes()
indices = np.random.choice(N, size=100, replace=False)
distances = nx.floyd_warshall_numpy(G_lcc)

In [36]:
l1 = []
distortion = []
denom = N*(N-1)
for root in indices:
    layering_tree = layering_approx_tree(G_lcc, root)
    distance_layering = nx.floyd_warshall_numpy(layering_tree)
    distortion.append(np.abs(distance_layering-distances).max())
    l1.append(np.abs(distance_layering-distances).sum() / denom)
print(np.mean(l1), np.std(l1))
print(np.mean(distortion), np.std(distortion))

IndexError: index 2485 is out of bounds for axis 0 with size 2485

# Airport

In [38]:
base_path = '../datasets'

airport = 'D_airport.pkl'
airport_path = os.path.join(base_path, airport)
with open(airport_path, 'rb') as f:
    distances = pickle.load(f)
distances = distances.astype(np.float64)

np.random.seed(42)
N = distances.shape[0]
indices = np.random.choice(N, size=100, replace=False)

## HCC

In [39]:
l1 = []
distortion = []
denom = N*(N-1)
for root in indices:
    tree_hcc = HccLinkage(distances)
    tree_hcc.fit_tree(root)
    distortion.append(np.abs(tree_hcc.d_T-distances).max())
    l1.append(np.abs(tree_hcc.d_T-distances).sum() / denom)

print(np.mean(l1), np.std(l1))
print(np.mean(distortion), np.std(distortion))

1.1059447896980141 0.13988059753483872
7.71 0.7251896303726358


## Layering Tree

In [None]:
largest_cc_nodes = max(nx.connected_components(G), key=len)
G_lcc = G.subgraph(largest_cc_nodes).copy()
np.random.seed(42)
N = G_lcc.number_of_nodes()
indices = np.random.choice(N, size=100, replace=False)
distances = nx.floyd_warshall_numpy(G_lcc)

In [None]:
l1 = []
distortion = []
denom = N*(N-1)
for root in indices:
    layering_tree = layering_approx_tree(G_lcc, root)
    distance_layering = nx.floyd_warshall_numpy(layering_tree)
    distortion.append(np.abs(distance_layering-distances).max())
    l1.append(np.abs(distance_layering-distances).sum() / denom)
print(np.mean(l1), np.std(l1))
print(np.mean(distortion), np.std(distortion))

In [4]:
merge_csv_files('/share/home/houedry/projects/DifferentiableHyperbolicity/hyperbolicity/expes/results_expes/expe_phd', '/share/home/houedry/projects/DifferentiableHyperbolicity/hyperbolicity/expes/results_expes/expe_phd/results_phd.csv')

Merged 108 CSV files into /share/home/houedry/projects/DifferentiableHyperbolicity/hyperbolicity/expes/results_expes/expe_phd/results_phd.csv


In [5]:
df = pd.read_csv('/share/home/houedry/projects/DifferentiableHyperbolicity/hyperbolicity/expes/results_expes/expe_phd/results_phd.csv')

In [6]:
df.loc[df['mean_optim_distortion'].idxmin()]

n_epochs                         500
intermediate_distortion     8.674208
intermediate_l1             1.035947
mean_optim_l1               1.276931
min_optim_l1                1.177544
std_optim_l1                0.046907
mean_optim_distortion      10.150798
min_optim_distortion        9.467228
std_optim_distortion        0.489663
run_number                         0
dataset                          phd
learning_rate                    0.1
distance_reg                     0.1
scale_delta                     10.0
epochs                           500
batch_size                        32
n_batches                        100
gpu                             True
Name: 6, dtype: object