In [1]:
import os
import sys
sys.path.append("/home/romainlhardy/code/hyperbolic-cancer/Mixed-Curvature-Pathways")

import networkx as nx
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import torch

from utils.load_graph import load_graph
from PoincareMaps.data import prepare_data, compute_rfa
from sklearn.decomposition import PCA

In [2]:
!python Mixed-Curvature-Pathways/pytorch/pytorch_hyperbolic.py \
    --dataset /home/romainlhardy/code/hyperbolic-cancer/data/graphs/bladder_edges.txt \
    --batch-size 64 \
    -l 5.0 \
    --epochs 50 \
    --checkpoint-freq 10 \
    --subsample 32 \
    --euc 1 \
    --edim 2 \
    --sph 1 \
    --sdim 2 \
    --dim 2 \
    --hyp 1 \
    --riemann \
    --visualize \
    --model-save-file /home/romainlhardy/code/hyperbolic-cancer/models/bladder_model

2025-03-24T19:56:13 Commandline ['Mixed-Curvature-Pathways/pytorch/pytorch_hyperbolic.py', '--dataset', '/home/romainlhardy/code/hyperbolic-cancer/data/graphs/bladder_edges.txt', '--batch-size', '64', '-l', '5.0', '--epochs', '50', '--checkpoint-freq', '10', '--subsample', '32', '--euc', '1', '--edim', '2', '--sph', '1', '--sdim', '2', '--dim', '2', '--hyp', '1', '--riemann', '--visualize', '--model-save-file', '/home/romainlhardy/code/hyperbolic-cancer/models/bladder_model']

The scipy.sparse array containers will be used instead of matrices
in Networkx 3.0. Use `to_scipy_sparse_array` instead.
  GM = nx.to_scipy_sparse_matrix(G, nodelist=list(range(G.order())))
2025-03-24T19:56:13 Loaded Graph /home/romainlhardy/code/hyperbolic-cancer/data/graphs/bladder_edges.txt with 2075 nodes scale=1.0
2025-03-24T19:56:13 Building dataset
2025-03-24T19:56:13 	Final Matrix (2075, 2075)
2025-03-24T19:56:13 Built distance matrix with 1.0 factor

The scipy.sparse array containers will be used instead

In [None]:
data_dir = "/home/romainlhardy/data/hyperbolic-cancer/bladder"
dset = "bladder"

file_path = "/home/romainlhardy/data/hyperbolic-cancer/bladder/GSM4307111_GEO_processed_BC159-T_3_log2TPM_matrix_final.txt"
df = pd.read_csv(file_path, sep="\t", index_col=0)

expression_matrix = df.values.astype(np.float32).T

# PCA
pca = PCA(n_components=20)
features = pca.fit_transform(expression_matrix)
features = torch.DoubleTensor(features)
print(features.shape)

In [None]:
rfa = compute_rfa(
    features,
    mode="features", 
    k_neighbours=30, 
    distlocal="minkowski",
    distfn="MFIsym", 
    connected=True, 
    sigma=1.0
) # Pairwise distances in the original data space

In [None]:
# Create graph with all nodes and edges from RFA matrix
G = nx.Graph()
G.add_nodes_from(range(features.shape[0]))
for i in range(rfa.shape[0]):
    indices = torch.argsort(-rfa[i, i + 1:])
    for j in indices[:3]:
        G.add_edge(i, j.item() + i + 1, weight=1.0)
    # for j in range(i + 1, rfa.shape[1]):
    #     if rfa[i, j] > 0.01:
    #         G.add_edge(i, j, weight=float(rfa[i, j].item()))
print(G.number_of_nodes(), G.number_of_edges())

# Save graph to file
output_path = f"/home/romainlhardy/code/hyperbolic-cancer/data/graphs/{dset}_edges.txt"
nx.write_weighted_edgelist(G, output_path)

In [None]:
G = load_graph(f"/home/romainlhardy/code/hyperbolic-cancer/data/graphs/bladder_edges.txt")
GM = nx.to_scipy_sparse_matrix(G, nodelist=list(range(G.order())))
G.edges()