In [22]:
from ltp import extract_features, calculate_features_matrix
import numpy as np
import torch
import pandas as pd

from torch_geometric.data import Data

from graph_utils import Graph

In [23]:
orgs = pd.read_csv("org_subset.csv")
graphs = [Graph.from_organism(id) for id in orgs["metacyc_id"]]

In [24]:
def graph_to_pyg(graph):
    row, col = np.nonzero(graph.adj_matrix)
    edge_index = torch.tensor([row, col], dtype=torch.long)
    data = Data( edge_index=edge_index, num_nodes=graph.adj_matrix.shape[0])
    return data

In [26]:
features = extract_features(
    [graph_to_pyg(g) for g in graphs],
    degree_sum=True,
    shortest_paths=True,
    edge_betweenness=True,
    jaccard_index=True,
    local_degree_score=True,
)

In [33]:
features_matrix = calculate_features_matrix(features, n_bins=50)
features_matrix

In [40]:
df=pd.DataFrame(features_matrix, index=orgs["metacyc_id"]).T
df.to_csv("ltp_embeddings.csv", index=False)
pd.read_csv("ltp_embeddings.csv")

Unnamed: 0,TAX-3888,TAX-3847,TAX-3880,TAX-1063,TAX-1061,TAX-287,TAX-4577,TAX-4081,TAX-9606,TAX-3708,...,TAX-2287,TAX-3055,TAX-1097,TAX-2190,TAX-1280,TAX-358,TAX-4058,TAX-3702,TAX-3988,TAX-1772
0,0.328189,0.283767,0.411523,0.518365,0.401408,0.291003,0.474236,0.317564,0.124437,0.361673,...,0.295627,0.222243,0.264588,0.129665,0.550220,0.315171,0.220443,0.502634,0.523169,0.392884
1,0.218107,0.208161,0.514403,0.000000,0.000000,0.254322,0.197598,0.177278,0.000603,0.346809,...,0.602410,0.482315,0.000000,0.375841,0.000000,0.657051,0.374173,0.142921,0.000000,0.000000
2,0.223251,0.208162,0.000000,0.000000,0.915493,0.025677,0.229518,0.183559,0.000235,0.000000,...,0.000000,0.435029,0.815217,0.255572,1.020408,0.000000,0.269753,0.199126,0.000000,1.022980
3,0.033951,0.009819,0.301293,0.888626,0.000000,0.001223,0.042560,0.006979,0.000168,0.401308,...,0.535475,0.000000,0.000000,0.163491,0.000000,0.475427,0.333565,0.011241,0.866965,0.000000
4,0.009259,0.000982,0.000000,0.000000,0.000000,0.002445,0.010640,0.007677,0.000201,0.000000,...,0.000000,0.245886,0.550629,0.048859,0.000000,0.000000,0.000000,0.011241,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
495,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
496,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
497,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
498,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
