In [None]:
%load_ext autoreload
%autoreload 2

import json
from pathlib import Path
import pandas as pd
from krxns.network import construct_reaction_network, SuperMultiDiGraph
from hydra import compose, initialize
from ergochemics.draw import draw_reaction, draw_molecule
from ergochemics.standardize import standardize_smiles
from IPython.display import SVG
import networkx as nx
from itertools import product
from functools import partial
from tqdm import tqdm

with initialize(version_base=None, config_path="../configs/filepaths"):
    cfg = compose("filepaths")

def get_node_idx(smi, G):
    nidx = G.get_nodes_by_prop('smiles', smi)
    if len(nidx) == 0:
        return None
    else:
        return nidx[0]

In [None]:
ccm_aa = pd.read_csv(Path(cfg.raw_data) / "ccm_aa.csv")
std_smi = partial(standardize_smiles, quiet=True, neutralization_mode="simple")

In [None]:
ccm_aa = pd.read_csv(Path(cfg.raw_data) / "ccm_aa.csv")
ccm_aa["smiles"] = ccm_aa["smiles"].apply(std_smi)
std_smi = partial(standardize_smiles, quiet=True, neutralization_mode="simple")
kcs = pd.read_csv(Path(cfg.interim_data) / "compounds.csv")
sources = pd.read_csv(Path(cfg.interim_data) / "default_sources.csv")
sources = sources['id'].tolist()

with open(Path(cfg.interim_data) / "mass_contributions.json", 'r') as f:
    mass_contributions = json.load(f)

addtl_sources = kcs.loc[kcs["smiles"].apply(std_smi).isin(ccm_aa["smiles"]), "id"].tolist()
sources += addtl_sources

In [59]:
pnmc_lb = 0.15
rnmc_lb = 0.0

edges, nodes = construct_reaction_network(
    mass_contributions=mass_contributions,
    compounds=kcs,
    sources=sources,
    pnmc_lb=pnmc_lb,
    rnmc_lb=rnmc_lb,
)

In [60]:
len(edges), len(nodes)

(21251, 6364)

In [61]:
G = SuperMultiDiGraph()
G.add_nodes_from(nodes)
G.add_edges_from(edges)
_get_node_idx = partial(get_node_idx, G=G)

In [62]:
ccm_aa["nidx"] = ccm_aa["smiles"].apply(_get_node_idx)
ccm_aa = ccm_aa[ccm_aa["nidx"].notna()]
ccm_aa['nidx'] = ccm_aa['nidx'].astype(int)
ccm_aa.head()

Unnamed: 0,id,smiles,nidx
0,succinate,O=C(O)CCC(=O)O,6215
1,fumarate,O=C(O)C=CC(=O)O,6161
2,malate,O=C(O)CC(O)C(=O)O,6202
3,oxaloacetate,O=C(O)CC(=O)C(=O)O,6177
4,ketoglutarate,O=C(O)CCC(=O)C(=O)O,6210


In [63]:
def show_path(source, target, topk = 1):
    node_path, edge_path = G.shortest_path(source, target)
    print(node_path)
    for i, step in enumerate(edge_path):
        print(f"STEP #{i+1}")
        for edge in step[:topk]:
            print([(k, v) for k, v in edge.items()])
            display(SVG(draw_reaction(edge["am_smarts"])))

In [64]:
# k_hop_neighbors.edges(keys=True)

In [65]:
k = 2
for i, nidx in enumerate(ccm_aa['nidx']):
    if i == 0:
        k_hop_neighbors = nx.ego_graph(G, nidx, radius=k)
    else:
        k_hop_neighbors = nx.compose(k_hop_neighbors, nx.ego_graph(G, nidx, radius=k))
    
    print(k_hop_neighbors.number_of_nodes(), k_hop_neighbors.number_of_edges())

52 1744
67 1881
96 2064
105 2156
120 2250
130 3368
154 3488
160 3529
160 3536
162 3541
168 3567
178 3614
178 3614
184 3653
191 3674
196 3696
210 4117
231 4230
233 4235
245 4283
258 4357
277 4437
282 4461
298 4534
321 4606
327 4628
333 4649


In [66]:
k_hop_neighbors.number_of_nodes(), k_hop_neighbors.number_of_edges()

(333, 4649)

In [67]:
MG = nx.MultiGraph()
MG.add_edges_from([(1, 2, {'name': 'foo'}), (1, 2, {'name': 'bar'}), (2, 3, {'name': 'foo'})])
dict(MG.degree()), MG.number_of_edges(), MG.number_of_nodes()

({1: 2, 2: 3, 3: 1}, 3, 3)