In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import networkx as nx
from src.utils import load_json
from src.config import filepaths
from src.post_processing import Expansion, find_paths_single_target

In [3]:
imt_reverses = load_json(filepaths['rules'] / "jnimt_reverses.json")
# forward = '1_steps_alpha_ketoglutarate_to_None_rules_JN3604IMT_rules_carbonyl_free_co_metacyc_coreactants_carbonyl_free_sampled_False_pruned_False'
# reverse = '1_steps_hopa_to_None_rules_JN3604IMT_rules_carbonyl_free_co_metacyc_coreactants_carbonyl_free_sampled_False_pruned_False'

# forward = '2_steps_ccm_v0_to_None_rules_JN3604IMT_rules_co_metacyc_coreactants_sampled_False_pruned_False'
reverse = '2_steps_bottle_targets_24_to_None_rules_JN3604IMT_rules_co_metacyc_coreactants_sampled_False_pruned_False'

forward = '2_steps_pivalic_acid_to_bottle_targets_24_rules_JN3604IMT_rules_co_metacyc_coreactants_sampled_False_pruned_True'

print("Loading expansion")
pk = Expansion(
    forward=filepaths['raw_expansions'] / f"{forward}.pk" if forward else forward,
    reverse=filepaths['raw_expansions'] / f"{reverse}.pk" if reverse else reverse,
    operator_reverses=imt_reverses,
)

Loading expansion




In [4]:
# paths = pk.find_paths()

In [5]:
fwd = Expansion(
    forward=filepaths['raw_expansions'] / f"{forward}.pk"
)

rev = Expansion(
    reverse=filepaths['raw_expansions'] / f"{reverse}.pk",
    operator_reverses=imt_reverses,
)



In [6]:
len(fwd.compounds), len(rev.compounds)

(690, 102763)

In [7]:
get_As = lambda x : {k: v for k, v in x.items() if k[0] == 'C'}
get_cpds_from_gen = lambda x, gen : {k: v for k, v in x.items() if v['Generation'] == gen}
fwd_As = get_As(fwd.compounds)
rev_As = get_As(rev.compounds)
checkpoints = fwd_As.keys() & rev_As.keys()
final_fwd = get_cpds_from_gen(fwd.compounds, fwd.generations)
final_rev = get_cpds_from_gen(rev.compounds, rev.generations)
len(checkpoints), len(final_fwd), len(final_rev)

(121, 611, 102449)

In [8]:
n=0

In [9]:
n+=1
fwd.compounds[list(fwd.compounds.keys())[n]]

{'ID': 'PYROPHOSPHATE_ACCEPTOR_CoF',
 '_id': 'X2afb0d1241a06a5b42485c9533e1ee325f9c9c21',
 'SMILES': 'Nc1ncnc2c1ncn2C1OC(COP(=O)(O)O)C(O)C1O',
 'InChI_key': 'UDMBCSSLTHHNCD',
 'Type': 'Coreactant',
 'Generation': 0,
 'atom_count': Counter({'H': 14, 'C': 10, 'O': 7, 'N': 5, 'P': 1}),
 'Expand': False,
 'Formula': 'C10H14N5O7P',
 'last_tani': 0}

In [10]:
n+=1
fwd.reactions[list(fwd.reactions.keys())[n]]

{'_id': 'R349246c13463b5aea2ffcf6da8fb0f82120230015e5a44ed8de20eaa1b604473',
 'Reactants': [(1, 'C743d3d3a4a0e2cf0e31d4d7623c0d79884571a95'),
  (1, 'X151d45ce062c088336cff9e45bc138b6a63626fe'),
  (1, 'X73bc8ef21db580aefe4dbc0af17d4013961d9d17')],
 'Products': [(1, 'C884bfe32dd0fc7e192d120e509978b660dff3f79'),
  (1, 'X281e38d790b809dc908f7c9749d5274a6d8280c4'),
  (1, 'X8dc023d8052d83fb6feadf8541387e57c199cad0')],
 'Operators': {'rule0005_60'},
 'SMILES_rxn': '(1) CC(C)(C)C(=O)O + (1) NC(=O)c1ccc[n+](C2OC(COP(=O)(O)OP(=O)(O)OCC3OC(n4cnc5c(N)ncnc54)C(OP(=O)(O)O)C3O)C(O)C2O)c1 + (1) O => (1) CC(C)(C)C=O + (1) NC(=O)C1=CN(C2OC(COP(=O)(O)OP(=O)(O)OCC3OC(n4cnc5c(N)ncnc54)C(OP(=O)(O)O)C3O)C(O)C2O)C=CC1 + (1) O=O'}

In [11]:
cpd_types = {c['Type'] for c in pk.compounds.values()}
cpd_types

{'Coreactant', 'Predicted', 'Starting Compound'}

In [12]:
def construct_network(compounds: dict, reactions: dict):
        # Generate a directed bipartite graph
        # 1. Add Compound Nodes
        # 2. Add Reaction Nodes
        # 3. Add directed edges
        cpd_node_list = []
        rxn_node_list = []
        edge_list = []

        starting_nodes = []
        smiles_to_cid = {}

        # Get compound information
        for i, cpd in compounds.items():
            if cpd["Type"] == "Coreactant":
                 continue

            cpd_node_list.append(
                (
                    i,
                    {
                        "SMILES": cpd["SMILES"],
                        "InChIKey": cpd["InChI_key"],
                        "Type": cpd["Type"],
                        "Generation": cpd["Generation"],
                        "_id": cpd["_id"]
                    }
                )
            )

            if cpd["Type"] == "Starting Compound":
                starting_nodes.append(i)
            
            smiles_to_cid[cpd["SMILES"]] = i

        # Get reaction information
        for i, rxn in reactions.items():
            # stoich = get_stoich_pk(rxn["SMILES_rxn"])
            rxn_node_list.append(
                (
                    i,
                    {
                        "Rule": rxn["Operators"],
                        # "Stoich": stoich,
                        "Type": "Reaction",
                        "feasible": None,
                        "Reactants": rxn["Reactants"],
                        "Products": rxn["Products"]
                    }
                )
            )
            
            for _, c_id in rxn["Reactants"]:
                if c_id[0] == 'X':
                     continue
                
                edge_list.append((c_id, i))
            
            for _, c_id in rxn["Products"]:
                if c_id[0] == 'X':
                     continue
                
                edge_list.append((i, c_id))
        
        # Create Graph
        DG = nx.DiGraph(smiles_to_cid=smiles_to_cid)  
        DG.add_nodes_from(cpd_node_list, bipartite=0)
        DG.add_nodes_from(rxn_node_list, bipartite=1)
        DG.add_edges_from(edge_list)
        
        return DG



In [13]:
DG_new = construct_network(fwd.compounds, fwd.reactions)
DG_old, _, _ = fwd.construct_network()

In [14]:
DG_new.number_of_nodes(), DG_new.number_of_edges(), DG_old.number_of_nodes(), DG_old.number_of_edges()

(1573, 2454, 1602, 2713)

In [27]:
fwd.starters

{'C743d3d3a4a0e2cf0e31d4d7623c0d79884571a95': 'pivalic_acid'}

In [28]:
%timeit paths_old = find_paths_single_target(DG_old, list(fwd.starters.keys()), list(fwd.targets.keys())[0], fwd.generations)
paths_old = find_paths_single_target(DG_old, list(fwd.starters.keys()), list(fwd.targets.keys())[0], fwd.generations)

68.5 μs ± 684 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [29]:
%timeit paths_new = find_paths_single_target(DG_new, list(fwd.starters.keys()), list(fwd.targets.keys())[0], fwd.generations)
paths_new = find_paths_single_target(DG_new, list(fwd.starters.keys()), list(fwd.targets.keys())[0], fwd.generations)

65.6 μs ± 1.08 μs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [30]:
len(paths_old), len(paths_new)

(6, 6)

In [33]:
set(paths_old) ^ set(paths_new)

set()

In [49]:
%timeit paths = nx.all_simple_paths(DG_new, source=list(fwd.starters.keys())[0], target=list(fwd.targets.keys()), cutoff=fwd.generations*2)
paths = nx.all_simple_paths(DG_new, source=list(fwd.starters.keys())[0], target=list(fwd.targets.keys()), cutoff=fwd.generations*2)

886 ns ± 7.2 ns per loop (mean ± std. dev. of 7 runs, 1,000,000 loops each)


In [50]:
paths = [tuple([elt[0]] + elt[1::2]) for elt in paths]
len(paths)

15

In [51]:
paths

[('C743d3d3a4a0e2cf0e31d4d7623c0d79884571a95',
  'R022d15344a76dbd79cf46e8c67679ce6aff193011ecd9fd035f0bb52746679bf'),
 ('C743d3d3a4a0e2cf0e31d4d7623c0d79884571a95',
  'R022d15344a76dbd79cf46e8c67679ce6aff193011ecd9fd035f0bb52746679bf',
  'Rfa06fdf1de128b497279c2e28639d50e249dd2ba1d6190519fd8cc4ad00c2e1d'),
 ('C743d3d3a4a0e2cf0e31d4d7623c0d79884571a95',
  'R022d15344a76dbd79cf46e8c67679ce6aff193011ecd9fd035f0bb52746679bf',
  'R12331e3c0d6b4e7199ec7d9d9bcc4d7cb71792b2bcad1aae82c8c20fbe94dbbb'),
 ('C743d3d3a4a0e2cf0e31d4d7623c0d79884571a95',
  'Ra70122a22903bfb5cde4f132c01565531ee96e8a1ae6a48b48526025b265eba6',
  'Rea76132ecc1eedc98147000d6f3bde0cc9885b8d222d9865e6ab6d7ca5399220'),
 ('C743d3d3a4a0e2cf0e31d4d7623c0d79884571a95',
  'Ra70122a22903bfb5cde4f132c01565531ee96e8a1ae6a48b48526025b265eba6',
  'Rad907da7aa348bc2c4625531d5f3bb640ca7b9bb6aa19b8e972b62e1e74f49d4'),
 ('C743d3d3a4a0e2cf0e31d4d7623c0d79884571a95',
  'Ra70122a22903bfb5cde4f132c01565531ee96e8a1ae6a48b48526025b265eba6',
  '

In [53]:
set(paths_new) - set(paths)

set()

In [58]:
nx_only = list(set(paths) - set(paths_new))

In [61]:
fwd.targets

{'Ca44a84be6f833fe631009a55d05e4807de958fe0': '3hpa',
 'Cce065e1e7b273fd0df7f455452f948b30517b232': 'dmb',
 'Cc2274a1b151121868789bd03eb37b9c4065ef3d1': 'dmhb'}

In [65]:
i = 0


In [72]:

for r in nx_only[i][1:]:
    print(fwd.reactions[r]["Products"])
i += 1

[(1, 'C2d94449d2035580e3a2de661386c29c111c7bf71'), (1, 'Cce065e1e7b273fd0df7f455452f948b30517b232')]
[(1, 'Cc2274a1b151121868789bd03eb37b9c4065ef3d1'), (1, 'X151d45ce062c088336cff9e45bc138b6a63626fe'), (1, 'X73bc8ef21db580aefe4dbc0af17d4013961d9d17')]
