In [23]:
%load_ext autoreload
%autoreload 2

from src.rxn_ctr_mcs import *
from src.utils import load_json, rxn_entry_to_smarts, rm_atom_map_num
from src.pathway_utils import get_reverse_paths_to_starting, create_graph_from_pickaxe
from src.post_processing import *

from minedatabase.pickaxe import Pickaxe
from minedatabase.utils import get_compound_hash

from rdkit.Chem import AllChem

from collections import defaultdict
import pandas as pd
import csv
import pickle

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [24]:
# Set params

expansion_dir = '../data/raw_expansions/'
fn = "ccm_v0_to_methylene_molecules_gen_4_tan_sample_1_n_samples_1000.pk" # Expansion file name
generations = 4

# Load raw expansion object
pk = Pickaxe()
path = expansion_dir + fn
pk.load_pickled_pickaxe(path)

----------------------------------------
Intializing pickaxe object

Done intializing pickaxe object
----------------------------------------

Loading ../data/raw_expansions/ccm_v0_to_methylene_molecules_gen_4_tan_sample_1_n_samples_1000.pk pickled data.
Loaded 67701 compounds
Loaded 84312 reactions
Loaded 3604 operators
Loaded 2 targets
Took 6.442622900009155


Unnamed: 0_level_0,id
smiles,Unnamed: 1_level_1
C=C(CCC(=O)[O-])C(=O)[O-],2-methyleneglutarate
C=C(CC([NH3+])C(=O)[O-])C(=O)[O-],4-methylene-l-glutamate


In [25]:
# Create the initial graph

DG, rxn, edge = create_graph_from_pickaxe(pk, "Biology")
starting_nodes = []
bad_nodes = []
for n in DG.nodes():
    try:
        if DG.nodes()[n]["Type"] == "Starting Compound":
            starting_nodes.append(n)
    except:
        bad_nodes.append(n)

RDKit ERROR: [18:07:01] Can't kekulize mol.  Unkekulized atoms: 19 23 26 27 28 29 30 31 33 34
RDKit ERROR: 
[18:07:01] Can't kekulize mol.  Unkekulized atoms: 19 23 26 27 28 29 30 31 33 34

RDKit ERROR: [18:07:07] Can't kekulize mol.  Unkekulized atoms: 19 23 26 27 28 29 30 31 33 34
RDKit ERROR: 
[18:07:07] Can't kekulize mol.  Unkekulized atoms: 19 23 26 27 28 29 30 31 33 34

RDKit ERROR: [18:07:07] Can't kekulize mol.  Unkekulized atoms: 19 23 26 27 28 29 30 31 33 34
RDKit ERROR: 
[18:07:07] Can't kekulize mol.  Unkekulized atoms: 19 23 26 27 28 29 30 31 33 34

RDKit ERROR: [18:07:07] Can't kekulize mol.  Unkekulized atoms: 19 23 26 27 28 29 30 31 33 34
RDKit ERROR: 
[18:07:07] Can't kekulize mol.  Unkekulized atoms: 19 23 26 27 28 29 30 31 33 34

RDKit ERROR: [18:07:07] Can't kekulize mol.  Unkekulized atoms: 19 23 26 27 28 29 30 31 33 34
RDKit ERROR: 
[18:07:07] Can't kekulize mol.  Unkekulized atoms: 19 23 26 27 28 29 30 31 33 34

RDKit ERROR: [18:07:07] Can't kekulize mol.  Unkek

In [31]:
# Get pathways
max_depth = generations * 2
paths = defaultdict(list)

# Specify Targets / Starting Cpds
# target_cids = [get_compound_hash(smi)[0] for smi in pk.target_smiles]
# target_names = [target_smi_name.loc[smi, "id"] for smi in pk.target_smiles]
target_cids, target_names = [], []
for k,v in pk.targets.items():
    target_cids.append(get_compound_hash(v['SMILES'])[0])
    target_names.append(v['ID'])

starting_cpds = [get_compound_hash(val["SMILES"])[0] for val in pk.compounds.values() if val["Type"].startswith("Start")]

# Loop through targets and get pathways from targets to starting compounds
for i, this_target in enumerate(target_cids):
    this_paths = get_reverse_paths_to_starting(DG, begin_node=this_target, end_nodes=starting_cpds, max_depth=max_depth)
    # If we find paths then reverse those paths and assign to a dictionary
    if this_paths:
        this_paths = list(set([tuple(path[1::2]) for path in [[*reversed(ind_path)] for ind_path in this_paths]]))
        for elt in this_paths:
            for r in pk.reactions[elt[0]]["Reactants"]:
                if r[-1] in starting_cpds:
                    s_name = pk.compounds[r[-1]]["ID"]
                    t_name = target_names[i]
                    paths[(s_name, t_name)].append(pathway(rhashes=elt, starter_hash=r[-1], target_hash=this_target)) 

In [33]:
# Make predicted reaction dict

pred_rxns = {}
degen_rhashes = defaultdict(lambda : 1)
for st_pair in paths:
    for elt in paths[st_pair]:
        for this_rhash in elt.rhashes:
            if this_rhash not in pred_rxns:
                rxn_sma = rxn_hash_2_rxn_sma(this_rhash, pk)
                pred_rxns[this_rhash] = reaction(this_rhash, rxn_sma)
            else:
                degen_rhashes[this_rhash] += 1


RDKit ERROR: [18:20:24] Can't kekulize mol.  Unkekulized atoms: 19 23 26 27 28 29 30 31 33 34
RDKit ERROR: 
[18:20:24] Can't kekulize mol.  Unkekulized atoms: 19 23 26 27 28 29 30 31 33 34



In [35]:
# Load in IMT rule mapping

# Load rules
rules_path = '../data/rules/JN3604IMT_rules.tsv'
rule_df = pd.read_csv(rules_path, delimiter='\t')
rule_df.set_index('Name', inplace=True)

# Load mapping
rxn2rule = {}
db_names = ['_mc_v21', '_brenda', '_kegg']
suffix = '_imt_rules_enforce_cof.csv'
for name in db_names:
    mapping_path = '../data/mapping/mapping' + name + suffix
    with open(mapping_path, 'r') as f:
        reader = csv.reader(f)
        for row in reader:
            if len(row) == 1:
                rxn2rule[row[0]] = []
            else:
                rxn2rule[row[0]] = row[1:]

# Make rule2rxn
rule2rxn = {}
for k,v in rxn2rule.items():
    for elt in v:
        if elt not in rule2rxn:
            rule2rxn[elt] = [k]
        else:
            rule2rxn[elt].append(k)

# Load all known reaction json entries into dict
known_rxns = {}
pref = '../data/mapping/'
suffs = ['mc_v21_as_is.json', 'brenda_as_is.json', 'kegg_as_is.json']
for elt in suffs:
    known_rxns.update(load_json(pref + elt))

In [36]:
# Populate reaction objects in rxn dict w/ known reactions

for k, v in pred_rxns.items():
    this_rules = list(pk.reactions[k]["Operators"])
    this_known_rxns = []
    for elt in this_rules:
        if elt in rule2rxn:
            this_rxn_ids = rule2rxn[elt]
            for this_id in this_rxn_ids:
                this_sma = rxn_entry_to_smarts(known_rxns[this_id])
                this_known_rxns.append((None, this_sma, this_id))
    
    v.known_rxns = [list(elt) for elt in set(this_known_rxns)]


In [44]:
# pr_am_errors = [] # Track predicted rxn am errors
# kr_am_errors = [] # Track known rxn am errors
# alignment_issues = [] # Track substrate alignment issues
# Populate pred_rxns, known rxn prc-mcs slot
for x in range(1611, len(pred_rxns.keys())):
    h = list(pred_rxns.keys())[x]
    rxn_sma1 = pred_rxns[h].smarts

    # Skip pred reactions that trigger RXNMapper atom mapping errors
    try:
        am_rxn_sma1 = atom_map(rxn_sma1)
    except:
        pr_am_errors.append(h)
        continue

    a = 0 # Number known rxns analyzed
    for z, kr in enumerate(pred_rxns[h].known_rxns):
        rxn_sma2 = kr[1]

        # Catch stoichiometry mismatches stemming from pickaxe, early post-processing
        if tuple([len(elt.split('.')) for elt in rxn_sma2.split('>>')]) != tuple([len(elt.split('.')) for elt in rxn_sma1.split('>>')]):
            print(x, z, 'stoich_error')
            continue

        # Skip pred reactions that trigger RXNMapper atom mapping errors
        try:
            am_rxn_sma2 = atom_map(rxn_sma2)
        except:
            kr_am_errors.append((h, z, kr[-1]))
            continue

        # Construct reaction objects
        rxns = []
        for elt in [am_rxn_sma1, am_rxn_sma2]:
            temp = AllChem.ReactionFromSmarts(elt, useSmiles=True)
            temp.Initialize()
            rxns.append(temp)

        rc_atoms = [elt.GetReactingAtoms() for elt in rxns] # Get reaction center atom idxs

        # Construct rxn ctr mol objs
        try: # REMOVE after addressing KekulizationException in get_sub_mol
            rcs = []
            for i, t_rxn in enumerate(rxns):
                temp = []
                for j, t_mol in enumerate(t_rxn.GetReactants()):
                    temp.append(get_sub_mol(t_mol, rc_atoms[i][j]))
                rcs.append(temp)
        except:
            continue

        # Align substrates of the 2 reactions
        rc_idxs = [] # Each element: (idx for rxn 1, idx for rxn 2)
        remaining = [[i for i in range(len(elt))] for elt in rcs]
        while (len(remaining[0]) > 0) & (len(remaining[1]) > 0):
            idx_pair = align_substrates(rcs, remaining)

            if idx_pair is None:
                break
            else:
                rc_idxs.append(idx_pair)
                remaining[0].remove(idx_pair[0])
                remaining[1].remove(idx_pair[1])

        # Skip if you haven't aligned every reactant pred to known
        if len(rc_idxs) < len(rxn_sma1.split('>>')[0].split('.')):
            alignment_issues.append((h, z, kr[-1]))
            continue

        # For reaction 2 (known reaction) Re-order rcs, rc_atoms,
        # internal order of reactants in the reaction object in rxns
        # and the smarts stored in the known_reactions attribute of the
        # associated predicted reaction

        # Sort reaction 2 rc_idxs by reaction 1 rc_idxs
        rxn_1_rc_idxs, rxn_2_rc_idxs = list(zip(*rc_idxs))
        if rxn_1_rc_idxs != rxn_2_rc_idxs:
            rxn_2_rc_idxs, rxn_1_rc_idxs = sort_x_by_y(rxn_2_rc_idxs, rxn_1_rc_idxs)

            # Re-order atom-mapped smarts string, and then update known_rxns entry
            # with de-atom-mapped version of this string because atom mapper changes
            # reactant order and its this order that rcs, rcatoms, rc_idxs all come from
            am_ro_sma2 = am_rxn_sma2.split('>>')[0].split('.') # Get list of reactant strings
            am_ro_sma2 = '.'.join([am_ro_sma2[elt] for elt in rxn_2_rc_idxs]) # Re-join in new order
            am_rxn_sma2 = am_ro_sma2 + '>>' + am_rxn_sma2.split('>>')[1] # Join with products side

            # Re-construct reaction object from re-ordered, am smarts
            foo = rxns[1]
            temp = AllChem.ReactionFromSmarts(am_rxn_sma2, useSmiles=True)
            temp.Initialize()
            rxns[1] = temp
            bar = rxns[1]

            rc_atoms[1] = rxns[1].GetReactingAtoms() # Update rc_atoms
            rcs[1] = [get_sub_mol(elt, rc_atoms[1][i]) for i, elt in enumerate(rxns[1].GetReactants())] # Update rc mol obj
        
        pred_rxns[h].known_rxns[z][1] = rm_atom_map_num(am_rxn_sma2) # Update known_reaction entry w/ de-am smarts
        rxns = align_atom_map_nums(rxns, rcs, rc_atoms)

        # Compute MCS seeded by reaction center
        prc_mcs = get_prc_mcs(rxns, rcs, rc_atoms) 
        pred_rxns[h].known_rxns[z][0] = prc_mcs # Update pred_rxns
        
        a += 1 # Count known rxn analyzed
        pred_rxns[h].smarts = rm_atom_map_num(am_rxn_sma1) # Update pred_rxn smarts w/ de-am smarts

    print(x, ':', a / (z+1), 'of', z+1)

1611 : 0.15 of 40
1612 : 0.12790697674418605 of 86
1613 : 0.13636363636363635 of 88
1614 : 1.0 of 15
1615 : 0.13636363636363635 of 88
1616 : 0.723404255319149 of 47
1617 : 0.0 of 88
1618 : 0.13636363636363635 of 88
1619 : 1.0 of 38
1620 : 0.13636363636363635 of 88
1621 : 1.0 of 15
1622 : 1.0 of 23
1623 : 1.0 of 15
1624 : 0.13636363636363635 of 88
1625 : 0.9649122807017544 of 57
1626 : 0.13636363636363635 of 88
1627 : 0.13636363636363635 of 88
1628 : 1.0 of 15
1629 : 0.0 of 88
1630 : 0.6533333333333333 of 75
1631 : 0.9736842105263158 of 38
1632 : 0.9807692307692307 of 52
1633 : 1.0 of 15
1634 : 0.0 of 88
1635 : 0.723404255319149 of 47
1636 : 0.23809523809523808 of 84
1637 : 1.0 of 15
1638 : 0.0 of 88
1639 : 1.0 of 22
1640 : 0.13636363636363635 of 88
1641 : 0.0 of 2
1642 : 0.723404255319149 of 47
1643 : 1.0 of 15
1644 : 0.13636363636363635 of 88
1645 : 0.42857142857142855 of 56
1646 : 0.9882352941176471 of 85
1647 : 0.723404255319149 of 47
1648 : 0.13636363636363635 of 88
1649 : 0.081395

RDKit ERROR: [00:03:00] non-ring atom 0 marked aromatic
[00:03:00] non-ring atom 0 marked aromatic
RDKit ERROR: [00:03:00] non-ring atom 0 marked aromatic
[00:03:00] non-ring atom 0 marked aromatic
RDKit ERROR: [00:03:00] non-ring atom 0 marked aromatic
[00:03:00] non-ring atom 0 marked aromatic
RDKit ERROR: [00:03:00] non-ring atom 0 marked aromatic
[00:03:00] non-ring atom 0 marked aromatic
RDKit ERROR: [00:03:00] non-ring atom 0 marked aromatic
[00:03:00] non-ring atom 0 marked aromatic
RDKit ERROR: [00:03:00] non-ring atom 0 marked aromatic
[00:03:00] non-ring atom 0 marked aromatic
RDKit ERROR: [00:03:00] non-ring atom 0 marked aromatic
[00:03:00] non-ring atom 0 marked aromatic
RDKit ERROR: [00:03:00] non-ring atom 0 marked aromatic
[00:03:00] non-ring atom 0 marked aromatic
RDKit ERROR: [00:03:00] non-ring atom 0 marked aromatic
[00:03:00] non-ring atom 0 marked aromatic
RDKit ERROR: [00:03:00] non-ring atom 0 marked aromatic
[00:03:00] non-ring atom 0 marked aromatic
RDKit ERRO

1979 : 0.0 of 88
1981 : 0.13636363636363635 of 88
1982 : 0.13636363636363635 of 88
1983 : 0.9 of 10
1984 : 0.0 of 88
1985 : 0.0 of 88
1986 : 0.13636363636363635 of 88
1987 : 0.0 of 88
1988 : 0.13636363636363635 of 88
1989 : 0.13636363636363635 of 88
1990 : 0.0 of 88
1991 : 0.0 of 88
1992 : 0.15 of 40
1993 : 0.13636363636363635 of 88
1994 : 0.0 of 88
1995 : 0.15 of 40
1996 : 0.0 of 88
1997 : 0.0 of 88
1998 : 0.9 of 20
1999 : 0.0 of 88
2000 : 0.0 of 40
2001 : 1.0 of 2
2002 : 0.15 of 40
2003 : 0.13636363636363635 of 88
2004 : 0.0 of 88
2005 : 0.13636363636363635 of 88
2006 : 0.13636363636363635 of 88
2007 : 0.723404255319149 of 47
2008 : 1.0 of 20
2009 : 0.0 of 88
2010 : 0.15 of 40
2011 : 0.0 of 88
2012 : 0.0 of 40
2013 : 0.0 of 88
2014 : 0.13636363636363635 of 88
2015 : 0.0 of 88
2016 : 0.0 of 88
2017 : 0.13636363636363635 of 88
2018 : 0.15 of 40
2019 : 0.723404255319149 of 47
2020 : 0.0 of 88
2021 : 0.0 of 88
2022 : 0.13636363636363635 of 88
2023 : 0.0 of 88
2024 : 0.0 of 88
2025 : 0.0 

RDKit ERROR: [00:08:12] non-ring atom 0 marked aromatic
[00:08:12] non-ring atom 0 marked aromatic
RDKit ERROR: [00:08:12] non-ring atom 0 marked aromatic
[00:08:12] non-ring atom 0 marked aromatic
RDKit ERROR: [00:08:12] non-ring atom 0 marked aromatic
[00:08:12] non-ring atom 0 marked aromatic
RDKit ERROR: [00:08:12] non-ring atom 0 marked aromatic
[00:08:12] non-ring atom 0 marked aromatic
RDKit ERROR: [00:08:12] non-ring atom 0 marked aromatic
[00:08:12] non-ring atom 0 marked aromatic
RDKit ERROR: [00:08:12] non-ring atom 0 marked aromatic
[00:08:12] non-ring atom 0 marked aromatic
RDKit ERROR: [00:08:12] non-ring atom 0 marked aromatic
[00:08:12] non-ring atom 0 marked aromatic
RDKit ERROR: [00:08:12] non-ring atom 0 marked aromatic
[00:08:12] non-ring atom 0 marked aromatic
RDKit ERROR: [00:08:12] non-ring atom 0 marked aromatic
[00:08:12] non-ring atom 0 marked aromatic
RDKit ERROR: [00:08:13] non-ring atom 0 marked aromatic
[00:08:13] non-ring atom 0 marked aromatic
RDKit ERRO

2056 : 0.0 of 88
2057 : 0.15 of 40
2058 : 0.13636363636363635 of 88
2059 : 0.0 of 88
2060 : 0.0 of 88
2061 : 0.0 of 88
2062 : 0.13636363636363635 of 88
2063 : 0.0 of 88
2064 : 0.0 of 88
2065 : 0.0 of 88
2066 : 0.13636363636363635 of 88
2067 : 0.0 of 88
2068 : 0.13636363636363635 of 88
2069 : 0.0 of 88
2070 : 0.0 of 88
2071 : 0.9 of 10


RDKit ERROR: [00:09:27] non-ring atom 0 marked aromatic
[00:09:27] non-ring atom 0 marked aromatic
RDKit ERROR: [00:09:27] non-ring atom 0 marked aromatic
[00:09:27] non-ring atom 0 marked aromatic
RDKit ERROR: [00:09:27] non-ring atom 0 marked aromatic
[00:09:27] non-ring atom 0 marked aromatic
RDKit ERROR: [00:09:27] non-ring atom 0 marked aromatic
[00:09:27] non-ring atom 0 marked aromatic
RDKit ERROR: [00:09:27] non-ring atom 0 marked aromatic
[00:09:27] non-ring atom 0 marked aromatic
RDKit ERROR: [00:09:27] non-ring atom 0 marked aromatic
[00:09:27] non-ring atom 0 marked aromatic
RDKit ERROR: [00:09:28] non-ring atom 0 marked aromatic
[00:09:28] non-ring atom 0 marked aromatic
RDKit ERROR: [00:09:28] non-ring atom 0 marked aromatic
[00:09:28] non-ring atom 0 marked aromatic
RDKit ERROR: [00:09:28] non-ring atom 0 marked aromatic
[00:09:28] non-ring atom 0 marked aromatic
RDKit ERROR: [00:09:28] non-ring atom 0 marked aromatic
[00:09:28] non-ring atom 0 marked aromatic
RDKit ERRO

2072 : 0.0 of 88
2073 : 0.13636363636363635 of 88
2074 : 0.15 of 40
2075 : 0.0 of 88
2076 : 0.0 of 88
2077 : 0.0 of 88
2078 : 0.15 of 40
2080 : 0.0 of 88
2081 : 0.13636363636363635 of 88


RDKit ERROR: [00:10:09] non-ring atom 4 marked aromatic
[00:10:09] non-ring atom 4 marked aromatic
RDKit ERROR: [00:10:09] non-ring atom 4 marked aromatic
[00:10:09] non-ring atom 4 marked aromatic
RDKit ERROR: [00:10:10] non-ring atom 4 marked aromatic
[00:10:10] non-ring atom 4 marked aromatic
RDKit ERROR: [00:10:10] non-ring atom 4 marked aromatic
[00:10:10] non-ring atom 4 marked aromatic
RDKit ERROR: [00:10:10] non-ring atom 4 marked aromatic
[00:10:10] non-ring atom 4 marked aromatic
RDKit ERROR: [00:10:10] non-ring atom 4 marked aromatic
[00:10:10] non-ring atom 4 marked aromatic
RDKit ERROR: [00:10:10] non-ring atom 4 marked aromatic
[00:10:10] non-ring atom 4 marked aromatic
RDKit ERROR: [00:10:10] non-ring atom 4 marked aromatic
[00:10:10] non-ring atom 4 marked aromatic
RDKit ERROR: [00:10:10] non-ring atom 4 marked aromatic
[00:10:10] non-ring atom 4 marked aromatic
RDKit ERROR: [00:10:10] non-ring atom 4 marked aromatic
[00:10:10] non-ring atom 4 marked aromatic
RDKit ERRO

2082 : 0.0 of 88
2083 : 0.0 of 88
2084 : 0.0 of 88
2085 : 0.13636363636363635 of 88
2086 : 0.15 of 40
2087 : 0.15 of 40
2088 : 0.15 of 40
2089 : 0.0 of 88
2090 : 0.13636363636363635 of 88
2091 : 0.15 of 40
2092 : 0.13636363636363635 of 88
2093 : 0.0 of 88
2094 : 0.13636363636363635 of 88
2095 : 0.13636363636363635 of 88
2096 : 0.15 of 40
2097 : 0.15 of 40
2098 : 0.13636363636363635 of 88
2099 : 0.07954545454545454 of 88
2100 : 0.0 of 88
2101 : 0.13636363636363635 of 88
2102 : 0.13636363636363635 of 88
2103 : 0.13636363636363635 of 88
2104 : 0.0 of 88
2105 : 0.15 of 40
2106 : 0.13636363636363635 of 88
2107 : 0.15 of 40
2108 : 0.0 of 88
2109 : 0.0 of 88
2110 : 0.0 of 88
2111 : 0.15 of 40
2112 : 0.9 of 10
2113 : 0.13636363636363635 of 88
2114 : 0.0 of 88
2115 : 0.15 of 40
2116 : 0.15 of 40
2117 : 0.13636363636363635 of 88
2118 : 0.0 of 88
2119 : 0.0 of 40
2120 : 0.9 of 10
2121 : 0.723404255319149 of 47
2122 : 0.0 of 88
2123 : 0.0 of 88
2124 : 0.13636363636363635 of 88
2125 : 0.0 of 88
212

RDKit ERROR: [00:19:28] non-ring atom 2 marked aromatic
[00:19:28] non-ring atom 2 marked aromatic


2272 : 0.7505376344086021 of 465
2273 : 0.0 of 43
2274 : 0.0 of 40
2275 : 0.15 of 40
2277 : 0.85 of 20


RDKit ERROR: [00:20:01] Can't kekulize mol.  Unkekulized atoms: 19 23 26 27 28 29 30 31 33 34
RDKit ERROR: 
[00:20:01] Can't kekulize mol.  Unkekulized atoms: 19 23 26 27 28 29 30 31 33 34



2279 : 1.0 of 7
2280 : 0.29411764705882354 of 17
2281 : 0.7439024390243902 of 82
2282 : 0.8 of 45
2283 : 1.0 of 7
2284 : 1.0 of 7
2285 : 1.0 of 82
2286 : 1.0 of 2
2287 : 0.13636363636363635 of 88
2288 : 0.8198757763975155 of 322
2289 : 0.0 of 40
2290 : 0.0 of 2
2291 : 1.0 of 3
2292 : 0.0 of 26
2293 : 0.0 of 40
2294 : 1.0 of 69
2295 : 0.15 of 40
2296 : 0.975609756097561 of 41
2297 : 1.0 of 18
2298 : 1.0 of 11
2299 : 0.14814814814814814 of 27
2300 : 1.0 of 6
2301 : 1.0 of 4
2302 : 0.15 of 40
2303 : 0.9615384615384616 of 26
2304 : 0.13636363636363635 of 88
2305 : 0.8181818181818182 of 22
2306 : 0.7037037037037037 of 54
2307 : 0.0 of 37
2308 : 1.0 of 106
2309 : 0.868421052631579 of 38
2310 : 1.0 of 8
2311 : 1.0 of 11
2312 : 1.0 of 16
2313 : 0.7439024390243902 of 82
2314 : 0.9090909090909091 of 11
2315 : 0.15 of 40
2316 : 0.723404255319149 of 47
2317 : 0.9069767441860465 of 43
2318 : 0.15 of 40
2319 : 0.9090909090909091 of 11
2320 : 0.13636363636363635 of 88
2321 : 0.12790697674418605 of 86

In [45]:
# Save reactions dict and paths list (ultimately will replace with expansion object)

rxns_fn = 'predicted_reactions_' + fn
paths_fn = 'paths_' + fn
save_dir = '../data/processed_expansions/'
rxns_path = save_dir + rxns_fn
paths_path = save_dir + paths_fn

with open(rxns_path, 'wb') as f:
    pickle.dump(pred_rxns, f)

with open(paths_path, 'wb') as f:
    pickle.dump(paths, f)

In [46]:
len(alignment_issues), len(pr_am_errors), len(kr_am_errors)

(115545, 15, 11288)