# Identify Imbalanced reactions
This script is by Frowin.

Goal of this script:
Identify all imbalanced reactions in all models in a directory

Input:  
A directory containing GEMs as xml files

Output:  
A csv file showing all imbalanced reactions across all models in the input directory, how often and in which models they occur.

## Imports

In [1]:
import cobra
from cobra.io import read_sbml_model
from collections import defaultdict
import os
import pandas as pd
import csv

## Paths

In [2]:
# Path to your working directory
project_dir = "/home/lisa/Dokumente/Programmierung/"

# Path to the model files
model_path = project_dir+ "Models/before_mass_balance/"

# Path to the directory where you want to save the results
save_path = project_dir + "Models/mass_balance/"

## Functions

In [3]:
# Creates a list of all internal reactions in the model

def internal_reactions(model):
    
    # Get all reaction_ids in the model
    model_reactions = []
    for reaction in model.reactions:
        model_reactions.append(reaction.id)

    # Filter out all exchanges from the list of reactions
    indicator_exchanges = ['EX','sink','Growth']
    internal_reactions = list(filter(lambda i: all(indicator not in i for indicator in indicator_exchanges), model_reactions))
    
    return internal_reactions

In [4]:
# Confirms if a single reaction is mass balanced
# This function checks if the sum of the elements in the reactants equals the sum of the elements in the products
# It returns True if the reaction is mass balanced, and False otherwise

def is_mass_balanced(reaction):

    # Create a dict to store the sum of each element in the reaction
    balance = defaultdict(int)

    # Iterate over the metabolites in the reaction 
    for metabolite, coefficient in model.reactions.get_by_id(reaction).metabolites.items():

        # Check if the metabolite has elements
        if metabolite.elements is None or len(metabolite.elements) == 0:
            return False
        
        # Sum the elements in the balance dict using their coefficients
        for element, amount in (metabolite.elements).items():
            balance[element] += coefficient * amount

    # Returns True if all elements are balanced (in a margin of 1e-10)        
    return all(-1e-10 < amount < 1e-10 for amount in balance.values()) 

In [5]:
# This function checks all internal reactions in the model and returns a list of unbalanced reactions

def find_mass_unbalanced_reactions(model):

    # Get all internal reactions in the model
    reactions = internal_reactions(model)

    # Check each reaction for mass balance using the is_mass_balanced function
    # and return a list of unbalanced reactions
    return [rxn for rxn in reactions if not is_mass_balanced(rxn)]

In [6]:
# This function combines the list of all imbalanced reactions from all models
# and create a DataFrame showing how often each imbalanced reaction occurs throughout the models
def reaction_abundance(imbalanced_models):
    # Create a list to collect all imbalanced reactions
    # and a dict to store the models containing these reactions
    unique_imbalanced_reactions = []
    occurences_reactions = {}
    # Create the unique list of imbalanced reactions by iterating through all reaction list in the dict and appending reactions that are not already in the list
    for reaction_list in imbalanced_models.values():
        for reaction in reaction_list:
            if reaction not in unique_imbalanced_reactions:
                unique_imbalanced_reactions.append(reaction)
    # Iterate through the list and find all models containing this reaction
    for reaction in unique_imbalanced_reactions:
        occurences_reactions[reaction] = []
        for imbalanced_model in imbalanced_models.keys():
            if reaction in imbalanced_models[imbalanced_model]:
                occurences_reactions[reaction].append(imbalanced_model)
    # Create a table with all results
    imbalanced_reactions = pd.DataFrame({
    "Reaction": occurences_reactions.keys(),
    "Occurences": [len(v) for v in occurences_reactions.values()],
    "Model IDs": occurences_reactions.values()})
    return imbalanced_reactions

## Main

In [9]:
imbalanced_models = {}

for m in os.listdir(model_path):
    if m.endswith(".xml"):
        print(m)
        model = read_sbml_model(model_path+f'/{m}')
        if len(find_mass_unbalanced_reactions(model)) != 0:
            imbalanced_models[m] = find_mass_unbalanced_reactions(model)

imbalanced_reactions = reaction_abundance(imbalanced_models)

Adding exchange reaction EX_14glucan_e with default bounds for boundary metabolite: 14glucan_e.
Adding exchange reaction EX_25dkglcn_e with default bounds for boundary metabolite: 25dkglcn_e.
Adding exchange reaction EX_2m35mdntha_e with default bounds for boundary metabolite: 2m35mdntha_e.
Adding exchange reaction EX_2pglyc_e with default bounds for boundary metabolite: 2pglyc_e.
Adding exchange reaction EX_34dhbz_e with default bounds for boundary metabolite: 34dhbz_e.
Adding exchange reaction EX_35dnta_e with default bounds for boundary metabolite: 35dnta_e.
Adding exchange reaction EX_3mb_e with default bounds for boundary metabolite: 3mb_e.
Adding exchange reaction EX_3oxoadp_e with default bounds for boundary metabolite: 3oxoadp_e.
Adding exchange reaction EX_4abut_e with default bounds for boundary metabolite: 4abut_e.
Adding exchange reaction EX_4hbald_e with default bounds for boundary metabolite: 4hbald_e.
Adding exchange reaction EX_4hbz_e with default bounds for boundary me

AA5.xml


Ignoring reaction 'EX_14glucan_e' since it already exists.
Ignoring reaction 'EX_25dkglcn_e' since it already exists.
Ignoring reaction 'EX_2m35mdntha_e' since it already exists.
Ignoring reaction 'EX_2pglyc_e' since it already exists.
Ignoring reaction 'EX_34dhbz_e' since it already exists.
Ignoring reaction 'EX_35dnta_e' since it already exists.
Ignoring reaction 'EX_3mb_e' since it already exists.
Ignoring reaction 'EX_3oxoadp_e' since it already exists.
Ignoring reaction 'EX_4abut_e' since it already exists.
Ignoring reaction 'EX_4hbald_e' since it already exists.
Ignoring reaction 'EX_4hbz_e' since it already exists.
Ignoring reaction 'EX_4hpro_LT_e' since it already exists.
Ignoring reaction 'EX_5dglcn_e' since it already exists.
Ignoring reaction 'EX_5drib_e' since it already exists.
Ignoring reaction 'EX_5mdru1p_e' since it already exists.
Ignoring reaction 'EX_LalaDglu_e' since it already exists.
Ignoring reaction 'EX_LalaLglu_e' since it already exists.
Ignoring reaction 'EX_

AA4.xml


Ignoring reaction 'EX_12ppd__R_e' since it already exists.
Ignoring reaction 'EX_12ppd__S_e' since it already exists.
Ignoring reaction 'EX_14glucan_e' since it already exists.
Ignoring reaction 'EX_15dap_e' since it already exists.
Ignoring reaction 'EX_23camp_e' since it already exists.
Ignoring reaction 'EX_23ccmp_e' since it already exists.
Ignoring reaction 'EX_23cgmp_e' since it already exists.
Ignoring reaction 'EX_23cump_e' since it already exists.
Ignoring reaction 'EX_25dkglcn_e' since it already exists.
Ignoring reaction 'EX_26dap__M_e' since it already exists.
Ignoring reaction 'EX_2ameph_e' since it already exists.
Ignoring reaction 'EX_2ddglcn_e' since it already exists.
Ignoring reaction 'EX_2dhglcn_e' since it already exists.
Ignoring reaction 'EX_2hxmp_e' since it already exists.
Ignoring reaction 'EX_2m35mdntha_e' since it already exists.
Ignoring reaction 'EX_2pg_e' since it already exists.
Ignoring reaction 'EX_2pglyc_e' since it already exists.
Ignoring reaction 'E

AA6.xml


Ignoring reaction 'EX_15dap_e' since it already exists.
Ignoring reaction 'EX_25dkglcn_e' since it already exists.
Ignoring reaction 'EX_2ameph_e' since it already exists.
Ignoring reaction 'EX_2m35mdntha_e' since it already exists.
Ignoring reaction 'EX_2pglyc_e' since it already exists.
Ignoring reaction 'EX_34dhbz_e' since it already exists.
Ignoring reaction 'EX_35dnta_e' since it already exists.
Ignoring reaction 'EX_3h4atb_e' since it already exists.
Ignoring reaction 'EX_3hoxpac_e' since it already exists.
Ignoring reaction 'EX_3mb_e' since it already exists.
Ignoring reaction 'EX_3oxoadp_e' since it already exists.
Ignoring reaction 'EX_4abut_e' since it already exists.
Ignoring reaction 'EX_4hba_e' since it already exists.
Ignoring reaction 'EX_4hbald_e' since it already exists.
Ignoring reaction 'EX_4hbz_e' since it already exists.
Ignoring reaction 'EX_4hoxpac_e' since it already exists.
Ignoring reaction 'EX_4hphac_e' since it already exists.
Ignoring reaction 'EX_4hpro_LT_

AA1.xml


Ignoring reaction 'EX_13ppd_e' since it already exists.
Ignoring reaction 'EX_14glucan_e' since it already exists.
Ignoring reaction 'EX_15dap_e' since it already exists.
Ignoring reaction 'EX_2ddglcn_e' since it already exists.
Ignoring reaction 'EX_2dhglcn_e' since it already exists.
Ignoring reaction 'EX_2m35mdntha_e' since it already exists.
Ignoring reaction 'EX_35dnta_e' since it already exists.
Ignoring reaction 'EX_3mb_e' since it already exists.
Ignoring reaction 'EX_3oxoadp_e' since it already exists.
Ignoring reaction 'EX_4abut_e' since it already exists.
Ignoring reaction 'EX_4abutn_e' since it already exists.
Ignoring reaction 'EX_4hbald_e' since it already exists.
Ignoring reaction 'EX_4hbz_e' since it already exists.
Ignoring reaction 'EX_5dglcn_e' since it already exists.
Ignoring reaction 'EX_5drib_e' since it already exists.
Ignoring reaction 'EX_6atha_e' since it already exists.
Ignoring reaction 'EX_LalaLglu_e' since it already exists.
Ignoring reaction 'EX_R_3h4atb

AA3.xml


Ignoring reaction 'EX_12ppd__R_e' since it already exists.
Ignoring reaction 'EX_2ameph_e' since it already exists.
Ignoring reaction 'EX_2hxmp_e' since it already exists.
Ignoring reaction 'EX_2m35mdntha_e' since it already exists.
Ignoring reaction 'EX_35dnta_e' since it already exists.
Ignoring reaction 'EX_3amp_e' since it already exists.
Ignoring reaction 'EX_3cmp_e' since it already exists.
Ignoring reaction 'EX_3gmp_e' since it already exists.
Ignoring reaction 'EX_3h4atb_e' since it already exists.
Ignoring reaction 'EX_3ump_e' since it already exists.
Ignoring reaction 'EX_4abut_e' since it already exists.
Ignoring reaction 'EX_4ahmmp_e' since it already exists.
Ignoring reaction 'EX_4hba_e' since it already exists.
Ignoring reaction 'EX_4hbz_e' since it already exists.
Ignoring reaction 'EX_4hpro_DC_e' since it already exists.
Ignoring reaction 'EX_4hpro_LT_e' since it already exists.
Ignoring reaction 'EX_5drib_e' since it already exists.
Ignoring reaction 'EX_6pgc_e' since 

AA7.xml


Ignoring reaction 'EX_12dgr160_e' since it already exists.
Ignoring reaction 'EX_12dgr180_e' since it already exists.
Ignoring reaction 'EX_1ag160_e' since it already exists.
Ignoring reaction 'EX_1ag180_e' since it already exists.
Ignoring reaction 'EX_1ag181d9_e' since it already exists.
Ignoring reaction 'EX_1ag182d9d12_e' since it already exists.
Ignoring reaction 'EX_25dkglcn_e' since it already exists.
Ignoring reaction 'EX_2ameph_e' since it already exists.
Ignoring reaction 'EX_2ddglcn_e' since it already exists.
Ignoring reaction 'EX_2m35mdntha_e' since it already exists.
Ignoring reaction 'EX_34dhbz_e' since it already exists.
Ignoring reaction 'EX_35dnta_e' since it already exists.
Ignoring reaction 'EX_3h4atb_e' since it already exists.
Ignoring reaction 'EX_3mb_e' since it already exists.
Ignoring reaction 'EX_3oxoadp_e' since it already exists.
Ignoring reaction 'EX_4abz_e' since it already exists.
Ignoring reaction 'EX_4hba_e' since it already exists.
Ignoring reaction '

AA2.xml


Adding exchange reaction EX_12ppd__R_e with default bounds for boundary metabolite: 12ppd__R_e.
Adding exchange reaction EX_25dkglcn_e with default bounds for boundary metabolite: 25dkglcn_e.
Adding exchange reaction EX_2dhglcn_e with default bounds for boundary metabolite: 2dhglcn_e.
Adding exchange reaction EX_2m35mdntha_e with default bounds for boundary metabolite: 2m35mdntha_e.
Adding exchange reaction EX_2pglyc_e with default bounds for boundary metabolite: 2pglyc_e.
Adding exchange reaction EX_35dnta_e with default bounds for boundary metabolite: 35dnta_e.
Adding exchange reaction EX_3h4atb_e with default bounds for boundary metabolite: 3h4atb_e.
Adding exchange reaction EX_3mb_e with default bounds for boundary metabolite: 3mb_e.
Adding exchange reaction EX_3oxoadp_e with default bounds for boundary metabolite: 3oxoadp_e.
Adding exchange reaction EX_4ahmmp_e with default bounds for boundary metabolite: 4ahmmp_e.
Adding exchange reaction EX_4hba_e with default bounds for boundar

## Output the Result

In [10]:
imbalanced_reactions

Unnamed: 0,Reaction,Occurences,Model IDs
0,CMCBTFL,7,"[AA5.xml, AA4.xml, AA6.xml, AA1.xml, AA3.xml, ..."
1,SALCHS4FEabcpp,7,"[AA5.xml, AA4.xml, AA6.xml, AA1.xml, AA3.xml, ..."
2,CMCBTFabcpp,3,"[AA4.xml, AA7.xml, AA2.xml]"
3,DHBSZ3FEabcpp,4,"[AA4.xml, AA6.xml, AA3.xml, AA2.xml]"


Save the result

In [11]:
# Save the results
imbalanced_reactions.to_csv(save_path+"/Imbalanced_reactions.csv", sep="\t")