# Balancing metabolic models
This is a short version of Lisas original script "02_balance_metabolic_models.ipynb" (see Github) that is supposed to be more generalised so that you can also use it for other models.

## Import

In [31]:
import pandas as pd
import ast
import os
import csv
from collections import Counter
from cobra.io import read_sbml_model, write_sbml_model
from cobra.manipulation.validate import check_mass_balance

In [32]:
# you need 2 csv files with metabolite and reaction info from BIGG
# alternatively you can download them (but it takes a while), see Lisas og script

# Read CSV with all BIGG metabolites
df_bigg_met = pd.read_csv("../../bigg_metabolites_complete.csv", quotechar='"')
# Convert stringified lists back to real lists
df_bigg_met["formulas"] = df_bigg_met["formulas"].apply(ast.literal_eval)
df_bigg_met["charges"] = df_bigg_met["charges"].apply(ast.literal_eval)

# Read CSV with all BIGG reactions
df_bigg_rea = pd.read_csv("../../bigg_reactions_complete.csv", quotechar='"', usecols=['bigg_id', 'name', 'metabolites', 'equation'])
df_bigg_rea["metabolites"] = df_bigg_rea["metabolites"].apply(ast.literal_eval)
df_bigg_rea["equation"] = df_bigg_rea["equation"].apply(ast.literal_eval)

In [33]:
# path to xml files
models_path = "../../Models/02_mass_balance/"

In [34]:
# import metabolic models; they will be stored in a dictionary with the model name as key and the actual model as value
models = {}
for model_name in (f for f in os.listdir(models_path) if f.endswith(".xml")):
    model = read_sbml_model(f"{models_path}/{model_name}")
    model.solver = "cplex"
    models[model_name[:-4]] = model  # cuts off the ".xml" part of the model name

models = {key: models[key] for key in sorted(models.keys())}

## Functions

In [35]:
# checks the mass and charge balance for every reaction in a model
def check_balance(model, print_results=True):
    unbalanced_reactions = check_mass_balance(model)
    if print_results:
        print("There are {0} unbalanced reactions in {1}".format(len(unbalanced_reactions), model) )
    return unbalanced_reactions

In [36]:
# returns a pandas dataframe with metabolite info for a specific cobra model that includes: bigg_id, model_id, formula and charge
# NOTE: bigg_id could be wrong (i.e. not the real id on the website) because it only takes the model_id and removes the _compartment
def extract_met_info_model(model):
    met_infos = []

    for met in model.metabolites:
        met_infos.append({
            "bigg_id": met.id.rsplit("_", 1)[0],  # strip compartment so that it matches the actual BIGG ID that also doesn't have compartments (e.g., glc__D_c to glc__D)
            "model_id": met.id,
            "model_formula": met.formula,
            "model_charge": met.charge
        })

    met_infos = pd.DataFrame(met_infos)
    return met_infos

In [37]:
# returns pandas dataframe with metabolite info from the model and from big and compares info about formula and charge state
def compare_bigg_modelMets(model_mets, list_unbalanced_mets):
    # Merge on BiGG ID (you can tune how you strip compartments if needed)
    merged = model_mets.merge(df_bigg_met, on="bigg_id", how="left")

    merged["charge_match"] = merged.apply(
        lambda row: row["model_charge"] in row["charges"] if isinstance(row["charges"], list) else False,
        axis=1
    )

    merged["formula_match"] = merged.apply(
        lambda row: row["model_formula"] in row["formulas"] if isinstance(row["formulas"], (list, set)) else False, axis=1
    )

    # adds another column to check if the metabolites are part of an unbalanced reaction (false = not part of unbalanced reactions, true = part of unbalanced reaction(s))
    merged['unbalanced'] = merged['model_id'].isin(list_unbalanced_mets)
    # merged['unbalanced'] = merged['model_id'].isin(list_unbalanced_mets).astype(int) (instead of true/false with 1/0)

    return merged

In [38]:
# Overwrite model metabolite with BIGG info if the BIGG info is unambiguously, i.e. only one charge state/formula
def overwrite_with_BIGG_metabolites(model, merged_df):
    n_unbalanced = len(check_balance(model, print_results=False))
    for i in range(0,len(merged_df)):
        # check if there is only one charge state
        if len(merged_df["charges"][i]) == 1:
            model.metabolites.get_by_id(merged_df["model_id"][i]).charge = int(merged_df["charges"][i][0])

        else:  # we only need this to get the right datatype (int) for the charge state to save the model later on because apparently it got fucked up
            model.metabolites.get_by_id(merged_df["model_id"][i]).charge = int(model.metabolites.get_by_id(merged_df["model_id"][i]).charge)

        # check if there is only one formula (and at least one charge state because otherwise that model formula could be right)
        if len(merged_df["formulas"][i]) == 1 and len(merged_df["charges"][i]) != 0:
            if not "X" in merged_df["formulas"][i][0] and not "R" in merged_df["formulas"][i][0]:
                model.metabolites.get_by_id(merged_df["model_id"][i]).formula = merged_df["formulas"][i][0]

    n_unbalanced_update = len(check_balance(model, print_results=False))
    print(f'{model.id}: There were {n_unbalanced} unbalanced reactions before and now there are {n_unbalanced_update} after overwriting metabolite info with BIGG data.')

In [39]:
# Overwrite model reactions with BIGG info if the reaction is unbalanced
def overwrite_with_BIGG_reactions(model):
    unbalanced_rxns = check_balance(model, print_results=False)
    unbalanced_rxns = [r.id for r in unbalanced_rxns]

    for rxn in unbalanced_rxns:

        new_react = df_bigg_rea[df_bigg_rea['bigg_id'] == rxn]["equation"].iloc[0]

        new_mets_dict = {model.metabolites.get_by_id(met_id): coeff for met_id, coeff in new_react.items()}

        reaction = model.reactions.get_by_id(rxn)
        reaction.subtract_metabolites(reaction.metabolites)
        reaction.add_metabolites(new_mets_dict)

    unbalanced_rxns_after = check_balance(model, print_results=False)
    unbalanced_rxns_after = [r.id for r in unbalanced_rxns_after]

    print(f'{model.id}: There were {len(unbalanced_rxns)} unbalanced reactions before and now there are {len(unbalanced_rxns_after)} after overwriting reaction info with BIGG data.')

In [40]:
def overwrite_charge(model, rxn_id, new_charge):
    if rxn_id in model.metabolites:
        model.metabolites.get_by_id(rxn_id).charge = new_charge

In [41]:
def overwrite_formula(model, rxn_id, new_formula):
    if rxn_id in model.metabolites:
        model.metabolites.get_by_id(rxn_id).formula = new_formula

In [42]:
def overwrite_reaction(model, rxn_id, new_rxn_dict):
    if rxn_id in model.reactions:
        rxn = model.reactions.get_by_id(rxn_id)
        rxn.subtract_metabolites(rxn.metabolites)
        rxn.add_metabolites(new_rxn_dict)

In [43]:
def delete_metabolite(model, met_id):
    if met_id in model.metabolites:

        if len(model.metabolites.get_by_id(met_id).reactions) == 0:
            met = model.metabolites.get_by_id(met_id)
            model.metabolites.remove(met)

        else:
            print(f'metabolite {met_id} cannot be deleted from {model.id} because of reaction(s): {model.metabolites.get_by_id(met_id).reactions}')

In [44]:
def delete_reaction(model, rxn_id):
    if rxn_id in model.reactions:
        rxn = model.reactions.get_by_id(rxn_id)
        model.remove_reactions([rxn])

## Get all charge unbalanced reactions for all models

In [45]:
# dictionary to store unbalanced reactions
unbalanced_reactions_dict = {}

# models is the dict where all models are stored that were "imported" witch read_sbml_file()
for name, model in models.items():
    unbalanced_reactions = check_balance(model, print_results=False)
    unbalanced_reactions_dict[name] = unbalanced_reactions

In [46]:
# We know how many unbalanced reactions each model has on their own but what is the overlap?
unique_reactions = set()

# Loop through all models and collect reaction IDs
for model_name, unbalanced_reactions in unbalanced_reactions_dict.items():
    # Add the reaction ID to the set (sets are by default like 'Mengen', i.e. they only have unique elements)
    unique_reactions.update(reaction.id for reaction in unbalanced_reactions.keys())

# this is a list of all the reaction IDs that are charge unbalanced throughout all models
unique_reaction_ids = list(unique_reactions)

print("There are {0} charge unbalanced reactions throughout all models.".format(len(unique_reaction_ids)))

There are 808 charge unbalanced reactions throughout all models.


In [47]:
# we now have the unbalanced reactions but which metabolites are part of these?
# go through all unbalanced (unique) reactions and get all participating metabolites
metabolite_counter_compartment = Counter()
metabolite_counter_name = Counter()
seen_reactions = set()  # Track reactions that were already counted

for model in models.values():
    for rxn_id in unique_reaction_ids:
        if rxn_id in model.reactions and rxn_id not in seen_reactions:
            reaction = model.reactions.get_by_id(rxn_id)
            for metabolite in reaction.metabolites:
                metabolite_counter_compartment[metabolite.id] += 1  # this is compartment specific, e.g. h2o_c and h2o_p are different metabolites
                metabolite_counter_name[metabolite.name] += 1  # h2o is only counted once not dependent on compartment
            seen_reactions.add(rxn_id)  # Mark this reaction as counted

# compartment specific, e.g. h20_c and h2o_p are counted separately
print(len(metabolite_counter_compartment))
# h2o only exists once
print(len(metabolite_counter_name))

# all metabolites from unbalanced reactions
unbalanced_mets = list(metabolite_counter_compartment.keys())

990
880


In [48]:
# get metabolite info for all models (i.e. formula and charge state)
# model name is key and the value is a daftaframe that has metabolite and bigg ID and charge/formula
model_mets = {key + "_mets": extract_met_info_model(model) for key, model in models.items()}

In [49]:
# merge the metabolite info from the models with the bigg info; creates 2 columns to show if charge/formula info match between model and bigg
model_merged = {key.replace("_mets", "_merged"): compare_bigg_modelMets(mets, unbalanced_mets)
                for key, mets in model_mets.items()}

## Overwrite model with BIGG information
We want to try out if the information on BIGG is valuable to our model to help with the big amount of charge unbalanced reactions.
That means for reactions or rather metabolites were the bigg information is different to our model info, we can try to overwrite it with the bigg info.

At the moment I am only overwriting model info with BIGG info if the BIGG info only gives one charge/formula. If they're multiple possible charge states/formulas I should do manual curation. I also only overwrite it when the metabolite is part of an unbalanced reaction.

In [50]:
for model_name, model in models.items():
    merged_key = model_name + "_merged"
    if merged_key in model_merged:
        overwrite_with_BIGG_metabolites(model, model_merged[merged_key])

AA1: There were 445 unbalanced reactions before and now there are 191 after overwriting metabolite info with BIGG data.
AA2: There were 473 unbalanced reactions before and now there are 212 after overwriting metabolite info with BIGG data.
AA3: There were 372 unbalanced reactions before and now there are 157 after overwriting metabolite info with BIGG data.
AA4: There were 410 unbalanced reactions before and now there are 167 after overwriting metabolite info with BIGG data.
AA5: There were 360 unbalanced reactions before and now there are 170 after overwriting metabolite info with BIGG data.
AA6: There were 451 unbalanced reactions before and now there are 205 after overwriting metabolite info with BIGG data.
AA7: There were 452 unbalanced reactions before and now there are 184 after overwriting metabolite info with BIGG data.


In [51]:
for model_name, model in models.items():
    merged_key = model_name + "_merged"
    if merged_key in model_merged:
        overwrite_with_BIGG_reactions(model)

AA1: There were 191 unbalanced reactions before and now there are 185 after overwriting reaction info with BIGG data.
AA2: There were 212 unbalanced reactions before and now there are 207 after overwriting reaction info with BIGG data.
AA3: There were 157 unbalanced reactions before and now there are 152 after overwriting reaction info with BIGG data.
AA4: There were 167 unbalanced reactions before and now there are 163 after overwriting reaction info with BIGG data.
AA5: There were 170 unbalanced reactions before and now there are 164 after overwriting reaction info with BIGG data.
AA6: There were 205 unbalanced reactions before and now there are 200 after overwriting reaction info with BIGG data.
AA7: There were 184 unbalanced reactions before and now there are 179 after overwriting reaction info with BIGG data.


## Manual changes

In [52]:
def overwrite_manual(model):
# change_list is a list of numbers from 1 to 7 and is supposed to indicate which fixes should apply to the model.
# 1 to 7 refers to the fixes first introduced for a model, i.e. all fixes for 2 were first introduced specifically for model 2
# However, I started curating with model 1 and many reactions/metabolites overlap, i.e. applying fixes from model 1 to model 2 reduces unbalanced reactions drastically.

    # first all the more basic changes regarding formula and charge of a metabolite
    overwrite_charge(model, "2mpdhl_c", -1)
    overwrite_charge(model, "4cml_c", -2)
    overwrite_charge(model, "5aizc_c", -3)
    overwrite_charge(model, "23dhbzs3_c", -1) # og = 0, -1 alterative in bigg
    overwrite_charge(model, "3sala_c", -2) # og = 0, -2 alterative in bigg
    overwrite_formula(model, "3hsa_c", "C19H24O3")
    overwrite_formula(model, "34dhsa_c", "C19H24O4")
    overwrite_formula(model, "49dsha_c", "C19H23O6")
    overwrite_charge(model, "49dsha_c", -1)
    overwrite_charge(model, "4hoxpac_c", -1) # og = 0, -1 alterative in bigg
    overwrite_charge(model, "4hoxpac_e", -1)
    overwrite_charge(model, "4hoxpac_p", -1)
    overwrite_charge(model, "4hoxpac_L_c", -1)
    overwrite_formula(model, "2ameph_p", "C2H7NO3P") # og = C2H8NO3P, charge was changed from 0 to -1 automatically with bigg and formula now also needed to be changed
    overwrite_formula(model, "5ohhipcoa_c", "C34H50N7O19P3S")
    overwrite_charge(model, "5ohhipcoa_c", -4)
    overwrite_charge(model, "5ohhipcoa_e", -4)
    overwrite_formula(model, "5ohhipcoa_c", "C34H50N7O19P3S")
    overwrite_charge(model, "5ohhipcoa_c", -4)
    overwrite_formula(model, "2ameph_e", "C2H7NO3P")
    overwrite_formula(model, "2ameph_c", "C2H7NO3P")
    overwrite_charge(model, "6pgg_c", -2) # og = 0, -2 alterative in bigg and in accordance with ecoli
    overwrite_formula(model, "9ohadd_c", "C19H24O3")

    overwrite_formula(model, "andrs14dn317dn_c", "C19H24O2")
    overwrite_charge(model, "ACP_c", 0)
    overwrite_charge(model, "air_c", -2)
    overwrite_charge(model, "aad_c", -2)
    overwrite_charge(model, "actACP_c", -1)
    overwrite_charge(model, "amacald_c", 1)
    overwrite_formula(model, "amacald_c", "C2H6NO")
    overwrite_charge(model, "aso3_c", -1)
    overwrite_charge(model, "aso3_e", -1)
    overwrite_charge(model, "aso3_p", -1)
    overwrite_formula(model, "aso3_c", "H2O3As")
    overwrite_formula(model, "aso3_e", "H2O3As")
    overwrite_formula(model, "aso3_p", "H2O3As")
    overwrite_formula(model, "aso4_c", "HO4As")
    overwrite_formula(model, "aso4_e", "HO4As")
    overwrite_formula(model, "aso4_p", "HO4As")
    overwrite_charge(model, "acysbmn_e", -1) # og = 0, -1 according to metacyc with same formula https://metacyc.org/compound?orgid=META&id=CPD1G-185
    overwrite_charge(model, "acysbmn_c", -1) # og = 0, -1
    overwrite_charge(model, "ah6p__D_c", -2) # og = 0, -2 must be because f6p is also -2 and they can directly converted into each other
    overwrite_formula(model, "apoACP_c", "C373H582N94O136S2") # og = C373H583N94O136S2; charge was changed from 1 to 0 and now the amount of H also reflects that

    overwrite_charge(model, "but2eACP_c", -1)
    overwrite_charge(model, "bmn_c", 2) # og = 0, to balance BMNMSHS (bmn is just imported for this reaction)
    overwrite_charge(model, "bmn_e", 2)

    overwrite_charge(model, "cdigmp_c", -2)
    overwrite_formula(model, "cholc3coa_c", "C43H66N7O18P3S")
    overwrite_formula(model, "cholenec3coa_c", "C43H64N7O18P3S")
    overwrite_formula(model, "cholc5coa_c", "C45H70N7O18P3S")
    overwrite_formula(model, "cholenec5coa_c", "C45H68N7O18P3S")
    overwrite_formula(model, "cchol_c", "C27H42O3")
    overwrite_formula(model, "cholc8coa_c", "C48H76N7O18P3S")
    overwrite_formula(model, "cholenec8coa_c", "C48H74N7O18P3S")

    overwrite_formula(model, "decoa_c", "C31H48N7O17P3S")
    overwrite_charge(model, "dgal6p_c", -2)
    overwrite_charge(model, "dtbt_c", -1)

    overwrite_charge(model, "fad_c", -2)
    overwrite_charge(model, "fpram_c", -1)
    overwrite_formula(model, "fpram_c", "C8H15N3O8P")
    overwrite_charge(model, "ficytc_c", 1)
    overwrite_charge(model, "focytc_c", 1)
    overwrite_charge(model, "fmn_c", -2)
    overwrite_formula(model, "fmn_c", "C17H19N4O9P")
    overwrite_charge(model, "fmcbtt_c", 2) # og = 0 but it has fe2 in it
    overwrite_formula(model, "feoxam_c", "C25H46FeN6O8") # formula change according to bigg and ecoli
    overwrite_formula(model, "feoxam_e", "C25H46FeN6O8")
    overwrite_formula(model, "feoxam_p", "C25H46FeN6O8")
    overwrite_formula(model, "fe3dhbzs3_c", "C30FeH29N3O16") # og = C30FeH28N3O16, with H29 is in bigg and ecoli
    overwrite_formula(model, "fe3dhbzs3_e", "C30FeH29N3O16")
    overwrite_formula(model, "fe3dhbzs3_p", "C30FeH29N3O16")

    overwrite_charge(model, "g3p_c", -2)
    overwrite_charge(model, "gly_pro__L_c", 1)
    overwrite_formula(model, "gly_pro__L_c", "C7H13N2O3")
    overwrite_charge(model, "gly_pro__L_e", 1)
    overwrite_formula(model, "gly_pro__L_e", "C7H13N2O3")
    overwrite_formula(model, "gly_tyr_c", "C11H14N2O4")
    overwrite_formula(model, "gly_phe_c", "C11H14N2O3")
    overwrite_formula(model, "gly_leu_c", "C8H16N2O3")
    overwrite_formula(model, "gly_cys_c", "C5H10N2O3S")
    overwrite_charge(model, "glutrna_c", -3)
    overwrite_charge(model, "gcvHL_ADPr_c", -1)
    overwrite_formula(model, "gcvHL_ADPr_c", "C23H36N6O21P4S2")
    overwrite_charge(model, "gcvHL_nhLA_c", 0)
    overwrite_formula(model, "gcvHL_nhLA_c", "C8H16NO8P2S2")
    overwrite_charge(model, "gdptp_c", -7)
    overwrite_charge(model, "g6p_A_c", -2)

    overwrite_charge(model, "hethmpp_c", -2)
    overwrite_charge(model, "hemeO_c", -2)
    overwrite_formula(model, "hipecoa_c", "C34H48N7O19P3S")
    overwrite_charge(model, "hipecoa_c", -4)
    overwrite_formula(model, "hipohcoa_c", "C34H50N7O20P3S")
    overwrite_charge(model, "hipohcoa_c", -4)
    overwrite_formula(model, "hipocoa_c", "C34H48N7O20P3S")
    overwrite_charge(model, "hipocoa_c", -4)
    overwrite_formula(model, "hia_c", "C11H16O4")
    overwrite_formula(model, "hia_e", "C11H16O4")
    overwrite_formula(model, "hcholc3coa_c", "C43H66N7O19P3S")
    overwrite_formula(model, "hip_c", "C13H17O4")
    overwrite_charge(model, "hip_c", -1)
    overwrite_formula(model, "hipcoa_c", "C34H48N7O19P3S")
    overwrite_charge(model, "hipcoa_c", -4)
    overwrite_formula(model, "hcholc5coa_c", "C45H70N7O19P3S")
    overwrite_formula(model, "hcholc8coa_c", "C48H76N7O19P3S")
    overwrite_formula(model, "hchol_c", "C27H44O2")
    overwrite_charge(model, "hmbpp_c", -4) # pubchem C5H12O8P2 with charge 0; model=C5H8O8P2, so charge must be -4

    overwrite_charge(model, "istfrnA_e", -2)
    overwrite_formula(model, "istfrnA_e", "C17FeH19N2O14")
    overwrite_charge(model, "istfrnB_e", +1)
    overwrite_formula(model, "istfrnB_e", "C16FeH22N2O11")

    overwrite_charge(model, "lysglugly_c", 0)
    overwrite_charge(model, "lysglugly_e", 0)

    overwrite_charge(model, "met_L_ala__L_c", -1)
    overwrite_charge(model, "met_L_ala__L_e", -1)
    overwrite_formula(model, "met_L_ala__L_c", "C8H15N2O3S")
    overwrite_formula(model, "met_L_ala__L_e", "C8H15N2O3S")
    overwrite_charge(model, "mhpglu_c", -4)
    overwrite_formula(model, "mcbtt_c", "C47H77N5O10") # was wrongly overwritten by a false bigg formula = [C43H71N5O10], metacyc also has the original one that was in the model
    overwrite_charge(model, "mcbtt_c", 0)
    overwrite_charge(model, "mi3p__D_c", -2) # og = 0, -2 according to bigg

    overwrite_formula(model, "Nforglu_c", "C6H7NO5")

    overwrite_charge(model, "ocdcaACP_c", 0) # og = -1, 0 alterative in bigg and is in accordance with charge = 0 of ACP
    overwrite_charge(model, "ocACP_c", 0) # og = -1, 0 alterative in bigg and is in accordance with charge = 0 of ACP
    overwrite_formula(model, "ochol_c", "C27H42O2")
    overwrite_formula(model, "ocholc8coa_c", "C48H74N7O19P3S")
    overwrite_formula(model, "ocholc5coa_c", "C45H68N7O19P3S")

    overwrite_charge(model, "ppad_c", -2)
    overwrite_charge(model, "ptd1ino160_c", -1)
    overwrite_charge(model, "pqqh2_c", -3)
    overwrite_charge(model, "pqqh2_p", -3)
    overwrite_charge(model, "ppgpp_c", -6)
    overwrite_charge(model, "prohisglu_c", -1) # og = -2; tripeptid pro-his-glu, only glu has -1 charge and other two are neutral
    overwrite_charge(model, "prohisglu_e", -1)

    overwrite_formula(model, "ribflv_c", "C17H20N4O6")
    overwrite_formula(model, "ribflv_e", "C17H20N4O6")

    overwrite_charge(model, "scys__L_c", -1)
    overwrite_charge(model, "stfrnA_e", -5)
    overwrite_formula(model, "stfrnA_e", "C17H19N2O14")
    overwrite_charge(model, "stfrnA_c", -5)
    overwrite_formula(model, "stfrnA_c", "C17H19N2O14")
    overwrite_charge(model, "stfrnB_e", -2)
    overwrite_formula(model, "stfrnB_e", "C16H22N2O11")
    overwrite_charge(model, "stfrnB_c", -2)
    overwrite_formula(model, "stfrnB_c", "C16H22N2O11")
    overwrite_charge(model, "salc_e", -1) # og = 0, -1 alterative in bigg
    overwrite_charge(model, "salc_c", -1)
    overwrite_charge(model, "scl_c", -7) # og = 0, -7 according to bigg
    overwrite_formula(model, "salchsx_c", "C16H20NO11") # og C16H21NO11; https://pubchem.ncbi.nlm.nih.gov/compound/135397946
    overwrite_formula(model, "salchsx_e", "C16H20NO11")
    overwrite_formula(model, "salchsx_p", "C16H20NO11")
    overwrite_charge(model, "salchs2fe_c", 3) # to match salchs4fe
    overwrite_charge(model, "salchs2fe_p", 3)
    overwrite_charge(model, "salchs2fe_e", 3)

    overwrite_charge(model, "tag6p__D_c", -2)
    overwrite_charge(model, "tagdp__D_c", -4)

    overwrite_charge(model, "udpacgal_c", -2) # og = 0, -2 alterative in bigg

    overwrite_charge(model, "vacc_c", -1)
    overwrite_charge(model, "vacc_p", -1)

    overwrite_charge(model, "xylan4_c", -1) # og = 0, no charge given in Bigg, but -1 fits equations
    overwrite_charge(model, "xylan4_e", -1)

    # now in the second part, there are all changes to actual reactions
    overwrite_reaction(model, "ASR",
                       {"aso4_c": -1.0,
                        "gthrd_c": -2.0,
                        "h_c": -1.0,
                        "aso3_c": 1.0,
                        "gthox_c": 1.0,
                        "h2o_c": 1.0})

    overwrite_reaction(model, "ACPpds",
                       {"ACP_c": -1.0,
                        "h2o_c": -1.0,
                        "apoACP_c": 1.0,
                        "h_c": 2.0,
                        "pan4p_c": 1.0})

    overwrite_reaction(model, "ALDD31_1",
                       {"gly_c": 1.0,
                        "h_c": 2.0,
                        "h2o_c": -1.0,
                        "nad_c": -1.0,
                        "nadh_c": 1.0,
                        "amacald_c": -1})

    # https://biocyc.org/reaction?orgid=META&id=RXN-10737
    overwrite_reaction(model, "ASR2",
                       {"aso4_c": -1.0,
                        "trdrd_c": -1.0,
                        "h_c": -1.0,
                        "aso3_c": 1.0,
                        "h2o_c": 1.0,
                        "trdox_c": 1.0})

    # was overwritten by BIGG, but before that the H was in the reaction and equals also this reaction BKDC that is e.g. in AA1
    overwrite_reaction(model, "AT_MBD2",
                       {"dhlam_c": -1.0,
                        "ibcoa_c": -1.0,
                        "2mpdhl_c": 1.0,
                        "coa_c": 1.0,
                        "h_c": 1.0})

    overwrite_reaction(model, "BEF",
                       {"betald_c": -1.0,
                        "fad_c": -1.0,
                        "h2o_c": -1.0,
                        "fadh2_c": 1.0,
                        "glyb_c": 1.0,
                        "h_c": 1.0})

    overwrite_reaction(model, "DHBZS2H",
                       {"23dhbzs2_c": -1.0,
                        "h2o_c": -1.0,
                        "h_c": 2.0, # new because of logic
                        "23dhbzs_c": 2.0})

    # https://biocyc.org/reaction?orgid=META&id=RXN-14477
    overwrite_reaction(model, "ENTERH",
                       {"enter_c": -1.0,
                        "h2o_c": -1.0,
                        "23dhbzs3_c": 1.0,
                        "h_c": 1.0}) # h was added

    overwrite_reaction(model, "FORGLUIH2",
                       {"forglu_c": -1.0,
                        "h2o_c": -1.0,
                        "Nforglu_c": 1.0,
                        "nh4_c": 1.0})

    overwrite_reaction(model, "FADD3",
                       {"atp_c": -1.0,
                        "coa_c": -1.0,
                        "hip_c": -1.0,
                        "hipcoa_c": 1.0,
                        "ppi_c": 1.0,
                        "amp_c": 1.0})

    overwrite_reaction(model, "FEDHBZS3R2",
                       {"fe3dhbzs3_c": -2.0,
                        "fmnh2_c": -1.0,
                        "23dhbzs3_c": 2.0,
                        "fe2_c": 2.0,
                        "h_c": 4.0,
                        "fmn_c": 1.0})

    overwrite_reaction(model, "FE3DHBZS3R",
                       {"fe3dhbzs3_c": -2.0,
                        "nadph_c": -1.0,
                        "23dhbzs3_c": 2.0,
                        "fe2_c": 2.0,
                        "h_c": 3.0,
                        "nadp_c": 1.0})

    overwrite_reaction(model, "FEDHBZS3R1",
                       {"fe3dhbzs3_c": -2.0,
                        "fadh2_c": -1.0,
                        "23dhbzs3_c": 2.0,
                        "fe2_c": 2.0,
                        "h_c": 4.0,
                        "fad_c": 1.0})

    overwrite_reaction(model, "FEDHBZS3R3",
                       {"fe3dhbzs3_c": -2.0,
                        "rbflvrd_c": -1.0,
                        "23dhbzs3_c": 2.0,
                        "fe2_c": 2.0,
                        "h_c": 4.0,
                        "ribflv_c": 1.0})

    # https://biocyc.org/reaction?orgid=META&id=1.18.1.2-RXN change of stoichiometry
    overwrite_reaction(model, "FPRA",
                       {"fdxrd_c": -2.0,
                        "h_c": -1.0,
                        "nadp_c": -1.0,
                        "fdxox_c": 2.0,
                        "nadph_c": 1.0})

    overwrite_reaction(model, "GLYTYRabc",
                       {"atp_c": -1.0,
                        "gly_tyr_e": -1.0,
                        "h2o_c": -1.0,
                        "adp_c": 1.0,
                        "gly_tyr_c": 1.0,
                        "pi_c": 1.0,
                        "h_c": 1.0})

    overwrite_reaction(model, "GLYLEUtr",
                       {"atp_c": -1.0,
                        "gly_leu_e": -1.0,
                        "h2o_c": -1.0,
                        "adp_c": 1.0,
                        "gly_leu_c": 1.0,
                        "pi_c": 1.0,
                        "h_c": 1.0})

    overwrite_reaction(model, "GLYPHEtr",
                       {"atp_c": -1.0,
                        "gly_phe_e": -1.0,
                        "h2o_c": -1.0,
                        "adp_c": 1.0,
                        "gly_phe_c": 1.0,
                        "pi_c": 1.0,
                        "h_c": 1.0})

    overwrite_reaction(model, "GLYCYSabc",
                       {"atp_c": -1.0,
                        "gly_cys_e": -1.0,
                        "h2o_c": -1.0,
                        "adp_c": 1.0,
                        "gly_cys_c": 1.0,
                        "pi_c": 1.0,
                        "h_c": 1.0})

    overwrite_reaction(model, "GLUTRS_3",
                       {"atp_c": -1.0,
                        "glu__L_c": -1.0,
                        "trnaglu_c": -1.0,
                        "amp_c": 1.0,
                        "glutrna_c": 1.0,
                        "ppi_c": 1.0})

    # https://metacyc.org/reaction?orgid=META&id=GLUTAMATE-SYNTHASE-FERREDOXIN-RXN#
    overwrite_reaction(model, "GLMS_syn",
                       {"fdxrd_c": -2.0,
                        "akg_c": -1.0,
                        "gln__L_c": -1.0,
                        "h_c": -2.0,
                        "glu__L_c": 2.0,
                        "fdxox_c": 2.0}) # replaces fdxo_2_2_c because we need +2 charge

    # https://modelseed.org/biochem/reactions/rxn28276 (immer noch charge imbalance, aber mass stimmt)
    overwrite_reaction(model, "GCDH",
                       {"glutcoa_c": -1.0,
                        "b2coa_c": 1.0,
                        "h_c": 1.0,
                        "co2_c": 1.0})

    overwrite_reaction(model, "HSAC",
                       {"34dhsa_c": -1.0,
                        "o2_c": -1.0,
                        "49dsha_c": 1.0,
                        "h_c": 1.0})

    overwrite_reaction(model, "MS_1",
                       {"hcys__L_c": -1.0,
                        "mhpglu_c": -1.0,
                        "hpglu_c": 1.0,
                        "met__L_c": 1.0})

    overwrite_reaction(model, "MECDPDH3_syn",
                       {"2mecdp_c": -1.0,
                        "fdxrd_c": -2.0,
                        "h_c": -1.0,
                        "fdxox_c": 2.0, # replaces fdxo_2_2_c
                        "h2mb4p_c": 1.0,
                        "h2o_c": 1.0})

    overwrite_reaction(model, "PRAIS",
                       {"atp_c": -1.0,
                        "fpram_c": -1.0,
                        "adp_c": 1.0,
                        "air_c": 1.0,
                        "pi_c": 1.0,
                        "h_c": 2.0})

    overwrite_reaction(model, "PACPT_1",
                       {"amp_c": 1.0,
                        "coa_c": -1.0,
                        "ppcoa_c": 1.0,
                        "ppad_c": -1.0})

    # https://modelseed.org/biochem/reactions/rxn13395 out scl is only -7, in seed it is -8, so we need only one H
    overwrite_reaction(model, "PC2DHG",
                       {"dscl_c": -1.0,
                        "nadp_c": -1.0,
                        "nadph_c": 1.0,
                        "scl_c": 1.0,
                        "h_c": 1.0})

    overwrite_reaction(model, "QSDH",
                       {"pqq_c": -1.0,
                        "skm_c": -1.0,
                        "3dhsk_c": 1.0,
                        "pqqh2_c": 1.0})

    overwrite_reaction(model, "SMIA1",
                       {"fe3_e": -1.0,
                        "stfrnA_e": -1.0,
                        "istfrnA_e": 1.0})

    overwrite_reaction(model, "SMIB1",
                       {"fe3_e": -1.0,
                        "stfrnB_e": -1.0,
                        "istfrnB_e": 1.0})

    overwrite_reaction(model, "SMIA2abc",
                       {"atp_c": -1.0,
                        "h2o_c": -1.0,
                        "istfrnA_e": -1.0,
                        "adp_c": 1.0,
                        "fe3_c": 1.0,
                        "h_c": 1.0,
                        "pi_c": 1.0,
                        "stfrnA_c": 1.0})

    overwrite_reaction(model, "SMIA1abc",
                       {"atp_c": -1.0,
                        "h2o_c": -1.0,
                        "istfrnB_e": -1.0,
                        "adp_c": 1.0,
                        "fe3_c": 1.0,
                        "h_c": 1.0,
                        "pi_c": 1.0,
                        "stfrnB_c": 1.0})

    overwrite_reaction(model, "SALCHS1H",
                       {"h2o_c": -1.0,
                        "salchs1_c": -1.0,
                        "23dhbzs_c": 1.0,
                        "salchsx_c": 1.0,
                        "h_c": 2.0})

    overwrite_reaction(model, "SALCHS2H",
                       {"h2o_c": -1.0,
                        "salchs2_c": -1.0,
                        "salchs1_c": 1.0,
                        "salchsx_c": 1.0,
                        "h_c": 1.0})

    # https://modelseed.org/biochem/reactions/rxn10816
    overwrite_reaction(model, "THZSN_1",
                       {"cys__L_c": -1.0,
                        "dxyl_c": -1.0,
                        "fdxox_c": -1.0, # instead of fdx_2_2_c
                        "tyr__L_c": -1.0,
                        "4hba_c": 1.0,
                        "4mhetz_c": 1.0,
                        "co2_c": 1.0,
                        "fdxrd_c": 1.0,
                        "h2o_c": 1.0,
                        "h_c": 2.0, # 2 instead of 1;
                        "nh4_c": 1.0,
                        "pyr_c": 1.0})

    overwrite_reaction(model, "T6PK",
                       {"atp_c": -1.0,
                        "tag6p__D_c": -1.0,
                        "adp_c": 1.0,
                        "tagdp__D_c": 1.0,
                        "h_c": 1.0})

    #deletions
    delete_reaction(model, "CO2FO")
    delete_reaction(model, "GTPDPK_1")
    delete_reaction(model, "MECDPDH4E")
    delete_reaction(model, "PRFGS_1") # there is PRFGS; GPR of _1 is in PRFGS
    delete_reaction(model, "PENAM")
    delete_reaction(model, "PRFGCL")

    delete_metabolite(model, "fdxo_2_2_c")

In [53]:
for model in models.values():
    overwrite_manual(model)

metabolite fdxo_2_2_c cannot be deleted from AA6 because of reaction(s): frozenset({<Reaction FNOR at 0x74be25559330>})
metabolite fdxo_2_2_c cannot be deleted from AA7 because of reaction(s): frozenset({<Reaction POR_syn at 0x74be27542350>})


In [54]:
for model in models.values():
    check_balance(model)

There are 3 unbalanced reactions in AA1
There are 5 unbalanced reactions in AA2
There are 9 unbalanced reactions in AA3
There are 21 unbalanced reactions in AA4
There are 22 unbalanced reactions in AA5
There are 11 unbalanced reactions in AA6
There are 13 unbalanced reactions in AA7
