## Evaluation of the Solution Space under different conditions

In [None]:
# Ensure necessary packages are installed
!pip install cobra efmtool numpy pandas scipy

COBRApy definition of the DFKI toy network

In [1]:
import cobra
from cobra import Model, Reaction, Metabolite

# Modell erstellen
model = Model('Toy_Model')

# Metaboliten erstellen
A_EX = Metabolite('A_EX', compartment='e')
A = Metabolite('A', compartment='c')
B = Metabolite('B', compartment='c')
D = Metabolite('D', compartment='c')
E = Metabolite('E', compartment='c')
E_EX = Metabolite('E_EX', compartment='e')
F = Metabolite('F', compartment='c')
G = Metabolite('G', compartment='c')
H = Metabolite('H', compartment='c')
I = Metabolite('I', compartment='c')
J = Metabolite('J', compartment='c')
K = Metabolite('K', compartment='c')
L = Metabolite('L', compartment='c')
L_EX = Metabolite('L_EX', compartment='e')
M = Metabolite('M', compartment='c')
M_EX = Metabolite('M_EX', compartment='e')
N = Metabolite('N', compartment='c')
N_EX = Metabolite('N_EX', compartment='e')
O = Metabolite('O', compartment='c')
P = Metabolite('P', compartment='c')
P_EX = Metabolite('P_EX', compartment='e')

# Reaktionen erstellen
a_in = Reaction('A_in')
a_in.name = 'A_EX to A'
a_in.add_metabolites({A_EX: -1, A: 1})

reaction1 = Reaction('R1')
reaction1.name = 'A and G to B'
reaction1.add_metabolites({A: -1, G: -1, B: 1})

reaction2 = Reaction('R2')
reaction2.name = 'B to D'
reaction2.add_metabolites({B: -1, D: 1})

reaction3 = Reaction('R3')
reaction3.name = 'D to E and F'
reaction3.add_metabolites({D: -1, E: 1, F: 1})

reaction4 = Reaction('R4')
reaction4.name = 'B to I'
reaction4.add_metabolites({B: -1, I: 1})
reaction4.lower_bound= -1000.0

reaction5 = Reaction('R5')
reaction5.name = 'I to M and J'
reaction5.add_metabolites({I: -1, M: 1, J: 1})
reaction5.lower_bound= -1000.0

reaction6 = Reaction('R6')
reaction6.name = 'O to P and J'
reaction6.add_metabolites({O: -1, J: 1, P: 1})
reaction6.lower_bound= -1000.0

reaction7 = Reaction('R7')
reaction7.name = 'H to K and L'
reaction7.add_metabolites({H: -1, K: 1, L: 1})
reaction7.lower_bound= -1000.0

reaction8 = Reaction('R8')
reaction8.name = 'K and M to N'
reaction8.add_metabolites({K: -1, M: -1, N: 1})

reaction9 = Reaction('R9')
reaction9.name = 'A and J to I'
reaction9.add_metabolites({A: -1, J: -1, I: 1})
reaction9.lower_bound= -1000.0

reaction10 = Reaction('R10')
reaction10.name = 'F to G'
reaction10.add_metabolites({F: -1, G: 1})

reaction11 = Reaction('R11')
reaction11.name = 'E to H'
reaction11.add_metabolites({E: -1, H: 1})
reaction11.lower_bound= -1000.0

reaction12 = Reaction('R12')
reaction12.name = 'I to P'
reaction12.add_metabolites({I: -1, P: 1})
reaction12.lower_bound= -1000.0

reaction13 = Reaction('R13')
reaction13.name = 'O to M and L'
reaction13.add_metabolites({O: -1, M: 1, L: 1})
reaction13.lower_bound= -1000.0

reaction14 = Reaction('R14')
reaction14.name = 'P to K'
reaction14.add_metabolites({P: -1, K: 1})
reaction14.lower_bound= -1000.0


E_EX_out = Reaction('E_EX_out')
E_EX_out.name = 'E to E_EX'
E_EX_out.add_metabolites({E: -1, E_EX: 1})

L_EX_out = Reaction('L_EX_out')
L_EX_out.name = 'L to L_EX'
L_EX_out.add_metabolites({L: -1, L_EX: 1})

M_EX_out = Reaction('M_EX_out')
M_EX_out.name = 'M to M_EX'
M_EX_out.add_metabolites({M: -1, M_EX: 1})

N_EX_out = Reaction('N_EX_out')
N_EX_out.name = 'N to N_EX'
N_EX_out.add_metabolites({N: -1, N_EX: 1})

P_EX_out = Reaction('P_EX_out')
P_EX_out.name = 'P to P_EX'
P_EX_out.add_metabolites({P: -1, P_EX: 1})

# Reaktionen zum Modell hinzufügen
model.add_reactions([a_in, reaction1, reaction2, reaction3, reaction4, reaction5, 
                     reaction6, reaction7, reaction8, reaction9, reaction10,
                     reaction11, reaction12, reaction13, reaction14, E_EX_out, L_EX_out, M_EX_out, N_EX_out, P_EX_out])



# Modellinformationen ausgeben
print("Reaktionen im Modell:", len(model.reactions))
print("Metaboliten im Modell:", len(model.metabolites))
print("Gene im Modell:", len(model.genes))


Reaktionen im Modell: 20
Metaboliten im Modell: 21
Gene im Modell: 0


In [2]:
import numpy as np
import cobra


def get_stoichiometric_matrix(model):
    # Filtere die Reaktionen, um nur Reaktionen zu behalten, die nicht mit "EX_" beginnen
    internal_reactions = [rxn for rxn in model.reactions if not rxn.id.startswith("EX_")]

    # Extrahiere die Metaboliten aus dem Modell
    internal_metabolites = [meta for meta in model.metabolites if not meta.compartment == 'e']

    # Erstelle die leere Stöchiometrie-Matrix mit (Anzahl der Metaboliten, Anzahl der internen Reaktionen)
    stoichiometric_matrix = np.zeros((len(internal_metabolites), len(internal_reactions)))

    # Befülle die Stöchiometrie-Matrix so, dass jede Spalte eine Reaktion repräsentiert
    for j, rxn in enumerate(internal_reactions):  # Spaltenweise über Reaktionen iterieren
        for i, met in enumerate(internal_metabolites):  # Zeilenweise über Metaboliten iterieren
            stoichiometric_matrix[i, j] = rxn.metabolites.get(met, 0)  # Falls Metabolit nicht in der Reaktion ist, wird 0 zurückgegeben
    return stoichiometric_matrix


In [4]:
# Liste der experimentellen Datenpunkten mit eingefügten 0 für Produkte, die nicht erzeugt werden.
experimental_data_zeropadded_out = [
    {'A_in':1.0, 'N_EX_out': 0.5, 'E_EX_out': 0.0, 'P_EX_out': 0.0, 'L_EX_out': 0.0, 'M_EX_out': 0.0},
    {'A_in':1.0, 'N_EX_out': 0.0, 'E_EX_out': 1.0, 'P_EX_out': 0.0, 'L_EX_out': 0.0, 'M_EX_out': 0.0},
    {'A_in':1.0, 'N_EX_out': 0.0, 'E_EX_out': 0.0, 'P_EX_out': 1.0, 'L_EX_out': 1.0, 'M_EX_out': 0.0},
    {'A_in':1.0, 'N_EX_out': 0.5, 'E_EX_out': 0.0, 'P_EX_out': 0.0, 'L_EX_out': 2.0, 'M_EX_out': 0.0},
    {'A_in':1.0, 'N_EX_out': 0.0, 'E_EX_out': 0.0, 'P_EX_out': 0.0, 'L_EX_out': 3.0, 'M_EX_out': 1.0},
    {'A_in':1.0, 'N_EX_out': 0.0, 'E_EX_out': 0.0, 'P_EX_out': 0.0, 'L_EX_out': 0.0, 'M_EX_out': 1.0},
    {'A_in':1.0, 'N_EX_out': 0.0, 'E_EX_out': 0.0, 'P_EX_out': 0.0, 'L_EX_out': 1.5, 'M_EX_out': 1.0},
    {'A_in':1.0, 'N_EX_out': 0.5, 'E_EX_out': 0.0, 'P_EX_out': 0.0, 'L_EX_out': 1.25, 'M_EX_out': 0.0},
    {'A_in':1.0, 'N_EX_out': 0.0, 'E_EX_out': 0.0, 'P_EX_out': 1.0, 'L_EX_out': 0.0, 'M_EX_out': 0.0},
    {'A_in':1.0, 'N_EX_out': 0.5, 'E_EX_out': 0.0, 'P_EX_out': 0.0, 'L_EX_out': 0.5, 'M_EX_out': 0.0},
    {'A_in':1.0, 'N_EX_out': 0.25, 'E_EX_out': 0.0, 'P_EX_out': 0.5, 'L_EX_out': 0.0, 'M_EX_out': 0.0}
]

In [31]:
import numpy as np
from scipy.optimize import lsq_linear
from scipy.optimize import minimize





#print(reaction_ids)
# Funktion zur Ableitung des Zielvektors aus den experimentellen Daten
def create_target_vector(data_point, reaction_ids):
    target_flux = np.full(len(reaction_ids), np.nan)  # Vektor mit NaN initialisieren
    for rxn_id, value in data_point.items():
        if rxn_id in reaction_ids:  # Nur Reaktionen, die im Modell existieren
            idx = reaction_ids.index(rxn_id)
            target_flux[idx] = value  # Setze den Zielwert
    return target_flux


def least_square_min(model, S, experimental_data):
    reaction_ids = [rxn.id for rxn in model.reactions if not rxn.id.startswith("EX_")]
    total_score = 0.0
    successful_optimizations = 0

    for data_point in experimental_data:
        # Zielvektor für aktuellen Datenpunkt erstellen
        target_flux = create_target_vector(data_point, reaction_ids)

        # Filter für bekannte Flüsse (Reaktionen, die in 'target_flux' nicht NaN sind)
        known_flux_indices = ~np.isnan(target_flux)
        target_flux_values = target_flux[known_flux_indices]

        def objective(x):
            return np.linalg.norm(x[known_flux_indices] - target_flux_values)

        constraints = {'type': 'eq', 'fun': lambda x: np.dot(S, x)}

        initial_guess = np.zeros(len(reaction_ids))
        bounds = [(model.reactions.get_by_id(rxn_id).lower_bound,
                   model.reactions.get_by_id(rxn_id).upper_bound)
                  for rxn_id in reaction_ids]

        result = minimize(objective, initial_guess, constraints=constraints, bounds=bounds, tol=1e-10)

        if result.success:
            optimized_flux = result.x
            total_score += np.linalg.norm(optimized_flux[known_flux_indices] - target_flux_values)
            successful_optimizations += 1
        else:
            print(f"Optimization failed for data point: {data_point}. Message: {result.message}")

    # Return None if any optimization failed, otherwise the total score
    if successful_optimizations < len(experimental_data):
        return 0.0, len(experimental_data)
    else:
        return total_score, len(experimental_data)

 
S = get_stoichiometric_matrix(model)
print(least_square_min(model, S, experimental_data_zeropadded_out))

(7.084858411749409e-08, 11)


In [76]:
import random
from cobra import Reaction

# Erstellen von Reaktionen mit 1-2 Substrat(en) und 1-2 Produkt(en)
def generate_random_reactions(model, num_reactions=2000):
    # Filter out metabolites that end with '_EX'
    metabolites = [met for met in model.metabolites if not met.compartment == 'e']
    random_reactions = []

    for i in range(num_reactions):
        reaction_valid = False

        while not reaction_valid:
            # Zufällige Auswahl der Anzahl an Substraten und Produkten
            num_reactants = random.randint(1, 2)
            num_products = random.randint(1, 2)

            # Zufällige Auswahl der Substrate und Produkte
            reactants = random.sample(metabolites, num_reactants)
            products = random.sample(metabolites, num_products)

            # Check, dass auf beiden Seiten Metaboliten vorhanden sind
            if set(reactants).isdisjoint(set(products)):
                reaction_valid = True

                # Erstellung der Reaktion
                reaction = Reaction(f'Random_Rxn_{i+1}')
                reaction.name = f'Random Reaction {i+1}'

                reaction.add_metabolites({
                    met: -1.0 for met in reactants
                })
                reaction.add_metabolites({
                    met: 1.0 for met in products
                })

                # 50% Chance, die Reaktion reversibel zu machen
                if random.random() < 0.5:
                    reaction.lower_bound = -1000.0
                else:
                    reaction.lower_bound = 0.0

                reaction.upper_bound = 1000.0

                random_reactions.append(reaction)

    return random_reactions



Total number of random reactions generated: 2000


In [33]:
import cobra
import random

def sample_reactions(model, n_reactions):
  
    # Filter reactions that do not end with '_in' or '_out'
    valid_reactions = [rxn for rxn in model.reactions if not rxn.id.endswith('_in') and not rxn.id.endswith('_out')]

    # Ensure that the model has enough valid reactions
    if n_reactions > len(valid_reactions):
        raise ValueError("Requested number of reactions exceeds the total number of valid reactions in the model")

    # Sample reactions randomly from the valid reactions list
    sampled_reactions = random.sample(valid_reactions, n_reactions)

    for rxn in sampled_reactions:
        print(rxn.id)
    
    return sampled_reactions


In [34]:
import random

def sample_reactions_from_list(reactions_list, n_reactions):
  
    # Ensure the list has enough reactions to sample from
    if n_reactions > len(reactions_list):
        raise ValueError("Requested number of reactions exceeds the total number of reactions in the list")

    # Sample reactions randomly from the given list
    sampled_reactions = random.sample(reactions_list, n_reactions)

    for rxn in sampled_reactions:
        print(rxn.id if hasattr(rxn, 'id') else rxn)  # Print the ID if the object has one

    return sampled_reactions


In [35]:
def model_edit_distance(model1, model2):
   
    # Get sets of reaction IDs for both models
    reactions_model1 = {rxn.id for rxn in model1.reactions}
    reactions_model2 = {rxn.id for rxn in model2.reactions}
    
    # Calculate the symmetric difference (reactions present in one model but not the other)
    differing_reactions = reactions_model1.symmetric_difference(reactions_model2)
    
    # The edit distance is the number of differing reactions
    edit_distance = len(differing_reactions)
    
    return edit_distance



In [36]:
import pandas as pd

def filter_and_display(list1, list2, list3):
 
    # Erstellen des DataFrames
    df = pd.DataFrame({'Scores': list1, 'Edit Distances': list2, 'Computation Count': list3})
    
    # Filtern: Entfernen von Zeilen mit NaN oder 0.0 in der ersten Liste
    filtered_df = df.dropna(subset=['Scores'])
    filtered_df = filtered_df[filtered_df['Scores'] != 0.0]
    
    # Ergebnis zurückgeben oder anzeigen
    return filtered_df




## Generation of data points using sampling

In [37]:
import cobra
from cobra.sampling import sample

import pandas as pd


# Function to normalize a row based on A_in, excluding boundary reactions
def normalize_row(row, normalization_id):
    normalization_factor = row[normalization_id]
    # Exclude boundary reactions (columns starting with "EX_" or ending with "_out")
    filtered_row = {col: value for col, value in row.items() if (col.endswith("_in") or col.endswith("_out"))}
    return {col: (value / normalization_factor) if normalization_factor != 0 else 0.0
            for col, value in filtered_row.items()}


def create_synthetic_datapoints(model, amount, normalization_id):
    s = sample(model, amount)
    
    normalized_data = [normalize_row(row, normalization_id) for _, row in s.iterrows()]
    
    return normalized_data

In [38]:
#Declaration of exchange reactions, since they are needed to sample flux distributions

model_data_gen = model.copy()

# Exchange reactions for extracellular metabolites
model_data_gen.add_boundary(A_EX, type="exchange")
model_data_gen.add_boundary(E_EX, type="exchange")
model_data_gen.add_boundary(L_EX, type="exchange")
model_data_gen.add_boundary(M_EX, type="exchange")
model_data_gen.add_boundary(N_EX, type="exchange")
model_data_gen.add_boundary(P_EX, type="exchange")

0,1
Reaction identifier,EX_P_EX
Name,exchange
Memory address,0x15a2868d0
Stoichiometry,P_EX <=>  <=>
GPR,
Lower bound,-1000.0
Upper bound,1000.0


In [39]:
import cobra

def check_for_deadends(model):    

    # Lists for results
    upstream_no_consumption_roots = []
    downstream_no_production_roots = []

    # Initialize dictionaries for production and consumption of metabolites
    metabolite_produced = {met: False for met in model.metabolites if met.compartment == 'c'}
    metabolite_consumed = {met: False for met in model.metabolites if met.compartment == 'c'}

    # Iterate over all reactions in the model
    for reaction in model.reactions:
        if reaction.reversibility:
            #print(reaction)
            # If reversible, treat all metabolites as both reactants and products
            for met in reaction.metabolites:
                metabolite_produced[met] = True
                metabolite_consumed[met] = True
        else:
            # Metabolites produced in this reaction
            for product in reaction.products:
                #print(reaction.products)
                if product.compartment == 'c':  # Only consider intracellular metabolites
                    metabolite_produced[product] = True

            # Metabolites consumed in this reaction
            for reactant in reaction.reactants:
                
                if reactant.compartment == 'c':  # Only consider intracellular metabolites
                    metabolite_consumed[reactant] = True

    # Determine Upstream No-Consumption Roots (produced but not consumed)
    for met, produced in metabolite_produced.items():
        if produced and not metabolite_consumed[met]:
            upstream_no_consumption_roots.append(met.id)

    # Determine Downstream No-Production Roots (consumed but not produced)
    for met, consumed in metabolite_consumed.items():
        if consumed and not metabolite_produced[met]:
            downstream_no_production_roots.append(met.id)

    #print("Upstream No-Consumption Roots:", upstream_no_consumption_roots)
    #print("Downstream No-Production Roots:", downstream_no_production_roots)

    return len(upstream_no_consumption_roots) + len(downstream_no_production_roots)



In [40]:
def filter_datapoints(datapoints, reaction_list):
   
    ids = [reaction.id for reaction in reaction_list]
    
    filtered_data = []

    for datapoint in datapoints:
        filtered_datapoint = {key: value for key, value in datapoint.items() if key in ids}
        filtered_data.append(filtered_datapoint)

    return filtered_data

In [41]:
from itertools import combinations

def evaluate_reaction_subsets(model, experimental_data, random_reactions, num_removed):
  
    
    threshold = len(experimental_data)/10 * 3e-7
    
    # Step 1: Remove 2 reactions from the model
    model_copy = model.copy()
    valid_reactions = [rxn for rxn in model_copy.reactions if not rxn.id.endswith('_in') and not rxn.id.endswith('_out')]
    removed_reactions = random.sample(valid_reactions, num_removed)
    model_copy.remove_reactions(removed_reactions)
    print(model_copy.reactions)

    experimental_data = filter_datapoints(experimental_data, model_copy.reactions)
    
    # Step 2: Assemble reaction pool of reactions that can be added
    reaction_pool = removed_reactions + random_reactions

    # Step 3: Evaluate subsets of size 2 and 3
    adequate_subsets_count = 0
    num_subsets_unblocked = 0
    for subset_size in [2, 3]:
        for subset in combinations(reaction_pool, subset_size):
            # Add the subset of reactions to the model
            temp_model = model_copy.copy()
            temp_model.add_reactions(subset)

            # Evaluate the score
            S = get_stoichiometric_matrix(temp_model)
            score, _ = least_square_min(temp_model, S, experimental_data)
            
            # Check if the score meets the criteria
            if score < threshold and score != 0.0:
                adequate_subsets_count += 1
            
                num_deadends = check_for_deadends(temp_model)
                if num_deadends == 0:
                    num_subsets_unblocked += 1
                
    return adequate_subsets_count, num_subsets_unblocked





In [77]:
random.seed(42)
np.random.seed(42)

In [78]:
test_random_reacs = generate_random_reactions(model, 13)
#remove_reacs = sample_reactions(model, 2)

In [79]:
random.seed(42)
np.random.seed(42)

In [80]:
synth_data20 = create_synthetic_datapoints(model_data_gen, 20, "A_in")

In [81]:
random.seed(42)
np.random.seed(42)

In [82]:
synth_data100 = create_synthetic_datapoints(model_data_gen, 100, "A_in")

In [83]:
random.seed(42)
np.random.seed(42)

In [84]:
synth_data500 = create_synthetic_datapoints(model_data_gen, 500, "A_in")

## Evaluating number of adequate subsets for different experimental data

In [85]:
random.seed(17)
np.random.seed(17)

In [86]:
num_adequate_subsets_efm, num_unblocked_efm = evaluate_reaction_subsets(model, experimental_data_zeropadded_out, test_random_reacs, 2)

[<Reaction A_in at 0x159513050>, <Reaction R1 at 0x1595132d0>, <Reaction R2 at 0x159513450>, <Reaction R3 at 0x159513c90>, <Reaction R4 at 0x159512e50>, <Reaction R5 at 0x159513ed0>, <Reaction R6 at 0x1595125d0>, <Reaction R8 at 0x159513d10>, <Reaction R10 at 0x159512990>, <Reaction R11 at 0x159510c50>, <Reaction R12 at 0x159512bd0>, <Reaction R13 at 0x159511ad0>, <Reaction R14 at 0x159510250>, <Reaction E_EX_out at 0x159513810>, <Reaction L_EX_out at 0x159512b50>, <Reaction M_EX_out at 0x159512610>, <Reaction N_EX_out at 0x159510f90>, <Reaction P_EX_out at 0x159512690>]


In [87]:
print(f"Number of subsets with adequate scores: {num_adequate_subsets_efm}")
print(f"Number of unblocked models: {num_unblocked_efm}")

Number of subsets with adequate scores: 113
Number of unblocked models: 113


In [88]:
random.seed(17)
np.random.seed(17)

In [89]:
num_adequate_subsets20, num_unblocked_20 = evaluate_reaction_subsets(model, synth_data20, test_random_reacs, 2)
print(f"Number of subsets with adequate scores: {num_adequate_subsets20}")
print(f"Number of unblocked models: {num_unblocked_20}")

[<Reaction A_in at 0x299ab1490>, <Reaction R1 at 0x299ab3f90>, <Reaction R2 at 0x299ab1990>, <Reaction R3 at 0x29996fd90>, <Reaction R4 at 0x29996df10>, <Reaction R5 at 0x29996e190>, <Reaction R6 at 0x29996e290>, <Reaction R8 at 0x29917b310>, <Reaction R10 at 0x15731b710>, <Reaction R11 at 0x157318e10>, <Reaction R12 at 0x299060e90>, <Reaction R13 at 0x16e3af010>, <Reaction R14 at 0x1577d2b50>, <Reaction E_EX_out at 0x15785a890>, <Reaction L_EX_out at 0x15785a650>, <Reaction M_EX_out at 0x157858450>, <Reaction N_EX_out at 0x16e40a950>, <Reaction P_EX_out at 0x16e408b50>]
Number of subsets with adequate scores: 117
Number of unblocked models: 117


In [90]:
random.seed(17)
np.random.seed(17)

In [91]:
num_adequate_subsets100, num_unblocked_100 = evaluate_reaction_subsets(model, synth_data100, test_random_reacs, 2)
print(f"Number of subsets with adequate scores: {num_adequate_subsets100}")
print(f"Number of unblocked models: {num_unblocked_100}")

[<Reaction A_in at 0x15a32df50>, <Reaction R1 at 0x15a32e350>, <Reaction R2 at 0x15a32fad0>, <Reaction R3 at 0x15a32f150>, <Reaction R4 at 0x15a32cd10>, <Reaction R5 at 0x15a32d290>, <Reaction R6 at 0x157f61650>, <Reaction R8 at 0x1578fda10>, <Reaction R10 at 0x157fb7f10>, <Reaction R11 at 0x15944e3d0>, <Reaction R12 at 0x15944c710>, <Reaction R13 at 0x15944d910>, <Reaction R14 at 0x15944cfd0>, <Reaction E_EX_out at 0x15944c850>, <Reaction L_EX_out at 0x15944dfd0>, <Reaction M_EX_out at 0x15a3bde10>, <Reaction N_EX_out at 0x15a3bd010>, <Reaction P_EX_out at 0x15a3bfb50>]
Number of subsets with adequate scores: 116
Number of unblocked models: 116


In [92]:
random.seed(17)
np.random.seed(17)

In [93]:
num_adequate_subsets500, num_unblocked_500 = evaluate_reaction_subsets(model, synth_data500, test_random_reacs, 2)
print(f"Number of subsets with adequate scores: {num_adequate_subsets500}")
print(f"Number of unblocked models: {num_unblocked_500}")

[<Reaction A_in at 0x157753050>, <Reaction R1 at 0x15a0a1810>, <Reaction R2 at 0x15a0a1cd0>, <Reaction R3 at 0x15a0028d0>, <Reaction R4 at 0x15a010850>, <Reaction R5 at 0x15a0f3010>, <Reaction R6 at 0x15a0f3a10>, <Reaction R8 at 0x15a09d450>, <Reaction R10 at 0x15a09d350>, <Reaction R11 at 0x15a09fc50>, <Reaction R12 at 0x15a09f790>, <Reaction R13 at 0x15a09f750>, <Reaction R14 at 0x15a09f890>, <Reaction E_EX_out at 0x15a09fc10>, <Reaction L_EX_out at 0x15a09fe50>, <Reaction M_EX_out at 0x15a09d810>, <Reaction N_EX_out at 0x15a09d950>, <Reaction P_EX_out at 0x15a09da50>]
Number of subsets with adequate scores: 116
Number of unblocked models: 116


Different Seed

In [94]:
random.seed(23)
np.random.seed(23)

In [95]:
num_adequate_subsets_efm23, num_unblocked_efm_23 = evaluate_reaction_subsets(model, experimental_data_zeropadded_out, test_random_reacs, 2)
print(f"Number of subsets with adequate scores: {num_adequate_subsets_efm23}")
print(f"Number of unblocked models: {num_unblocked_efm_23}")

[<Reaction A_in at 0x15a04f910>, <Reaction R1 at 0x15a0ed690>, <Reaction R2 at 0x15a302410>, <Reaction R3 at 0x15a3020d0>, <Reaction R4 at 0x16e3f3090>, <Reaction R6 at 0x15a373710>, <Reaction R7 at 0x15a09b3d0>, <Reaction R8 at 0x15a09ad50>, <Reaction R9 at 0x16e34a310>, <Reaction R10 at 0x16e429ed0>, <Reaction R11 at 0x16e429e10>, <Reaction R12 at 0x16e428a90>, <Reaction R14 at 0x15a0d8110>, <Reaction E_EX_out at 0x15a0db190>, <Reaction L_EX_out at 0x15a0893d0>, <Reaction M_EX_out at 0x15a0894d0>, <Reaction N_EX_out at 0x15a0897d0>, <Reaction P_EX_out at 0x15a08b090>]
Number of subsets with adequate scores: 102
Number of unblocked models: 102


In [96]:
random.seed(23)
np.random.seed(23)

In [97]:
num_adequate_subsets20_23, num_unblocked_20_23 = evaluate_reaction_subsets(model, synth_data20, test_random_reacs, 2)
print(f"Number of subsets with adequate scores: {num_adequate_subsets20_23}")
print(f"Number of unblocked models: {num_unblocked_20_23}")

[<Reaction A_in at 0x28f4a0ad0>, <Reaction R1 at 0x28f4a0a50>, <Reaction R2 at 0x28f4a2390>, <Reaction R3 at 0x29930f410>, <Reaction R4 at 0x29930eb10>, <Reaction R6 at 0x29930dbd0>, <Reaction R7 at 0x29930cd10>, <Reaction R8 at 0x29930dc10>, <Reaction R9 at 0x29930f6d0>, <Reaction R10 at 0x29930d410>, <Reaction R11 at 0x29930d290>, <Reaction R12 at 0x29930cf10>, <Reaction R14 at 0x29930f050>, <Reaction E_EX_out at 0x29930f510>, <Reaction L_EX_out at 0x29930c490>, <Reaction M_EX_out at 0x29930e590>, <Reaction N_EX_out at 0x29930ef90>, <Reaction P_EX_out at 0x29930c850>]
Number of subsets with adequate scores: 124
Number of unblocked models: 124


In [98]:
random.seed(23)
np.random.seed(23)

In [99]:
num_adequate_subsets100_23, num_unblocked_100_23 = evaluate_reaction_subsets(model, synth_data100, test_random_reacs, 2)
print(f"Number of subsets with adequate scores: {num_adequate_subsets100_23}")
print(f"Number of unblocked models: {num_unblocked_100_23}")

[<Reaction A_in at 0x15a1bf7d0>, <Reaction R1 at 0x15a1be610>, <Reaction R2 at 0x15a1bd810>, <Reaction R3 at 0x15a1bdc10>, <Reaction R4 at 0x15a1bdd50>, <Reaction R6 at 0x15a10fa10>, <Reaction R7 at 0x15a10c650>, <Reaction R8 at 0x15a10ded0>, <Reaction R9 at 0x15a10c8d0>, <Reaction R10 at 0x15a10e5d0>, <Reaction R11 at 0x15a10f350>, <Reaction R12 at 0x15a10e010>, <Reaction R14 at 0x15a10e590>, <Reaction E_EX_out at 0x15a10ca10>, <Reaction L_EX_out at 0x15a10f510>, <Reaction M_EX_out at 0x15a10ccd0>, <Reaction N_EX_out at 0x15a10fc50>, <Reaction P_EX_out at 0x15a10f6d0>]
Number of subsets with adequate scores: 111
Number of unblocked models: 111


In [100]:
random.seed(23)
np.random.seed(23)

In [101]:
num_adequate_subsets500_23, num_unblocked_500_23 = evaluate_reaction_subsets(model, synth_data500, test_random_reacs, 2)
print(f"Number of subsets with adequate scores: {num_adequate_subsets500_23}")
print(f"Number of unblocked models: {num_unblocked_500_23}")

[<Reaction A_in at 0x157caa190>, <Reaction R1 at 0x157ca8bd0>, <Reaction R2 at 0x157ca8fd0>, <Reaction R3 at 0x157cab850>, <Reaction R4 at 0x157cabf90>, <Reaction R6 at 0x157ca8150>, <Reaction R7 at 0x299258a90>, <Reaction R8 at 0x29933ec90>, <Reaction R9 at 0x157caa810>, <Reaction R10 at 0x1596c8610>, <Reaction R11 at 0x157730b10>, <Reaction R12 at 0x157732650>, <Reaction R14 at 0x157730190>, <Reaction E_EX_out at 0x157731850>, <Reaction L_EX_out at 0x157733990>, <Reaction M_EX_out at 0x157730610>, <Reaction N_EX_out at 0x157730a50>, <Reaction P_EX_out at 0x157733e90>]
Number of subsets with adequate scores: 94
Number of unblocked models: 94


## Evaluation model reconstruction taking into account internal flux values

In [102]:
import cobra
from cobra.sampling import sample

import pandas as pd


# Function to normalize a row based on a normalization id, excluding boundary reactions
def normalize_row_internal(row, normalization_id):
    normalization_factor = row[normalization_id]
    # Exclude boundary reactions (columns starting with "EX_" or ending with "_out")
    filtered_row = {col: value for col, value in row.items() if not (col.startswith("EX_"))}
    return {col: (value / normalization_factor) if normalization_factor != 0 else 0.0
            for col, value in filtered_row.items()}


def create_synthetic_datapoints_internal(model, amount, normalization_id):
    s = sample(model, amount)
    
    normalized_data = [normalize_row_internal(row, normalization_id) for _, row in s.iterrows()]
    
    return normalized_data

In [103]:
random.seed(42)
np.random.seed(42)

In [104]:
synth_data_internal42 = create_synthetic_datapoints_internal(model_data_gen, 20, "A_in")

print(synth_data_internal42)

[{'A_in': 1.0, 'R1': 1.4099330919526516, 'R2': 1.4099330919526523, 'R3': 1.409933091952652, 'R4': 7.994916532655358e-17, 'R5': 0.05944189885176445, 'R6': -0.46937499080441825, 'R7': 1.1751508703062226, 'R8': 0.23178537303488572, 'R9': -0.4099330919526534, 'R10': 1.409933091952652, 'R11': 1.175150870306222, 'R12': -0.4693749908044184, 'R13': 0.46937499080441836, 'R14': -0.9433654972713353, 'E_EX_out': 0.23478222164642967, 'L_EX_out': 1.6445258611106406, 'M_EX_out': 0.2970315166212973, 'N_EX_out': 0.23178537303488606, 'P_EX_out': 0.004615515662500538}, {'A_in': 1.0, 'R1': 1.0046443576691568, 'R2': 1.0046443576691577, 'R3': 1.0046443576691602, 'R4': 2.72669793894444e-16, 'R5': 0.05064110673095897, 'R6': -0.0552854644001173, 'R7': 0.639597527067702, 'R8': 0.02239804028601917, 'R9': -0.0046443576691575375, 'R10': 1.0046443576691602, 'R11': 0.6395975270677017, 'R12': -0.05528546440011741, 'R13': 0.055285464400117376, 'R14': -0.6171994867816838, 'E_EX_out': 0.3650468306014578, 'L_EX_out': 0.6

In [105]:
random.seed(17)
np.random.seed(17)

In [106]:
num_adequate_subsets_internal17 = evaluate_reaction_subsets(model, synth_data_internal42, test_random_reacs, 2)
print(f"Number of subsets with adequate scores: {num_adequate_subsets_internal17}")

[<Reaction A_in at 0x16fa50710>, <Reaction R1 at 0x16fa51150>, <Reaction R2 at 0x16fa50990>, <Reaction R3 at 0x16fa51b90>, <Reaction R4 at 0x16fa51f10>, <Reaction R5 at 0x16fa53090>, <Reaction R6 at 0x16fa53890>, <Reaction R8 at 0x16fa53a90>, <Reaction R10 at 0x16fa51010>, <Reaction R11 at 0x16fa50150>, <Reaction R12 at 0x16fa51c90>, <Reaction R13 at 0x16fa53390>, <Reaction R14 at 0x16fa51f50>, <Reaction E_EX_out at 0x16fa51e10>, <Reaction L_EX_out at 0x16fa514d0>, <Reaction M_EX_out at 0x16fa51410>, <Reaction N_EX_out at 0x16fa50590>, <Reaction P_EX_out at 0x16fa52210>]
Number of subsets with adequate scores: (14, 14)


In [107]:
random.seed(23)
np.random.seed(23)

In [108]:
num_adequate_subsets_internal23 = evaluate_reaction_subsets(model, synth_data_internal42, test_random_reacs, 2)
print(f"Number of subsets with adequate scores: {num_adequate_subsets_internal23}")

[<Reaction A_in at 0x16fca1510>, <Reaction R1 at 0x16fca1d90>, <Reaction R2 at 0x16fca3190>, <Reaction R3 at 0x16fca2390>, <Reaction R4 at 0x16fca0c90>, <Reaction R6 at 0x16fca1210>, <Reaction R7 at 0x16fca2190>, <Reaction R8 at 0x16fca1850>, <Reaction R9 at 0x16fca2c50>, <Reaction R10 at 0x16fca06d0>, <Reaction R11 at 0x16fca3cd0>, <Reaction R12 at 0x16fca0550>, <Reaction R14 at 0x1593c0450>, <Reaction E_EX_out at 0x299ab3350>, <Reaction L_EX_out at 0x299ab3090>, <Reaction M_EX_out at 0x299ab2790>, <Reaction N_EX_out at 0x299ab0190>, <Reaction P_EX_out at 0x299ab0d50>]
Number of subsets with adequate scores: (13, 13)
