In [1]:
import cobra
from cobra.io import load_model, read_sbml_model
from cobra.flux_analysis import flux_variability_analysis
import pheflux
import pandas as pd
import scipy
from scipy.stats import pearsonr
import numpy as np
import matplotlib.pyplot as plt
import importlib
import gc
from cobra.sampling import sample
from matplotlib.ticker import MaxNLocator

Welcome to PheFlux ! 





In [2]:
%pwd

'/home/marcelo/jupyter/teraflux/fig5/Scerevisiae'

In [4]:
import sys
teraflux_path = '../../'
if teraflux_path not in sys.path:
    sys.path.append(teraflux_path)
import teraflux

# También puedes usar la función de autoreload para que Jupyter
# recargue tu librería si haces cambios en el archivo .py
%load_ext autoreload
%autoreload 2

In [13]:
organism = "Scerevisiae"
model_dir = "./gems/iMM904.xml"
model = read_sbml_model(model_dir)
objective_id=[rxn.id for rxn in model.summary()._objective.keys()][0]

medium_codes   = ["glc","glc_chem"]
reaction_codes = {"glc":["EX_glc__D_e"],"glc_chem":["EX_glc__D_e"],"Growth_rate":[objective_id]}

In [14]:
# Compute max magnitude for the standard bounds set
max_mag = max(
    max(abs(rxn.lower_bound), abs(rxn.upper_bound))
    for rxn in model.reactions
)

# Define standard bounds
STANDARD_BOUNDS = {0.0, max_mag, -max_mag}

for reaction in model.reactions:
    lb = reaction.lower_bound
    ub = reaction.upper_bound
    if lb==0 and ub==0: print(reaction.id,(lb,ub),reaction.bounds,sep="\t")
    # Only process non-standard bounds
    if lb not in STANDARD_BOUNDS or ub not in STANDARD_BOUNDS:

        # Category 1: always positive
        if lb >= 0 and ub > 0:
            reaction.bounds=(0,max_mag)
        # Category 2: always negative
        elif ub <= 0 and lb < 0:
            reaction.bounds=(-max_mag,0)
        # Category 3: crossing zero
        elif lb < 0 < ub:
            reaction.bounds=(-max_mag,max_mag)
        print(reaction.id,(lb,ub),reaction.bounds,sep="\t")

ATPM	(1.0, 1.0)	(0, 999999.0)
EX_glc__D_e	(-10.0, 999999.0)	(-999999.0, 999999.0)
EX_o2_e	(-2.0, 999999.0)	(-999999.0, 999999.0)


In [7]:
# Create medium file
for medium_code in medium_codes:
    model = read_sbml_model(model_dir)
    # Define carbon source
    model.reactions.EX_glc__D_e.bounds=(0,max_mag) 
    for rxn in reaction_codes[medium_code]:
        model.reactions.get_by_id(rxn).bounds=(-5.13,max_mag)
    solution=model.optimize()
    print(medium_code)
    print(model.summary())
    with open(f"mediums/{organism}_Medium_{medium_code}.csv", 'w', newline="\n") as f:
        print("Metabolite_Name", "Reaction_ID", sep="\t", file=f)
        for index,row in model.summary().uptake_flux.iterrows():
            print(row.metabolite, row.reaction, sep="\t", file=f)

glc
Objective
1.0 BIOMASS_SC5_notrace = 0.17216507286177776

Uptake
------
Metabolite    Reaction    Flux  C-Number  C-Flux
  glc__D_e EX_glc__D_e    5.13         6 100.00%
     nh4_e    EX_nh4_e  0.9635         0   0.00%
      o2_e     EX_o2_e       2         0   0.00%
      pi_e     EX_pi_e 0.03404         0   0.00%
     so4_e    EX_so4_e 0.01331         0   0.00%

Secretion
---------
Metabolite  Reaction       Flux  C-Number C-Flux
     co2_e  EX_co2_e     -9.612         1 39.09%
    etoh_e EX_etoh_e     -7.489         2 60.91%
     for_e  EX_for_e -0.0008901         1  0.00%
     h2o_e  EX_h2o_e     -4.177         0  0.00%
       h_e    EX_h_e    -0.8669         0  0.00%

glc_chem
Objective
1.0 BIOMASS_SC5_notrace = 0.17216507286177776

Uptake
------
Metabolite    Reaction    Flux  C-Number  C-Flux
  glc__D_e EX_glc__D_e    5.13         6 100.00%
     nh4_e    EX_nh4_e  0.9635         0   0.00%
      o2_e     EX_o2_e       2         0   0.00%
      pi_e     EX_pi_e 0.03404         

In [7]:
# Filter the known fluxes to just output the carbon source consumption and biomass growth rate
for medium_code in medium_codes:
    
    experimentalFile=f"./experimental/{organism}_Fluxfile_{medium_code}.csv"
    known_fluxes_filename   =f"./knownFluxes/{organism}_knownFluxes_{medium_code}.csv" 
    experimental = pd.read_csv(experimentalFile,index_col=0,sep="\t")
    reactions = reaction_codes[medium_code]+reaction_codes["Growth_rate"] 

    
    with open(known_fluxes_filename, 'w', newline="\n") as f:
        print("Reaction_ID", "Metabolite_ID", "Reaction_Flux", sep="\t", file=f)
        for Reaction_ID in reactions:
            Metabolite_ID = Reaction_ID
            if Reaction_ID == reaction_codes["Growth_rate"][0]:
                Reaction_Flux = experimental.loc["Growth_rate"].Flux 
            else:
                Reaction_Flux = experimental.loc[Reaction_ID].Flux 
            print(Reaction_ID, Metabolite_ID, Reaction_Flux, sep="\t", file=f)

In [8]:
# Run pheflux

for medium_code in medium_codes:
    # Create InputData file
    input_file_name = f"inputData_{organism}_{medium_code}.csv"
    with open(input_file_name, 'w', newline="\n") as f:
        print("Organism\tCondition\tGeneExpFile\tMedium\tNetwork\tKnownFluxes", file=f)    
        Organism = organism
        Condition = medium_code
        GeneExpFile = f"./transcriptomes/{organism}_Expfile_{medium_code}.csv"
        Medium = f"./mediums/{organism}_Medium_{medium_code}.csv"
        Network = model_dir 
        KnownFluxes = f"./knownFluxes/{organism}_knownFluxes_{medium_code}.csv"
        print(Organism, Condition, GeneExpFile, Medium, Network,KnownFluxes, sep="\t", file=f)

    # Save results
    resultsDir = "./results/pheflux/"
    prefix_log_file = f"{organism}_phe_{medium_code}"
    verbosity = True
    fluxes = pheflux.getFluxes(input_file_name, resultsDir, prefix_log_file, verbosity)

[2025/11/27 16:53:50] Condition ejecuted: Scerevisiae - glc
[2025/11/27 16:53:50] Loading metabolic model: iMM904
[2025/11/27 16:53:51] Loading transcriptomic data...
[2025/11/27 16:53:51] Updating metabolic model...
[2025/11/27 16:53:51] Running pheflux...
((-14.0833*(R_BIOMASS_SC5_notrace-R_BIOMASS_SC5_notrace_reverse_353ca))-(0.4*(R_EX_glc__D_e-R_EX_glc__D_e_reverse_d552d)))


******************************************************************************
This program contains Ipopt, a library for large-scale nonlinear optimization.
 Ipopt is released as open source code under the Eclipse Public License (EPL).
         For more information visit https://github.com/coin-or/Ipopt
******************************************************************************

Total number of variables............................:     2062
                     variables with only lower bounds:        0
                variables with lower and upper bounds:     2062
                     variables with onl

In [15]:
# Run teraflux

for medium_code in medium_codes:
    # Create  InputData file
    input_file_name = f"inputData_{organism}_{medium_code}.csv"
    with open(input_file_name, 'w', newline="\n") as f:
        print("Organism\tCondition\tGeneExpFile\tMedium\tNetwork\tKnownFluxes", file=f)    
        Organism = organism
        Condition = medium_code
        GeneExpFile = f"./transcriptomes/{organism}_Expfile_{medium_code}.csv"
        Medium =      f"./mediums/{organism}_Medium_{medium_code}.csv"
        Network = model_dir 
        KnownFluxes = f"./knownFluxes/{organism}_knownFluxes_{medium_code}.csv"
        print(Organism, Condition, GeneExpFile, Medium, Network,KnownFluxes, sep="\t", file=f)

    # Save results
    resultsDir = "./results/teraflux/"
    prefix_log_file = f"{organism}_{medium_code}"
    ipoptParams={
            'print_level': 0, # Suppress solver output for cleaner logs
            'sb': 'yes',       # Suppress IPOPT banner
            'tol': 1e-10,       # Overall tolerance (e.g., 1e-8, 1e-10, etc.)
            'acceptable_tol': 1e-10, # Allowable tolerance before giving up
            'max_iter': 6500,   # Increase maximum iterations if needed
            'nlp_scaling_method' : 'none',
            'constr_viol_tol' : 1e-10,
            'nlp_scaling_method': 'gradient-based',
            'hessian_approximation': 'exact',
        }
    fluxes,fr,lagrange = teraflux.getFluxes(input_file_name, resultsDir, prefix_log_file, ipoptParams)

Found 1 conditions to process.

[2026/02/06 19:04:34] Loading metabolic model: iMM904.xml
[2026/02/06 19:04:35] Loading transcriptomic data: Scerevisiae_Expfile_glc.csv
[2026/02/06 19:04:35] Loading known fluxes: Scerevisiae_knownFluxes_glc.csv
[2026/02/06 19:04:35] Updating model (bounds, medium)...
Applying medium from: ./mediums/Scerevisiae_Medium_glc.csv
Starting data pre-processing...
Capping FPKM values at 95th percentile: 336.7053
Median 'g' value (E_g) for imputation: 5.2357e+01
Pre-processing finished in 0.01 seconds.
[2026/02/06 19:04:36] Running TeraFlux optimization...
Creating stoichiometric constraints...
Adding known flux constraints...
Solving with standard FBA to get an initial guess (x0)...
Adding known flux constraints...
R_EX_glc__D_e -14.0833333333333
R_BIOMASS_SC5_notrace 0.4
{'R_EX_glc__D_e': 14.0833333333333, 'R_EX_so4_e': inf, 'R_EX_nh4_e': inf, 'R_EX_o2_e': inf, 'R_EX_pi_e': inf}
FBA solution found. Objective value: 0.4000. Status: optimal
## Solving the non-l

In [16]:
# Run teraflux with all internal fluxes being reversible

for reaction in model.reactions:
    if reaction.id in model.exchanges: continue
    reaction.bounds = (-1000,1000)
cobra.io.write_sbml_model(model,"./gems/iMM904_allReversible.xml")   

for medium_code in medium_codes:
    # Create  InputData file
    input_file_name = f"inputData_{organism}_{medium_code}.csv"
    with open(input_file_name, 'w', newline="\n") as f:
        print("Organism\tCondition\tGeneExpFile\tMedium\tNetwork\tKnownFluxes", file=f)    
        Organism = organism
        Condition = medium_code
        GeneExpFile = f"./transcriptomes/{organism}_Expfile_{medium_code}.csv"
        Medium =      f"./mediums/{organism}_Medium_{medium_code}.csv"
        Network = "./gems/iMM904_allReversible.xml" 
        KnownFluxes = f"./knownFluxes/{organism}_knownFluxes_{medium_code}.csv"
        print(Organism, Condition, GeneExpFile, Medium, Network,KnownFluxes, sep="\t", file=f)

    # Save results
    resultsDir = "./results/teraflux_allReversible/"
    prefix_log_file = f"{organism}_{medium_code}"
    ipoptParams={
            'print_level': 0, # Suppress solver output for cleaner logs
            'sb': 'yes',       # Suppress IPOPT banner
            'tol': 1e-10,       # Overall tolerance (e.g., 1e-8, 1e-10, etc.)
            'acceptable_tol': 1e-10, # Allowable tolerance before giving up
            'max_iter': 6500,   # Increase maximum iterations if needed
            'nlp_scaling_method' : 'none',
            'constr_viol_tol' : 1e-10,
            'nlp_scaling_method': 'gradient-based',
            'hessian_approximation': 'exact',
        }
    fluxes,fr,lagrange = teraflux.getFluxes(input_file_name, resultsDir, prefix_log_file, ipoptParams)

Found 1 conditions to process.

[2026/02/06 19:04:58] Loading metabolic model: iMM904_allReversible.xml
[2026/02/06 19:04:59] Loading transcriptomic data: Scerevisiae_Expfile_glc.csv
[2026/02/06 19:04:59] Loading known fluxes: Scerevisiae_knownFluxes_glc.csv
[2026/02/06 19:04:59] Updating model (bounds, medium)...
Applying medium from: ./mediums/Scerevisiae_Medium_glc.csv
Starting data pre-processing...
Capping FPKM values at 95th percentile: 336.7053
Median 'g' value (E_g) for imputation: 5.2357e+01
Pre-processing finished in 0.01 seconds.
[2026/02/06 19:05:00] Running TeraFlux optimization...
Creating stoichiometric constraints...
Adding known flux constraints...
Solving with standard FBA to get an initial guess (x0)...
Adding known flux constraints...
R_EX_glc__D_e -14.0833333333333
R_BIOMASS_SC5_notrace 0.4
{'R_EX_glc__D_e': 14.0833333333333, 'R_EX_so4_e': inf, 'R_EX_nh4_e': inf, 'R_EX_o2_e': inf, 'R_EX_pi_e': inf}
FBA solution found. Objective value: 0.4000. Status: optimal
## Sol

In [17]:
%pwd

'/home/marcelo/jupyter/teraflux/fig5/Scerevisiae'