In [7]:
import cobra
from cobra.io import load_model, read_sbml_model
from cobra.flux_analysis import flux_variability_analysis
import pandas as pd
import scipy
from scipy.stats import pearsonr
import numpy as np
import matplotlib.pyplot as plt
import importlib
import gc
from cobra.sampling import sample
from matplotlib.ticker import MaxNLocator

In [8]:
import sys
teraflux_path = '../../'
if teraflux_path not in sys.path:
    sys.path.append(teraflux_path)
import teraflux
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [9]:
organism="Ecoli_core"

In [10]:
model = load_model("textbook")
# Update gene protein rule wit the reaction ID
for reaction in model.reactions:
    reaction.gene_reaction_rule = reaction.id

In [11]:
cobra.io.write_sbml_model(model, "ecoli_core.xml")

In [6]:
# DO SAMPLING

In [13]:
BOUND=30000
modelS = load_model("textbook")
for reaction in modelS.reactions:
    LB, UB = reaction.bounds
    if LB==-1000: LB = -BOUND
    if UB== 1000: UB =  BOUND
    reaction.bounds=(LB,UB)
modelS.reactions.EX_glc__D_e.bounds = (-10,BOUND)
modelS.reactions.ATPM.bounds = (0,BOUND)
s = sample(modelS, 1000)
s.to_csv('sampled_flux_distributions.csv') 

In [5]:
s = pd.read_csv('sampled_flux_distributions.csv',index_col=0)

In [13]:
import pandas as pd
import os # 1. Import the 'os' module for directory and path operations

# 2. Define the names of the new directories
known_fluxes_dir = 'knownFluxes'
transcriptomes_dir = 'transcriptomes'

# Create the directories before the loop starts.
# 'exist_ok=True' prevents an error if the directories already exist.
os.makedirs(known_fluxes_dir, exist_ok=True)
os.makedirs(transcriptomes_dir, exist_ok=True)
print(f"Ensured directories '{known_fluxes_dir}' and '{transcriptomes_dir}' exist.")


# Open the inputData.csv file once, BEFORE the loop begins.
with open("inputData.csv", 'w', newline="\n") as input_data_file:
    # Write the header to the main file just one time.
    print("Organism", "Condition", "GeneExpFile", "Medium", "Network", "KnownFluxes", sep="\t", file=input_data_file)

    # Now, start the loop to process each row and append to the file.
    for i in range(s.shape[0]):
        
        fluxesRef = s.loc[i]
        expressions = fluxesRef.abs()

        # --- Create iteration-specific files inside their respective folders ---
        
        # 3. Use os.path.join() to create the full path for each file.
        # This correctly adds the folder name to the path.
        known_fluxes_filename = os.path.join(known_fluxes_dir, f'knownFluxes_{i}.csv')
        transcriptome_filename = os.path.join(transcriptomes_dir, f'transcriptome_{i}.csv')

        # Create the knownFluxes file for the current iteration
        with open(known_fluxes_filename, 'w', newline="\n") as f:
            print("Reaction_ID", "Metabolite_ID", "Reaction_Flux", sep="\t", file=f)
            intakeRxns = ["EX_glc__D_e"]
            for Reaction_ID in intakeRxns:
                Metabolite_ID = Reaction_ID[3::]
                Reaction_Flux = fluxesRef[Reaction_ID]
                print(Reaction_ID, Metabolite_ID, Reaction_Flux, sep="\t", file=f)
        
        # Create the transcriptome file for the current iteration
        with open(transcriptome_filename, 'w', newline="\n") as f:
            print("Gene_ID", "Expression", sep="\t", file=f)
            for reactionID in expressions.index:
                print(reactionID, expressions[reactionID], sep="\t", file=f)
        
        # --- Append a new line to the main inputData.csv file ---
        
        # The variables now hold the full paths (e.g., "knownFluxes/knownFluxes_0.csv")
        # so this part works perfectly without further changes.
        Organism = organism
        Condition = f"glucose_iter_{i}"
        GeneExpFile = transcriptome_filename
        Medium = "medium.csv"
        Network = "ecoli_core.xml"
        KnownFluxes = known_fluxes_filename
        
        # Print the new line to the already-open input_data_file
        print(Organism, Condition, GeneExpFile, Medium, Network, KnownFluxes, sep="\t", file=input_data_file)

print("\nProcessing complete. 'inputData.csv' and sub-folder files have been created.")

Ensured directories 'knownFluxes' and 'transcriptomes' exist.

Processing complete. 'inputData.csv' and sub-folder files have been created.


In [14]:
inputFile = "inputData.csv"
resultsDir = "./results"
prefix_log_file="teraflux_ecolicore"
fluxes=teraflux.getFluxes(inputFile,resultsDir,prefix_log_file) 

Found 1000 conditions to process.

[2026/02/06 19:28:43] Loading metabolic model: ecoli_core.xml
[2026/02/06 19:28:43] Loading transcriptomic data: transcriptome_0.csv
[2026/02/06 19:28:43] Loading known fluxes: knownFluxes_0.csv
[2026/02/06 19:28:43] Updating model (bounds, medium)...
Applying medium from: medium.csv
Starting data pre-processing...
Capping FPKM values at 95th percentile: 80.5596
Median 'g' value (E_g) for imputation: 3.1127e+00
Pre-processing finished in 0.01 seconds.
[2026/02/06 19:28:45] Running TeraFlux optimization...
Creating stoichiometric constraints...
Adding known flux constraints...
Solving with standard FBA to get an initial guess (x0)...
Adding known flux constraints...
R_EX_glc__D_e -9.879681412354817
{'R_EX_glc__D_e': 9.879681412354817, 'R_EX_nh4_e': inf, 'R_EX_o2_e': inf, 'R_EX_pi_e': inf}
FBA solution found. Objective value: 0.9056. Status: optimal
## Solving the non-linear optimization problem with IPOPT...
Solver finished. Status: Solve_Succeeded | S

In [15]:
%pwd

'/home/marcelo/jupyter/teraflux_bkup5feb26/figs2_3/ecoli_core'