# Eflux2 applied to *S. elongatus*

In [1]:
import sys
sys.path.append('../src')
sys.path.append('/Users/mahs128/Repos/CONCERTO')
from concerto.helpers.load_model_from_git import load_model_from_git
from eflux2 import EFlux2
import cobra
import pandas as pd
import numpy as np
import gurobipy

## Load in inputs

### Read in transcriptomics data

In [2]:
transcriptomics_fname = "processed_data/cleaned_transcriptomics.csv"
transcriptomics_df = pd.read_csv(transcriptomics_fname, index_col="Label")
transcriptomics_df.head()

Unnamed: 0_level_0,Se_axen_d4_1,Se_axen_d4_2,Se_axen_d4_3,Se_axen_d6_1,Se_axen_d6_2,Se_axen_d6_3,Se_axen_d8_1,Se_axen_d8_2,Se_axen_d8_3
Label,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
SYNPCC7942_RS00005,16290,17761,15101,14567,13967,12653,7016,8855,8689
SYNPCC7942_RS00010,7185,7502,6565,10086,7546,7705,3747,6670,6960
SYNPCC7942_RS00015,24176,26304,22781,23926,21306,20412,13440,17253,17053
SYNPCC7942_RS00020,35119,35145,25895,37701,34175,30569,24835,39769,30280
SYNPCC7942_RS00025,6891,7803,6607,6347,6284,6844,3326,4146,3180


### Load SBML model

In [4]:
# syn_model = load_model_from_git('Synechococcus')
syn_model = cobra.io.validate_
('/Users/mahs128/Repos/CONCERTO/models/synechococcus_elongatus_pcc_7942/iJB785_w_sucrose_transport.xml')

Set parameter TokenServer to value "leghorn.emsl.pnl.gov"


CobraSBMLError: Something went wrong reading the SBML model. Most likely the SBML model is not valid. Please check that your model is valid using the `cobra.io.sbml.validate_sbml_model` function or via the online validator at https://sbml.org/validator_servlet/ .
	`(model, errors) = validate_sbml_model(filename)`
If the model is valid and cannot be read please open an issue at https://github.com/opencobra/cobrapy/issues .

### Load rates

In [None]:
### TODO: MISSING RATES FOR SYN MODEL ###
rates_fname = "processed_data/cleaned_metabolomic_abundance_rates.csv"
rates_df = pd.read_csv(rates_fname, index_col="Sample")
rates_df.head()

Unnamed: 0_level_0,Se_axen_d4_1,Se_axen_d4_2,Se_axen_d4_3,Se_axen_d6_1,Se_axen_d6_2,Se_axen_d6_3,Se_axen_d8_1,Se_axen_d8_2,Se_axen_d8_3
Sample,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
13ppd,-29836.5,-61719.5,12502.0,20379.0,31300.5,-10967.5,3339.0,-34320.5,12960.0
3hbz,-2978.0,-1145.0,-113.5,571.5,219.0,444.5,-437.0,-1084.5,-930.0
4hbz,-2529.5,1568.5,-1020.5,1889.5,-1916.0,810.0,-2246.5,818.0,1859.5
aso4,-4807.5,-3176.0,275.0,-795.0,-1199.0,-3490.0,1443.0,-2645.5,4379.5
bz,-83691.0,-34668.5,-18102.5,-13110.5,-25685.0,-52963.5,15482.5,-2626.0,21771.0


## Normalize relevant data with respect to reference strain/line

### Identify reference strain/line

In [6]:
ref_rep = transcriptomics_df.columns[4] # 'Se_axen_d6_2'

### Normalize transcriptomics data

In [7]:
normalized_transcriptomics_df = transcriptomics_df.div(transcriptomics_df[ref_rep], axis=0)
normalized_transcriptomics_df.head()

Unnamed: 0_level_0,Se_axen_d4_1,Se_axen_d4_2,Se_axen_d4_3,Se_axen_d6_1,Se_axen_d6_2,Se_axen_d6_3,Se_axen_d8_1,Se_axen_d8_2,Se_axen_d8_3
Label,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
SYNPCC7942_RS00005,1.166321,1.27164,1.081191,1.042958,1.0,0.905921,0.502327,0.633994,0.622109
SYNPCC7942_RS00010,0.95216,0.994169,0.869997,1.336602,1.0,1.021071,0.496554,0.883912,0.922343
SYNPCC7942_RS00015,1.134704,1.234582,1.069229,1.12297,1.0,0.95804,0.630808,0.809772,0.800385
SYNPCC7942_RS00020,1.027623,1.028383,0.757718,1.103175,1.0,0.894484,0.726701,1.163687,0.886028
SYNPCC7942_RS00025,1.096595,1.241725,1.0514,1.010025,1.0,1.089115,0.529281,0.659771,0.506047


### Normalize rates data

In [8]:
normalized_rates_df = rates_df.div(rates_df[ref_rep], axis=0)
display(normalized_rates_df)

NameError: name 'rates_df' is not defined

## Calculate fluxes

### Explore candidate uptake reactions

#### Exploring Carbon/Biomass
This would be a typical approach, but measurements for neither CO2 nor HCO3 are available in the metabolics data. Therefore, we can either (a) ignore uptake reactions and not use them to narrow the bounds for flux calculations, or (b) use growth rates instead


In [9]:
# # Explore CO2 exchange and transport from external compartment --> periplasm --> cytoplasm

# print("----")
# print("co2_e reactions")
# print("----")
# for r in syn_model.metabolites.co2_e.reactions:
#     print(r.id)

# print(" ")
# print("----")
# print("co2_p reactions")
# print("----")
# for r in syn_model.metabolites.co2_p.reactions:
#     print(r.id)



In [10]:
# display(syn_model.reactions.get_by_id('EX_co2_e'))
# display(syn_model.reactions.get_by_id('CO2tex'))
# display(syn_model.reactions.get_by_id('CO2tpp')) 

### Explore candiate secretion reactions

#### Exploring Sucrose output

In [11]:
# Explore sucrose exchange and transport from cytoplasm --> external compartment
print("----")
print("sucr_c reactions")
print("----")
for r in syn_model.metabolites.sucr_c.reactions:
    print(r.id)

print(" ")
print("----")
print("sucr_e reactions")
print("----")
for r in syn_model.metabolites.sucr_e.reactions:
    print(r.id)

----
sucr_c reactions
----


NameError: name 'syn_model' is not defined

In [12]:
display(syn_model.reactions.get_by_id('SUCR'))
display(syn_model.reactions.get_by_id('SUCRt2'))
display(syn_model.reactions.get_by_id('SPP'))
display(syn_model.reactions.get_by_id('EX_sucr_e')) # candidate for secretion target

NameError: name 'syn_model' is not defined

In [13]:
# Identify candiates for target intake metabolite and corresponding reaction
intake_target_met = None #
# intake_target_rxn = ""

# Identify candiates for target secretion metabolite and corresponding reaction
secrete_target_met = "sucr" # BiGG ID for sucrose
secrete_target_rxn = 'EX_sucr_e'

# List of replicates/strains/lines included in data
replicate_list = normalized_transcriptomics_df.columns

# Use teh Gurobi solver in the model
syn_model.solver = 'gurobi'

# TODO: generalize this as a function with various options for intake candidate availability, or using growth rates instead

fluxes = {}
for rep in replicate_list:
    with syn_model:
        # Collect rates for this replicate/strain/line

        # TODO: condition intake/secrete on if a candidate is chosen
        # intake_target = syn_model.reactions.id....[co2_intake] 
        secrete_target = normalized_rates_df.loc[secrete_target_met, rep]
      #  syn_model.reactions.get_by_id(secrete_target_rxn) # secrete_3hp = normalized_line_rates.loc[rep,'3hp_secretion_rates (mmol/gDCW * hr)']
       
        # Update model reaction bounds with experimentally calculated rate
        # if intake_target:
        #     getattr(syn_model.reactions, intake_target_rxn).reaction.upper_bound = intake_target
        if secrete_target:
            getattr(syn_model.reactions, secrete_target_rxn).lower_bound = secrete_target
        try:
            print(rep)
            fluxes[rep] = EFlux2(syn_model, normalized_transcriptomics_df[rep])
        except TypeError:
            print(f"Replicate {rep} with uptake of {intake_target_met} and secretion of {secrete_target_met} is infeasible")
fluxes[rep]

NameError: name 'syn_model' is not defined

In [78]:
normalized_transcriptomics_df[rep]

Label
SYNPCC7942_RS00005    1.166321
SYNPCC7942_RS00010    0.952160
SYNPCC7942_RS00015    1.134704
SYNPCC7942_RS00020    1.027623
SYNPCC7942_RS00025    1.096595
                        ...   
HTX97_RS00025         1.266990
HTX97_RS00030         1.038095
HTX97_RS00035         1.000000
HTX97_RS00040         0.849624
HTX97_RS00005         1.153846
Name: Se_axen_d4_1, Length: 2761, dtype: float64