# Code to turn FBA data and proteomics into estimates for Kapps
## Written by: Wheaton Schroeder
### Latest version: 10/25/2023

#### Make imports

In [1]:
import re
import pandas as pd
import numpy as np
import cobra
import statistics
import math
import shutil

#### Rules used taken from Dinh and Maranas, 2023
##### 1. Enzymes that catalyze multiple reactions are assumed to have the same Kapp for all reactions. This is calculated by summing flux rates and dividing by that enzyme's concentration.
###### assumption: absolute sum of flux rates
##### 2. In "and" relationships, the lowest abundant protein divided by its stoichiometry is used to set the enzyme concentration
##### 3. Any reaction with a "0" rate under pFBA will be assigned the average Kapp value so that it can still hold flux and still have some enzyme limitation

In [2]:
#secify default in and out folders
def_in = "input_base"
def_out = "model_base"

#### read the proteomics file

In [3]:
proteomics = pd.read_excel("./"+def_in+"/get_kapp/simplified_protein_measurements_av.xlsx")

#change locus tag to index
proteomics = proteomics.set_index("Locus Tags")

#for debugging
proteomics

Unnamed: 0_level_0,No,Fasta headers,Mass (Da),iBAQ (log2),g/gDCW
Locus Tags,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Clo1313_1249,2.0,CP002416.1_prot_ADU74312.1_1212[locus_tag=Clo1...,81307.705,14.367733,3.934364e-08
Clo1313_1075,3.0,CP002416.1_prot_ADU74139.1_1039[locus_tag=Clo1...,25442.325,14.999780,2.059554e-08
Clo1313_0956,4.0,CP002416.1_prot_ADU74024.1_924[locus_tag=Clo13...,46594.370,15.077650,4.018699e-08
Clo1313_0147,5.0,CP002416.1_prot_ADU73243.1_143[locus_tag=Clo13...,43346.730,15.084033,3.758075e-08
Clo1313_0842,6.0,CP002416.1_prot_ADU73911.1_811[locus_tag=Clo13...,26710.795,15.443570,3.103413e-08
...,...,...,...,...,...
Clo1313_0630,,,,,0.000000e+00
Clo1313_0628,,,,,0.000000e+00
Clo1313_1768,,,,,0.000000e+00
Clo1313_1487,,,,,0.000000e+00


#### Read in the enzyme stoich file to get the GPR for each reaction and each reaction name

In [4]:
rxn_data = pd.read_excel("./"+def_in+"/ENZYME_stoich_curation_ctherm.xlsx")

#change locus tag to index
rxn_data = rxn_data.set_index("id")

#for debugging
rxn_data

Unnamed: 0_level_0,rxn_src,enz,gpr,protein_stoich,subunit_comments,status,MW (g/mmol)
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
RXN-13PPDH_FWD-BDH,13PPDH,BDH,Clo1313_2130,Clo1313_2130:1,,protStoichAsgnAuto,43.33800
RXN-13PPDH_FWD-gbsB_1798,13PPDH,gbsB_1798,Clo1313_1798,Clo1313_1798:1,,protStoichAsgnAuto,96.85200
RXN-13PPDH_FWD-gbsB_1827,13PPDH,gbsB_1827,Clo1313_1827,Clo1313_1827:1,,protStoichAsgnAuto,42.48978
RXN-2D3DGLNR_FWD-kduD,2D3DGLNR,kduD,Clo1313_0815,Clo1313_0815:1,,protStoichAsgnAuto,27.91363
RXN-2D3DGLNR_REV-kduD,2D3DGLNR,kduD,Clo1313_0815,Clo1313_0815:1,,protStoichAsgnAuto,27.91363
...,...,...,...,...,...,...,...
RXN-Zn2divalent_FWD-SPONT,Zn2divalent,SPONT,SPONT,zeroCost,,protStoichAsgnManual,89.80625
RXN-Zn2metal_FWD-SPONT,Zn2metal,SPONT,SPONT,zeroCost,,protStoichAsgnAuto,0.00000
RXN-ZNabc_FWD-znuABC,ZNabc,znuABC,Clo1313_1688 and Clo1313_1689 and Clo1313_1690,"Clo1313_1688:1,Clo1313_1689:1,Clo1313_1690:1",,protStoichAsgnAuto,0.00000
RXN-ADCS_FWD-UNKNOWN,ADCS,UNKNOWN,UNKNOWN,zeroCost,,,


#### Read in the v_min_rxn values

In [5]:
v_min_rxn = pd.read_csv("input_base/get_kapp/get_v_min_flux_all_av.txt",sep="\t")

#change locus tag to index
v_min_rxn = v_min_rxn.set_index("RXN")

#for debugging
v_min_rxn

Unnamed: 0_level_0,LB,flux,UB
RXN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
RXN-13PPDH_FWD-BDH,0.0,0.000000,1000.0
RXN-13PPDH_FWD-gbsB_1798,0.0,0.000000,1000.0
RXN-13PPDH_FWD-gbsB_1827,0.0,0.000000,1000.0
RXN-2D3DGLNR_FWD-kduD,0.0,0.000000,1000.0
RXN-2D3DGLNR_REV-kduD,0.0,0.000000,1000.0
...,...,...,...
BIOSYN-OTHER18,0.0,0.001443,1000.0
BIOSYN-OTHER19,0.0,0.000935,1000.0
BIOSYN-OTHER20,0.0,0.000725,1000.0
BIOSYN-OTHER21,0.0,0.001245,1000.0


In [6]:
v_min_rxn.loc['RXN-EXCH_etoh_e_FWD-SPONT','flux']

3.27096

#### Use the dataframes to get the enzyme concentration limit for each reaction

In [7]:
#store RBA model names
rxn_names = rxn_data.index.to_list()

#store stoichiometric model reaction names (these are the basis of the RBA model names)
rxn_base_names = dict()

#stores if reaction has at least one measured protein abundance. If not, this reaction will be treated as unmeasured.
rxn_prot_meas = dict()

#build dictionary for protein stoichiometry for each reaction
enz_prot_stoic = dict()

#build dictionary for enzyme names
rxn_enz_name = dict()

#list of enzymes, list and set used to make that list non-redunant
enz_list = list(set(rxn_data.enz.to_list()))

#enzyme-keyed and protein-keyed concentration of enzyme components
enz_comp_conc = dict()

#enzyme-keyed and reaction keyed dictionary of flux of reactions catalyzed by a particular enzyme
enz_rxn_rates = dict()

#base name to enzyme name
base_to_enz = dict()

#stores if an enzyme has at least one enzyme that is measured
enz_prot_meas = dict()

#stores enzyme MW
enz_MW = dict()

#sub-dictionaries need to be initialized for components and concentrations
for enz in enz_list:

  #initialize nested dictionaries for component concentration, reaction rates, and protein stoichiometry
  enz_comp_conc[enz] = dict()
  enz_rxn_rates[enz] = dict()
  enz_prot_stoic[enz] = dict()

  #default enzymes to being unmeasured
  enz_prot_meas[enz] = False

#keep a count of unmeasured genes
unmeasured = list()

#keep a count of measured genes
measured = list()

#keep a count of zero cost results
zero_cost = dict()

#keep a list of genes
all_genes = list()

#keep a list of reactions where no proteomics measurement is present
no_meas = list()

for rxn in rxn_names:

  #get the base reaction name, makes fluxes easier to look up
  #get the base reaction name by removing textual framework
  base_name = rxn.split("RXN-")

  base_name2 = re.split("_(FWD|REV)",base_name[1])

  rxn_base_names[rxn] = base_name2[0]

  stoich_str_temp = rxn_data.loc[rxn, 'protein_stoich']

  #default to the reaction proteins being unmeasured
  rxn_prot_meas[rxn] = False
  
  #for debugging
  #print("rxn: ",rxn)
  #print("protein_stoich: ",stoich_str_temp)

  #split the protein stoichiometry into individual genes
  stoich_list_temp = stoich_str_temp.split(",")

  #for debugging
  #print('rxn: ',rxn,"\tstoich_list_temp: ",stoich_list_temp)

  rxn_enz_name[rxn] = rxn_data.loc[rxn, 'enz']

  #add enzyme MW, if not there already
  if not rxn_enz_name[rxn] in enz_MW:
  
    enz_MW[rxn_enz_name[rxn]] = rxn_data.loc[rxn,"MW (g/mmol)"]

  if base_name2[0] in base_to_enz:

    if not rxn_data.loc[rxn, 'enz'] in base_to_enz[base_name2[0]]:

      base_to_enz[base_name2[0]].append(rxn_data.loc[rxn, 'enz'])

  else:

    base_to_enz[base_name2[0]] = list()
    base_to_enz[base_name2[0]].append(rxn_data.loc[rxn, 'enz'])
  
  if(stoich_list_temp[0] == 'zeroCost'):

    #we can't get a kapp estimate it is a zero cost reaction
    zero_cost[rxn] = True

  else:

    #is not a zero-cost reaction
    zero_cost[rxn] = False

    #for each gene
    for gene in stoich_list_temp:

      gene_temp, stoich_temp = gene.split(":")

      if gene_temp in proteomics.index:

        #if here, then it is measured

        #that means at least one protein is measured
        rxn_prot_meas[rxn] = True

        #at least one protien in the enzyme is measured
        enz_prot_meas[rxn_data.loc[rxn, 'enz']] = True

        #get the measured amount, convert to 
        mmolgDW_temp = (proteomics.loc[gene_temp, "g/gDCW"] / proteomics.loc[gene_temp, "Mass (Da)"]) * 1000
        
        #save the stoichiometry and concentration
        enz_prot_stoic[rxn_data.loc[rxn, 'enz']][gene_temp] = float(stoich_temp)

        #save enzyme component concentrations if not already there
        if not (gene_temp in enz_comp_conc[rxn_enz_name[rxn]]):

          #save the concentration
          enz_comp_conc[rxn_enz_name[rxn]][gene_temp] = mmolgDW_temp

        #for debugging 
        #print("enzyme: ",rxn_enz_name[rxn],"\tgene: ",gene_temp,"\tstoich: ",stoich_temp,"\tconcentration: ",mmolgDW_temp," mmol/gDCW")

        #update list and counts only if gene not already in the gene list
        if gene_temp not in all_genes:

          measured.append(gene_temp)

          all_genes.append(gene_temp)

      else:

        #otherwise, unmeasured

        #for debugging
        #print("gene: ",gene_temp,"\tstoich: ",stoich_temp,"\tis unmeasured")

        #update list and counts only if gene not already in the gene list
        if gene_temp not in all_genes:

          unmeasured.append(gene_temp)

          all_genes.append(gene_temp)

  #for debugging for the reaction, report the enzyme concentration bound that we will use
  #print("reaction: ",rxn,"\tenzyme: ",rxn_enz_name[rxn],"\tat least one protein measured? ",rxn_prot_meas[rxn])

  #do something if a bound wasn't set
  if not rxn_prot_meas[rxn]:

    no_meas.append(rxn)


In [8]:
# for "and" relations, where there are multiple proteins making up a heteromer, use the least abundant peptide to calcualte enzyme concentration
#note that "or" relations are already separated out, wherever tehre are multiple

#create a dictionary to store enzyme concentration
enz_conc = dict()

#build a dictionary to store if an enzyme is active
enz_active = dict()

#for each enzyme
for enz in enz_list:

  #only bother if the protein is measured
  if enz_prot_meas[enz]:

    #for debugging
    #print("enzyme: ",enz,"\tamount: ",enz_comp_conc[enz])

    #only bother if has at least one protein in the enzyme
    #should be the same as if measured, but 
    if(len(enz_comp_conc[enz]) >= 1):

      #stores the list of adjusted concentrations
      stoic_conc = list()

      #for each protein in the stoichiometry of the enzyme
      for prot in enz_comp_conc[enz]:

        #calculate concentration
        comp_temp = enz_comp_conc[enz][prot]
        stoic_temp = enz_prot_stoic[enz][prot]
        stoic_conc_temp = enz_comp_conc[enz][prot] / enz_prot_stoic[enz][prot]

        stoic_conc.append(stoic_conc_temp)
      
      enz_conc[enz] = min(stoic_conc)

    else:

      #do nothing, no proteins
      pass

    if(enz_conc[enz] < 1E-10):

      enz_active[enz] = False
    
    else:

      enz_active[enz] = True

    #for debugging
    #print("\"and\"-based concentration: ",enz_conc[enz],"\tactive? ",enz_active[enz])
      
  else:

    #if unmeasured, assume inactive
    enz_active[enz] = False

#don't flag spontaneous reactions as inactive
enz_active['SPONT'] = True

In [9]:
#list of inactive reactions
inactive = list()

for rxn in rxn_names:

  #if the associated enzyme is already deemed inactive, add to the inactive list
  if not enz_active[rxn_enz_name[rxn]]:

    inactive.append(rxn)


#for debugging, comment out if unused
print(len(rxn_names))
print(inactive)
len(inactive)

1741
['RXN-3MOPDC_FWD-UNKNOWN', 'RXN-3OACOAS_FWD-UNKNOWN', 'RXN-4MOPDC_FWD-UNKNOWN', 'RXN-AACPS3MBUT_FWD-UNKNOWN', 'RXN-AACPS3MBUT_REV-UNKNOWN', 'RXN-AACPS3MCR_FWD-UNKNOWN', 'RXN-AACPS3MCR_REV-UNKNOWN', 'RXN-ACabc_FWD-UNKNOWN', 'RXN-ACGNMCT_FWD-ADU73322', 'RXN-ACLDC_FWD-UNKNOWN', 'RXN-ACNAM9PL_FWD-spsE_0230', 'RXN-ACNMCT_FWD-ADU73322', 'RXN-ACNPLYS_FWD-spsE_0230', 'RXN-ACt2_FWD-UNKNOWN', 'RXN-ADCL_FWD-UNKNOWN', 'RXN-ADEabc_FWD-nupOPQ', 'RXN-ADEabc_REV-nupOPQ', 'RXN-ADPCOAL_FWD-grsT', 'RXN-ADSK_FWD-tufA', 'RXN-AKP1_FWD-UNKNOWN', 'RXN-ALAALAr_FWD-pyrAB', 'RXN-ALCD22xi_FWD-cotJA', 'RXN-ALCD22xi2_FWD-cotJA', 'RXN-ALCD23xi2_FWD-cotJA', 'RXN-ALCD3MBOH_FWD-cotJA', 'RXN-ALCD3x_FWD-cotJA', 'RXN-AMAOTr_FWD-bioA', 'RXN-AMAOTr_REV-bioA', 'RXN-AMUAAH_FWD-cwlD', 'RXN-AMUAAH_FWD-cwlJ', 'RXN-AMUAAH_FWD-lysM', 'RXN-AMUAAH_FWD-sleB_0046', 'RXN-AMUAAH_FWD-sleB_2624', 'RXN-AMUAAH_FWD-SLH', 'RXN-AMUAAH_FWD-yqiI_1198', 'RXN-AOXSr_FWD-bioF', 'RXN-AOXSr_REV-bioF', 'RXN-AOXSr2_FWD-bioF', 'RXN-ASNS1_FWD-ADU7473

261

In [10]:
#list of active reactions, is compliment of the list of inactive reactions
active_rxns = list()

for rxn in rxn_names:

  if rxn not in inactive:

    active_rxns.append(rxn)

#for debugging, comment out if unused
print(active_rxns)
len(active_rxns)

['RXN-13PPDH_FWD-BDH', 'RXN-13PPDH_FWD-gbsB_1798', 'RXN-13PPDH_FWD-gbsB_1827', 'RXN-2D3DGLNR_FWD-kduD', 'RXN-2D3DGLNR_REV-kduD', 'RXN-2DHPFALDL_FWD-panB', 'RXN-2HBO_FWD-lctE', 'RXN-2HBO_FWD-lctE_1160', 'RXN-34DHOXPEGOX_FWD-gbsB_1798', 'RXN-34DHOXPEGOX_REV-gbsB_1798', 'RXN-3HAD100_FWD-fabZ', 'RXN-3HAD100_REV-fabZ', 'RXN-3HAD120_FWD-fabZ', 'RXN-3HAD120_REV-fabZ', 'RXN-3HAD140_FWD-fabZ', 'RXN-3HAD140_REV-fabZ', 'RXN-3HAD160_FWD-fabZ', 'RXN-3HAD160_REV-fabZ', 'RXN-3HAD180_FWD-fabZ', 'RXN-3HAD180_REV-fabZ', 'RXN-3HAD40_1_FWD-fabZ', 'RXN-3HAD40_1_REV-fabZ', 'RXN-3HAD60_FWD-fabZ', 'RXN-3HAD60_REV-fabZ', 'RXN-3HAD80_FWD-fabZ', 'RXN-3HAD80_REV-fabZ', 'RXN-3OAR100_FWD-fabG_1283', 'RXN-3OAR100_REV-fabG_1283', 'RXN-3OAR100_FWD-fabG_2272', 'RXN-3OAR100_REV-fabG_2272', 'RXN-3OAR120_FWD-fabG_1283', 'RXN-3OAR120_REV-fabG_1283', 'RXN-3OAR120_FWD-fabG_2272', 'RXN-3OAR120_REV-fabG_2272', 'RXN-3OAR140_FWD-fabG_1283', 'RXN-3OAR140_REV-fabG_1283', 'RXN-3OAR140_FWD-fabG_2272', 'RXN-3OAR140_REV-fabG_2272', 'R

1480

#### for each enzyme compile a list of reactions it catalyzes, or it shares catalytic duties for so that it would be summed

In [11]:
#stores list of reactions catalyzed by each enzyme
enz_cat_list = dict()

#initialize each list
for enz in enz_list:

  enz_cat_list[enz] = []

#for debugging, comment out if unused. 
#enz_cat_list

In [12]:
#for each reaction
for rxn in rxn_names:

  #initialize a list of reactions
  rxn_list_temp = list()

  #get the enzyme name
  enz_temp = rxn_enz_name[rxn]

  #get the base reaction name, shares catalytic duties for each enzyme with same base name
  base_temp = rxn_base_names[rxn]

  #create a list of all reactions with:
  #a. same base name
  #b. enzyme
  for rxn2 in rxn_names:

    #get the enzyme name
    enz_temp2 = rxn_enz_name[rxn2]

    #get the base reaction name, shares catalytic duties for each enzyme with same base name
    base_temp2 = rxn_base_names[rxn2]

    if((enz_temp2 == enz_temp) or (base_temp2 == base_temp)):

      rxn_list_temp.append(rxn2)

  #add this list of reactions to the reaction fluxes needing to be considered when calculating kapp
    
  temp_list = enz_cat_list[enz_temp]
  enz_cat_list[enz_temp].extend(rxn_list_temp) 

  #clean up the list, removing duplicates
  #enz_cat_list[enz_temp] = list(set(enz_cat_list[enz_temp]))

  #repeat for each reaction
      

In [13]:
#remove duplicates
for enz in enz_list:

  #old list
  old_enz_cat_list = enz_cat_list[enz]

  #new list 
  temp_list = list()

  for rxn in old_enz_cat_list:

    if rxn not in temp_list:

      temp_list.append(rxn)

  #replace the old list with the new
  enz_cat_list[enz] = temp_list

  #for debugging
  #print("enzyme ",enz," catalyzes or is a catalyst of the following reactions: ",str(enz_cat_list[enz]))

#### use the v_min_rxn results to get Kapps

In [14]:
#dictionary for Kapps
kapp_vals = dict()

#keep numerator and denominators for debugging
kapp_num = dict()
kapp_denom = dict()
rxns_used = dict()
enzs_used = dict()

#for each reaction, calculate the Kapp, since Kapps are defined on a per-reaction basis in GAMS
for rxn in active_rxns:

  #for debugging, comment out if unused
  #print("working on ",rxn)

  #build the numerators and denominator terms to calculate Kapp
  kapp_num[rxn] = 0
  kapp_denom[rxn] = 0

  #build a list of enzymes used in kapp denominator
  enzs_used[rxn] = list()

  #build a list of metabolic reactions used in the kapp denominator
  rxns_used[rxn] = list()

  #From Hoang and Maranas, Metabolic Engineering, 2023 supplemental files
  #where multiple reactions are catalyzed by the same enzyme or multiple isozymes catalyze the same reaction, since we cannot ascertain individual 
  #enzyme-reaction catalysis load, we assume the same kapp value for all reactions and enzymes involved and the sum of metabolic fluxes and the sum of enzyme 
  #concentration values are used in kapp calculation

  #therefore, for each reaction we have to 
  #1. get the sum of all enzymes catalyzing that reaction

  #1a. get the list of all enzymes that catalyze the base reactions
  enz_list_temp = base_to_enz[rxn_base_names[rxn]]

  #for debugging
  if(rxn == "RXN-EXCH_cellb_e_REV-SBP_cellx"):

    print(enz_list_temp)

  #remove anything spontaneous or unknown from the list not part of proper GPR
  if 'SPONT' in enz_list_temp:

    enz_list_temp.remove('SPONT')
  
  if 'UNKNOWN' in enz_list_temp:

    enz_list_temp.remove('UNKNOWN')

  #if list is empty, go to next reaction, reaction can have no Kapp
  if len(enz_list_temp) == 0:

    continue

  #1b. sum their concentrations
  for enz in enz_list_temp:

    #ignore if no concentration information
    if enz in list(enz_conc.keys()):

      kapp_denom[rxn] = kapp_denom[rxn] + enz_conc[enz]
      enzs_used[rxn].append(enz)

  #it is not just the rate of this reaction in the numerator, but the rates of all reactions catalyzed by any 
  #2. get the rates of all reactions catalyzed by these enzymes

  #2a. get the list of all reactions catalyzed by these enzymes 
  rxn_list_temp = list()

  #for each enzyme catalyzing the base reaction
  for enz in enz_list_temp:  

    #get the list of reactions catalyzed by that enzyme
    rxn_list_temp2 = enz_cat_list[enz]
      
    #for each reaction catalyzed by one of these enzymes
    for rxn2 in rxn_list_temp2:

      #if not already in the lsit of reactions to consider, add it
      if rxn2 not in rxn_list_temp:
      
        rxn_list_temp.extend(enz_cat_list[enz])

  #at this point, we should have the list of reactions for the numerator
  for rxn2 in rxn_list_temp:

    #add to get the kapp numerator
    #ignore if not in the 
    if ((rxn2 in list(v_min_rxn.index.values.tolist())) and (rxn2 not in rxns_used[rxn])):
    
      kapp_num[rxn] = kapp_num[rxn] + v_min_rxn.loc[rxn2,'flux']
      rxns_used[rxn].append(rxn2)

      #for debugging, comment out if unused
      #if rxn == 'RXN-FBA_FWD-fbaA_1875':

        #print("reaction also considered with FBA: ",rxn2,"\tcontribution: ",v_min_rxn.loc[rxn2,'flux'])

      #for debugging, comment out if unused
      #if rxn == 'RXN-OBFOR_FWD-PFOR_0':

        #print("reaction also considered with OBFOR: ",rxn2,"\tcontribution: ",v_min_rxn.loc[rxn2,'flux'])

      #for debugging, comment out if unused
      #if rxn == 'RXN-POR_FWD-PFOR_0':

        #print("reaction also considered with PFOR: ",rxn2,"\tcontribution: ",v_min_rxn.loc[rxn2,'flux'])

      #for debugging, comment out if unused
      #if rxn == "RXN-EXCH_cellb_e_REV-SBP_cellx":

        #print("reaction also considered with EXCH_cellb_e: ",rxn2,"\tcontribution: ",v_min_rxn.loc[rxn2,'flux'])
    
  #calculate kapp
  kapp_vals[rxn] = kapp_num[rxn] / kapp_denom[rxn]
    

['SBP_cellx']


In [15]:
#calculate the kapp for each enzyme

#save the enzyme kapp values

#units for comparison to brenda database, 1/s
kapp_invs = dict()

#units for use in the model, inverse hours, 1/h
kapp_invh = dict()

#note, loop by enz_conc as all enzymes will hae flux measurements, but not concentration estimates
for rxn in list(kapp_vals.keys()):

  #convert to inverse seconds

  #for debugging, comment out if unused
  #print("reaction: ",rxn)

  #simplified version
  kapp_invs[rxn] =  kapp_vals[rxn] * (1/3600)

  #report values, for debugging
  #print('rxn: ',rxn,'\tkapp_vals: ',kapp_vals[rxn],'\tkapp_invs: ',kapp_invs[rxn],'\tkapp_invh: ',kapp_invh[rxn],'\n')

#### get the median kapp value, assign it to all non "UNKNOWN" and "SPONT" reactions

In [16]:
#get the list of values that we will apply the statistics to
good_kapps = list()

#kapps to keep
keep_kapps = dict()

for rxn in list(kapp_vals.keys()):

  if((kapp_invs[rxn] > 0) and (not math.isnan(kapp_vals[rxn])) and (not math.isinf(kapp_vals[rxn]))):

    good_kapps.append(kapp_vals[rxn])

    keep_kapps[rxn] = kapp_vals[rxn]

#for debugging
#good_kapps

In [17]:
med_kapp = statistics.median(good_kapps)

med_kapp

64828.83813494978

In [18]:
len(good_kapps)

854

In [19]:
med_kapp_invs = statistics.median(good_kapps) / 3600

med_kapp_invs

18.008010593041604

#### build the k_app list

In [20]:
#build a list of all kapp values to use
final_kapps = dict()

#for each reaction in 
for rxn in v_min_rxn.index.to_list():

  if rxn in keep_kapps:

    final_kapps[rxn] = keep_kapps[rxn]

  else:

    #assign median value
    final_kapps[rxn] = med_kapp

In [21]:
#gams-formatted file for lower bounds of reactions from MFA
calc_kapp_f = open('./'+def_out+'/kapp_vals_av_bc.txt', 'w')
calc_kapp_f.write("/\n")

for rxn in final_kapps:

  #for debugging
  #print("rxn: ",rxn)

  #only write metabolic reactions
  if((not re.search('PROSYN-',rxn)) and (not re.search('ENZSYN-',rxn)) and (not re.search('ENZLOAD-',rxn)) and (not re.search('PROWASTE-',rxn)) and (not re.search('BIOSYN-',rxn)) and (not re.search('BIOSYN-',rxn)) and (not re.search('-UNKNOWN',rxn)) and (not re.search('-SPONT',rxn))):

    calc_kapp_f.write("\'"+rxn+"\'\t"+str(final_kapps[rxn])+"\n")

calc_kapp_f.write("/")

calc_kapp_f.close()

#### make sure the GAMS model file has the same version of the model by copying over the model files in build model

In [22]:
shutil.rmtree('../GAMS/'+def_out)
shutil.copytree('./'+def_out+'/',"../GAMS/"+def_out+"/")

'../GAMS/model_base/'

In [23]:
['Clo1313_2628']

['Clo1313_2628']

In [24]:
kapp_num['RXN-EXCH_cellb_e_REV-SBP_cellx']

4.2

In [25]:
kapp_denom['RXN-EXCH_cellb_e_REV-SBP_cellx']

0.000330727321849575

In [26]:
final_kapps['RXN-EXCH_cellb_e_REV-SBP_cellx']

12699.283435404499

In [27]:
kapp_num['RXN-EXCH_cellb_e_REV-SBP_cellx']/kapp_denom['RXN-EXCH_cellb_e_REV-SBP_cellx']

12699.283435404499