In [2]:
#import necessary packages

import pandas as pd
import cobra
from collections import OrderedDict
from copy import deepcopy
import os
import re as re

from custom_functions_scRBA import *

#### Specifics of files/conventions used in building this model
##### hopefully allows all the editing to take place on a single line in future

In [13]:
#define default in and out locations
def_in = "input_base"
def_out = "model_base"

#input files
sbml_file = '/'+def_in+'/iCTH669_for_RBA.sbml'                         #stoichiometric network file
prot_st_file = '/'+def_in+'/PROTEIN_stoich_curation_ctherm.xlsx'       #protein information file
aa_map_file = '/'+def_in+'/PROTEIN_amino_acid_map_ctherm.txt'          #amino acid weight map file
dummy_pr_file = '/'+def_in+'/PROTEIN_dummy_prot_calc_ctherm.xlsx'      #dummy protein file
enzy_st_file = '/'+def_in+'/ENZYME_stoich_curation_ctherm.xlsx'        #enzyme stoichiometry file
rna_st_file = '/'+def_in+'/RNA_stoich_ctherm.xlsx'                     #rRNA stoichiometry file
rib_comp_file = '/'+def_in+'/RIBOSOME_composition_ctherm.xlsx'         #ribosome composition file
biomass_file = '/'+def_in+'/BIOMASS_RBA_ctherm.xlsx'                   #biomass composition file
cell_av_conc_file = '/'+def_in+'/av_cellulosome_estimate.xlsx'
cell_cb_conc_file = '/'+def_in+'/cb_cellulosome_estimate.xlsx'
exchange_tag = 'RXN-EXCH_'                                            #string tag used to mark exchange reactions
dummy_length = 276                                                #length of dummy proteins

#output file(s) for the cellulosome
cellulosome_memb_file = '/'+def_out+'/RBA_rxns_prosyn_cell.txt'
cellulosome_waste_file = '/'+def_out+'/RBA_rxns_prowaste_cell.txt'
cell_av_conc_gams_file = '/'+def_out+'/prot_flux_cell_exp_av.txt'
cell_cb_conc_gams_file = '/'+def_out+'/prot_flux_cell_exp_cb.txt'

In [14]:
sbml_file

'/input_base/iCTH669_for_RBA.sbml'

#### Load data

In [15]:
#get the current working directory, this way I don't have to specify filepath all the time
curr_dir = os.getcwd()

# Metabolic model (COBRApy json)
model = cobra.io.read_sbml_model(curr_dir+sbml_file)

# Read protein data including ID, gene name, uniprot ID, location, cofactors (like ions), molecular weight, sequence and status
df_pro = pd.read_excel(curr_dir+prot_st_file)
df_pro.index = df_pro.id.to_list()
df_aamap = pd.read_csv(curr_dir+aa_map_file, sep='\t')
df_aamap.index = df_aamap.aa_abbv.to_list()

# Dummy protein
df_aa_dummy = pd.read_excel(curr_dir+dummy_pr_file)
df_aa_dummy.index = df_aa_dummy.aa_abbv.to_list()
dummy_medianL = int(round(df_aa_dummy.loc['A', 'Unnamed: 5'], 0))
# Add 1e-5 g/mmol to prevent number round lost of protein MW
dummy_MW = round(df_aa_dummy.loc['C', 'Unnamed: 5'], 5) + 1e-5

# Enzyme stoichiometry
df_enz = pd.read_excel(curr_dir+enzy_st_file)

# RNA stoichiometry
df_rnas = pd.read_excel(curr_dir+rna_st_file)
df_rnas.index = df_rnas.RNAid.to_list()

# Ribosome
df_ribo = pd.read_excel(curr_dir+rib_comp_file)

#not sure what this does, 
#df_ribo = df_ribo[df_ribo.paralog.isnull()]

# Biomass
df_biom = pd.read_excel(curr_dir+biomass_file, sheet_name='RBABioRxns')

#read avicel cellulosome file
av_cell_conc = pd.read_excel(curr_dir+cell_av_conc_file)

#set identifiers to the dataframe index
av_cell_conc = av_cell_conc.set_index('identifier')

#read avicel cellulosome file
cb_cell_conc = pd.read_excel(curr_dir+cell_cb_conc_file)

#set identifiers to the dataframe index
cb_cell_conc = cb_cell_conc.set_index('identifier')

In [16]:
model

0,1
Name,iCTH669
Memory address,319736680
Number of metabolites,883
Number of reactions,979
Number of genes,711
Number of groups,0
Objective expression,1.0*BIOMASS - 1.0*BIOMASS_reverse_69053
Compartments,"Cytosol, Extracellular space"


#### if marked as a cellulosome protein, add it to a list of cellulosome proteins, these will be allowed to have protein waste reactions

In [17]:
#create a list of cellulosomal protein synthesis reactions
cellulosome_membs = list()

#create a list of cellulosomal protein waste reactions
cellulosome_wastes = list()

#create a list for a gams-readable file for C therm synthesis of the cellulosome
av_conc_strings = list()
cb_conc_strings = list()

for prot_id in df_pro.index:

  if re.search(r'cellulosome', str(df_pro.loc[prot_id,'status'])):

    #build the protein synthesis string
    syn_string = "\'PROSYN-"+prot_id+"\'"
    waste_string = "\'PROWASTE-"+prot_id+"\'"
    av_conc_string = "\'PROSYN-"+prot_id+"\'\t"+str(av_cell_conc.loc[prot_id,'estimated molar abundance (mmol/gDCW)'])
    cb_conc_string = "\'PROSYN-"+prot_id+"\'\t"+str(cb_cell_conc.loc[prot_id,'estimated molar abundance (mmol/gDCW)'])

    #for debugging, comment out if unused
    #print(conc_string)

    cellulosome_membs.append(syn_string)
    cellulosome_wastes.append(waste_string)
    av_conc_strings.append(av_conc_string)
    cb_conc_strings.append(cb_conc_string)

#add forward slashes
cellulosome_membs = ['/'] + cellulosome_membs + ['/']
cellulosome_wastes = ['/'] + cellulosome_wastes + ['/']
av_conc_strings = ['/'] + av_conc_strings + ['/']
cb_conc_strings = ['/'] + cb_conc_strings + ['/']

In [18]:
#write the cellulosome protein file
#with open(curr_dir+cellulosome_memb_file, 'w') as f:
#    f.write('\n'.join(cellulosome_membs))

In [19]:
#write the cellulosome protein file
with open(curr_dir+cellulosome_waste_file, 'w') as f:
    f.write('\n'.join(cellulosome_wastes))

In [20]:
#write the cellulosome protein file
#with open(curr_dir+cell_av_conc_gams_file, 'w') as f:
#    f.write('\n'.join(av_conc_strings))

In [21]:
#write the cellulosome protein file
#with open(curr_dir+cell_cb_conc_gams_file, 'w') as f:
#    f.write('\n'.join(cb_conc_strings))

In [22]:
#get the list of rRNAs from the rna stoichiometry file
rnas = df_rnas.RNAid.to_list()

rnas

['rrna23s', 'rrna5s', 'rrna16s']

#### Assemble reactions

In [23]:
df_enz

Unnamed: 0,id,rxn_src,enz,gpr,protein_stoich,subunit_comments,status,MW (g/mmol)
0,RXN-13PPDH_FWD-BDH,13PPDH,BDH,Clo1313_2130,Clo1313_2130:1,,protStoichAsgnAuto,43.33800
1,RXN-13PPDH_FWD-gbsB_1798,13PPDH,gbsB_1798,Clo1313_1798,Clo1313_1798:1,,protStoichAsgnAuto,96.85200
2,RXN-13PPDH_FWD-gbsB_1827,13PPDH,gbsB_1827,Clo1313_1827,Clo1313_1827:1,,protStoichAsgnAuto,42.48978
3,RXN-2D3DGLNR_FWD-kduD,2D3DGLNR,kduD,Clo1313_0815,Clo1313_0815:1,,protStoichAsgnAuto,27.91363
4,RXN-2D3DGLNR_REV-kduD,2D3DGLNR,kduD,Clo1313_0815,Clo1313_0815:1,,protStoichAsgnAuto,27.91363
...,...,...,...,...,...,...,...,...
1736,RXN-Zn2divalent_FWD-SPONT,Zn2divalent,SPONT,SPONT,zeroCost,,protStoichAsgnManual,89.80625
1737,RXN-Zn2metal_FWD-SPONT,Zn2metal,SPONT,SPONT,zeroCost,,protStoichAsgnAuto,0.00000
1738,RXN-ZNabc_FWD-znuABC,ZNabc,znuABC,Clo1313_1688 and Clo1313_1689 and Clo1313_1690,"Clo1313_1688:1,Clo1313_1689:1,Clo1313_1690:1",,protStoichAsgnAuto,0.00000
1739,RXN-ADCS_FWD-UNKNOWN,ADCS,UNKNOWN,UNKNOWN,zeroCost,,,


In [24]:
df_enz.iloc[591]['id']

'RXN-EXCH_cell5_e_FWD-SPONT'

In [25]:
for i in df_enz.index:
    
    rxn_id = df_enz.id[i]

    _,rxn_base_id,rxn_dir,enz_id = extract_details_from_rxnid(rxn_id)
    
    if rxn_id[:len(exchange_tag)] == exchange_tag:

        print("id: ",rxn_id,"\tbase id: ",rxn_base_id,"\tdir: ",rxn_dir,"\tenz_id: ",enz_id)

    if rxn_id[:len(exchange_tag)] == exchange_tag:

        print("id: ",rxn_id,"\tbase id: ",rxn_base_id,"\tdir: ",rxn_dir,"\tenz_id: ",enz_id)

id:  RXN-EXCH_2mbtoh_e_FWD-SPONT 	base id:  EXCH_2mbtoh_e 	dir:  FWD 	enz_id:  SPONT
id:  RXN-EXCH_2mbtoh_e_FWD-SPONT 	base id:  EXCH_2mbtoh_e 	dir:  FWD 	enz_id:  SPONT
id:  RXN-EXCH_4crsol_e_FWD-SPONT 	base id:  EXCH_4crsol_e 	dir:  FWD 	enz_id:  SPONT
id:  RXN-EXCH_4crsol_e_FWD-SPONT 	base id:  EXCH_4crsol_e 	dir:  FWD 	enz_id:  SPONT
id:  RXN-EXCH_5oxpro_e_FWD-SPONT 	base id:  EXCH_5oxpro_e 	dir:  FWD 	enz_id:  SPONT
id:  RXN-EXCH_5oxpro_e_FWD-SPONT 	base id:  EXCH_5oxpro_e 	dir:  FWD 	enz_id:  SPONT
id:  RXN-EXCH_ac_e_FWD-SPONT 	base id:  EXCH_ac_e 	dir:  FWD 	enz_id:  SPONT
id:  RXN-EXCH_ac_e_FWD-SPONT 	base id:  EXCH_ac_e 	dir:  FWD 	enz_id:  SPONT
id:  RXN-EXCH_ade_e_FWD-SPONT 	base id:  EXCH_ade_e 	dir:  FWD 	enz_id:  SPONT
id:  RXN-EXCH_ade_e_FWD-SPONT 	base id:  EXCH_ade_e 	dir:  FWD 	enz_id:  SPONT
id:  RXN-EXCH_adocbl_e_FWD-SPONT 	base id:  EXCH_adocbl_e 	dir:  FWD 	enz_id:  SPONT
id:  RXN-EXCH_adocbl_e_FWD-SPONT 	base id:  EXCH_adocbl_e 	dir:  FWD 	enz_id:  SPONT
id:  RXN

In [26]:
df_eqn = pd.DataFrame(columns=['id', 'type', 'coupling_type', 'coupling_species', 'reaction'])
#c = df_eqn.shape[0] - 1
c = -1

"""
### Metabolic network reaction
# Exchange reactions
for rxn in model.reactions:
    if rxn.id[:len(exchange_tag)] == exchange_tag:
        
        met = [i for i in rxn.metabolites.keys()][0]
        
        c += 1
        new_id = 'RXN-' + rxn.id + '_FWD-SPONT'
        df_eqn.loc[c, 'id'] = new_id
        df_eqn.loc[c, 'type'] = 'metabolic'
        df_eqn.loc[c, 'reaction'] = 'MET-' + met.id + ' -->'
        
        c += 1
        new_id = 'RXN-' + rxn.id + '_REV-SPONT'
        df_eqn.loc[c, 'id'] = new_id
        df_eqn.loc[c, 'type'] = 'metabolic'
        df_eqn.loc[c, 'reaction'] = '-->' + 'MET-' + met.id
        
"""


# Reactions that are not exchange reactions
for i in df_enz.index:
    rxn_id = df_enz.id[i]
    _,rxn_base_id,rxn_dir,enz_id = extract_details_from_rxnid(rxn_id)
    
    """
    #updated to the exchange reaction tag I use
    if rxn_base_id[:len(exchange_tag)] == exchange_tag:
        continue
    """

    #skip empty items
    if rxn_base_id[:len(exchange_tag)] == exchange_tag:
        continue

    #from wheaton, for debugging, comment out when no bug problems in this section
    print(rxn_base_id,"\n")
    
    c += 1
    rxn_base = model.reactions.get_by_id(rxn_base_id)
    
    met_dict = metabolites_dict_from_reaction_equation_RBA(rxn_base.reaction)
    met_dict = {k:v for k,v in met_dict.items() if k != ''}
    met_dict = {'MET-' + k:v for k,v in met_dict.items()}
    
    if rxn_dir == 'REV':

        #if reverse, flip coefficients, I think
        met_dict = {k:-v for k,v in met_dict.items()}

    elif rxn_dir == 'FWD':

        #if forward, keep copefficients same, I think
        None

    else:
        
        #some error handling seems to exist
        print("Unknown ID that indicate reaction direction, only accepting 'FWD' and 'REV'")
    
    if enz_id not in ['SPONT', 'UNKNOWN']:
        df_eqn.loc[c, 'coupling_type'] = 'rxn_enz'
        df_eqn.loc[c, 'coupling_species'] = enz_id
    
    df_eqn.loc[c, 'id'] = rxn_id
    df_eqn.loc[c, 'type'] = 'metabolic'
    df_eqn.loc[c, 'reaction'] = build_reaction_equation_from_metabolites_dict_RBA(met_dict, arrow='-->')
    
### Enzyme synthesis network reaction
enz_stoich = OrderedDict()
for i in df_enz.index:
    enz_stoich[df_enz.enz[i]] = df_enz.protein_stoich[i]

c = df_eqn.shape[0] - 1

#looks like here, the protein building reactions are starting
for enz_id,prot_str in enz_stoich.items():

    if prot_str == 'zeroCost':
        continue

    #added by wheaton for debugging, comment out when not in use
    if enz_id == "pupG":
        
        print("row #: ",c,"\tenz_id: ",enz_id,"\tprot_str: ",prot_str)
    
    c += 1
    prot_str = prot_str.split(',')
    coeffs = OrderedDict({'PRO-' + i.split(':')[0]:-int(i.split(':')[1]) for i in prot_str})
    coeffs['ENZ-' + enz_id] = 1
    
    df_eqn.loc[c, 'id'] = 'ENZSYN-' + enz_id
    df_eqn.loc[c, 'type'] = 'enzyme'
    df_eqn.loc[c, 'reaction'] = build_reaction_equation_from_metabolites_dict_RBA(coeffs, arrow='-->')

#these ENZLOAD equations also need to have their MW listed in the enz_mw_g_per_mmol file
#initialize an enzload sting
enzload_str = ""

for i in df_enz.index:
    
    if df_enz.protein_stoich[i] == 'zeroCost':
        continue
        
    c += 1
    coeffs = OrderedDict()
    coeffs['ENZ-' + df_enz.enz[i]] = -1

    enzload_id = 'ENZLOAD-' + df_enz.id[i][4:]

    #for debugging, comment out if unused
    print("i: ",i,"\tenzload_id: ", enzload_id)
    
    df_eqn.loc[c, 'id'] = enzload_id
    df_eqn.loc[c, 'type'] = 'enzymeRxnLoad'
    df_eqn.loc[c, 'reaction'] = build_reaction_equation_from_metabolites_dict_RBA(coeffs, arrow='-->')

    #get the associated mw
    enz_mw = getattr(df_enz,"MW (g/mmol)")[i]

    new_line = enzload_id + "\t" + str(enz_mw) + "\n"
    enzload_str = enzload_str + new_line

#write the file resulting from enzload_str
with open('./'+def_out+'/enz_mw_g_per_mmol.txt', 'w') as f:
    f.write(enzload_str)
    
### Ribosome
c = df_eqn.shape[0] - 1

#for debugging, comment out when not using
#print(rnas)

for rna in rnas:
    
    c += 1
    rna_stoich = OrderedDict({i:0 for i in ['MET-'+rna, 'MET-atp_c', 'MET-ctp_c',
                                            'MET-gtp_c', 'MET-utp_c', 'MET-ppi_c']})
    rna_stoich['RIBO-'+rna] = 1
    rna_stoich['MET-atp_c'] = -int(df_rnas.A[rna])
    rna_stoich['MET-ctp_c'] = -int(df_rnas.C[rna])
    rna_stoich['MET-gtp_c'] = -int(df_rnas.G[rna])
    rna_stoich['MET-utp_c'] = -int(df_rnas.U[rna])
    rna_stoich['MET-ppi_c'] = int(df_rnas.loc[rna, ['A','C','G','U']].sum())
    rna_stoich['BIO-rrna'] = df_rnas.loc[rna, 'MW (g/mmol)']

    #for debugging, comment out if not used
    #print(rna_stoich)
    
    df_eqn.loc[c, 'id'] = 'RIBOSYN-' + rna
    df_eqn.loc[c, 'type'] = 'ribosome'
    df_eqn.loc[c, 'reaction'] = build_reaction_equation_from_metabolites_dict_RBA(rna_stoich, arrow='-->')
    
c += 1
ribo_stoich = OrderedDict()

for i in df_ribo.index:
    if df_ribo.id[i] in rnas:
        ribo_stoich['RIBO-' + df_ribo.id[i]] = -1
    else:
        ribo_stoich['PRO-' + df_ribo.id[i]] = -1
df_eqn.loc[c, 'id'] = 'RIBOSYN-ribo'
df_eqn.loc[c, 'type'] = 'ribosome'
df_eqn.loc[c, 'reaction'] = build_reaction_equation_from_metabolites_dict_RBA(ribo_stoich, arrow='-->')

13PPDH 

13PPDH 

13PPDH 

2D3DGLNR 

2D3DGLNR 

2DHPFALDL 

2HBO 

2HBO 

34DHOXPEGOX 

34DHOXPEGOX 

3HAD100 

3HAD100 

3HAD120 

3HAD120 

3HAD140 

3HAD140 

3HAD160 

3HAD160 

3HAD180 

3HAD180 

3HAD40_1 

3HAD40_1 

3HAD60 

3HAD60 

3HAD80 

3HAD80 

3MOPDC 

3OACOAS 

3OAR100 

3OAR100 

3OAR100 

3OAR100 

3OAR120 

3OAR120 

3OAR120 

3OAR120 

3OAR140 

3OAR140 

3OAR140 

3OAR140 

3OAR160 

3OAR160 

3OAR160 

3OAR160 

3OAR18B 

3OAR18B 

3OAR18B 

3OAR18B 

3OAR40 

3OAR40 

3OAR40 

3OAR40 

3OAR60 

3OAR60 

3OAR60 

3OAR60 

3OAR80 

3OAR80 

3OAR80 

3OAR80 

3OAS100 

3OAS100 

3OAS100 

3OAS100 

3OAS100 

3OAS120 

3OAS120 

3OAS120 

3OAS120 

3OAS120 

3OAS140 

3OAS140 

3OAS140 

3OAS140 

3OAS140 

3OAS160 

3OAS160 

3OAS160 

3OAS160 

3OAS160 

3OAS60 

3OAS80 

3OAS80 

3OAS80 

3OAS80 

3OAS80 

3SALATA 

3SALATA 

3SALATA 

3SALATA 

4crsolt 

4crsolt 

4HTHRS 

4MOPDC 

5FLURAPRT2 

6TINS5MPOR 

6TXAN5MPAML 

AABHH 

AACPS3MBUT 

AACPS3MBUT 

AACPS3

#### Build files related to MW

In [27]:
#build the pro_mw_g_per_mmol.txt file

data = getattr(df_pro,'MW (g/mmol)').to_dict()
ids = getattr(df_pro,'id').to_list()

#build the string to write
prot_mw_str = ""

#for each id
for id in ids:
    
    new_line = id + "\t" + str(data[id]) + "\n"

    #for debugging, comment out if unused
    #print(new_line)

    prot_mw_str = prot_mw_str + new_line

#write to output
with open('./'+def_out+'/pro_mw_g_per_mmol.txt', 'w') as f:
    f.write(prot_mw_str)

In [28]:
#get MW list
mw_list = getattr(df_enz,"MW (g/mmol)").to_list()

#get enz list
enz_list = getattr(df_enz,"enz").to_list()

#create string for writing output file
enz_mw_str = "enz_id\tMW (g/mmol)\n"

#add to the string for enzyme entries
#need to initialize a counter
c = 0

#list to keep track of existing entries
#predefined values prevent adding unknown
existing_pro = ['nan','unknown']

for enz in enz_list:

    #only add a new line if unique entry
    if not(str(enz) in existing_pro):

        #for debugging, comment out if unused
        #print("enz: |",enz,"|, type: ",type(enz))
    
        new_line = str(enz) + "\t" + str(mw_list[c]) + "\n"

        #for debugging, comment out if unused
        #print(new_line)

        enz_mw_str = enz_mw_str + new_line

        existing_pro.append(enz)

    c = c + 1

#write to output
with open('./'+def_out+'/enz_mw_g_per_mmol_norxnmapped.txt', 'w') as f:
    f.write(enz_mw_str)

#### Protein file

In [29]:
### Protein

#stores protein lengths
pro_lengths = {}

c = df_eqn.shape[0] - 1

for i in df_pro.index:  

    c += 1

    #for debugging, comment out if not using
    #print(i)

    prot_st = OrderedDict()

    for met in ['MET-atp_c', 'MET-h2o_c',
                'MET-adp_c', 'MET-pi_c', 'MET-h_c', 'MET-gtp_c',
                'MET-gdp_c']:
        prot_st[met] = 0

    seq = df_pro.sequence[i][:-1]

    #added code to get protein lengths
    pro_lengths[i] = len(seq) - 1

    #for debugging, comment out when not using
    #print("i: ",i,"\tlen: ",pro_lengths[i])

    #print("seq: ",seq)

    #build trna stoich for protein synthesis
    for aa in df_aamap.index:

        #for debugging comment out if unused
        #print("aa: ",aa,"\tcount: ",seq.count(aa))

        prot_st[df_aamap.tRNA_in[aa]] = -seq.count(aa)
        prot_st[df_aamap.tRNA_out[aa]] = seq.count(aa)
        
    cofs_str = df_pro.cofactor_stoich[i]
    if pd.isnull(cofs_str) == False:

        #for debugging, comment out when not used
        print("protein: ",i,"\tcofacs: |",cofs_str,"|")

        cofs_st = cofs_str.split(',')
        cofs_st = OrderedDict({c.split(':')[0]:c.split(':')[1] for c in cofs_st})
        for k,v in cofs_st.items():
            prot_st['MET-' + k] = -int(v)
                
    prot_st['PRO-' + df_pro.id[i]] = 1
    
    df_eqn.loc[c, 'coupling_type'] = 'prot_ribo'
    df_eqn.loc[c, 'coupling_species'] = 'ribo'
        
    # Set protein to occupy cellular space in cytosol
    prot_st['BIO-protcyt'] = df_pro.loc[i, 'MW (g/mmol)']
    
    # Cost: Initiation: 1 ATP + 2 GTP (initiate and bind Methionine)
    # Elongation: 2 GTP / cycle
    # (elongation process excludes Methionine since it is already bound in initiation process)
    for met in ['MET-atp_c', 'MET-h2o_c']:
        prot_st[met] -= 1
    for met in ['MET-adp_c', 'MET-pi_c', 'MET-h_c']:
        prot_st[met] += 1
                
    for met in ['MET-gtp_c', 'MET-h2o_c']:
        prot_st[met] -= 2*len(seq)
    for met in ['MET-gdp_c', 'MET-pi_c', 'MET-h_c']:
        prot_st[met] += 2*len(seq)
    
    df_eqn.loc[c, 'id'] = 'PROSYN-' + df_pro.id[i]
    df_eqn.loc[c, 'type'] = 'protein'
    df_eqn.loc[c, 'reaction'] = build_reaction_equation_from_metabolites_dict_RBA(prot_st, arrow='-->')

#write the protein length file
len_str = "/\n"

for prot in pro_lengths:

    new_line = "'PROSYN-" + prot + "' " + str(pro_lengths[prot]) + "\n"

    #for debugging, comment out if unused
    #print(new_line)

    len_str = len_str + new_line

#finally, add in the dummy
len_str = len_str + "'PROSYN-PROTDUMMY' "+str(dummy_length)+"\n"

len_str = len_str + "/"

with open('./'+def_out+'/RBA_proteinLength.txt', 'w') as f:
    f.write(len_str)
    
### Dummy protein
prot_st = OrderedDict()
for met in ['MET-atp_c', 'MET-h2o_c',
            'MET-adp_c', 'MET-pi_c', 'MET-h_c', 'MET-gtp_c',
            'MET-gdp_c']:
    prot_st[met] = 0

seq = df_pro.sequence[i][:-1]
for aa in df_aamap.index:
    prot_st[df_aamap.tRNA_in[aa]] = -round(df_aa_dummy.N_AA[aa], 4)
    prot_st[df_aamap.tRNA_out[aa]] = round(df_aa_dummy.N_AA[aa], 4)

for met in ['MET-atp_c', 'MET-h2o_c']:
    prot_st[met] -= 1
for met in ['MET-adp_c', 'MET-pi_c', 'MET-h_c']:
    prot_st[met] += 1

for met in ['MET-gtp_c', 'MET-h2o_c']:
    prot_st[met] -= 2*dummy_medianL
for met in ['MET-gdp_c', 'MET-pi_c', 'MET-h_c']:
    prot_st[met] += 2*dummy_medianL

c += 1
prot_st['BIO-protdummy'] = dummy_MW
df_eqn.loc[c, 'id'] = 'PROSYN-PROTDUMMY'
df_eqn.loc[c, 'coupling_type'] = 'prot_ribo'
df_eqn.loc[c, 'coupling_species'] = 'ribo'
df_eqn.loc[c, 'type'] = 'protein'
df_eqn.loc[c, 'reaction'] = build_reaction_equation_from_metabolites_dict_RBA(prot_st,
                                                        arrow='-->', floatdecimal=6)
# Protein waste reactions
c = df_eqn.shape[0] - 1
for i in df_pro.index:     
    c += 1
    prot_st = OrderedDict()
    prot_st['PRO-' + df_pro.id[i]] = -1
    
    df_eqn.loc[c, 'id'] = 'PROWASTE-' + df_pro.id[i]
    df_eqn.loc[c, 'type'] = 'proteinWaste'
    df_eqn.loc[c, 'reaction'] = build_reaction_equation_from_metabolites_dict_RBA(prot_st, arrow='-->')
    
### Biomass
for i in df_biom.index:
    c += 1
    df_eqn.loc[c, 'id'] = df_biom.rxn_id[i]
    df_eqn.loc[c, 'type'] = 'biomass'
    df_eqn.loc[c, 'reaction'] = df_biom.rxn_equation[i]

protein:  Clo1313_0021 	cofacs: | 4fe4s_c:1 |
protein:  Clo1313_0061 	cofacs: | fmn_c:1,4fe4s_c:2 |
protein:  Clo1313_0062 	cofacs: | fmn_c:1 |
protein:  Clo1313_0063 	cofacs: | fmn_c:1 |
protein:  Clo1313_0066 	cofacs: | 4fe4s_c:3 |
protein:  Clo1313_0068 	cofacs: | mg2_c:1 |
protein:  Clo1313_0076 	cofacs: | zn2_c:1 |
protein:  Clo1313_0098 	cofacs: | zn2_c:1 |
protein:  Clo1313_0099 	cofacs: | mg2_c:1 |
protein:  Clo1313_0101 	cofacs: | mg2_c:2 |
protein:  Clo1313_0113 	cofacs: | pydx5p_c:1 |
protein:  Clo1313_0118 	cofacs: | 4fe4s_c:1 |
protein:  Clo1313_0124 	cofacs: | 4fe4s_c:1 |
protein:  Clo1313_0137 	cofacs: | 4fe4s_c:1 |
protein:  Clo1313_0138 	cofacs: | 4fe4s_c:1 |
protein:  Clo1313_0139 	cofacs: | 4fe4s_c:1 |
protein:  Clo1313_0142 	cofacs: | 4fe4s_c:1 |
protein:  Clo1313_0152 	cofacs: | 4fe4s_c:1 |
protein:  Clo1313_0175 	cofacs: | nh4K_c:1 |
protein:  Clo1313_0185 	cofacs: | mg2_c:1 |
protein:  Clo1313_0186 	cofacs: | zn2_c:1 |
protein:  Clo1313_0217 	cofacs: | mg2_c:1 |


#### Save excel files

In [30]:
df_eqn.to_excel('./'+def_out+'/RBA_stoichiometry.xlsx', index=None)