In [1]:
import cobra
import os
from cobra import Model, Reaction, Metabolite
import pandas as pd

path = "/Users/maureencarey/local_documents/work/comparative_parasite_models/paradigm"
data_path = "/Users/maureencarey/local_documents/work/comparative_parasite_models/paradigm/data"
os.chdir(data_path)

In [2]:
pf_model = cobra.io.read_sbml_model("./other_models/iPfal17.xml")

# update gene identifiers
aliases = pd.read_csv('Pfalciparum3D7_GeneAliases.csv')
rename_dictionary1 = dict()
for index, row in aliases.iterrows():
    for x in ['name2','name3','name4','Unnamed: 4','Unnamed: 5','Unnamed: 6','Unnamed: 7']:
        if row[x] != 'nan':
            rename_dictionary1[row[x]] = row['name1']
    
rename_dictionary = dict()
for key, value in rename_dictionary1.items():
    if isinstance(key, str):
        if key in [x.id for x in pf_model.genes]:
            rename_dictionary[key] = value
        elif key.startswith('MAL'):
            s = key.replace('.','_')
            if s in [x.id for x in pf_model.genes]:
                rename_dictionary[s] = value
                
for key, value in rename_dictionary.items():
    if '.' in value:
        rename_dictionary[key] = value.split('.')[0]

def rename_genes_updated(cobra_model, rename_dict):

    from six import iteritems
    from ast import NodeTransformer
    from cobra.core import Gene, Metabolite, Reaction
    from cobra.core.gene import ast2str
    from cobra.manipulation.delete import get_compiled_gene_reaction_rules

    recompute_reactions = set()  # need to recomptue related genes
    remove_genes = []
    for old_name, new_name in iteritems(rename_dict):
        # undefined if there a value matches a different key
        # because dict is unordered
        try:
            gene_index = cobra_model.genes.index(old_name)
        except ValueError:
            gene_index = None
        old_gene_present = gene_index is not None
        new_gene_present = new_name in cobra_model.genes
        if old_gene_present and new_gene_present:
            old_gene = cobra_model.genes.get_by_id(old_name)
            if old_gene != cobra_model.genes.get_by_id(new_name): # Added in case not renaming some
                remove_genes.append(old_gene)
                recompute_reactions.update(old_gene._reaction)
        elif old_gene_present and not new_gene_present:
            # rename old gene to new gene
    #         old_gene = cobra_model.genes.get_by_id(old_name) #added
            gene = cobra_model.genes[gene_index]
            # trick DictList into updating index
            cobra_model.genes._dict.pop(gene.id)  # ugh
            gene.id = new_name
            cobra_model.genes[gene_index] = gene
    #         recompute_reactions.update(old_gene._reaction) # ADDED
        elif not old_gene_present and new_gene_present:
            pass # already fixed
        else:  # not old gene_present and not new_gene_present
            # the new gene's _model will be set by repair
            # cobra_model.genes.append(Gene(new_name)) # Removed, otherwise, adds genes that are unassigned to reactions
            pass

    cobra_model.repair()

    class Renamer(NodeTransformer):
        def visit_Name(self, node):
            node.id = rename_dict.get(node.id, node.id)
            return node

    gene_renamer = Renamer()
    for rxn, rule in iteritems(get_compiled_gene_reaction_rules(cobra_model)):
        if rule is not None:
            rxn._gene_reaction_rule = ast2str(gene_renamer.visit(rule))

    for rxn in recompute_reactions:
        rxn.gene_reaction_rule = rxn._gene_reaction_rule
    for i in remove_genes:
        cobra_model.genes.remove(i)
    
rename_genes_updated(pf_model,rename_dictionary)

In [3]:
# ana's curation for glutathione
pf_model.reactions.get_by_id('HMGLB').add_metabolites({pf_model.metabolites.get_by_id('h2o2_c'):1.,
                         pf_model.metabolites.get_by_id('h_c'):-2.})
pf_model.reactions.get_by_id('HMGLB').notes['References'] = 'doi: 10.1073/pnas.0601876103; DOI: 10.1111/j.1365-2141.1975.tb00540.x'

pheme_c = pf_model.metabolites.get_by_id('pheme_c')
gthrd_c = pf_model.metabolites.get_by_id('gthrd_c')
gthox_c = pf_model.metabolites.get_by_id('gthox_c')
pheme_fv = pf_model.metabolites.get_by_id('pheme_fv')
h2o2_c = pf_model.metabolites.get_by_id('h2o2_c')
heme_degraded_c = Metabolite('heme_degraded_c', formula='',
    name='degraded heme', compartment='c')
heme_degraded_fv = Metabolite('heme_degraded_fv',formula='',
    name='degraded heme',compartment='fv')

new_rxn = Reaction()
new_rxn.name = 'gthrd_heme'
new_rxn.id = 'gthrd_heme'
new_rxn.add_metabolites({pheme_c : -1,gthrd_c : -1,
    gthox_c : +1,    heme_degraded_c : +1 })
new_rxn.lower_bound = 0.
new_rxn.upper_bound = 1000.
new_rxn.notes['References'] = 'doi: 10.1074/jbc.270.42.24876'
pf_model.add_reactions([new_rxn])

new_rxn = Reaction()
new_rxn.name = 'perox_heme'
new_rxn.id = 'perox_heme'
new_rxn.add_metabolites({pheme_fv : -1, h2o2_c : -1, heme_degraded_fv : +1 })
new_rxn.lower_bound = 0.
new_rxn.upper_bound = 1000.
new_rxn.notes['References'] = 'doi: 10.1042/bj1740893'
pf_model.add_reactions([new_rxn])

pf_model.add_boundary(heme_degraded_c, type="sink", reaction_id="SK_heme_degraded_c",lb=0, ub=1000.0)
pf_model.add_boundary(heme_degraded_fv, type="sink", reaction_id="SK_heme_degraded_fv",
                     lb=0, ub=1000.0)


0,1
Reaction identifier,SK_heme_degraded_fv
Name,degraded heme sink
Memory address,0x011242e160
Stoichiometry,heme_degraded_fv <=> degraded heme <=>
GPR,
Lower bound,-1000.0
Upper bound,1000.0


In [4]:
# Make dictionary to make all metabolite IDs compatible with bigg
# IN FUTURE, EXPAND TO ALL MODELS # for model in [pf_curated, chominis, leish]:
universal_model = cobra.io.load_json_model('universal_model_may2018.json')

d_list = list()
for met in pf_model.metabolites:
    if met.id.endswith('_ap'):
        num = -3
    else:
        num = -2
    if '_D_' in met.id:
        x = met.id[:num].replace('_D_','__D_')
        d_list.append(met.id)
    else:
        x = met.id[:num]
    if x not in [y.id[:-2] for y in universal_model.metabolites]:
        yasdfas = True # filler
        # # print("'"+met.id+"'"+":"+"'',") ## COPY AND PASTE THIS TO NEXT SECTION

# switch _D_ to __D_ to be BiGG compatible
for met in [x for x in d_list if '__' not in x]:
    pf_model.metabolites.get_by_id(met).name = pf_model.metabolites.get_by_id(met).name.replace('_D','__D')
    pf_model.metabolites.get_by_id(met).id = pf_model.metabolites.get_by_id(met).id.replace('_D_','__D_')
       

In [5]:
met_dict = {'3oodcoa_c':'3ohodcoa_c', 
#'5mti_c':'',
'5mtr1p_c':'5mtr1p_c', # new met
'Asn_X_Ser_FSLASH_Thr_c':'Asn_X_Ser_Thr_c',
'acgpail_c':'acgpail_c', # consistent, i.e. acgpail_hs
'cdpdag_c':'cdpdag_c', # consistent, species specific IDs
'cdpdag_e':'cdpdag_e', # consistent, species specific IDs
'crm_c':'crm_c', # consistent i.e. crm_hs
'dag_c':'dag_c', # consistent i.e. dag_hs
'dag_e':'dag_e', # consistent i.e. dag_hs
'dhcrm_c':'dhcrm_c', # consistent i.e. dhcrm_hs
'doldp_L_c':'doldp_c',
'dolmanp_L_c':'dolmanp_c',
'g3m8mpdol_L_c':'g3m8mpdol_c',
'gacpail_c':'gacpail_c', # consistent i.e. gacpail_cho
'glc__D_e_c':'glc__D_c', # CHECK THAT ITS C
'gluside_c':'gluside_c', # consistent i.e. gluside_hs
'gpail_c':'gpail_c', # consistent i.e. gpail_hs
'hb_c':'hb_c', # hemoglobin, new metabolite
'hb_e':'hb_e', # hemoglobin, new metabolite
'hcys_L_c':'hcys__L_c',
'lgt_S_c':'lgt__S_c',
'lpchol_c':'lpchol_c', # consistent i.e. lpchol_hs
'm5mpdol_L_c':'m5mpdol_c', 
'm6mpdol_L_c':'m6mpdol_c',
'm7mpdol_L_c':'m7mpdol_c',
'pail_c':'pail_c', # consistent i.e. pail_hs
'pc_c':'pc_c', # 'host' pc (aggregate)
'pc_e':'pc_e', # 'host' pc (aggregate)
'pe_c':'pe_c', # 'host' pe (aggregate)
'pe_e':'pe_e', # 'host' pe (aggregate)
'ptd145bp_c':'ptd145bp_c', # consistent, species specific IDs
'ptd1ino_c':'ptd1ino_c', # consistent, species specific IDs
'ptd1ino_e':'ptd1ino_e', # consistent, species specific IDs
'ptd3ino_c':'ptd3ino_c', # consistent, species specific IDs
'ptd4ino_c':'ptd4ino_c', # consistent, species specific IDs
'saccrp_L_c':'saccrp__L_c',
'sertrna_sec__c':'sertrna_sec_c',
'sphmyln_c':'sphmyln_c', # consistent i.e. sphmyln_hs
'tag_c':'tag_c', # AGGREGATE
'up4u_c':'up4u_c', #new metabolite
'xolest2_c':'xolest2_c', # consistent, i.e. xolest2_hs
'xolest2_e':'xolest2_e', # consistent, i.e. xolest2_hs
'pail_e':'pail_e', # consistent i.e. pail_hs
'Asn_X_Ser_FSLASH_Thr_e':'Asn_X_Ser_Thr_e',
'naglc2p_L_c':'naglc2p__L_c',
'm4mpdol_L_c':'m4mpdol__L_c',
'm8mpdol_L_c':'m8mpdol__L_c',
'hemeA_fv':'hemeA_fv', # new compartment
#'psertrna_sec_c':'',
#'pnte_c':'',
#'2aeth_c':'',
'hemozoin_fv':'hemozoin_fv', # new met and compartment
'hemozoin_e':'hemozoin_e', # new met 
# 'pyrdat_c':'',
'gluside_e':'gluside_e', # consistent i.e. gluside_hs
'mgacpail_c':'mgacpail_c', # consistent i.e. mgacpail_hs
'sphmyln_e':'sphmyln_e', # consistent i.e.sphmln_hs
'ROOH_ap':'ROOH_ap', # aggregate
'ROH_ap':'ROH_ap', # aggregate
'protein_t_c':'protein_t_c', # host specific
'gthox_protein_c':'gthox_protein_c', # host specific
'gthox_protein_e':'gthox_protein_e', # host specific
'protein_t_e':'protein_t_e', # host specific
'ROOH_c':'ROOH_c', # aggregate
'ROH_c':'ROH_c', # aggregate
'ROOH_m':'ROOH_m', # aggregate
'ROH_m':'ROH_m', # aggregate
'proteinSS_c':'protdt_c',
'proteinSHSH_c':'protds_c',
'proteinSS_ap':'protdt_ap',
'proteinSHSH_ap':'protds_ap',
'proteinSS_m':'protdt_m',
'proteinSHSH_m':'protds_m',
'all_pc_c':'all_pc_c', # AGGREGATE
'all_pe_c':'all_pe_c', # AGGREGATE
'all_ps_c':'all_ps_c', # AGGREGATE
'all_pi_c':'all_pi_c', # AGGREGATE
'all_pg_c':'all_pg_c', # AGGREGATE
'all_apg_c':'all_apg_c', # AGGREGATE
'all_dgl_c':'all_dgl_c', # AGGREGATE
'lipid_c':'lipid_c'} # AGGREGATE

for x in pf_model.metabolites:
    if x.id in met_dict.keys():
        if x.id != met_dict[x.id]:
            x.id = met_dict[x.id]

In [6]:
# THINGS TO CURATE
print(len(pf_model.reactions))
pf_model.reactions.get_by_id('hcys_ex').remove_from_model() # duplicate with EX_hcys___L_e
print(len(pf_model.reactions))
pf_model.add_boundary(pf_model.metabolites.get_by_id('protein_t_e'), type="exchange")
print(len(pf_model.reactions))

# [x.reaction for x in pf_curated.metabolites.get_by_id('protein_t_e').reactions]
# # pf_curated.reactions.PUNP8 # CURATED SOMETHING WRONG
# pf_curated.reactions.UP4UH1
# # pf_curated.reactions.get_by_id('PYRDAT') # SOMETHING WRONG, genes don't make sense
# # not bigg canocnical rxns PNTK2, PNTH

1196
1195
1196


In [7]:
# temp_dict = dict() # get products and reactants for every reaction
# for rxn in universal_model.reactions:
#     rxn_dict = dict()
#     check_rxn_products = rxn.products
#     check_rxn_reactants = rxn.reactants
#     rxn_dict['reactants'] = [x.id for x in check_rxn_reactants]
#     rxn_dict['products'] = [x.id for x in check_rxn_products]
#     temp_dict[rxn.id] = rxn_dict

# # list duplicate reactions
# l = list() # list of duplicate reactions, bounds might be different
# for key, value in temp_dict.items():
#     temp_dict2 = temp_dict.copy()
#     del temp_dict2[key]
#     if value in [value2 for value2 in temp_dict2.values()]:
#         l.append(key)

# # map duplicate reactions to each other
# for x in l: 
#     if x in skip_these:
#         continue
#     else:
#         reactants_products = temp_dict[x] 
#         usable_l = list(set(l) - set(skip_these) - set([x]))
#         all_other_rxns_reactants_products = [temp_dict[y] for y in usable_l]
#         if reactants_products in all_other_rxns_reactants_products:
#             del temp_dict[x]
#             keys_to_delete = list()
#             for key in temp_dict.keys():
#                 if temp_dict[key]['products'] == reactants_products['products'] and \
#                 temp_dict[key]['reactants'] == reactants_products['reactants']:
#                     keys_to_delete.append(key)
#             for keys_delete in keys_to_delete:
#                 del temp_dict[keys_delete]
#                 skip_these.append(keys_delete)
#             duplicates2[x] = keys_to_delete
#     skip_these.append(x)
    
# # print
# for key, value in duplicates2.items():
#     print(key)
#     print(universal_model.reactions.get_by_id(key).reaction)
#     print(universal_model.reactions.get_by_id(key).bounds)
    
#     print(value)
#     for x in value:
#         print(universal_model.reactions.get_by_id(x).reaction)
#         print(universal_model.reactions.get_by_id(x).bounds)
#     print('')

In [8]:
# similarity_exceptions = duplicates2
import json
# class JSONEncoder(json.JSONEncoder):
#     def default(self, obj):
#         if hasattr(obj, 'to_json'):
#             return obj.to_json(orient='records')
#         return json.JSONEncoder.default(self, obj)
# with open('similarity_exceptions_july23.json', 'w') as fp:
#     json.dump(similarity_exceptions, fp, cls=JSONEncoder)
#     # THESE ARE REACTIONS WITH DIFFERENT IDS BUT HAVE SIMILAR/SAME FUNCTION

similarity_exceptions = json.load(open('/Users/maureencarey/local_documents/work/comparative_parasite_models/paradigm/similarity_exceptions_july23.json'))
# 

In [9]:
# FIND/ REMOVE DUPLICATE REACTIONS

duplicates = dict()
temp_dict = dict() # get products and reactants for every reaction
for rxn in pf_model.reactions:
    rxn_dict = dict()
    check_rxn_products = rxn.products
    check_rxn_reactants = rxn.reactants
    rxn_dict['reactants'] = [x.id for x in check_rxn_reactants]
    rxn_dict['products'] = [x.id for x in check_rxn_products]
    temp_dict[rxn.id] = rxn_dict

print('temp dict made')
print([key for key in temp_dict.keys() if key not in [x.id for x in pf_model.reactions]])

for rxn in universal_model.reactions:
    for key in temp_dict.keys():
        if key != rxn.id:
            if [x.id for x in rxn.reactants] == temp_dict[key]['reactants'] and \
            [x.id for x in rxn.products] == temp_dict[key]['products']:
                if rxn.id not in duplicates.keys():
                    duplicates[rxn.id] = key
                elif duplicates[rxn.id] == key or key in duplicates[rxn.id]:
                    continue
                else:
                     duplicates[rxn.id] = duplicates[rxn.id]+', '+key
                        
print('duplicates made')
print([value for value in duplicates.values() if value not in [x.id for x in pf_model.reactions]])

temp dict made
[]
duplicates made
[]


In [10]:
# #  replace pf reaction ids with BiGG id for same reaction string
# # NOTE: some have different reaction bounds. see:
# for key, value in duplicates.items():
#     print(value)
#     print(pf_curated.reactions.get_by_id(value).reaction)
#     print(pf_curated.reactions.get_by_id(value).bounds)
#     print(key)
#     print(universal_model.reactions.get_by_id(key).reaction)
#     print(universal_model.reactions.get_by_id(key).bounds)
#     print(' ')

for key, value in duplicates.items():
    # key is universal id
    # value is pf id
    if key in ['SUCCt2r','MALt2r','FUMt2r','ASPt2r'] or value in ['SUCCt2r','MALt2r','FUMt2r','ASPt2r']:
        continue
    elif (key in similarity_exceptions.keys() or key in similarity_exceptions.values()) and \
    (value in similarity_exceptions.keys() or key in similarity_exceptions.values()):
        continue # continue to next key value pair
    elif key in [x.id for x in pf_model.reactions]:
#         print('replacement string')
        print(key)
        print('conflicts with reaction in model:')
        print(pf_model.reactions.get_by_id(key).reaction)
        print('but we wanted to use that id for:')
        print(value)
        print(pf_model.reactions.get_by_id(value).reaction)
    else:
        if value in [x.id for x in pf_model.reactions]:
            pf_model.reactions.get_by_id(value).id = key # now BiGG compatible
        
pf_model.reactions.get_by_id('ATPtm').id = 'ATPPHm'
# pf_model.reactions.get_by_id('ATPADPexm').id = 'ATPtm'

ACCOAL
conflicts with reaction in model:
ac_c + atp_c + coa_c --> accoa_c + adp_c + pi_c
but we wanted to use that id for:
ACCOAL2
atp_c + coa_c + ppa_c --> adp_c + pi_c + ppcoa_c


In [11]:
os.chdir(data_path+'/other_models')
cobra.io.write_sbml_model(pf_model, "iPfal18_pregapfilling.xml")

In [12]:
pf_model.slim_optimize()

29.866835591947282