In [1]:
import os.path as op
import json
import requests
import pandas as pd

In [1]:
def request_json(link, outfile, outdir=None):
    """Download a file in JSON format from a web request

    Args:
        link: Link to web request
        outfile: Name of output file
        outdir: Directory of output file
        force_rerun_flag: If true, redownload the file

    Returns:
        dict: contents of the JSON request
[
]
    """
    if not outdir:
        outdir = ''
    outfile = op.join(outdir, outfile)

    text_raw = requests.get(link)
    my_dict = text_raw.json()
    with open(outfile, 'w') as f:
        json.dump(my_dict, f)
        
    return my_dict

In [59]:
bigg_model='iJO1366'
home_dir='/Applications/labfile/senior_design/data/database'

In [64]:
rxn_name_list=['DURADx','HXAND','PYROX','RNDR1','RNDR1b','RNDR2','RNDR2b','RNDR3','RNDR3b','RNDR4','RNDR4b','RNTR1c2','RNTR2c2','RNTR3c2','RNTR4c2','XAND']
rxn_list=[]
for rxn_name in rxn_name_list:
    bigg_reaction = rxn_name
    reaction = request_json(link='http://bigg.ucsd.edu/api/v2/models/{}/reactions/{}'.format(bigg_model,bigg_reaction),
                   outfile='{}_{}.json'.format(bigg_model,bigg_reaction),
                   outdir=home_dir+'/reactions')
    outfile = home_dir+'/reactions/{}_{}.json'.format(bigg_model,bigg_reaction)
    with open(outfile, 'r') as f:
        rxn_dict = json.load(f)
    rxn_dict_new={}
    rxn_dict_new[':ID']=rxn_dict['bigg_id']
    rxn_dict_new[':LABEL']='Reactions'
    rxn_dict_new['Subsystem']=rxn_dict['results'][0]['subsystem']
    rxn_dict_new['Reaction_carrier']=rxn_dict['name']
    rxn_dict_new['Reaction_equation']=rxn_dict['results'][0]['reaction_string']
    rxn_dict_new['Reaction_carrier']=rxn_dict['name']
    rxn_dict_new.update(rxn_dict)
    rxn_dict_new['name']=rxn_dict['bigg_id']
    rxn_list.append(rxn_dict_new)
rxn_df=pd.DataFrame.from_records(rxn_list)
rxn_df.to_csv(home_dir+'/reactions/PPP_rxn.csv')

In [6]:
met_name_list=[]
met_com=rxn_df['metabolites']
for i in range(len(met_com)):
    for j in range(len(met_com[i])):
                   met_id=met_com[i][j]['bigg_id']+'_'+met_com[i][j]['compartment_bigg_id']
                   if met_id not in met_name_list:
                    met_name_list.append(met_id)

In [71]:
met_list=[]
for met_name in met_name_list:
    bigg_met = met_name
    met = request_json(link='http://bigg.ucsd.edu/api/v2/models/{}/metabolites/{}'.format(bigg_model,bigg_met),
                   outfile='{}_{}.json'.format(bigg_model,bigg_met),
                   outdir=home_dir+'/metabolites')
    outfile = home_dir+'/metabolites/{}_{}.json'.format(bigg_model,bigg_met)
    with open(outfile, 'r') as f:
        met_dict = json.load(f)
    met_dict_new={}
    met_dict_new[':ID']=met_name
    met_dict_new[':LABEL']='Metabolites'
    met_dict_new['Metabolite_name']=met_dict['name']
    met_dict_new.update(met_dict)
    met_dict_new['bigg_id']=met_name
    met_list.append(met_dict_new)
met_df=pd.DataFrame.from_records(met_list)
met_df.to_csv(home_dir+'/metabolites/PPP_met.csv')

In [42]:
gene_name_list=[]
gene_com=rxn_df['results']
for i in range(len(gene_com)):
    for j in range(len(gene_com[i][0]['genes'])):
        gene_id=gene_com[i][0]['genes'][j]['bigg_id']
        if gene_id not in gene_name_list:
            gene_name_list.append(gene_id)
                   

In [137]:
gene_list=[]
for gene_name in gene_name_list:
    bigg_gene = gene_name
    met = request_json(link='http://bigg.ucsd.edu/api/v2/models/{}/genes/{}'.format(bigg_model,bigg_gene),
                   outfile='{}_{}.json'.format(bigg_model,bigg_gene),
                   outdir=home_dir+'/genes')
    outfile = home_dir+'/genes/{}_{}.json'.format(bigg_model,bigg_gene)
    with open(outfile, 'r') as f:
        gene_dict = json.load(f)
    gene_dict_new={}
    gene_dict_new[':ID']=gene_name
    gene_dict_new[':LABEL']='Genes'
    gene_dict_new.update(gene_dict)
    gene_dict_new['translated_protein']=gene_dict_new['name']
    del gene_dict_new['name']
    gene_dict_new['name']=gene_dict['bigg_id']
    gene_list.append(gene_dict_new)
met_df=pd.DataFrame.from_records(gene_list)
met_df.to_csv(home_dir+'/genes/PPP_gene.csv')

In [53]:
gene_dict

{'bigg_id': 'b4238',
 'chromosome_ncbi_accession': 'NC_000913.3',
 'database_links': {'ASAP': [{'id': 'ABE-0013865',
    'link': 'http://identifiers.org/asap/ABE-0013865'}],
  'EcoGene': [{'id': 'EG11417',
    'link': 'http://identifiers.org/ecogene/EG11417'}],
  'NCBI Entrez Gene': [{'id': '948755',
    'link': 'http://identifiers.org/ncbigene/948755'}],
  'NCBI GI': [{'id': '16132060',
    'link': 'http://identifiers.org/ncbigi/16132060'}],
  'UniProt': [{'id': 'P28903',
    'link': 'http://identifiers.org/uniprot/P28903'}]},
 'genome_name': 'NC_000913.3',
 'genome_ref_string': 'ncbi_accession:NC_000913.3',
 'leftpos': 4460521,
 'mapped_to_genbank': True,
 'model_bigg_id': 'iJO1366',
 'name': 'nrdD',
 'old_identifiers': ['b4238'],
 'reactions': [{'bigg_id': 'RNTR1c2',
   'gene_reaction_rule': '(b0684 and b3924 and b4238 and b4237) or (b2895 and b3924 and b4238 and b4237) or (b0684 and b4238) or (b2895 and b4238)',
   'name': 'Ribonucleoside-triphosphate reductase (ATP) (flavodoxin)'}

In [168]:
rxn_crr_list=[]
for i in range(len(rxn_list)):
    rxn_crr_dict={}
    rxn_crr_dict[':ID']=rxn_list[i]['bigg_id']+'_crr'
    rxn_crr_dict[':LABEL']='Reaction_Carrier'
    rxn_crr_dict['name']=rxn_list[i]['Reaction_carrier']
    rxn_crr_dict['reaction_rule']=rxn_list[i]['results'][0]['gene_reaction_rule']
    rxn_crr_dict['concentration']='N/A'
    rxn_crr_dict['other_info']='N/A'
    rxn_crr_list.append(rxn_crr_dict)

crr_df=pd.DataFrame.from_records(rxn_crr_list)
crr_df.to_csv(home_dir+'/complexes/PPP_crr.csv')
    

In [100]:
rxn_rls_cons=[]
rxn_rls_prod=[]
for i in range(len(rxn_list)):
    for j in range(len(rxn_list[i]['metabolites'])):
        
        if rxn_list[i]['metabolites'][j]['stoichiometry']<0:
            rls_dict_new={}
            rls_dict_new[':START_ID']=rxn_list[i]['metabolites'][j]['bigg_id']+'_'+rxn_list[i]['metabolites'][j]['compartment_bigg_id']
            rls_dict_new[':END_ID']=rxn_list[i]['bigg_id']
            rls_dict_new[':TYPE']='COMSUMPTION'
            rls_dict_new['stoichiometry']=rxn_list[i]['metabolites'][j]['stoichiometry']
            rls_dict_new['other_info']='N/A'        
            rxn_rls_cons.append(rls_dict_new)

cons_df=pd.DataFrame.from_records(rxn_rls_cons)
cons_df.to_csv(home_dir+'/relationships/PPP_met_cons.csv')


In [101]:
for i in range(len(rxn_list)):
    for j in range(len(rxn_list[i]['metabolites'])):
        
        if rxn_list[i]['metabolites'][j]['stoichiometry']>0:
            rls_dict_new={}
            rls_dict_new[':START_ID']=rxn_list[i]['bigg_id']
            rls_dict_new[':END_ID']=rxn_list[i]['metabolites'][j]['bigg_id']+'_'+rxn_list[i]['metabolites'][j]['compartment_bigg_id']
            rls_dict_new[':TYPE']='PRODUCTION'
            rls_dict_new['stoichiometry']=rxn_list[i]['metabolites'][j]['stoichiometry']
            rls_dict_new['other_info']='N/A'
            rxn_rls_prod.append(rls_dict_new)  
            
prod_df=pd.DataFrame.from_records(rxn_rls_prod)
prod_df.to_csv(home_dir+'/relationships/PPP_met_prod.csv')

In [169]:
rxn_rls_crr=[]
for i in range(len(rxn_list)):
    rls_dict_new={}
    rls_dict_new[':END_ID']=rxn_list[i]['bigg_id']
    rls_dict_new[':START_ID']=rxn_list[i]['bigg_id']+'_crr'
    rls_dict_new[':TYPE']='CARRY_OUT_RXN'
    rls_dict_new['kinetic_info']='N/A'
    rls_dict_new['other_info']='N/A'
    rxn_rls_crr.append(rls_dict_new)
    
rxn_crr_df=pd.DataFrame.from_records(rxn_rls_crr)
rxn_crr_df.to_csv(home_dir+'/relationships/PPP_rxn_crr.csv')

In [130]:
crr_rls_gene=[]
for i in range(len(rxn_list)):
    for j in range(len(rxn_list[i]['results'][0]['genes'])):
        rls_dict_new={}
        rls_dict_new[':START_ID']=rxn_list[i]['results'][0]['genes'][j]['bigg_id']
        rls_dict_new[':END_ID']=rxn_list[i]['bigg_id']+'_crr'
        rls_dict_new[':TYPE']='TRANSLATION'
        rls_dict_new['Thermodynamics']='N/A'
        rls_dict_new['other_info']='N/A'
        crr_rls_gene.append(rls_dict_new)

crr_gene_df=pd.DataFrame.from_records(crr_rls_gene)
crr_gene_df.to_csv(home_dir+'/relationships/PPP_crr_gene.csv')

In [164]:
ppi=pd.read_csv(home_dir+'/relationships/ppi.csv')
ppi

Unnamed: 0,node1_external_id,node2_external_id,neighborhood_on_chromosome,gene_fusion,phylogenetic_cooccurrence,homology,coexpression,experimentally_determined_interaction,database_annotated,automated_textmining,combined_score
0,511145.b2868,511145.b2866,0.89,0.892,0.778,0.0,0.297,0.709,0.9,0.608,0.999
1,511145.b2867,511145.b2866,0.89,0.788,0.778,0.0,0.135,0.856,0.9,0.609,0.999
2,511145.b2147,511145.b2146,0.915,0.567,0.733,0.0,0.772,0.761,0.9,0.763,0.999
3,511145.b2675,511145.b2235,0.347,0.0,0.151,0.0,0.969,0.395,0.952,0.752,0.999
4,511145.b2235,511145.b2234,0.643,0.0,0.778,0.0,0.986,0.986,0.976,0.931,0.999
5,511145.b2676,511145.b2234,0.345,0.0,0.0,0.0,0.969,0.4,0.952,0.682,0.999
6,511145.b2676,511145.b2675,0.894,0.456,0.778,0.0,0.992,0.548,0.952,0.849,0.999
7,511145.b2868,511145.b2867,0.88,0.899,0.778,0.0,0.149,0.861,0.9,0.735,0.999
8,511145.b4238,511145.b4237,0.676,0.0,0.778,0.0,0.613,0.272,0.54,0.906,0.998
9,511145.b1012,511145.b1007,0.811,0.0,0.718,0.0,0.15,0.0,0.8,0.343,0.993


In [167]:
ppi_list1=[]
for i in range(len(ppi.node1_external_id)):
    rls_new_dict={}
    rls_new_dict[':START_ID']=ppi.node1_external_id[i].split('.')[1]
    rls_new_dict[':END_ID']=ppi.node2_external_id[i].split('.')[1]
    rls_new_dict[':TYPE']='PROTEIN-PROTEIN INTERACTION'
    rls_new_dict['coexpression']=ppi.coexpression[i]
    rls_new_dict['experimentally_determined_interaction']=ppi.experimentally_determined_interaction[i]
    rls_new_dict['automated_textmining']=ppi.automated_textmining[i]
    rls_new_dict['combined_score']=ppi.combined_score[i]
    ppi_list.append(rls_new_dict)

ppi_df1=pd.DataFrame.from_records(ppi_list)
ppi_df1.to_csv(home_dir+'/relationships/PPP_ppi1.csv')

In [166]:
ppi_list2=[]
for i in range(len(ppi.node1_external_id)):
    rls_new_dict={}
    rls_new_dict[':END_ID']=ppi.node1_external_id[i].split('.')[1]
    rls_new_dict[':START_ID']=ppi.node2_external_id[i].split('.')[1]
    rls_new_dict[':TYPE']='PROTEIN-PROTEIN INTERACTION'
    rls_new_dict['coexpression']=ppi.coexpression[i]
    rls_new_dict['experimentally_determined_interaction']=ppi.experimentally_determined_interaction[i]
    rls_new_dict['automated_textmining']=ppi.automated_textmining[i]
    rls_new_dict['combined_score']=ppi.combined_score[i]
    ppi_list2.append(rls_new_dict)

ppi_df2=pd.DataFrame.from_records(ppi_list2)
ppi_df2.to_csv(home_dir+'/relationships/PPP_ppi2.csv')