In [3]:
from cobra.io import read_sbml_model
from cobra.io import write_sbml_model
from cobra import Model, Reaction, Metabolite

import Simulator
import cobra
import copy
import pandas as pd
import numpy as np
import pandas as pd
import os
import glob
import time


def run_MetScore_simulation( filename, biomass_rxn, target_rxn, constdic={} ):
    simulator_obj = Simulator.Simulator()
    simulator_obj.read_model( filename )
    a,b,flux_dic = simulator_obj.run_FBA(internal_flux_minimization = True)
    wild_opt_flux = flux_dic
    print '#constraints', constdic
    #constdic[ biomass_rxn ] = [ 0.0 , 1000 ]
    
    new_objective_target = target_rxn        
    
    a,b,flux_dic = simulator_obj.run_FBA(new_objective = new_objective_target, flux_constraints = constdic, internal_flux_minimization = True, mode='max')
    max_const = flux_dic[ new_objective_target ]
     
    a,b,flux_dic = simulator_obj.run_FBA(new_objective = new_objective_target, flux_constraints = constdic, internal_flux_minimization = True, mode='min')    
    min_const = flux_dic[ new_objective_target ]    

    flux_dist_dic_set = {}   
    
    count = 1    
    for each_flux_const in np.linspace(min_const, max_const, 10):
        constdic2 = {}
        constdic2 = copy.deepcopy(constdic)
        count+=1
        constdic2[ new_objective_target ] = [ each_flux_const-each_flux_const*0.05, each_flux_const+each_flux_const*0.05]
        
        stat,obj_val,opt_flux_dic = simulator_obj.run_FBA(new_objective = biomass_rxn, flux_constraints = constdic2, mode='max')
        print opt_flux_dic[biomass_rxn]
        
        if stat != 2:
            continue
        
        constdic2[ biomass_rxn ] = [opt_flux_dic[biomass_rxn]-opt_flux_dic[biomass_rxn]*0.05, opt_flux_dic[biomass_rxn]+opt_flux_dic[biomass_rxn]*0.05]
        print constdic2
        
        stat,obj_val,flux_dic = simulator_obj.run_MOMA( wild_flux=wild_opt_flux, flux_constraints=constdic2)

        if stat == 2:
            flux_dist_dic_set[ each_flux_const ] = flux_dic
            
        if abs(flux_dic[biomass_rxn]) <= 10e-6:
            break

    flux_df = pd.DataFrame.from_dict(flux_dist_dic_set)
    flux_corr_df = flux_df.abs().T.corr()
    flux_cov_df = flux_df.abs().T.cov()
    return flux_df, flux_corr_df, flux_cov_df

def calculate_MetScore_sum(cobra_model, covariance_data):
    branch_metabolite_data = {}
    neighboring_rxns = {}

    for each_metabolite in cobra_model.metabolites:
        branch_metabolite_data[each_metabolite.id] = 0.0
        reactions = each_metabolite.reactions
        
        count = 0
        for each_reaction in reactions:
            reactants = [met.id for met in each_reaction.reactants]
            if each_metabolite.id in reactants:
                if each_reaction.id in covariance_data:
                    branch_metabolite_data[each_metabolite.id] += abs(covariance_data[each_reaction.id])
    
    return branch_metabolite_data

def select_candidates(cobra_model, target_reaction, output_file, metscore_df, corr_df, cov_df, corr_threshold=0, cov_threshold=0.1):
    pcorr_df = corr_df[corr_df > corr_threshold]
    pcov_df = cov_df[cov_df > cov_threshold]
    pcorr_df = pcorr_df.dropna()
    pcov_df = pcov_df.dropna()
    
    positive_candidate_reactions = list(set(pcorr_df.index)&set(pcov_df.index))
    
    ncorr_df = corr_df[corr_df < -corr_threshold]
    ncov_df = cov_df[cov_df < -cov_threshold]
    ncorr_df = ncorr_df.dropna()
    ncov_df = ncov_df.dropna()
    
    negative_candidate_reactions = list(set(ncorr_df.index)&set(ncov_df.index))
    
    print len(positive_candidate_reactions), len(negative_candidate_reactions)
    
    fp = open(output_file, 'w')
    print >>fp, '%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s'%('Metabolite', 'Score', 'Normalized score', 'No. of reactions', 'No. of positive reactions', 'No. of negative reactions', 'candidate reactions', 'positive reactions', 'negative reactions', 'Positive score', 'Negative score')
    for each_row, each_df in metscore_df.iterrows():
        cobra_metabolite = cobra_model.metabolites.get_by_id(each_row)
        candidate_reactions = []

        for each_reaction in cobra_metabolite.reactions:
            for each_reactant in each_reaction.reactants:
                if each_reactant.id == each_row:
                    candidate_reactions.append(each_reaction.id)
        
        candidate_reactions = list(set(candidate_reactions))   
        pos_candidate_reactions = list(set(positive_candidate_reactions)&set(candidate_reactions))
        neg_candidate_reactions = list(set(negative_candidate_reactions)&set(candidate_reactions))
        
        positive_score = 0.0
        negative_score = 0.0
        for rxn in pos_candidate_reactions:
            positive_score+=float(pcov_df.loc[rxn])
        for rxn in neg_candidate_reactions:
            negative_score+=float(ncov_df.loc[rxn])
        if each_df[target_reaction] != 0.0 and np.sqrt(float(len(candidate_reactions))) != 0.0:
            normalized_score = each_df[target_reaction]/np.sqrt(float(len(candidate_reactions)))
        else:
            normalized_score = 0.0
        
        print >>fp, '%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s'%(each_row, each_df[target_reaction], normalized_score, len(candidate_reactions), len(pos_candidate_reactions), len(neg_candidate_reactions), ';'.join(candidate_reactions), ';'.join(pos_candidate_reactions), ';'.join(neg_candidate_reactions),positive_score, negative_score)
    
    fp.close()
    return

def read_target_rxn(filename):
    fp = open(filename, 'r')
    lines = fp.read()
    lines = lines.replace('[', '')
    lines = lines.replace(']', '')
    lines = lines.replace("'", "")
    fp.close()
    target_rxn = lines.strip().split(',')
    print len(target_rxn)
    return target_rxn
    

def write_header(filename, taget_id):
    fp = open(filename, 'r')
    lines = fp.read()
    fp.close()
    fp = open(filename, 'w')
    print >>fp, '%s,%s'%('reaction', taget_id)
    print >>fp, lines.strip()
    fp.close()
    return

if __name__ == '__main__':
    #target_rxn_list = read_target_rxn('./target_product_reactions.txt')
    model_file = './input/ijo1366_IRR_indirubin.xml'
    original_model_file = './input/ijo1366_IRR.xml'
    biomass_rxn = 'Ec_biomass_iJO1366_core_53p95M'
    output_dir = './results/'
    target_rxn_list = ['EX_INDIRUBIN_LPAREN_e_RPAREN_']
    
    #Strain phenotype
    constrict={}
    constrict['PYK'] = [0.0, 0.0]
    
    
    cobra_model = read_sbml_model(model_file)
    original_cobra_model = read_sbml_model(original_model_file)

    for target_reaction in target_rxn_list:
        print target_reaction
        model_reactions = [reaction.id for reaction in cobra_model.reactions]

        corr_output = output_dir+'corr_%s.csv'%(target_reaction)
        cov_output = output_dir+'cov_%s.csv'%(target_reaction)

        flux_df, flux_corr_df, flux_cov_df = run_MetScore_simulation(model_file, biomass_rxn, target_reaction, constrict)
        flux_corr_df = flux_corr_df[target_reaction].ix[model_reactions]
        flux_corr_df.to_csv(corr_output)
        flux_cov_df = flux_cov_df[target_reaction].ix[model_reactions]
        flux_cov_df.to_csv(cov_output)

        write_header(corr_output, target_reaction)
        write_header(cov_output, target_reaction)

        df = pd.read_csv(cov_output, index_col=0, header=0)
        
        final_dic = {}
        for each_col in df.columns:
            final_dic[each_col]={}
            fluxsum_dic = calculate_MetScore_sum(cobra_model,  dict(df[each_col]))
            final_dic[each_col]=fluxsum_dic

        final_df = pd.DataFrame.from_dict(final_dic)
        final_df.to_csv(output_dir+'MetScore_%s.csv'%(target_reaction))

        flux_corr_df = pd.read_csv(corr_output, index_col=0)
        flux_cov_df = pd.read_csv(cov_output, index_col=0)
        output_file = output_dir+'Final_MetScore_%s.txt'%(target_reaction)
        select_candidates(cobra_model, target_reaction, output_file, final_df, flux_corr_df, flux_cov_df, 0, 0.0)
        
    




EX_INDIRUBIN_LPAREN_e_RPAREN_
#constraints {'PYK': [0.0, 0.0]}
0.982371812726
{'EX_INDIRUBIN_LPAREN_e_RPAREN_': [0.0, 0.0], 'PYK': [0.0, 0.0], 'Ec_biomass_iJO1366_core_53p95M': [0.9332532220900549, 1.0314904033626922]}
0.880321706871
{'EX_INDIRUBIN_LPAREN_e_RPAREN_': [0.5622737686139749, 0.6214604810996565], 'PYK': [0.0, 0.0], 'Ec_biomass_iJO1366_core_53p95M': [0.8363056215274212, 0.9243377922145182]}
0.777386552028
{'EX_INDIRUBIN_LPAREN_e_RPAREN_': [1.1245475372279499, 1.242920962199313], 'PYK': [0.0, 0.0], 'Ec_biomass_iJO1366_core_53p95M': [0.7385172244264583, 0.8162558796292433]}
0.674451397185
{'EX_INDIRUBIN_LPAREN_e_RPAREN_': [1.6868213058419248, 1.8643814432989694], 'PYK': [0.0, 0.0], 'Ec_biomass_iJO1366_core_53p95M': [0.6407288273255901, 0.7081739670440733]}
0.571516242342
{'EX_INDIRUBIN_LPAREN_e_RPAREN_': [2.2490950744558997, 2.485841924398626], 'PYK': [0.0, 0.0], 'Ec_biomass_iJO1366_core_53p95M': [0.5429404302246883, 0.6000920544588659]}
0.46858183453
{'EX_INDIRUBIN_LPAREN_e_R

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated


171 464


In [6]:
cobra_model = cobra.io.load_json_model("./universal_model.json")
write_sbml_model(cobra_model, './bigg_universal_model.xml', use_fbc_package=False)


import pandas as pd
import os
import glob
from cobra.io import read_sbml_model
from cobra.io import write_sbml_model
import numpy as np

def make_candidate_reaction_sets(df, baasename):
    ex_metabolites = []
    with open('./excluded_metabolites.txt', 'r') as fp:
        for line in fp:
            ex_metabolites.append(line.strip())
            
    score_info = {}
    for each_met, each_df in df.groupby('Metabolite'):
            
        if each_met[-1] == 'c':
            pos_reaction_num = each_df['No. of positive reactions'].values[0]
            neg_reaction_num = each_df['No. of negative reactions'].values[0]

            pos_score = each_df['Positive score'].values[0]
            neg_scroe = each_df['Negative score'].values[0]
            
            final_pos_scroe = pos_score/np.sqrt(1+pos_reaction_num)
            final_neg_scroe = neg_scroe/np.sqrt(1+neg_reaction_num)
            score_info[each_met] = [final_pos_scroe, final_neg_scroe]

    negative_score_info = {}
    positive_score_info = {}
    
    for met in score_info:
        if abs(score_info[met][0]) > abs(score_info[met][1]):
            positive_score_info[met] = score_info[met][0]
        else:
            negative_score_info[met] = score_info[met][1]
      
    fp = open('./application_results/Candidate_%s'%(baasename), 'w')
    print >>fp, '%s\t%s\t%s\t%s'%('Negative metabolite', 'Positive metabolite', 'Negative score', 'Positive score')
    for negative_met in negative_score_info:
        negative_score = negative_score_info[negative_met]
        
        for positive_met in positive_score_info:
            
            if negative_met in ex_metabolites or positive_met in ex_metabolites:
                continue
            
            positive_score = positive_score_info[positive_met]
            print >>fp, '%s\t%s\t%s\t%s'%(negative_met, positive_met, negative_score, positive_score)
            
    fp.close()
    
files = glob.glob('./results/Final_*.txt')
print len(files)
for filename in files:
    baasename = os.path.basename(filename)
    df = pd.read_table(filename)
    make_candidate_reaction_sets(df, baasename)


1


In [7]:
def check_reaction(filename, original_model_met_info, universal_model_met_info, flux_corr_df, flux_cov_df):
    
    
        
    basename = os.path.basename(filename)
    ex_reaction_id = basename.split('Final_MetScore_')[1].strip()
    ex_reaction_id = ex_reaction_id.replace('.txt', '')
    
    df = pd.read_table(filename)
    fp = open('./application_result2/New_reaction_candidate_%s'%(basename), 'w')
    print >>fp, '%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s'%('Target', 'Gene source', 'Negative metabolite', 
                                                  'Positive metabolite', 'Negative score', 'Positive score', 
                                                  'Reaction', 'Equation', 'Corr', 'Cov')
    
    for each_row, each_df in df.iterrows():
        negative_met = each_df['Negative metabolite']
        positive_met = each_df['Positive metabolite']

        negative_score = each_df['Negative score']
        positive_score = each_df['Positive score']
        
        for each_met_set in original_model_met_info:
            if negative_met in each_met_set[0] and positive_met in each_met_set[1]:
                target_reaction = each_met_set[2]
                if target_reaction not in flux_corr_df.index:
                    print >>fp, '%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s'%(ex_reaction_id, 'Native', negative_met, positive_met, 
                                                                      negative_score, positive_score, each_met_set[2], 
                                                                      each_met_set[3], 'NA', 'NA')
                else:
                    corr_val = float(flux_corr_df.ix[target_reaction])
                    cov_val = float(flux_cov_df.ix[target_reaction])
                    print >>fp, '%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s'%(ex_reaction_id, 'Native', negative_met, positive_met, 
                                                                      negative_score, positive_score, each_met_set[2], 
                                                                      each_met_set[3], corr_val, cov_val)
                    
        
        for each_met_set in universal_model_met_info:
            if negative_met+'_' in each_met_set[0] and positive_met+'_' in each_met_set[1]:
                target_reaction = each_met_set[2]
                if target_reaction not in flux_corr_df.index:
                    print >>fp, '%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s'%(ex_reaction_id, 'Universal model', negative_met, positive_met, 
                                                                      negative_score, positive_score, each_met_set[2], 
                                                                      each_met_set[3], 'NA', 'NA')
    fp.close()
    return

def metabolite_set(cobra_model):
    met_set_info = []
    for each_reaction in cobra_model.reactions:
        reactants = [str(met.id) for met in each_reaction.reactants]
        products = [str(met.id) for met in each_reaction.products]
        met_set_info.append([reactants, products, each_reaction.id, each_reaction.reaction])
        
    return met_set_info

def metabolite_set2(cobra_model):
    met_set_info = []
    for each_reaction in cobra_model.reactions:
        reactants = [str(met.id) for met in each_reaction.reactants]
        products = [str(met.id) for met in each_reaction.products]

        compartments = []
        for each_met in reactants+products:
            each_cmp = each_met[-2:]
            compartments.append(each_cmp)
            
        compartments = list(set(compartments))
        if len(compartments) == 1:
            if compartments[0] == 'c_':
                met_set_info.append([reactants, products, each_reaction.id, each_reaction.reaction])
        
    return met_set_info

original_model = './input/ijo1366_IRR.xml'
universal_model = './bigg_universal_model.xml'

original_cobra_model = read_sbml_model(original_model)
universal_cobra_model = read_sbml_model(universal_model)
#     print >>fp, '%s\t%s\t%s\t%s\t%s'%('Negative metabolite', 'Positive metabolite', 'Negative score', 'Positive score', 'Merged score')

original_model_met_set_info = metabolite_set(original_cobra_model)
universal_model_met_set_info = metabolite_set2(universal_cobra_model)
print len(original_model_met_set_info)
print len(universal_model_met_set_info)

files = glob.glob('./application_results/*.txt')
cov_dir = './results/'

cnt = 1
for each_file in files:
    s = time.time()
    basename = os.path.basename(each_file)
    print basename
    ex_reaction_id = basename.split('Final_MetScore_')[1].strip()
    ex_reaction_id = ex_reaction_id.replace('.txt', '')

    corr_file = glob.glob(cov_dir+'corr*%s*.csv'%(ex_reaction_id))[0]
    cov_file = glob.glob(cov_dir+'cov*%s*.csv'%(ex_reaction_id))[0]

    flux_corr_df = pd.read_csv(corr_file, index_col=0)
    flux_cov_df = pd.read_csv(cov_file, index_col=0)

    print ex_reaction_id, cnt

    check_reaction(each_file, original_model_met_set_info, universal_model_met_set_info, flux_corr_df, flux_cov_df)
    cnt+=1
    e = time.time()
    print e-s






3193
11948
Candidate_Final_MetScore_EX_INDIRUBIN_LPAREN_e_RPAREN_.txt
EX_INDIRUBIN_LPAREN_e_RPAREN_ 1


.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated


131.529567003


In [10]:
def parse_reactions(reaction, ex_metabolites):
    reaction = reaction.replace('>', '')
    reaction = reaction.replace('<', '')
    
    sptlist = reaction.split('--')
    reactants = sptlist[0].strip().split(' + ')
    products = sptlist[0].strip().split(' + ')
    new_reactants = []
    new_products = []
    
    for met in reactants:
        if met not in ex_metabolites:
            new_reactants.append(met)
            
    for met in products:
        if met not in ex_metabolites:
            new_products.append(met)
            
    return new_reactants, new_products

def check_duplicate(reaction, known_reaction_info, ex_metabolites):
    flag = False
    reactants, products = parse_reactions(reaction, ex_metabolites)
    if len(reactants) > 0 and len(products) > 0: 
        for rxn in known_reaction_info:
            reactants2 = known_reaction_info[rxn][0]
            products2 = known_reaction_info[rxn][1]

            if  set(reactants).issubset(set(reactants2)) and set(products).issubset(set(products2)):
                flag = True
                return flag

            if  set(products).issubset(set(reactants2)) and set(reactants).issubset(set(products2)):
                flag = True
                return flag
        
    return flag

ex_metabolites = []
with open('./excluded_metabolites.txt', 'r') as fp:
    for line in fp:
        ex_metabolites.append(line.strip())

original_model = './input/ijo1366_IRR.xml'
original_cobra_model = read_sbml_model(original_model)

known_reaction_info = {}
for each_reaction in original_cobra_model.reactions:
    reactants, products = parse_reactions(each_reaction.reaction, ex_metabolites)
    known_reaction_info[each_reaction] = [reactants, products]
    
        
files = glob.glob('./application_result2/*.txt')
fp = open('./Application4_final_summary.txt', 'w')
print >>fp, '%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s'%('Target', 'Gene source', 'Negative metabolite', 
                                              'Positive metabolite', 'Negative score', 'Positive score', 
                                              'Reaction', 'Equation', 'Corr', 'Cov')
for each_file in files:
    with open(each_file, 'r') as fp2:
        fp2.readline()
        for line in fp2:
            sptlist = line.strip().split('\t')
            equation = sptlist[7].strip()
            met1 = sptlist[2].strip()
            met2 = sptlist[3].strip()
            if met1 not in ex_metabolites and met2 not in ex_metabolites:
                print >>fp, line.strip()

fp.close()
