# Import packages

In [1]:
import cobra
from cobra import Model, Reaction, Metabolite
from cobra.io import read_sbml_model
from concurrent.futures import ProcessPoolExecutor, as_completed
import os
import glob
import pandas as pd
import time
import numpy as np
import sys
sys.path.append('../code/')
from path_analysis_function import *

# Input and output files

In [2]:
modelfile = '../data/external/model/'   #folder containing the models
specialgroup=['coa','ACP','thf'] #these name often at the end of the metabolite ID
specialgroup_begin=['udp','cdp','adp','gdp','uac'] #these name often at the beginning of the metabolite ID, uac for UDP-N-acetyl
interim_save_path = '../data/interim/bowtie_path/' #folder containing intermediate results
result_save_path = '../data/result/bowtie_path/' # Bowtie result folders for different models
precusors= ['e4p', 'pep', 'r5p', 'oaa', '3pg', 'pyr', 'akg', 'accoa', 'f6p', 'g6p', 'g3p', 'succoa'] #precusors
euk_model_list=['iAM_Pb448','iAM_Pc455','iAM_Pf480','iAM_Pk459'] #Eukaryotic model list
biggid2name=pd.read_csv("../data/external/biggid2name.csv",index_col=0) #All metabolite ID and Name of BIGG
bowtie_path_outputfile = result_save_path+'bowtie_path_total_output.csv'   #All metabolite ID and Name of BIGG
bowtie_path_analysis_tablefile = '../data/result/bowtie_path/bowtie_path_analysis_table.csv' #get table2 in the manuscript

# Get seed metabolites

In [3]:
#read model
modelfile_list = sorted(glob.glob(modelfile+'*.xml'), reverse=False)
for eachmodelfile in modelfile_list:
    #process the model to get a newmodel
    #model_name = eachmodelfile.split('\\')[1].split('.')[0] #for windows
    model_name = eachmodelfile.split('/')[-1].split('.')[0]
    print(model_name)
    model = read_sbml_model(eachmodelfile)
    if not os.path.exists(interim_save_path):
        os.makedirs(interim_save_path)
    compartments=get_compartments(model)
    newmodel=model_preprocess(model,specialgroup,specialgroup_begin,compartments)

    # calculate pathways between precusors, select seed metabolites
    paths=[]
    for pre in precusors:
        for pre1 in precusors:
            if pre1 != pre:
                for compp in compartments:
                    pre2=pre+'_'+compp
                    pre3=pre1+'_'+compp
                    if pre2 in newmodel.metabolites and pre3 in newmodel.metabolites:
                        metpair=path(newmodel,pre2,pre3,interim_save_path,specialgroup,specialgroup_begin)
                        if not metpair=="no path":
                            paths.append(metpair)

    result_df = pd.DataFrame(paths, columns =['substrate', 'formula','Cs','reduction degree','Crd','product','formula','Cp','reduction degree','Crd','rate', 'path yield','rdyield','difference'])
    result_df.to_csv(interim_save_path+model_name+'_precusor_connectivity.csv', header=True, index=False,mode='w')
print('finish')

iAF692
iAF987
iAM_Pb448
iAM_Pc455
iAM_Pf480
iAM_Pk459
iHN637
iJN1463
iML1515
iPC815
iSDY_1059
iSF_1195
iSSON_1240
iSbBS512_1146
iYL1228
finish


# Get bow tie structure

In [None]:
#Use parallel computing to achieve multiple models to quickly obtain results
def main(): 
    modelfile_list = sorted(glob.glob(modelfile+'*.xml'), reverse=False)
    with ProcessPoolExecutor() as executor:
        futures = {executor.submit(bow_tie_structure_for_model,eachmodelfile,specialgroup,specialgroup_begin,euk_model_list,\
                                   result_save_path): eachmodelfile for eachmodelfile in modelfile_list}
        
if __name__ == '__main__':
    start = time.perf_counter()
    main()
    end = time.perf_counter()
    print('Took %.10f seconds.' % (end - start))
    print('finish')


#If the program stops during parallel computing, you can use the following code to run the models one by one
#modelfile_list = sorted(glob.glob(modelfile+'*.xml'), reverse=False)
#print(len(modelfile_list))
#simgelmodelfile = modelfile_list[8]
#bow_tie_structure_for_model(simgelmodelfile, specialgroup, specialgroup_begin, euk_model_list, result_save_path)

iAF692
iAM_Pb448iAF987
iAM_Pc455iAM_Pf480iAM_Pk459



iHN637
iJN1463
iML1515
iPC815
iSDY_1059
iSF_1195
iSSON_1240
iSbBS512_1146
iYL1228


In [35]:
#Read the Bowtie table, extract each column of metabolites, and determine the category of metabolites in the model
#Organize the resulting file into a Bowtie
globfiles = sorted(glob.glob(result_save_path+'*_results.xlsx'), reverse=False) 
#print(globfile1)
for filename in globfiles:
    print(filename)
    #model_name = use_model_file.split('\\')[1].split('.')[0] #for windows
    model_name = filename.split('/')[-1].split('_results')[0]
    model_bowtie = pd.read_excel(filename,sheet_name='bowtie', index_col='metabolite')
    for index, row in model_bowtie.iterrows():   
        if index in biggid2name.index:
            biggid2name.loc[index,model_name]=row['bowtie']

biggid2name.to_csv(bowtie_path_outputfile, header=True, index=True,mode='w')
print('finish')


../data/result/bowtie_path/iAF692_results.xlsx
../data/result/bowtie_path/iAF987_results.xlsx
../data/result/bowtie_path/iAM_Pb448_results.xlsx
../data/result/bowtie_path/iAM_Pc455_results.xlsx
../data/result/bowtie_path/iAM_Pf480_results.xlsx
../data/result/bowtie_path/iAM_Pk459_results.xlsx
../data/result/bowtie_path/iHN637_results.xlsx
../data/result/bowtie_path/iJN1463_results.xlsx
../data/result/bowtie_path/iML1515_results.xlsx
../data/result/bowtie_path/iPC815_results.xlsx
../data/result/bowtie_path/iSDY_1059_results.xlsx
../data/result/bowtie_path/iSF_1195_results.xlsx
../data/result/bowtie_path/iSSON_1240_results.xlsx
../data/result/bowtie_path/iSbBS512_1146_results.xlsx
../data/result/bowtie_path/iYL1228_results.xlsx
finish


In [36]:
#Table 2
modelfile_list = sorted(glob.glob(modelfile+'*.xml'), reverse=False)
bowtie_path_data=pd.read_csv(bowtie_path_outputfile,index_col=0)
bowtie_analysis_table = pd.DataFrame()
for eachmodelfile in modelfile_list:
    #model_name = eachmodelfile.split('\\')[1].split('.')[0] #for windows
    model_name = eachmodelfile.split('/')[-1].split('.')[0]
    print(model_name)
    if model_name in bowtie_path_data.columns:
        record_has_data = [x for x in bowtie_path_data[model_name] if str(x) != 'nan']
        bowtie_analysis_table.loc[model_name,'All Metabolites']=len(record_has_data)
        bowtie_analysis_table.loc[model_name,'GSC']=record_has_data.count('GSC')
        bowtie_analysis_table.loc[model_name,'IN']=record_has_data.count('IN')
        bowtie_analysis_table.loc[model_name,'OUT']=record_has_data.count('OUT')
        bowtie_analysis_table.loc[model_name,'IS']=record_has_data.count('IS')

bowtie_analysis_table.to_csv(bowtie_path_analysis_tablefile, header=True, index=True,mode='w')
print('finish')

iAF692
iAF987
iAM_Pb448
iAM_Pc455
iAM_Pf480
iAM_Pk459
iHN637
iJN1463
iML1515
iPC815
iSDY_1059
iSF_1195
iSSON_1240
iSbBS512_1146
iYL1228
