# Construct a panGEM of 55 E. coli strains from BiGG

In [1]:
# import packages
import os
import cobra
import pickle
import cobra
import pandas as pd

### Step 1: Read models

In [4]:
# Read all models
def read_all_models(bigg_models_path):
    '''
    Returns a dictionary of all models used for panModel reconstruction
    '''

    models_dict = dict()

    # When re-run at later stage, ensure that all constructed models are not included in this list
    model_files_list_1 = [file for file in os.listdir(bigg_models_path) if '.json' in file]
    model_files_list_2 = [file for file in model_files_list_1 if 'CP007799.1' not in file] #ensure that EcN models (new assembly) are not included
    model_files_list_3 = [file for file in model_files_list_2 if 'CP022686.1' not in file] #ensure that EcN models (old assembly) are not included
    model_files_list = [file for file in model_files_list_3 if 'universal' not in file] #ensure that pan-ecoli and BIGG universal model are not included

    for model_file in model_files_list:
        model_path = os.path.join(bigg_models_path, model_file)
        print('Reading model: ', model_file)
        model = cobra.io.load_json_model(model_path)
        models_dict[model.id] = model
        
    return models_dict

# Run function
bigg_models_path = '../data/models/'
models_dict = read_all_models(bigg_models_path)

Reading model:  iAPECO1_1312.json
Reading model:  iB21_1397.json
Reading model:  iBWG_1329.json
Reading model:  ic_1306.json
Reading model:  iE2348C_1286.json
Reading model:  iEC042_1314.json
Reading model:  iEC55989_1330.json
Reading model:  iECABU_c1320.json
Reading model:  iECBD_1354.json
Reading model:  iECB_1328.json
Reading model:  iECDH10B_1368.json
Reading model:  iECDH1ME8569_1439.json
Reading model:  iEcDH1_1363.json
Reading model:  iECD_1391.json
Reading model:  iEcE24377_1341.json
Reading model:  iECED1_1282.json
Reading model:  iECH74115_1262.json
Reading model:  iEcHS_1320.json
Reading model:  iECIAI1_1343.json
Reading model:  iECIAI39_1322.json
Reading model:  iECNA114_1301.json
Reading model:  iECO103_1326.json
Reading model:  iECO111_1330.json
Reading model:  iECO26_1355.json
Reading model:  iECOK1_1307.json
Reading model:  iEcolC_1368.json
Reading model:  iECP_1309.json
Reading model:  iECS88_1305.json
Reading model:  iECSE_1348.json
Reading model:  iECSF_1327.json
Re

### Step 2.1: Get panreactome

In [5]:
def get_panreactome(models_dict):
    '''
    Return a list of panreactome IDs and dataframe
    '''
    
    panreactome = []
    model_ids_list = models_dict.keys()
    df_panreactome = pd.DataFrame(columns=model_ids_list)

    for model_id in model_ids_list:
        model = models_dict[model_id]
        for rxn in model.reactions:
            panreactome.append(rxn.id)
            df_panreactome.loc[rxn.id, model_id] = rxn.gene_reaction_rule

    df_panreactome.fillna('',inplace=True)
    
    return list(set(panreactome)), df_panreactome

# Run function
panreactome, df_panreactome = get_panreactome(models_dict)
df_panreactome.to_csv('../tables/panreactome.csv')

### Step 2.2: Create panreactome model
This model only contains all reactions, but lacks an objective function

In [22]:
def create_panreactome(models_dict):
    '''
    Return a pan-E. coli model
    '''
    
    # Get strain ids & load empty model
    model_ids_list = models_dict.keys()
    universal = cobra.Model("universal_reactions")

    # Select model
    for model_id in model_ids_list:
        model = models_dict[model_id]
        print('Adding reactions from the model', model_id)
        counter = 0

        # Add reactions from model
        for rxn in model.reactions:
            if rxn.id not in universal.reactions:
                universal.add_reaction(rxn)
                counter += 1

        print('Added', counter, 'reactions. Total number of reactions is', len(universal.reactions))
                
    return universal
                
# Run function
universal = create_panreactome(models_dict)         
                
# Save the model
cobra.io.json.save_json_model(universal, str('../data/models/panecoli_universal.json'), pretty=False)

Adding reactions from the model iAPECO1_1312
Added 2735 reactions. Total number of reactions is 2735
Adding reactions from the model iB21_1397
Added 61 reactions. Total number of reactions is 2796
Adding reactions from the model iBWG_1329
Added 27 reactions. Total number of reactions is 2823
Adding reactions from the model ic_1306
Added 8 reactions. Total number of reactions is 2831
Adding reactions from the model iE2348C_1286
Added 16 reactions. Total number of reactions is 2847
Adding reactions from the model iEC042_1314
Added 3 reactions. Total number of reactions is 2850
Adding reactions from the model iEC55989_1330
Added 1 reactions. Total number of reactions is 2851
Adding reactions from the model iECABU_c1320
Added 0 reactions. Total number of reactions is 2851
Adding reactions from the model iECBD_1354
Added 0 reactions. Total number of reactions is 2851
Adding reactions from the model iECB_1328
Added 0 reactions. Total number of reactions is 2851
Adding reactions from the mode

### Step 3. Get pangenome
The pangenome is an overrepresentation: some genes are "double". They have different identifiers in different models and therefore represented multiple times

In [23]:
def get_pangenome(models_dict):
    '''
    Return a list of pangenome IDs and dataframe
    '''
    
    pangenome = []
    model_ids_list = models_dict.keys()

    for model_id in model_ids_list:
        model = models_dict[model_id]
        for gene in model.genes:
            pangenome.append(gene.id)
    df_pangenome = pd.DataFrame(pangenome)
    return pangenome, df_pangenome

# Run function
pangenome, df_pangenome = get_pangenome(models_dict)
df_pangenome.to_csv('../tables/pangenome.csv')

### Step 4. Get panmetabolome

In [59]:
def get_panmetabolome(models_dict):
    '''
    Return a list of panmetabolome IDs and dataframe
    '''
    
    model_ids_list = models_dict.keys()

    # Create a dataframe
    panmetabolome_df = pd.DataFrame(columns=['name', 'formula', 'compartment'])
    panmetabolome_df.index.name = 'ID'

    # Store each metabolite in the dataframe
    for model_id in model_ids_list:
        model = models_dict[model_id]
        counter = 0
        print('Adding metabolites from the model', model_id)

        for mtb in model.metabolites:
            if mtb.id not in panmetabolome_df.index.values:
                panmetabolome_df.loc[mtb.id, 'name'] = mtb.name
                panmetabolome_df.loc[mtb.id, 'formula'] = mtb.formula
                panmetabolome_df.loc[mtb.id, 'compartment'] = mtb.compartment
                counter += 1

        print('Added', counter, 'metabolites. Total number of metabolites is', len(panmetabolome_df) )

    return panmetabolome_df

# Run function
panmetabolome_df = get_panmetabolome(models_dict)
panmetabolome_df.to_csv('../tables/panmetabolome.csv')

Adding metabolites from the model iAPECO1_1312
Added 1942 metabolites. Total number of metabolites is 1942
Adding metabolites from the model iB21_1397
Added 31 metabolites. Total number of metabolites is 1973
Adding metabolites from the model iBWG_1329
Added 19 metabolites. Total number of metabolites is 1992
Adding metabolites from the model ic_1306
Added 3 metabolites. Total number of metabolites is 1995
Adding metabolites from the model iE2348C_1286
Added 6 metabolites. Total number of metabolites is 2001
Adding metabolites from the model iEC042_1314
Added 0 metabolites. Total number of metabolites is 2001
Adding metabolites from the model iEC55989_1330
Added 0 metabolites. Total number of metabolites is 2001
Adding metabolites from the model iECABU_c1320
Added 0 metabolites. Total number of metabolites is 2001
Adding metabolites from the model iECBD_1354
Added 0 metabolites. Total number of metabolites is 2001
Adding metabolites from the model iECB_1328
Added 0 metabolites. Total n