In [1]:
import mackinac
import cobra
import pandas as pd
import json
import os
import numpy as np

In [2]:
# load ID's for each organisms genome
id_table = pd.read_table('../data/study_strain_subset_w_patric.tsv',sep='\t',dtype=str)
id_table = id_table.replace(np.nan, '', regex=True)
species_to_id = dict(zip(id_table["designation in screen"],id_table["PATRIC genome ID"]))

In [3]:
id_table

Unnamed: 0.1,Unnamed: 0,designation in screen,strain,TaxID,source,metabolic model,source of metabolic model,assembly,selection criteria,pre-inoculation media,PATRIC genome ID
0,8,B. clarus,"Bacteroides clarus A 20, YIT 12056, DSM 22519,...",762984,DSM 22519,Bacteroides_clarus_YIT_12056,"Magnusdottir et al., 2016",GCA_000195615.1_ASM19561v1,>= 10^-2 rel. abundance (in at least one samp...,mGAM,762984.1
1,24,B. longum subsp. longum,Bifidobacterium longum subsp. longum E194b (Va...,1679,DSM 20219,no,no,NT5028,>= 10^-2 rel. abundance (in at least one samp...,BHI++,
2,33,B. vulgatus HM-720,"Bacteroides vulgatus CL09T03C04, HM-720",997891,HM-720 (BEI Resources),no,no,GCA_000273295.1_Bact_vulg_CL09T03C04_V1,>= 10^-2 rel. abundance (in at least one samp...,mGAM,997891.3
3,44,C. saccharolyticum,"Clostridium saccharolyticum WM1, DSM 2544, ATC...",610130,DSM 2544,no,no,GCA_000144625.1_ASM14462v1,forming separate metabolic clade represented b...,GMM,610130.3
4,67,L. lactis,"Lactococcus lactis subsp. lactis IL1403, CIRM ...",272623,CIRM BIA 96 = CNRZ 1342 (INRA collection),Lactococcus lactis subsp. lactis IL1403,http://systemsbiology.ucsd.edu/InSilicoOrganis...,GCA_000006865.1_ASM686v1,probiotic,mGAM,272623.7
5,69,L. plantarum,"Lactobacillus plantarum WCFS1, LMG 9211, NCIMB...",220668,LMG 9211 (BCCM collection),Lactobacillus_plantarum_WCFS1,"Magnusdottir et al., 2016",GCA_000203855.3_ASM20385v3,probiotic,mGAM,220668.9
6,78,P. distasonis,"Parabacteroides distasonis DSM 20701, ATCC 850...",435591,DSM 20701,Parabacteroides_distasonis_ATCC_8503,"Magnusdottir et al., 2016",GCA_000012845.1_ASM1284v1,>= 10^-2 rel. abundance (in at least one samp...,mGAM,435591.13
7,80,P. merdae,"Parabacteroides merdae VPI T4-1, DSM 19495, AT...",411477,DSM 19495,Parabacteroides_merdae_ATCC_43184,"Magnusdottir et al., 2016",GCA_000154105.1_ASM15410v1,>= 10^-2 rel. abundance (in at least one samp...,mGAM,411477.4
8,82,R. gnavus,"Ruminococcus gnavus VPI C7-9, ATCC 29149",411470,ATCC 29149,Ruminococcus_gnavus_ATCC_29149,"Magnusdottir et al., 2016",GCA_000169475.1_ASM16947v1,>= 10^-2 rel. abundance (in at least one samp...,GMM,411470.41


In [4]:
mackinac.get_token('gregmedlock_seed')


patric password:  ···············


'gregmedlock_seed@patricbrc.org'

In [5]:
# grab and save a universal model to be used later for gapfilling. This is a public template available in Mike Mundy's workspace.
# The template says "gramneg", but there is no difference between the g+ and g- templates other than biomass composition,
# which will not be used during gapfilling (the GENREs will already have their own biomass function).
gramneg = mackinac.create_universal_model('/mmundy/public/modelsupport/templates/MicrobialNegativeResolved.modeltemplate')
cobra.io.save_json_model(gramneg,'../data/universal_mundy.json')

In [6]:
# save id's and both names in dictionary
name_to_recon_info = {}
name_to_gapfill_solution = {}
for species in species_to_id.keys():
    # Check for an existing GENRE and make sure there is a PATRIC ID for the strain--
    # if there is no PATRIC ID, the dictionary will have an empty string for that strain.
    if species+'.json' not in os.listdir('../data/modelseed_models') and species_to_id[species]:
        species_id = species_to_id[species]

        # reconstruct model; function returns a dictionary with reconstruction info, NOT the model
        print("Reconstructing GENRE for " + species)
        recon_info = mackinac.create_patric_model(species_id,species)
        name_to_recon_info[species] = recon_info
        # Get the reactions contained in the gapfill solution. This is on complete media
        name_to_gapfill_solution[species] = mackinac.get_patric_gapfill_solutions(species)[0]
        # convert to a cobra model
        model = mackinac.create_cobra_model_from_patric_model(species)
        # Save model in json format
        cobra.io.save_json_model(model, '../data/modelseed_models/'+species+'.json')
        
        # Save the model with gapfilled reactions removed
        gapfilled_reactions = name_to_gapfill_solution[species]['reactions'].keys()
        model.remove_reactions(gapfilled_reactions, remove_orphans=True)
        model.repair()
        cobra.io.save_json_model(model, '../data/modelseed_models/'+species+'_gapfill_removed.json')

# save conversion dict for id:original_name:SEED_name mapping
with open('../data/patric_recon_info.json','w') as jsonfile:
    json.dump(name_to_recon_info,jsonfile)
    
# save the gapfill solutions
with open('../data/patric_gapfill_solutions.json','w') as jsonfile:
    json.dump(name_to_gapfill_solution,jsonfile)
    

Reconstructing GENRE for B. clarus
Reconstructing GENRE for B. vulgatus HM-720
Reconstructing GENRE for C. saccharolyticum
Reconstructing GENRE for L. lactis
Reconstructing GENRE for L. plantarum
Reconstructing GENRE for P. distasonis
Reconstructing GENRE for P. merdae
Reconstructing GENRE for R. gnavus


In [7]:
species_to_id

{'B. clarus': '762984.10',
 'B. longum subsp. longum': '',
 'B. vulgatus HM-720': '997891.3',
 'C. saccharolyticum': '610130.3',
 'L. lactis': '272623.7',
 'L. plantarum': '220668.9',
 'P. distasonis': '435591.13',
 'P. merdae': '411477.4',
 'R. gnavus': '411470.41'}