In [1]:
from IPython.display import display, HTML, Math, Markdown
display(HTML("<style>.container { width:95% !important; }</style>"))

import sys
import os
coralme_dir = '../'#'/home/chris/zuniga/coralme/'
sys.path.insert(0, coralme_dir)
# code for enabling this notebook to work within cursor


import importlib
import coralme
import coralme.solver.solver
import coralme.builder.main
import coralme.core.model
import os
import pandas as pd
import json
import cobra

from coralme.builder.main import MEBuilder

from Bio import Entrez, SeqIO
Entrez.email = 'cdalldorf@sdsu.edu'

# Function to download GenBank file
def download_genbank(ref_id, output_dir, verbose = False):
    try:
        if verbose: print(f"Fetching {ref_id}...")
        with Entrez.efetch(db="nucleotide", id=ref_id, rettype="gb", retmode="text") as handle:
            gb_record = handle.read()
            output_file = os.path.join(output_dir, 'genome.gb')
            with open(output_file, "w") as f:
                f.write(gb_record)
        if verbose: print(f"Saved {ref_id} to {output_file}")
        return(1)
    except Exception as e:
        if verbose: print(f"Error fetching {ref_id}: {e}")
        return(0)

In [None]:
# go through each line, convert model, find files necessary, generate input jsons
base_dir = os.path.join(coralme_dir, 'species_files', 'Pseudomonas_files')
info_df = pd.read_excel(os.path.join(base_dir,'Strain_models_genome-information_predictedM9.xlsx'), index_col = 0)
for index, row in info_df.iterrows():
    if index == 'Reference': continue

    # check if run
    out_dir = base_dir+'/individual_species/'+index+'/'
    model_name = row['organism'].replace(' ', '_')+'_'+row['strain'].replace(' ','_')
    out_model = out_dir+'/outputs/MEModel-step3-'+model_name+'-TS.pkl'
    if os.path.exists(out_model):
        print(str(index)+' skipped, model already exists')
        continue
    
    # make directory
    if not os.path.exists(out_dir):
        os.mkdir(out_dir)
    if not os.path.exists(out_dir+'/inputs'):
        os.mkdir(out_dir+'/inputs')
    if not os.path.exists(out_dir+'/outputs'):
        os.mkdir(out_dir+'/outputs')
    
    # pull genome.gb file from NCBI
    if not os.path.exists(out_dir+'/inputs/genome.gb'):
        download_genbank(index, out_dir+'/inputs')

    # TODO - automatic download of Biolog files
    # you tried this before without success, perhaps do manually
    biolog_exists = False
    
    # let's create organism.json and input.json files
    if biolog_exists:
        input_json = {
          "m-model-path": "./inputs/model.json",
          "genbank-path": "./inputs/genome.gb",
          "biocyc.genes": "./inputs/genes.txt",
          "biocyc.prots": "./inputs/proteins.txt",
          "biocyc.TUs": "./inputs/TUs.txt",
          "biocyc.RNAs": "./inputs/RNAs.txt",
          "biocyc.seqs": "./inputs/sequences.fasta",
          "df_gene_cplxs_mods_rxns": "./outputs/building_data/automated-org-with-refs.xlsx",
          "out_directory": "./outputs",
          "log_directory": "./outputs",
          "run_bbh_blast": True,
          "e_value_cutoff": 1e-10,
          "dev_reference": True,
          "include_pseudo_genes": True,
          "locus_tag" : "locus_tag"
        }
    else:
        input_json = {
          "m-model-path": "./inputs/model.json",
          "genbank-path": "./inputs/genome.gb",
          "df_gene_cplxs_mods_rxns": "./outputs/building_data/automated-org-with-refs.xlsx",
          "out_directory": "./outputs",
          "log_directory": "./outputs",
          "run_bbh_blast": True,
          "e_value_cutoff": 1e-10,
          "dev_reference": True,
          "include_pseudo_genes": True,
          "locus_tag" : "locus_tag"
        }
    organism = {
    	"ME-Model-ID" : model_name,
    	"growth_key" : "mu1",
    	"complex_cofactors" : {},
    	"lipoprotein_precursors" : {},
    	"lipid_modifications" : [],
    	"flux_of_biomass_constituents" : {},
    	"flux_of_lipid_constituents" : {},
    	"braun's_lipoprotein" : [],
    	"braun's_lpp_flux" : -0.0,
    	"braun's_murein_flux" : -0.0,
    	"gr_data_doublings_per_hour" : [0, 0.6, 1.0, 1.5, 2.0, 2.5],
        "percent_dna_data" : [0.0592, 0.0512, 0.0330, 0.0252, 0.0222, 0.0208],
    	"defer_to_rxn_matrix" : []
    }
    with open(out_dir+'organism.json', 'w') as f:
        json.dump(organism, f, indent = 4)
    with open(out_dir+'input.json', 'w') as f:
        json.dump(input_json, f, indent = 4)

    
    # convert model file to json if necessary
    matlab_file = base_dir+'/matlab-strain-models/'+index+'.mat'
    json_file = out_dir+'/inputs/model.json'
    if not os.path.exists(json_file) and os.path.exists(matlab_file):
        model = cobra.io.load_matlab_model(matlab_file)
        cobra.io.save_json_model(model, json_file)

    # create the model
    organism = out_dir+'organism.json'
    inputs = out_dir+'input.json'
    try:
        builder = MEBuilder(*[organism, inputs])
        builder.generate_files(overwrite=True)
        builder.build_me_model(overwrite=True)
    except Exception as e:
        print(f"Error building model: {e}")
        continue
    
    # gapfill the model
    builder.troubleshoot(growth_key_and_value = { builder.me_model.mu : 0.001 })

CP065866 skipped, model already exists
CP041013 skipped, model already exists
CP039749 skipped, model already exists
CP065865 skipped, model already exists
CP065867 skipped, model already exists
LR590473 skipped, model already exists
CP008749.1 skipped, model already exists
AE004091.2 skipped, model already exists
Initiating file processing...
~ Processing files for Pseudomonas_aeruginosa_A39-1...


Checking M-model metabolites...                                            : 100
Checking M-model genes...                                                  : 100
Checking M-model reactions...                                              : 100
Syncing optional genes file...                                             : 0.0
Looking for duplicates within datasets...                                  : 100
Gathering ID occurrences across datasets...                                : 100
Solving duplicates across datasets...                                      : 0.0
Pruning GenBank...                                                         : 100
Updating Genbank file with optional files...                               : 0.0
Syncing optional files with genbank contigs...                             : 100
Modifying metabolites with manual curation...                              : 0.0
Modifying metabolic reactions with manual curation...                      : 0.0
Adding manual curation of co

Reading Pseudomonas_aeruginosa_A39-1 done.


Gathering M-model compartments...                                          : 100
Fixing compartments in M-model metabolites...                              : 100
Fixing missing names in M-model reactions...                               : 100


~ Processing files for iJL1678b...


Checking M-model metabolites...                                            : 100
Checking M-model genes...                                                  : 100
Checking M-model reactions...                                              : 100
Looking for duplicates within datasets...                                  : 100
Gathering ID occurrences across datasets...                                : 100
Solving duplicates across datasets...                                      : 0.0
Getting sigma factors...                                                   : 100
Getting TU-gene associations from optional TUs file...                     : 100
Adding protein location...                                                 : 100
Purging M-model genes...                                                   : 100


Reading iJL1678b done.
~ Running BLAST with 4 threads...


Converting Genbank contigs to FASTA for BLAST...                           : 100
Converting Genbank contigs to FASTA for BLAST...                           : 100


BLAST done.


Updating translocation machinery from homology...                          : 100
Updating protein location from homology...                                 : 100
Updating translocation multipliers from homology...                        : 100
Updating lipoprotein precursors from homology...                           : 100
Updating cleaved-methionine proteins from homology...                      : 100
Mapping M-metabolites to E-metabolites...                                  : 100
Updating generics from homology...                                         : 100
Updating folding from homology...                                          : 100
Updating ribosome subreaction machinery from homology...                   : 100
Updating tRNA synthetases from homology...                                 : 100
Updating peptide release factors from homology...                          : 100
Updating transcription subreactions machinery from homology...             : 100
Updating translation initiat

File processing done.
Initiating ME-model reconstruction...


Adding biomass constraint(s) into the ME-model...                          : 100
Adding Metabolites from M-model into the ME-model...                       : 100
Adding Reactions from M-model into the ME-model...                         : 100
Adding Transcriptional Units into the ME-model...                          : 0.0
Adding features from contig CP068238.1 into the ME-model...                : 100
Updating all TranslationReaction and TranscriptionReaction...              : 100
Removing SubReactions from ComplexData...                                  : 100
Adding ComplexFormation into the ME-model...                               : 100
Adding Generic(s) into the ME-model...                                     : 100
Processing StoichiometricData in ME-model...                               : 100


ME-model was saved in the ../species_files/Pseudomonas_files/individual_species/CP068238/./outputs directory as MEModel-step1-Pseudomonas_aeruginosa_A39-1.pkl


Adding tRNA synthetase(s) information into the ME-model...                 : 100
Adding tRNA modification SubReactions...                                   : 0.0
Associating tRNA modification enzyme(s) to tRNA(s)...                      : 0.0
Adding SubReactions into TranslationReactions...                           : 100
Adding RNA Polymerase(s) into the ME-model...                              : 100
Associating a RNA Polymerase to each Transcriptional Unit...               : 0.0
Processing ComplexData in ME-model...                                      : 100
Adding ComplexFormation into the ME-model...                               : 100
Adding SubReactions into TranslationReactions...                           : 100
Adding Transcription SubReactions...                                       : 100
Processing StoichiometricData in SubReactionData...                        : 100
Adding reaction subsystems from M-model into the ME-model...               : 100
Processing StoichiometricDat

ME-model was saved in the ../species_files/Pseudomonas_files/individual_species/CP068238/./outputs directory as MEModel-step2-Pseudomonas_aeruginosa_A39-1.pkl
ME-model reconstruction is done.
Number of metabolites in the ME-model is 6604 (+300.73%, from 1648)
Number of reactions in the ME-model is 11983 (+500.05%, from 1997)
Number of genes in the ME-model is 1627 (+15.23%, from 1412)
The MINOS and quad MINOS solvers are a courtesy of Prof Michael A. Saunders. Please cite Ma, D., Yang, L., Fleming, R. et al. Reliable and efficient solution of genome-scale models of Metabolism and macromolecular Expression. Sci Rep 7, 40863 (2017). https://doi.org/10.1038/srep40863

~ Troubleshooting started...
  Checking if the ME-model can simulate growth without gapfilling reactions...
