In [3]:
from IPython.display import display, HTML, Math, Markdown
display(HTML("<style>.container { width:95% !important; }</style>"))

import sys
import os
coralme_dir = '/home/chris/zuniga/coralme/'
sys.path.insert(0, coralme_dir)
# code for enabling this notebook to work within cursor


import importlib
import coralme
import coralme.solver.solver
import coralme.builder.main
import coralme.core.model
import os
import pandas as pd
import json
import cobra

from coralme.builder.main import MEBuilder

from Bio import Entrez, SeqIO
Entrez.email = 'cdalldorf@sdsu.edu'

# Function to download GenBank file
def download_genbank(ref_id, output_dir, verbose = False):
    try:
        if verbose: print(f"Fetching {ref_id}...")
        with Entrez.efetch(db="nucleotide", id=ref_id, rettype="gb", retmode="text") as handle:
            gb_record = handle.read()
            output_file = os.path.join(output_dir, 'genome.gb')
            with open(output_file, "w") as f:
                f.write(gb_record)
        if verbose: print(f"Saved {ref_id} to {output_file}")
        return(1)
    except Exception as e:
        if verbos: print(f"Error fetching {ref_id}: {e}")
        return(0)

In [4]:
# go through each line, convert model, find files necessary, generate input jsons
base_dir = os.path.join(coralme_dir, 'species_files', 'Pseudomonas_files')
info_df = pd.read_excel(os.path.join(base_dir,'Strain_models_genome-information_predictedM9.xlsx'), index_col = 0)
for index, row in info_df.iterrows():
    if index == 'Reference': continue


    # check if run
    out_dir = base_dir+'/individual_species/'+index+'/'
    model_name = row['organism'].replace(' ', '_')+'_'+row['strain'].replace(' ','_')
    out_model = out_dir+'/outputs/MEModel-step3-'+model_name+'-TS.pkl'
    if os.path.exists(out_model):
        print(str(index)+' skipped, model already exists')
        continue
    
    # make directory
    if not os.path.exists(out_dir):
        os.mkdir(out_dir)
    if not os.path.exists(out_dir+'/inputs'):
        os.mkdir(out_dir+'/inputs')
    if not os.path.exists(out_dir+'/outputs'):
        os.mkdir(out_dir+'/outputs')
    
    # pull genome.gb file from NCBI
    if not os.path.exists(out_dir+'/inputs/genome.gb'):
        download_genbank(index, out_dir+'/inputs')

    # TODO - automatic download of Biolog files
    # you tried this before without success, perhaps do manually
    biolog_exists = False
    
    # let's create organism.json and input.json files
    if biolog_exists:
        input_json = {
          "m-model-path": "./inputs/model.json",
          "genbank-path": "./inputs/genome.gb",
          "biocyc.genes": "./inputs/genes.txt",
          "biocyc.prots": "./inputs/proteins.txt",
          "biocyc.TUs": "./inputs/TUs.txt",
          "biocyc.RNAs": "./inputs/RNAs.txt",
          "biocyc.seqs": "./inputs/sequences.fasta",
          "df_gene_cplxs_mods_rxns": "./outputs/building_data/automated-org-with-refs.xlsx",
          "out_directory": "./outputs",
          "log_directory": "./outputs",
          "run_bbh_blast": True,
          "e_value_cutoff": 1e-10,
          "dev_reference": True,
          "include_pseudo_genes": True,
          "locus_tag" : "locus_tag"
        }
    else:
        input_json = {
          "m-model-path": "./inputs/model.json",
          "genbank-path": "./inputs/genome.gb",
          "df_gene_cplxs_mods_rxns": "./outputs/building_data/automated-org-with-refs.xlsx",
          "out_directory": "./outputs",
          "log_directory": "./outputs",
          "run_bbh_blast": True,
          "e_value_cutoff": 1e-10,
          "dev_reference": True,
          "include_pseudo_genes": True,
          "locus_tag" : "locus_tag"
        }
    organism = {
    	"ME-Model-ID" : model_name,
    	"growth_key" : "mu1",
    	"complex_cofactors" : {},
    	"lipoprotein_precursors" : {},
    	"lipid_modifications" : [],
    	"flux_of_biomass_constituents" : {},
    	"flux_of_lipid_constituents" : {},
    	"braun's_lipoprotein" : [],
    	"braun's_lpp_flux" : -0.0,
    	"braun's_murein_flux" : -0.0,
    	"gr_data_doublings_per_hour" : [0, 0.6, 1.0, 1.5, 2.0, 2.5],
        "percent_dna_data" : [0.0592, 0.0512, 0.0330, 0.0252, 0.0222, 0.0208],
    	"defer_to_rxn_matrix" : []
    }
    with open(out_dir+'organism.json', 'w') as f:
        json.dump(organism, f, indent = 4)
    with open(out_dir+'input.json', 'w') as f:
        json.dump(input_json, f, indent = 4)

    
    # convert model file to json if necessary
    matlab_file = base_dir+'/matlab-strain-models/'+index+'.mat'
    json_file = out_dir+'/inputs/model.json'
    if not os.path.exists(json_file) and os.path.exists(matlab_file):
        model = cobra.io.load_matlab_model(matlab_file)
        cobra.io.save_json_model(model, json_file)

    # create the model
    organism = out_dir+'organism.json'
    inputs = out_dir+'input.json'
    try:
        builder = MEBuilder(*[organism, inputs])
        builder.generate_files(overwrite=True)
        builder.build_me_model(overwrite=True)
    except Exception as e:
        print(f"Error building model: {e}")
        continue
    
    # gapfill the model
    builder.troubleshoot(growth_key_and_value = { builder.me_model.mu : 0.001 })

CP065866 skipped, model already exists
CP041013 skipped, model already exists
CP039749 skipped, model already exists
CP065865 skipped, model already exists
CP065867 skipped, model already exists
LR590473 skipped, model already exists
CP008749.1 skipped, model already exists
AE004091.2 skipped, model already exists
CP068238 skipped, model already exists
CP014784 skipped, model already exists
CP061848 skipped, model already exists
CP061335 skipped, model already exists
CP053697 skipped, model already exists
CP070982 skipped, model already exists
LS483372 skipped, model already exists
CP012830 skipped, model already exists
CP012831 skipped, model already exists
CP008896 skipped, model already exists
CP015225 skipped, model already exists
CP060288 skipped, model already exists
CP022562 skipped, model already exists
CP050291 skipped, model already exists
CP043179 skipped, model already exists
AP024503 skipped, model already exists
AP022324 skipped, model already exists
CP022560 skipped, mod