In [105]:
# imports
import os
import sys

# code for enabling this notebook to work within cursor
coralme_dir = '../'#/home/chris/zuniga/coralme/'
sys.path.insert(0, coralme_dir)

import cobra
import coralme
import coralme.solver.solver
import coralme.builder.main
import coralme.core.model
import pickle
import requests
import pandas as pd
import numpy as np

def create_m_to_me_mapping(m_model, me_model):
    """
    Create a dictionary mapping M model reaction IDs to their corresponding ME model reaction IDs
    
    Parameters:
    -----------
    m_model : cobra.Model
        Original M model
    me_model : coralme.core.MEModel
        ME model generated from M model
        
    Returns:
    --------
    dict
        Dictionary with M model reaction IDs as keys and lists of corresponding ME reaction IDs as values
    """
    m_to_me = {}
    
    # For each M model reaction, find corresponding ME reactions
    for m_rxn in m_model.reactions:
        # Find all ME reactions that contain this M reaction ID
        me_rxns = [me_rxn.id for me_rxn in me_model.reactions 
                  if m_rxn.id in me_rxn.id]
        
        if me_rxns:
            m_to_me[m_rxn.id] = me_rxns
            
    return m_to_me

In [2]:
# look for default starting media and annotate pathways
path = os.path.join(coralme_dir, 'data', 'default_model_exchanges.pkl')
if os.path.exists(path):
    pickle_in = open(path, 'rb')
    default_exchanges = pickle.load(pickle_in)
    pickle_in.close()
else:
    default_exchanges = {}

# create EC to pathway mapping
resp = requests.get('https://rest.kegg.jp/link/ec/pathway')
EC_to_pathways = {}
for line in resp.text.split('\n')[:-1]:
    if 'path:ec' in line: continue
    ec = line.split('\t')[1][3:]
    path = line.split('\t')[0][5:]
    if ec not in EC_to_pathways:
        EC_to_pathways.update({ec : set()})
    EC_to_pathways[ec].add(path)

# now create mapping from pathway to name
resp = requests.get('https://rest.kegg.jp/list/pathway')
pathway_to_name = {}
for line in resp.text.split('\n')[:-1]:
    pathway_to_name.update({line.split('\t')[0] : line.split('\t')[1]})

# loop through each model
spec_to_neg_EX = {}
base_dir = os.path.join(coralme_dir, 'species_files', 'Pseudomonas_files')
for f in os.listdir(os.path.join(base_dir, 'individual_species')):
    if f == 'Reference': continue
        
    # load in ME_model
    out_path = os.path.join(base_dir, 'individual_species', f, 'outputs')
    if os.path.exists(os.path.join(out_path, 'pathway_annotated_model.pkl')):
        print(str(f)+' : pathway annotated model already created, skipping')
        continue
    out_dict = {}
    opts = os.listdir(out_path)
    model_name = None
    for opt in opts:
        if 'step3' in opt:
            model_name = opt
    if not model_name:
        print(str(f)+' : model doesn\'t exist')
        continue
    print(str(f)+' : loading model...', end = '')
    ME_model = coralme.io.pickle.load_pickle_me_model(os.path.join(out_path, model_name))
    model_path = os.path.join(base_dir, 'individual_species', f, 'inputs', 'model.json')
    M_model = cobra.io.load_json_model(model_path)
    
    # look through exchange reactions to figure out what the media type is
    print('finding media exchanges...', end = '')
    spec_to_neg_EX.update({f : set()})
    for rxn in ME_model.reactions:
        if 'EX_' not in rxn.id:
            continue
        elif rxn.lower_bound < 0:
            spec_to_neg_EX[f].add((rxn.id, rxn.lower_bound, rxn.upper_bound))

    # let's also create subsystem annotations in the ME model
    print('mapping pathways...', end = '')
    mapping = create_m_to_me_mapping(M_model, ME_model)
    for M_rxn, ME_rxns in mapping.items():
        if 'ec-code' not in M_model.reactions.get_by_id(M_rxn).annotation: continue
        pathways = set()
        for ec_code in M_model.reactions.get_by_id(M_rxn).annotation['ec-code']:
            if ec_code not in EC_to_pathways: continue
            pathways = pathways.union(EC_to_pathways[ec_code])
        pathways = list(pathways)
        pathways.sort()
        for rxn in ME_rxns:
            ME_model.reactions.get_by_id(rxn).annotation.update({'pathways' : pathways})

    # let's also pass along the GPR rules
    for M_rxn, ME_rxns in mapping.items():
        M_rxn = M_model.reactions.get_by_id(M_rxn)
        for ME_rxn in ME_rxns:
            ME_model.reactions.get_by_id(ME_rxn).annotation.update({'genes' : set(M_rxn.gpr.genes)})
        
    # now let's save the ME model with the annotations
    print('saving model...', end = '')
    coralme.io.pickle.save_pickle_me_model(ME_model, os.path.join(out_path, 'pathway_annotated_model.pkl'))
    print(' done!')

# save off changes
pickle_out = open(path, 'wb')
pickle.dump(default_exchanges, pickle_out)
pickle_out.close()

CP014784 : pathway annotated model already created, skipping
CP061848 : pathway annotated model already created, skipping
CP022560 : model doesn't exist
CP024478 : model doesn't exist
LS483372 : loading model...finding media exchanges...mapping pathways...saving model... done!
CP015225 : loading model...finding media exchanges...mapping pathways...saving model... done!
CP008896 : loading model...finding media exchanges...mapping pathways...saving model... done!
CP069317 : model doesn't exist
CP058975 : model doesn't exist
CP022561 : model doesn't exist
CP000712 : model doesn't exist
CP012830 : loading model...finding media exchanges...mapping pathways...saving model... done!
CP065866 : pathway annotated model already created, skipping
AP014655.1 : model doesn't exist
CP020100 : model doesn't exist
CP070982 : loading model...finding media exchanges...mapping pathways...saving model... done!
AE004091.2 : pathway annotated model already created, skipping
CP045359 : model doesn't exist
CP0

In [95]:
# check for solutions, run if nonexistant
base_dir = os.path.join(coralme_dir, 'species_files', 'Pseudomonas_files')
for f in os.listdir(os.path.join(base_dir, 'individual_species')):
    if 'Reference' in f: continue
    
    # look to see if solution already exists
    sol_path = os.path.join(base_dir, 'individual_species', f, 'outputs', 'flux_solution.pkl')
    flux_path = os.path.join(base_dir, 'individual_species', f, 'outputs', 'flux_solution_dict.pkl')
    if os.path.exists(sol_path) and os.path.exists(flux_path):
        pass#print(str(f)+' : already run') zzz xxx todo uncomment this
        #continue
    
    # load in ME_model
    out_path = os.path.join(base_dir, 'individual_species', f, 'outputs')
    if not os.path.exists(os.path.join(out_path, 'pathway_annotated_model.pkl')):
        print(str(f)+' : pathway annotated model does not exist, skipping')
        continue
    print(str(f)+' : working...', end = '')
    ME_model = coralme.io.pickle.load_pickle_me_model(os.path.join(out_path, 'pathway_annotated_model.pkl'))
    
    # run on this default medium
    solution = ME_model.optimize(max_mu = 0.5, min_mu = 0.01, tolerance = 1e-4, maxIter = 20)
    
    # if there is a solution, create pathway to flux and annotation, save them
    if solution:
        print('saving solution...', end = '')
        rxns = []
        fluxes = []
        pathways = []
        for rxn, flux in ME_model.solution.fluxes.items():
            ME_rxn = ME_model.reactions.get_by_id(rxn)
            if 'pathways' not in ME_rxn.annotation: continue
            rxns.append(rxn)
            fluxes.append(flux)
            pathways.append(ME_rxn.annotation['pathways'])
        flux_df = pd.DataFrame(index = rxns)
        flux_df['flux'] = fluxes
        flux_df['pathways'] = pathways
        flux_df.to_pickle(flux_path)
        
        # save off solution
        pickle_out = open(sol_path, 'wb')
        pickle.dump(ME_model.solution, pickle_out)
        pickle_out.close()
    else:
        print('no solution...', end = '')
        # save something bogus to prevent rerunning this
        pickle_out = open(flux_path, 'wb')
        pickle.dump(False, pickle_out)
        pickle_out.close()
        
        # save off solution
        pickle_out = open(sol_path, 'wb')
        pickle.dump(False, pickle_out)
        pickle_out.close()
    print(' done!')

CP014784 : working...Iteration	 Solution to check	Solver Status
---------	------------------	-------------
        1	0.2550000000000000	Not feasible
        2	0.1325000000000000	Not feasible
        3	0.0712500000000000	Not feasible
        4	0.0406250000000000	Not feasible
        5	0.0253125000000000	Not feasible
        6	0.0176562500000000	Not feasible
        7	0.0138281250000000	Not feasible
        8	0.0119140625000000	Not feasible
        9	0.0109570312500000	Not feasible
       10	0.0104785156250000	Not feasible
       11	0.0102392578125000	Not feasible
       12	0.0101196289062500	Not feasible
       13	0.0100598144531250	Not feasible
no solution... done!
CP061848 : working...Iteration	 Solution to check	Solver Status
---------	------------------	-------------
        1	0.2550000000000000	Not feasible
        2	0.1325000000000000	Not feasible
        3	0.0712500000000000	Not feasible
        4	0.0406250000000000	Not feasible
        5	0.0253125000000000	Not feasible
        6

KeyboardInterrupt: 

In [108]:
# plotting
pathways_df = pd.DataFrame(index = pathway_to_name.keys())

# let's loop through all of these to pull out flux_dfs
base_dir = os.path.join(coralme_dir, 'species_files', 'Pseudomonas_files')
for f in os.listdir(os.path.join(base_dir, 'individual_species')):
    if 'Reference' in f: continue
    
    # look to see if solution already exists
    sol_path = os.path.join(base_dir, 'individual_species', f, 'outputs', 'flux_solution.pkl')
    flux_path = os.path.join(base_dir, 'individual_species', f, 'outputs', 'flux_solution_dict.pkl')
    if not os.path.exists(sol_path) or not os.path.exists(flux_path):
        print(str(f)+' : not yet run')
        continue
    
    # load in results
    pickle_in = open(sol_path, 'rb')
    sol = pickle.load(pickle_in)
    pickle_in.close()
    pickle_in = open(flux_path, 'rb')
    flux_df = pickle.load(pickle_in)
    pickle_in.close()
    if type(flux_df) == bool or len(flux_df) == 0:
        continue
    
    # reorganize by pathway groups
    pathway_to_fluxes = {}
    for index, row in flux_df.iterrows():
        pathways = row['pathways']
        for pathway in pathways:
            if pathway not in pathway_to_fluxes:
                pathway_to_fluxes.update({pathway : []})
            pathway_to_fluxes[pathway].append(row['flux'])
    
    # take max of each to represent the flux through the map (not sure what the best strategy is, maybe manually pick bottlenecks?)
    vals = []
    for pathway in pathways_df.index:
        if pathway in pathway_to_fluxes:
            # copying from https://www.nature.com/articles/s41467-020-17612-8
            total = sum(pathway_to_fluxes[pathway])
            abs_total = sum([abs(val) for val in pathway_to_fluxes[pathway]])
            if abs_total == 0:
                vals.append(0)
            else:
                vals.append(total / abs_total)
        else:
            vals.append(None)
    pathways_df[f] = vals

# now we can plot
pathways_df = pathways_df.dropna(0, how = 'all')
x_vals = []
y_vals = []
i = 0
for index in pathways_df.index:
    for col in pathways_df.columns:
        val = pathways_df.loc[index][col]
        x_vals.append(i)
        y_vals.append(val)
x_vals = np.range(0, len(pathways_df))
plt.plot(x_vals, y_vals)
plt.xticks(np.range(0, max(x_vals)), [pathway_to_name[pathway] for pathway in pathway_df.index], rotation = 45)
plt.xlabel('Pathways')
plt.ylabel('Flux by Pathway (mmol/gDW/h)')
plt.savefig(os.path.join(coralme_dir, 'figures', 'pathway_flux.pdf'), transparent = True)
plt.show()

CP022560 : not yet run
CP024478 : not yet run
CP069317 : not yet run
CP058975 : not yet run
CP022561 : not yet run
CP000712 : not yet run
CP012830 : not yet run
CP065866 : not yet run
AP014655.1 : not yet run
CP020100 : not yet run
CP070982 : not yet run
AE004091.2 : not yet run
CP045359 : not yet run
CP045349 : not yet run
CP012831 : not yet run
CP053697 : not yet run
CP026386 : not yet run
CP039749 : not yet run
CP065867 : not yet run
CP045416 : not yet run
LR590473 : not yet run
CP022562 : not yet run
CP038001 : not yet run
CP068238 : not yet run
CP061335 : not yet run
CP008749.1 : not yet run
CP073105 : not yet run
CP032419 : not yet run
AP024503 : not yet run
CP043320 : not yet run
AP022324 : not yet run
CP063780 : not yet run
CP076683 : not yet run
CP050291 : not yet run
CP060288 : not yet run
CP041013 : not yet run
CP065865 : not yet run
CP043179 : not yet run


  pathways_df = pathways_df.dropna(0, how = 'all')


AttributeError: module 'numpy' has no attribute 'range'

In [None]:
np.linra