In [1]:
import cobra
from cobra.flux_analysis import parsimonious
from cobra.flux_analysis.variability import find_essential_genes, find_essential_reactions
from cobra.medium.minimal_medium import minimal_medium

In [2]:
iYL1228 = cobra.io.load_json_model('/home/mjenior/Desktop/active_projects/klebsiella/iYL1228.json')

In [8]:
test = set([x.id for x in iYL1228.genes])

In [24]:
'KPN_49390' in test

False

In [27]:
KPN_loci = []
for x in iYL1228.genes:
    locus = x.id
    locus = locus.split('_')[1]
    KPN_loci.append(locus)
KPN_loci = set(KPN_loci)

KPHS_loci = []
with open('sequence.txt', 'r') as inFile:
    for line in inFile:
        if not line[0] == '>':
            continue
        else:
            locus = line.split()[1]
            locus = locus.split('=')[1]
            locus = locus.rstrip(']')
            locus = locus.split('_')[1]
            KPHS_loci.append(locus)
KPHS_loci = set(KPHS_loci)

test = KPN_loci.intersection(KPHS_loci)

In [33]:
len(KPN_loci)

1229

In [31]:
len(iYL1228.genes)

1229

In [15]:
iYL1228.reactions.DHAD1

0,1
Reaction identifier,DHAD1
Name,"Dihydroxy-acid dehydratase (2,3-dihydroxy-3-methylbutanoate)"
Memory address,0x07f4d9ad5c2d0
Stoichiometry,"23dhmb_c --> 3mob_c + h2o_c  (R)-2,3-Dihydroxy-3-methylbutanoate --> 3-Methyl-2-oxobutanoate + H2O H2O"
GPR,KPN_04270
Lower bound,0.0
Upper bound,1000.0


In [None]:
iYL1228.genes.get_by_id('KPN_04422')

In [3]:
iYL1228

0,1
Name,iYL1228
Memory address,0x07f2f843fc450
Number of metabolites,1658
Number of reactions,2262
Number of groups,0
Objective expression,1.0*BIOMASS_ - 1.0*BIOMASS__reverse_091e5
Compartments,"periplasm, cytosol, extracellular space"


In [6]:
# Function to calculate doubling time from objective value
def doublingTime(model):
    with model as m:
        if m.slim_optimize(error_value=0.) < 1e-6:
            print('GENRE has no objective flux')
        else:
            growth = (1. / float(m.slim_optimize())) * 3600.
            print(str(round(growth, 2)) + ' minutes doubling time')


# Identifies blocked reactions, 1% cutoff for fraction of optimum
def blockedReactions(model):
    
    with model as m:
        blocked = cobra.flux_analysis.variability.find_blocked_reactions(m)
        nogene_blocked = []
        for rxn in blocked:
            if m.reactions.get_by_id(rxn).gene_reaction_rule == '':
                nogene_blocked.append(rxn)

    #print(str(len(blocked)) + ' total reactions are blocked')
    fraction = (float(len(blocked)) / float(len(model.reactions))) * 100.
    fraction = round(fraction, 2)
    print(str(fraction) + '% reactions are blocked')
    
    return blocked


# Identify potentially gapfilled reactions, checks against pFBA solution
def missingGPR(model):
    
    noGene = []
    exclude = []
    for rxn in model.reactions:
        if len(list(rxn.genes)) == 0:
            if rxn.annotation['sbo'] != 'SBO:0000629':
                if rxn in model.boundary:
                    exclude.append(rxn.id)
                    continue
                else:
                    noGene.append(rxn.id)
    
    solution = parsimonious.pfba(model)
    active_rxns = set([rxn.id for rxn in model.reactions if abs(solution.fluxes[rxn.id]) > 1e-5])
    active_rxns = active_rxns.difference(set(exclude))
    noGene_active = set(noGene).intersection(active_rxns)

    fraction = float(len(model.reactions)) - float(len(exclude))
    fraction = (float(len(noGene)) / fraction) * 100.
    fraction = round(fraction, 2)
    print(str(fraction) + '% reactions without GPRs')
    
    fraction = (float(len(noGene_active)) / float(len(active_rxns))) * 100.
    fraction = round(fraction, 2)
    print(str(fraction) + '% of reactions used in pFBA solution have no GPR')
    
    return noGene_active


# Checks which cytosolic metabolites are generated for free (bacteria only)
def checkFreeMass(model, cytosol='cytosol'):

    free = []
    with model as m:
    
        # Close all exchanges
        for rxn in m.boundary: m.reactions.get_by_id(rxn.id).lower_bound = 0.
    
        # Create demand for each reaction and optimize individually
        reset_rxn = m.reactions[0].id
        for cpd in m.metabolites: 
            if cpd.compartment == cytosol:
                demand = cobra.Reaction('demand')
                demand.bounds = (0., 1000.)
                demand.add_metabolites({cpd: -1.0})
                m.add_reactions([demand])
                m.objective = demand
                obj_val = m.slim_optimize()
                if obj_val > 1e-8: free.append(cpd.id)
                m.objective = reset_rxn
                m.remove_reactions([demand])
    
    fraction = (float(len(free)) / float(len(model.metabolites))) * 100.
    fraction = round(fraction, 2)
    print(str(fraction) + '% metabolites are generated for free')

    return(free)


# Check for mass and charge balance in reactions
def checkBalance(model):
    
    with model as m:

        elements = set()
        for cpd in m.metabolites:
            try:
                elements |= set(cpd.elements.keys())
            except:
                pass
        
        massImbal = []
        failed = 0
        if len(elements) == 0:
            print('No elemental data associated with metabolites!')
            failed = 1
        else:
            for rxn in m.reactions:
                if rxn.annotation['sbo'] == 'SBO:0000629': 
                    continue
                elif rxn in m.boundary:
                    continue

                try:
                    test = rxn.check_mass_balance()
                except ValueError:
                    continue

                if len(list(test)) > 0:
                    if len(set(test.keys()).intersection(elements)) > 0: massImbal.append(rxn.id)
                        
    if failed != 1:
        fraction = (float(len(massImbal)) / float(len(model.reactions))) * 100.
        fraction = round(fraction, 2)
        print(str(fraction) + '% reactions are mass imbalanced')
        
    return massImbal


def basicCheck(model):
    
    # Determination
    if len(model.reactions) < len(model.metabolites): 
        print('GENRE is overdetermined')
    elif len(model.reactions) > len(model.metabolites):
        print('GENRE is underdetermined')
    else:
        pass
    
    # Compartments
    print('GENRE has ' + str(len(model.compartments.keys())) + ' compartment(s)')
    
    # Genes
    if len(model.genes) == 0: 
        print('GENRE has no gene data')
    else:
        print('GENRE has ' + str(len(model.genes)) + ' genes')
          
    # Growth
    doublingTime(model)



In [5]:
# Open all exchange bounds
for x in iYL1228.exchanges: x.bounds = (-1000., 1000.)

In [10]:
basicCheck(iYL1228)
draft_noGPRblocked = blockedReactions(iYL1228)
draft_free = checkFreeMass(iYL1228)
draft_massImbal = checkBalance(iYL1228)
draft_nogene = missingGPR(iYL1228)

GENRE is underdetermined
GENRE has 3 compartment(s)
GENRE has 1229 genes
53.94 minutes doubling time
22.37% reactions are blocked
0.0% metabolites are generated for free
No elemental data associated with metabolites!
3.86% reactions without GPRs
2.63% of reactions used in pFBA solution have no GPR


In [46]:
iYL1228.metabolites.arab__L_e

0,1
Metabolite identifier,arab__L_e
Name,L-Arabinose
Memory address,0x07f2f9c7b3810
Formula,
Compartment,e
In 2 reaction(s),"EX_arab__L_e, ARBtex"


In [12]:
iYL1228.reactions.ARAI

0,1
Reaction identifier,ARAI
Name,L-arabinose isomerase
Memory address,0x07f2070ab3050
Stoichiometry,arab__L_c <=> rbl__L_c  L-Arabinose <=> L-Ribulose
GPR,KPN_00061
Lower bound,-1000.0
Upper bound,1000.0


In [47]:
iYL1228.genes.KPN_00061

0,1
Gene identifier,KPN_00061
Name,araA
Memory address,0x07f2f9c687450
Functional,True
In 1 reaction(s),ARAI


In [67]:
iYL1228.metabolites.get_by_id('lyx__L_c')

0,1
Metabolite identifier,lyx__L_c
Name,L-Lyxose
Memory address,0x07f2f9c776a90
Formula,
Compartment,c
In 2 reaction(s),"LYXI, LYXt2pp"


In [69]:
iYL1228.reactions.get_by_id('MAN6PI')

0,1
Reaction identifier,MAN6PI
Name,Mannose-6-phosphate isomerase
Memory address,0x07f2f84277790
Stoichiometry,man6p_c <=> f6p_c  D-Mannose 6-phosphate <=> D-Fructose 6-phosphate
GPR,KPN_01515
Lower bound,-1000.0
Upper bound,1000.0


In [None]:
# Reactions from Mary:
# GLYCDH = Glycerol -> 3-HPA + H2O
# 13PPDH = 3-HPA + NADH2 -> 13ppd_c + NAD
# already present???


In [None]:


d-mannose
d/l-galactose

# Arabinose
https://pubmed.ncbi.nlm.nih.gov/17189171/
gene = KPN_00061

    
    
https://pubmed.ncbi.nlm.nih.gov/350845/
