In [1]:
import pandas as pd
import cobra
from cobra.io import load_json_model
from cobra.flux_analysis import single_gene_deletion, single_reaction_deletion, double_gene_deletion
import numpy as np

import re
from glob import glob
from Bio import Entrez, SeqIO

# I. Gene comparison EcN aerobic vs. anaerobic

In [2]:
# Load EcN model
EcN_ID = 'CP022686.1'
EcN_model = cobra.io.load_json_model('../data/models/%s_cur_4.8.json'%EcN_ID)

In [3]:
#Establish a definition that initializes models to an in silico representation of M9 media
def m9(model):
    for reaction in model.reactions:
        if 'EX_' in  reaction.id:
            reaction.lower_bound=0 
            
    model.reactions.EX_ca2_e.lower_bound=-1000
    model.reactions.EX_cl_e.lower_bound=-1000
    model.reactions.EX_co2_e.lower_bound=-1000
    model.reactions.EX_cobalt2_e.lower_bound=-1000
    model.reactions.EX_cu2_e.lower_bound=-1000
    model.reactions.EX_fe2_e.lower_bound=-1000
    model.reactions.EX_fe3_e.lower_bound=-1000
    model.reactions.EX_h_e.lower_bound=-1000
    model.reactions.EX_h2o_e.lower_bound=-1000
    model.reactions.EX_k_e.lower_bound=-1000
    model.reactions.EX_mg2_e.lower_bound=-1000
    model.reactions.EX_mn2_e.lower_bound=-1000
    model.reactions.EX_mobd_e.lower_bound=-1000
    model.reactions.EX_na1_e.lower_bound=-1000
    model.reactions.EX_tungs_e.lower_bound=-1000
    model.reactions.EX_zn2_e.lower_bound=-1000
    model.reactions.EX_ni2_e.lower_bound=-1000
    model.reactions.EX_sel_e.lower_bound=-1000
    model.reactions.EX_slnt_e.lower_bound=-1000
    model.reactions.EX_glc__D_e.lower_bound=-20
    model.reactions.EX_so4_e.lower_bound=-1000
    model.reactions.EX_nh4_e.lower_bound=-1000
    model.reactions.EX_pi_e.lower_bound=-1000
    model.reactions.EX_cbl1_e.lower_bound=-.01
    model.reactions.EX_o2_e.lower_bound=-20
       
    return model

In [4]:
# Get growth rate for each single gene knockout
with EcN_model:
    m9(EcN_model)
    wt_growth_ae = EcN_model.optimize().objective_value
    ae_results = single_gene_deletion(EcN_model)
    
with EcN_model:
    m9(EcN_model)
    EcN_model.reactions.EX_o2_e.lower_bound=0
    wt_growth_an = EcN_model.optimize().objective_value
    an_results = single_gene_deletion(EcN_model)

ae_results.head()

Unnamed: 0,ids,growth,status
0,{CIW80_06350},1.092734e-15,optimal
1,{CIW80_17865},1.180973,optimal
2,{CIW80_01255},1.180973,optimal
3,{CIW80_06590},2.015547e-13,optimal
4,{CIW80_23035},1.180973,optimal


In [5]:
# Get binary representation of gene essentiality
ae_results['binary_ae']= ae_results['growth'].map(lambda x: 0 if x < (0.05 * wt_growth_ae) else 1)
an_results['binary_an']= an_results['growth'].map(lambda x: 0 if x < (0.05 * wt_growth_an) else 1)

In [6]:
# Check whether there are genes for which the status is not "optimal"
ae_results[ae_results.status != 'optimal']

Unnamed: 0,ids,growth,status,binary_ae


In [7]:
# Check whether there are genes for which the status is not "optimal"
infeasible_an = an_results[an_results.status != 'optimal'].index.tolist()

# Assume infeasible result as growth deficient
for gene in infeasible_an:
    an_results.loc[gene, 'binary_an'] = 0
    
an_results[an_results.status != 'optimal']

Unnamed: 0,ids,growth,status,binary_an
456,{CIW80_08355},,infeasible,0
538,{CIW80_01800},,infeasible,0
896,{CIW80_07580},,infeasible,0


In [8]:
# Unpack the index values from the frozenset to enable comparison to the ortho_matrices
for del_result in [ae_results, an_results]:
    for value in del_result.index:
        unpacked, = del_result.loc[value, 'ids']
        del_result.loc[value, 'gene'] = unpacked

    # Set the unpacked gene name as index
    del_result.set_index('gene', inplace=True)

In [9]:
# Remove the other two columsn
ae_results.drop(columns=['growth', 'status'], inplace=True)
an_results.drop(columns=['growth', 'status'], inplace=True)

# Merge the two dataframes and 
del_results = pd.merge(ae_results, an_results, left_index=True, right_index=True)

# Get an overview of the total number of essential genes
print('total =', len(del_results))
print('Aerobic essential =', len(del_results) - del_results['binary_ae'].sum())
print('Anaerobic essential =', len(del_results) - del_results['binary_an'].sum())

# Identify the genes that are only essential in one of the two conditions
del_results.drop(columns=['ids_x', 'ids_y'], inplace=True) # Drop the ids columns
del_results['diff'] = del_results['binary_ae'] - del_results['binary_an']
del_results[del_results['diff'] != 0]

total = 1533
Aerobic essential = 198
Anaerobic essential = 202


Unnamed: 0_level_0,binary_ae,binary_an,diff
gene,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
CIW80_08355,1,0,1
CIW80_14315,1,0,1
CIW80_01800,1,0,1
CIW80_07580,1,0,1


In [10]:
del_results_ae = del_results[del_results['binary_ae'] == 0]
del_results_an = del_results[del_results['binary_an'] == 0]

# Save the resulting dataframe as .csv
del_results.to_csv('../tables/gene_ess_del_results.csv')
del_results_ae.to_csv('../tables/gene_ess_del_results_ae.csv')
del_results_an.to_csv('../tables/gene_ess_del_results_an.csv')

In [11]:
print(del_results.sum())

binary_ae    1335
binary_an    1331
diff            4
dtype: int64


In [12]:
# Find out which genes were essential only in anaerobic conditions
for gene in del_results[del_results['diff'] != 0].index:
    name = EcN_model.genes.get_by_id(gene).name
    print(name)

pgk
hemN
gapA
eno


### Investigate hemN

In [13]:
import escher
from escher import Builder

In [14]:
builder = Builder(
    map_name='iJO1366.Central metabolism',
    model_name='iJO1366')

Downloading Map from https://escher.github.io/1-0-0/6/maps/Escherichia%20coli/iJO1366.Central%20metabolism.json
Downloading Model from https://escher.github.io/1-0-0/6/models/Escherichia%20coli/iJO1366.json


In [15]:
builder

Builder()

In [16]:
with EcN_model:
    m9(EcN_model)
    EcN_model.reactions.EX_o2_e.lower_bound=0
    solution = EcN_model.optimize()

In [17]:
builder.reaction_data = solution.fluxes

In [18]:
EcN_model.genes.CIW80_14315

0,1
Gene identifier,CIW80_14315
Name,hemN
Memory address,0x1ec3c9f9900
Functional,True
In 1 reaction(s),CPPPGO2


In [19]:
EcN_model.genes.CIW80_06055

0,1
Gene identifier,CIW80_06055
Name,hemF
Memory address,0x1ec3c9980a0
Functional,True
In 1 reaction(s),CPPPGO


In [20]:
EcN_model.reactions.CPPPGO2

0,1
Reaction identifier,CPPPGO2
Name,Oxygen Independent coproporphyrinogen-III oxidase
Memory address,0x1ec3e1bf520
Stoichiometry,2.0 amet_c + cpppg3_c --> 2.0 co2_c + 2.0 dad_5_c + 2.0 met__L_c + pppg9_c  2.0 S-Adenosyl-L-methionine + Coproporphyrinogen III --> 2.0 CO2 CO2 + 2.0 5'-Deoxyadenosine + 2.0 L-Methionine + Protoporphyrinogen IX
GPR,CIW80_14315
Lower bound,0.0
Upper bound,1000.0


In [21]:
EcN_model.reactions.CPPPGO

0,1
Reaction identifier,CPPPGO
Name,Coproporphyrinogen oxidase (O2 required)
Memory address,0x1ec3cae1870
Stoichiometry,cpppg3_c + 2.0 h_c + o2_c --> 2.0 co2_c + 2.0 h2o_c + pppg9_c  Coproporphyrinogen III + 2.0 H+ + O2 O2 --> 2.0 CO2 CO2 + 2.0 H2O H2O + Protoporphyrinogen IX
GPR,CIW80_06055
Lower bound,0.0
Upper bound,1000.0


The oxygen-dependency of hemF makes it unable to rescue grown anaerobically for an hemN knockout strain.

# II. Essential reactions EcN & subsystem

In [22]:
# Get growth rate for each single gene knockout
with EcN_model:
    m9(EcN_model)
    wt_growth_ae = EcN_model.optimize().objective_value
    ae_rxn = single_reaction_deletion(EcN_model)

# Get binary representation of gene essentiality
ae_rxn['binary']= ae_rxn['growth'].map(lambda x: 0 if x < (0.05 * wt_growth_ae) else 1)

# Check whether there are genes for which the status is not "optimal"
inf_rxn_ae = ae_rxn[ae_rxn.status != 'optimal'].index.tolist()

# Assume infeasible result as growth deficient
for gene in inf_rxn_ae:
    ae_rxn.loc[gene, 'binary'] = 0
ae_rxn[ae_rxn.status != 'optimal']

Unnamed: 0,ids,growth,status,binary
2643,{GLCtex},,infeasible,0
2978,{EX_glc__D_e},,infeasible,0


In [23]:
# Unpack the index values from the frozenset to enable comparison to the ortho_matrices
for value in ae_rxn.index:
    unpacked, = ae_rxn.loc[value, 'ids']
    ae_rxn.loc[value, 'reaction'] = unpacked
    
# Set the unpacked gene name as index
ae_rxn = ae_rxn.set_index('reaction')
ae_rxn.rename(columns = {'binary':'ae_rxn_growth'}, inplace=True)

# Get the subsystem for all reactions
for rxn in ae_rxn.index.tolist():
    ae_rxn.loc[rxn, 'Subsystem'] = EcN_model.reactions.get_by_id(rxn).subsystem
    
print('total =', len(ae_rxn))
print('Essential =', len(ae_rxn) - ae_rxn.ae_rxn_growth.sum())
print('Non-essential =', ae_rxn.ae_rxn_growth.sum())

ae_rxn

total = 3143
Essential = 278
Non-essential = 2865


Unnamed: 0_level_0,ids,growth,status,ae_rxn_growth,Subsystem
reaction,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
ACACT5r,{ACACT5r},1.164489,optimal,1,Membrane Lipid Metabolism
S2FE2SS2,{S2FE2SS2},1.180973,optimal,1,Cofactor and Prosthetic Group Biosynthesis
GALUi,{GALUi},1.180973,optimal,1,Cell Envelope Biosynthesis
AGt3,{AGt3},1.180973,optimal,1,Inorganic Ion Transport and Metabolism
EX_g3ps_e,{EX_g3ps_e},1.180973,optimal,1,Extracellular exchange
...,...,...,...,...,...
GSNt3pp,{GSNt3pp},1.180973,optimal,1,"Transport, Inner Membrane"
GLYMETtpp,{GLYMETtpp},1.180973,optimal,1,"Transport, Inner Membrane"
LPLIPAL1E141pp,{LPLIPAL1E141pp},1.180973,optimal,1,Glycerophospholipid Metabolism
CLBTS10,{CLBTS10},1.180973,optimal,1,Secondary metabolite biosynthesis


In [25]:
gene_ess_sub = ae_rxn[ae_rxn['ae_rxn_growth'] == 0 ]['Subsystem'].value_counts()
gene_ess_sub

Cofactor and Prosthetic Group Biosynthesis            77
Cell Envelope Biosynthesis                            17
Purine and Pyrimidine Biosynthesis                    17
Tyrosine, Tryptophan, and Phenylalanine Metabolism    16
Extracellular exchange                                16
Glycerophospholipid Metabolism                        15
Valine, Leucine, and Isoleucine Metabolism            14
Transport, Outer Membrane                             12
Threonine and Lysine Metabolism                       12
Lipopolysaccharide Biosynthesis / Recycling           12
Arginine and Proline Metabolism                       12
Methionine Metabolism                                  9
Histidine Metabolism                                   9
Cysteine Metabolism                                    6
Transport, Inner Membrane                              5
Nucleotide Salvage Pathway                             5
Inorganic Ion Transport and Metabolism                 4
Citric Acid Cycle              

In [26]:
gene_ess_sub.to_csv('../tables/gene_ess_sub.csv')

# VI. Double gene essentiality

In [None]:
# Create a list of all non-essential genes
ae_non = del_results[del_results['binary_ae'] == 1].index.tolist()

# Run the double gene deletion with only the non-essential genes
with EcN_model:
    m9(EcN_model)
    double_ae = double_gene_deletion(EcN_model, ae_non, return_frame=True).round(4)
    
double_ae.head()

In [None]:
# Get binary representation of gene essentiality
double_ae['binary']= double_ae['growth'].map(lambda x: 0 if x < (0.05 * wt_growth_ae) else 1)

# Check whether there are genes for which the status is not "optimal"
inf_double = double_ae[double_ae.status != 'optimal'].index.tolist()

# Assume infeasible result as growth deficient
for gene in inf_double:
    double_ae.loc[gene, 'binary'] = 0
    
double_ae[double_ae.status != 'optimal']

In [None]:
double_ess = double_ae[double_ae.status == 'optimal']
double_ess = double_ess[double_ess['binary'] == 0]
print(len(double_ess))
double_ess.head()

In [None]:
# Save the resulting dataframe as .csv
double_ess.to_csv('../tables/gene_ess_double_ess.csv')

### Anaerobic

In [None]:
# Create a list of all non-essential genes
an_non = del_results[del_results['binary_an'] == 1].index.tolist()

# Run the double gene deletion with only the non-essential genes
with EcN_model:
    m9(EcN_model)
    EcN_model.reactions.EX_o2_e.lower_bound=0
#     wt_growth_an = EcN_model.optimize().objective_value
    double_an = double_gene_deletion(EcN_model, an_non, return_frame=True).round(4)
    
double_an.head()

In [None]:
# Get binary representation of gene essentiality
double_an['binary']= double_an['growth'].map(lambda x: 0 if x < (0.05 * wt_growth_an) else 1)

# Check whether there are genes for which the status is not "optimal"
inf_double_an = double_an[double_an.status != 'optimal'].index.tolist()

# Assume infeasible result as growth deficient
for gene in inf_double_an:
    double_an.loc[gene, 'binary'] = 0
    
double_an[double_an.status != 'optimal']

In [None]:
double_ess_an = double_an[double_an.status == 'optimal']
double_ess_an = double_ess_an[double_ess_an['binary'] == 0]
print(len(double_ess_an))
double_ess_an.head()

In [None]:
# Save the resulting dataframe as .csv
double_ess_an.to_csv('../tables/gene_ess_double_ess_an.csv')

In [None]:
# Get the names of the genes in the right format to compare
EcN_double = double_ess.copy()

for EcN_loc in EcN_double.index.tolist():
    EcN_id = EcN_double.loc[EcN_loc, 'ids']
    EcN_genes = []
    for gene in EcN_id:
        EcN_genes.append(str(gene))
    EcN_double.loc[EcN_loc, 'EcN'] = str(EcN_genes)

EcN_double_an = double_ess_an.copy()
for EcN_loc in EcN_double_an.index.tolist():
    EcN_id = EcN_double_an.loc[EcN_loc, 'ids']
    EcN_genes = []
    for gene in EcN_id:
        EcN_genes.append(str(gene))
    EcN_double_an.loc[EcN_loc, 'EcN'] = str(EcN_genes)

In [None]:
# Create two sets
set_ae = set(EcN_double['EcN'])
set_an = set(EcN_double_an['EcN'])

diff_ae = set_ae - set_an
diff_an = set_an - set_ae
similar = set.intersection(set_ae, set_an)

print(len(diff_ae), len(diff_an), len(similar))

In [None]:
def gene_names(gene_set):
    df = pd.DataFrame(columns=['EcN_id', 'EcN_id_1', 'EcN_id_2', 'EcN_name_1', 'EcN_name_2', 'EcN_names'])

    df['EcN_id'] = list(gene_set)
    for loc in df.index:
        
        #Split the name in two gene_ids
        name = df.loc[loc, 'EcN_id']
        name = name.strip("['").strip("]'").split("', '")
        df.loc[loc, 'EcN_id_1'] = name[0]
        df.loc[loc, 'EcN_id_2'] = name[1]
        
        # Identify the gene names
        df.loc[loc, 'EcN_name_1'] = EcN_model.genes.get_by_id(name[0]).name
        df.loc[loc, 'EcN_name_2'] = EcN_model.genes.get_by_id(name[1]).name
        name = sorted(name)
        df.loc[loc, 'EcN_names'] = str(EcN_model.genes.get_by_id(name[0]).name + ', ' + EcN_model.genes.get_by_id(name[1]).name)
        
    return df

In [None]:
# Get the names of the aerobic double essential genes
gene_overview_ae = gene_names(diff_ae)
gene_overview_ae

In [None]:
# Get the names of the anaerobic double essential genes
gene_overview_an = gene_names(diff_an)
gene_overview_an

The gene combination "iscU/sufD", "trxB/gor" and "xdhC/guaB" are present in both, but in different order. Only "hemF/hemN" is only found in aerobic conditions and "rpe/pgl", "rpe/zwf", "carA/ybcF" and "carB/ybcF" are anaerobic specific.

### Check differences

In [None]:
# Get the reactions
EcN_model.genes.CIW80_06055 #hemF
EcN_model.genes.CIW80_14315 #hemN

In [None]:
# Check the double lethal only for aerobic
with EcN_model:
    m9(EcN_model)
    EcN_model.reactions.EX_o2_e.lower_bound=0
    EcN_model.remove_reactions(['CPPPGO2', 'CPPPGO'])
    print(EcN_model.slim_optimize())

This deletion is lethal in both cases, but was not included for anaerobic, as the deletion of only CIW80_14315 (hemN/CPPPGO2) is already lethal in anaerobic conditions

In [None]:
# Check the double lethal only for anaerobic
with EcN_model:
    m9(EcN_model)
#     EcN_model.reactions.EX_o2_e.lower_bound=0
    EcN_model.remove_reactions(['RPE', 'G6PDH2r'])
    print(EcN_model.slim_optimize())

- 'CIW80_17845', 'CIW80_20730'/'CBPS', 'CBMKr'/ carB + ybcF > Indeed not lethal aerobically
- 'CIW80_17840', 'CIW80_20730'/'CBPS', 'CBMKr'/ carA + ybcF > Indeed not lethal aerobically
<br> CIW80_17840 and CIW80_17845 are together responsible for the reaction CBPS


- 'CIW80_11440', 'CIW80_21825'/'RPE', 'PGL'/ rpe + pgl >  Indeed not lethal aerobically
- 'CIW80_02180', 'CIW80_11440'/'G6PDH2r', 'RPE'/ zwf + rpe > Indeed not lethal aerobically

# VII. Gene comparison EcN & MG1655 model

In [None]:
# Load iML1515 model
k12_model = cobra.io.load_json_model('../data/models/iML1515.json')

# Get growth rate for each single gene knockout
with k12_model:
    m9(k12_model)
    del_result_k12 = single_gene_deletion(k12_model)

In [None]:
# Get a binary representation of the gene essentiality
k12_growth = k12_model.optimize().objective_value

del_result_k12['binary']= del_result_k12['growth'].map(lambda x: 0 if x < (0.05 * k12_growth) else 1)

In [None]:
# Check whether there are genes for which the status is not "optimal"
del_result_k12[del_result_k12.status != 'optimal']

In [None]:
# Unpack the index values from the frozenset to enable comparison to the ortho_matrices
for value in del_result_k12.index:
    unpacked, = del_result_k12.loc[value, 'ids']
    del_result_k12.loc[value, 'gene'] = unpacked
                                   

# Set the unpacked gene name as index
del_result_k12 = del_result_k12.set_index('gene')
del_result_k12.rename(columns = {'binary':'K12_growth'}, inplace=True)
del_result_k12.drop(columns=['ids', 'growth', 'status'], inplace=True)
del_result_k12.head()

In [None]:
# Get the ortho matrix of EcN vs. the reference strains
orthoIDs_matrix = pd.read_csv('../tables/orthoIDs_matrix.csv')
orthoIDs_matrix.set_index('Unnamed: 0', inplace=True)
geneIDs_sub = orthoIDs_matrix.loc[:, 'U00096'] # Get the gene ID orthologs for MG1655

# Merge the ortho_matrix with the EcN gene essentiality matrix
geneIDs_ess = pd.merge(geneIDs_sub, del_results, left_index = True, right_index = True)
geneIDs_ess.head()

In [None]:
# Combine with previously generated EcN essentiality data merged with K12 gene IDs
ess_comp = pd.merge(geneIDs_ess, del_result_k12, left_on ='U00096', right_index =True)

# Find the differences
ess_comp['difference'] = ess_comp['binary_ae'] - ess_comp['K12_growth']
ess_comp[ess_comp['difference'] != 0] 

In [None]:
# Save the resulting dataframe as .csv
ess_comp_k12 = ess_comp#[ess_comp['K12_growth'] == 0]
ess_comp_k12.to_csv('../tables/gene_ess_comp_k12.csv')

# Get the number of shared genes
len(ess_comp[ess_comp['U00096'].str.contains('b')])

In [None]:
for gene in ess_comp[ess_comp['difference'] != 0]['U00096']:
    name = k12_model.genes.get_by_id(gene).name
    print(name)

In [None]:
for gene in ess_comp[ess_comp['difference'] != 0].index:
    name = EcN_model.genes.get_by_id(gene).name
    print(name)

## K12 ess
### ZupT
heavy metal divalent cation transporter ZupT
https://ecocyc.org/gene?orgid=ECOLI&id=EG11167

### proB (corrected)
glutamate 5-kinase
https://ecocyc.org/gene?orgid=ECOLI&id=EG10768

### proA (corrected)
glutamate-5-semialdehyde dehydrogenase
https://ecocyc.org/gene?orgid=ECOLI&id=EG10767

## EcN ess
### argI/argF (corrected)
ornithine carbamoyltransferase
https://ecocyc.org/gene?orgid=ECOLI&id=EG10069

### leuB
3-isopropylmalate dehydrogenase
https://ecocyc.org/gene?orgid=ECOLI&id=EG11577

### can
carbonic anhydrase 2
https://ecocyc.org/gene?orgid=ECOLI&id=EG12319

In [None]:
## zupT
# EcN_model.genes.CIW80_09665
# EcN_model.reactions.COBALT2tpp
# EcN_model.metabolites.cobalt2_c
# EcN_model.reactions.COBALT2t3pp

# EcN_model.reactions.CU2tpp
# EcN_model.metabolites.cu2_p 
# EcN_model.reactions.CU2abcpp

## proA/B
# EcN_model.genes.CIW80_19485
# EcN_model.genes.CIW80_19490
# EcN_model.reactions.G5SD
# EcN_model.metabolites.glu5sa_c 
# EcN_model.reactions.NACODA
# EcN_model.reactions.ACODA
# EcN_model.metabolites.orn_c

## argI/argF
# EcN_model.genes.CIW80_16625
# EcN_model.reactions.OCBT

## leuB
# EcN_model.genes.CIW80_18070
# EcN_model.reactions.IPMD

# can
# EcN_model.genes.CIW80_18385
# EcN_model.reactions.HCO3E

In [None]:
# Check effect of specific reactions encoded by these genes
with k12_model:
    k12_model.remove_reactions(['OCBT'])
    growth = k12_model.slim_optimize()
    print(growth)

# Comparison double lethals

In [None]:
# Create a list of all non-essential genes
ae_non_k12 = del_result_k12[del_result_k12['K12_growth'] == 1].index.tolist()

# Run the double gene deletion with only the non-essential genes
with k12_model:
    m9(k12_model)
    double_ae_k12 = double_gene_deletion(k12_model, ae_non_k12, return_frame=True).round(4)
    
double_ae_k12.head()

In [None]:
# Get binary representation of gene essentiality
double_ae_k12['binary']= double_ae_k12['growth'].map(lambda x: 0 if x < (0.05 * k12_growth) else 1)

# Check whether there are genes for which the status is not "optimal"
inf_double_k12 = double_ae_k12[double_ae_k12.status != 'optimal'].index.tolist()

# Assume infeasible result as growth deficient
for gene in inf_double_k12:
    double_ae_k12.loc[gene, 'binary'] = 0
    
double_ae_k12[double_ae_k12.status != 'optimal']

In [None]:
double_ess_k12 = double_ae_k12[double_ae_k12.status == 'optimal']
double_ess_k12 = double_ess_k12[double_ess_k12['binary'] == 0]

print(len(double_ess_k12))
double_ess_k12.head()

In [None]:
# Add an empty row for EcN as placeholder
double_ess_k12['EcN'] = 0

for k12_loc in double_ess_k12.index.tolist():
    k12_id = double_ess_k12.loc[k12_loc, 'ids']
    EcN_genes = []
    for gene in k12_id:
        EcN_gene = geneIDs_ess[geneIDs_ess['U00096'] == gene].index[0]
        EcN_genes.append(str(EcN_gene))

    double_ess_k12.loc[k12_loc,'EcN'] = str(EcN_genes)

double_ess_k12.head()

In [None]:
# Save the resulting dataframe as .csv
double_ess_k12.to_csv('../tables/gene_ess_double_ess_k12.csv')

In [None]:
# Create two sets
set_1 = set(EcN_double['EcN'].values)
set_2 = set(double_ess_k12['EcN'].values)

# Find the difference & similarity
diff_k12 = set_2 - set_1
diff_EcN = set_1 - set_2
same = set.intersection(set_1, set_2)

print(len(diff_k12), len(diff_EcN), len(same))

The order of some of the genes is different, correct to see the true overlap of the sets

In [None]:
# Get the names of the different gene names
gene_overview_k12 = gene_names(diff_k12)
gene_overview_k12

In [None]:
# Get the names of the different gene names
gene_overview_EcN = gene_names(diff_EcN)
gene_overview_EcN

In [None]:
# Create two sets
set_1 = set(gene_overview_EcN['EcN_names'].values)
set_2 = set(gene_overview_k12['EcN_names'].values)

# Find the difference & similarity
diff_k12 = set_2 - set_1
diff_EcN = set_1 - set_2
same = set.intersection(set_1, set_2)

print(len(diff_k12), len(diff_EcN), len(same))

diff_k12

Actually 38 of the double lethals are the same (20 + 18). The only one specific to k12 is a combination of fpr and pfo.

### Investigate the difference in double lethals

In [None]:
EcN_model.genes.CIW80_25730
# EcN_model.genes.CIW80_14630

In [None]:
# Check the double lethal only for aerobic
for rxn_1 in k12_model.genes.b1378.reactions:
    for rxn_2 in k12_model.genes.b3924.reactions:
        with k12_model:
            m9(k12_model)
#             k12_model.reactions.EX_o2_e.lower_bound=0   
            k12_model.remove_reactions([rxn_1.id, rxn_2.id])
            print(rxn_1.id, rxn_2.id, k12_model.slim_optimize())

In [None]:
# Check the double lethal only for aerobic
for rxn_1 in EcN_model.genes.CIW80_25730.reactions:
    for rxn_2 in EcN_model.genes.CIW80_14630.reactions:
        with EcN_model:
            m9(EcN_model)
#             EcN_model.reactions.EX_o2_e.lower_bound=0    
            EcN_model.remove_reactions([rxn_1.id, rxn_2.id])
            print(rxn_1.id, rxn_2.id, EcN_model.slim_optimize())

In [None]:
# Check the reactions
k12_model.reactions.FLDR2
k12_model.reactions.POR5

# Check the metabolites and compare with EcN
EcN_model.metabolites.flxso_c.summary()
k12_model.metabolites.flxso_c.summary()

In [None]:
# The only difference in consumption/production of these flavodoxin metabolites is MECDPDH5
with k12_model:
    m9(k12_model)
#     k12_model.reactions.EX_o2_e.lower_bound=0   
    k12_model.remove_reactions(['MECDPDH5'])
    print(k12_model.slim_optimize())

In [None]:
# Check the difference in metabolism of k12 and EcN
k12_model.reactions.MECDPDH5
EcN_model.metabolites.get_by_id('2mecdp_c')
k12_model.metabolites.get_by_id('2mecdp_c')

In [None]:
EcN_model.reactions.MECDPDH2

The EcN model includes another reaction, MECDPH2, which used NADH as electron acceptor, instead of flavodoxin