In [1]:
import cobra, os, re
import pandas as pd
import numpy as np
from tqdm import tqdm

In [2]:
#dir_pfba = './output/pfba/'
#if not os.path.exists(dir_pfba):
#    os.makedirs(dir_pfba)
    
biomId = 'BIOMASS_Ecoli_core_w_GAM'

In [3]:
modelStart = cobra.io.load_json_model('./input/e_coli_core.json')
modelStart.reactions.EX_glc__D_e.lower_bound = -10. # default
modelStart.reactions.EX_o2_e.lower_bound = -1000. # default
pfba_M9 = cobra.flux_analysis.pfba(modelStart)
gMinMedium = pfba_M9.fluxes[biomId]
#pfba_M9.fluxes.to_csv(dir_pfba + 'pfba_WT_M9.csv', sep='\t')

In [4]:
gMinMedium

0.8739215069684309

Essential rxns from essential gene

In [5]:
df_ess = pd.read_csv('./input/Ecoli_ess_PECData.dat', sep='\t')
df_ess = df_ess[df_ess['Feature Type(1:gene 2:rRNA 3:tRNA 4:ncRNA 7:tmRNA 8:sRNA)'] == 1]
df_ess = df_ess[df_ess['Class(1:essential 2:noessential 3:unknown)'] == 1]

idx = []
for i in df_ess.index:
    names = df_ess['Alternative name'][i]
    idx.append(re.findall('b\d{4}', names)[0])
df_ess.index = idx

In [6]:
genes_ess = [g.id for g in modelStart.genes if g.id in df_ess.index]
rxns_from_ess_gene = []

model_raw = modelStart.copy()
model = modelStart.copy()

for gid in genes_ess:
    g = model.genes.get_by_id(gid)
    g.knock_out()
    
    for rxn in g.reactions:
        if rxn.bounds != model_raw.reactions.get_by_id(rxn.id).bounds:
            rxns_from_ess_gene.append(rxn.id)
            
    g.functionall = True
    for rxn in g.reactions:
        rxn.bounds = model_raw.reactions.get_by_id(rxn.id).bounds

Exchange and transport rxns

In [7]:
exrxns = [rxn.id for rxn in model.reactions if rxn.id[:3] == 'EX_']
# Excluding PEP:phosphotransferase rxns
tprxns = [rxn.id for rxn in model.reactions if ('t' in rxn.id or 'abc' in rxn.id)\
          and rxn.id not in ['EX_etoh_e, FRUpts2', 'GLCpts']]

Essential rxns in silico

In [8]:
model_raw = modelStart.copy()
model = modelStart.copy()

rxns_ess = []
for rxn in model.reactions:
    rxn.knock_out()
    fbako = model.optimize()
    if fbako.status in [None, 'infeasible']:
        print rxn.id
    
    if fbako.f < 0.25*gMinMedium:
        rxns_ess.append(rxn.id)
        
    rxn.bounds = model_raw.reactions.get_by_id(rxn.id).bounds

EX_glc__D_e
GLCpts




No GPR reactions

In [9]:
rxns_nogpr = [rxn.id for rxn in model.reactions if rxn.gene_reaction_rule in ['', 's0001']]

Special case: TKT and Biomass reaction

In [10]:
rxns_spec = [biomId, 'TKT1', 'TKT2']

Gather all reactions to be excluded from being KO for mutants

In [11]:
rxns_excl = rxns_from_ess_gene + exrxns + tprxns + rxns_ess + rxns_spec + rxns_nogpr
rxns_check = [rxn.id for rxn in model.reactions if rxn.id not in rxns_excl]

Run and record flux

In [12]:
cols = ['WT'] + [i + '_ko' for i in rxns_check]
rxns_all = [rxn.id for rxn in model.reactions]
df_pfba = pd.DataFrame(index=rxns_all, columns=cols)

model_raw = modelStart.copy()
model = modelStart.copy()

df_pfba.loc[rxns_all, 'WT'] = pfba_M9.fluxes[rxns_all]

for rxnid in rxns_check:
    rxn = model.reactions.get_by_id(rxnid)
    rxn.knock_out()
    pfba = cobra.flux_analysis.pfba(model)
    df_pfba.loc[rxns_all, rxnid + '_ko'] = pfba.fluxes[rxns_all]
        
    rxn.bounds = model_raw.reactions.get_by_id(rxn.id).bounds

In [13]:
df_pfba.to_csv('./output/pfba_ecoli_core.csv', sep='\t')

Calculate pairwise Eucledian distance (normalized by number of dimensions)

In [14]:
cols = ['WT'] + [i + '_ko' for i in rxns_check]
df_pair_dist = pd.DataFrame(index=cols, columns=cols)
for i in cols:
    df_pair_dist.loc[i, i] = 0

rxns_all = [rxn.id for rxn in model.reactions]
for i in range(0, len(cols)-1):
    for j in range(1, len(cols)):
        eu_dist = 0
        for rxnid in rxns_all:
            eu_dist += (abs(df_pfba.loc[rxnid, cols[i]]) - abs(df_pfba.loc[rxnid, cols[j]])) ** 2
        eu_dist = (eu_dist ** 0.5) / len(rxns_all)
        
        df_pair_dist.loc[cols[i], cols[j]] = eu_dist
        df_pair_dist.loc[cols[j], cols[i]] = eu_dist

In [15]:
for i in cols:
    for j in cols:
        if df_pair_dist.loc[i,j] < 1e-6:
            df_pair_dist.loc[i,j] = 0

In [17]:
mutants = df_pair_dist.columns

muts_none_all = []
for i in range(0, len(mutants)-1):
    mut1 = mutants[i]
    #print mut1 + ':',
    
    muts_none = []
    for j in range(i+1, len(mutants)):
        mut2 = mutants[j]
        val = df_pair_dist.loc[mut1, mut2]
        if val == 0:
            muts_none.append(mut2)
        
    #print ','.join(muts_none)
    muts_none_all += muts_none
    
muts_none_all = set(muts_none_all)

In [18]:
idx = [mut for mut in df_pair_dist.index if mut not in muts_none_all]
df_pair_dist = df_pair_dist.loc[idx, idx]

In [19]:
df_pair_dist.to_csv('./output/pfba_pairwise_distance.csv', sep='\t')

In [20]:
df_pair_dist

Unnamed: 0,WT,AKGDH_ko,ATPS4r_ko,FBA_ko,FUM_ko,G6PDH2r_ko,GLUDy_ko,MDH_ko,PDH_ko,PGI_ko,PPC_ko,PYK_ko,RPE_ko,TALA_ko
WT,0.0,0.144243,1.01538,0.809949,0.589936,0.165631,0.0939073,0.209194,0.306937,0.163794,0.0885523,0.064035,0.13704,0.151593
AKGDH_ko,0.144243,0.0,1.05119,0.791993,0.573338,0.243635,0.168013,0.276239,0.341794,0.189607,0.0699987,0.172717,0.22107,0.232424
ATPS4r_ko,1.01538,1.05119,0.0,1.46415,1.29449,1.00328,1.05374,1.09594,0.945041,1.06264,1.0273,1.02712,1.00344,1.00323
FBA_ko,0.809949,0.791993,1.46415,0.0,0.26965,0.904152,0.80916,0.875164,0.656574,0.677601,0.788758,0.831493,0.888072,0.896404
FUM_ko,0.589936,0.573338,1.29449,0.26965,0.0,0.695295,0.596358,0.676202,0.460425,0.451412,0.566693,0.61583,0.677049,0.686474
G6PDH2r_ko,0.165631,0.243635,1.00328,0.904152,0.695295,0.0,0.181456,0.196331,0.409045,0.302876,0.215369,0.145638,0.0314989,0.01583
GLUDy_ko,0.0939073,0.168013,1.05374,0.80916,0.596358,0.181456,0.0,0.203422,0.337638,0.192607,0.131129,0.102794,0.157506,0.169498
MDH_ko,0.209194,0.276239,1.09594,0.875164,0.676202,0.196331,0.203422,0.0,0.441132,0.315217,0.265797,0.152084,0.18801,0.191605
PDH_ko,0.306937,0.341794,0.945041,0.656574,0.460425,0.409045,0.337638,0.441132,0.0,0.229493,0.307194,0.338492,0.389388,0.399407
PGI_ko,0.163794,0.189607,1.06264,0.677601,0.451412,0.302876,0.192607,0.315217,0.229493,0.0,0.153482,0.201788,0.27766,0.290545


In [22]:
df_pfba_filtered = df_pfba.loc[:, idx]

In [23]:
df_pfba_filtered

Unnamed: 0,WT,AKGDH_ko,ATPS4r_ko,FBA_ko,FUM_ko,G6PDH2r_ko,GLUDy_ko,MDH_ko,PDH_ko,PGI_ko,PPC_ko,PYK_ko,RPE_ko,TALA_ko
ACALD,0,0,-2.81331e-14,0,0,0,0,0,0,0,0,0,0,0
ACALDt,0,0,0,0,0,0,0,0,0,0,0,0,0,0
ACKr,1.93788e-14,0,-14.3123,0,7.68923e-14,0,1.46274e-14,0,0,-2.22767e-14,-1.24219e-14,0,0,0
ACONTa,6.00725,5.83681,0.403757,0.759585,0.878546,7.8033,6.47117,8.33976,4.75726,4.53818,5.57361,6.6635,7.4653,7.63524
ACONTb,6.00725,5.83681,0.403757,0.759585,0.878546,7.8033,6.47117,8.33976,4.75726,4.53818,5.57361,6.6635,7.4653,7.63524
ACt2r,1.93788e-14,0,-14.3123,0,7.68923e-14,0,1.46274e-14,0,0,-2.22767e-14,-1.24219e-14,0,0,0
ADK1,0,0,0,4.60743,3.76289,0,0,3.67464,0,0,0,0,0,0
AKGDH,5.06438,0,0,0,0,6.87133,5.55261,7.44879,3.89771,3.60692,2.13896,5.73033,6.53128,6.70225
AKGt2r,0,0,0,0,0,0,0,0,0,0,0,0,0,0
ALCD2x,0,0,-2.81331e-14,0,0,0,0,0,0,0,0,0,0,0


In [24]:
df_pfba_filtered.to_csv('./output/pfba_ecoli_core_filtered.csv', sep='\t')