In [1]:
import cobra, os, re, json
import pandas as pd
import numpy as np
from tqdm import tqdm

In [2]:
biomId = 'Biomass_Rt_Clim'

In [3]:
modelStart = cobra.io.load_json_model('../yeast_model/yeast_model/build_iRhto/output/rt_draft/rt_draft8C.json')
modelStart.reactions.EX_glc__D_e.lower_bound = -10. # basis 10
modelStart.reactions.EX_o2_e.lower_bound = -1000. # default
pfba_M9 = cobra.flux_analysis.pfba(modelStart)
gMinMedium = pfba_M9.fluxes[biomId]
pfba_M9.fluxes.to_csv('./output/pfba/pfba_WT_M9.csv', sep=',')

In [4]:
gMinMedium

0.7541272865518587

#### Selection of mutants

1) Essential rxns from essential gene

In [5]:
df_ess = pd.read_csv('../yeast_model/yeast_model/build_iRhto/output/D3_gene_essentiality_prediction.csv',
                     sep='\t')
df_ess.index = df_ess.gene_id.tolist()
df_ess = df_ess[df_ess.essential == 'Yes']

In [6]:
genes_ess = [g.id for g in modelStart.genes if g.id in df_ess.index]
rxns_from_ess_gene = []

model_raw = modelStart.copy()
model = modelStart.copy()

for gid in genes_ess:
    g = model.genes.get_by_id(gid)
    g.knock_out()
    
    for rxn in g.reactions:
        if rxn.bounds != model_raw.reactions.get_by_id(rxn.id).bounds:
            rxns_from_ess_gene.append(rxn.id)
            
    g.functional = True
    for rxn in g.reactions:
        rxn.bounds = model_raw.reactions.get_by_id(rxn.id).bounds

2) Exchange rxns

In [7]:
rxns_ex = [rxn.id for rxn in model.reactions if rxn.id[:3] == 'EX_']

3) Transport rxns (even GPR associated ones, relevant for cases where transport GPR is poorly characterized)

In [8]:
rxns_tp = [rxn.id for rxn in model.reactions if rxn.subsystem == 'Transport']

4) Essential rxns in silico

In [9]:
model_raw = modelStart.copy()
model = modelStart.copy()

rxns_ess = []
rxns_check = [rxn.id for rxn in model.reactions]
for rxnid in tqdm(rxns_check, leave=False):
    rxn = model.reactions.get_by_id(rxnid)
    rxn.knock_out()
    fbako = model.optimize()
    if fbako.status in [None, 'infeasible']:
        rxns_ess.append(rxn.id)
    
    if fbako.f < 0.25*gMinMedium:
        rxns_ess.append(rxn.id)
        
    rxn.bounds = model_raw.reactions.get_by_id(rxn.id).bounds

                                                   

In [10]:
with open('./output/Step1_essential_rxns_iRhto1108.txt', 'w') as f:
    f.write('\n'.join(rxns_ess))

In [11]:
with open('./output/Step1_essential_rxns_iRhto1108.txt') as f:
    rxns_ess = f.read().split('\n')

5) No GPR reactions

In [12]:
rxns_nogpr = [rxn.id for rxn in model.reactions if rxn.gene_reaction_rule in ['', 'UNKNOWN', 'SPONT', 'TRUE']]

6) Special case: pseudoreactions and ATPM maintenance

In [13]:
rxns_spec = [rxn.id for rxn in model.reactions if rxn.subsystem == 'Pseudoreaction'] + ['ATPM_c']

7) Use FVA to find zero flux reactions

In [14]:
rxns_model = [rxn.id for rxn in model.reactions]
fva = cobra.flux_analysis.flux_variability_analysis(model,
        fraction_of_optimum=0, reaction_list=rxns_model)

In [15]:
rxns_zero = fva[(fva.minimum.abs() < 1e-6) & (fva.maximum.abs() < 1e-6)].index.tolist()

In [41]:
len(model.reactions) - len(rxns_zero)

710

In [43]:
model.reactions.ABTDD_c.subsystem

u'Alternative carbon metabolism'

Gather all reactions to be excluded from being KO for mutants

In [16]:
rxns_excl = {
    'Essential_in_vivo': rxns_from_ess_gene,
    'Essential_in_silico': rxns_ess,
    'Exchange': rxns_ex,
    'Transport': rxns_tp,
    'No_gpr': rxns_nogpr,
    'Special_case': rxns_spec,
    'Zero_fva_flux': rxns_zero
}

with open('./output/Step1_rxns_excluded.json', 'w') as f:
    f.write(json.dumps(rxns_excl))

#### Run pFBA

Run and record flux

In [None]:
with open('./output/Step1_rxns_excluded.json') as f:
    rxns_excl = json.load(f)
rxns_check = [rxn.id for rxn in model.reactions if rxn.id not in sum(rxns_excl.values(), [])]

In [23]:
cols = ['WT'] + [i + '_ko' for i in rxns_check]
rxns_all = [rxn.id for rxn in model.reactions]
df_pfba = pd.DataFrame(index=rxns_all, columns=cols)

model_raw = modelStart.copy()
model = modelStart.copy()

df_pfba.loc[rxns_all, 'WT'] = pfba_M9.fluxes[rxns_all]

for rxnid in tqdm(rxns_check, leave=False):
    rxn = model.reactions.get_by_id(rxnid)
    rxn.knock_out()
    try:
        pfba = cobra.flux_analysis.pfba(model)
    except:
        pfba = cobra.flux_analysis.pfba(model, fraction_of_optimum=0.999)
    df_pfba.loc[rxns_all, rxnid + '_ko'] = pfba.fluxes[rxns_all]
        
    rxn.bounds = model_raw.reactions.get_by_id(rxn.id).bounds

                                                 

In [24]:
df_pfba.to_csv('./output/Step1_pfba_iRhto1108.csv', sep='\t')

Calculate pairwise Eucledian distance (normalized by number of dimensions)

In [25]:
df_pfba = pd.read_csv('./output/Step1_pfba_iRhto1108.csv', sep='\t', index_col=0)

for i in tqdm(df_pfba.index, leave=False):
    for j in df_pfba.columns:
        if abs(df_pfba.loc[i,j]) < 1e-6:
            df_pfba.loc[i,j] = 0
            
rxns_zero = []
for i in df_pfba.index:
    if all([val == 0 for val in df_pfba.loc[i, :]]):
        rxns_zero.append(i)
        
rxns = [rxn for rxn in df_pfba.index if rxn not in rxns_zero]
df_pfba = df_pfba.loc[rxns, :]

                                                   

In [26]:
with open('./output/Step1_rxns_excluded.json') as f:
    rxns_excl = json.load(f)
rxns_check = [rxn.id for rxn in model.reactions if rxn.id not in sum(rxns_excl.values(), [])]

In [27]:
cols = ['WT'] + [i + '_ko' for i in rxns_check]
df_pair_dist = pd.DataFrame(index=cols, columns=cols)
for i in cols:
    df_pair_dist.loc[i, i] = 0

rxns_all = df_pfba.index.tolist()
for i in tqdm(range(0, len(cols)-1), leave=False):
    for j in range(1, len(cols)):
        eu_dist = np.linalg.norm(df_pfba[cols[i]] - df_pfba[cols[j]]) / len(rxns_all)
        df_pair_dist.loc[cols[i], cols[j]] = eu_dist
        df_pair_dist.loc[cols[j], cols[i]] = eu_dist

                                                 

In [28]:
for i in cols:
    for j in cols:
        if df_pair_dist.loc[i,j] < 1e-6:
            df_pair_dist.loc[i,j] = 0

In [29]:
df_pair_dist.to_csv('./output/Step1_pfba_pairwise_distance_iRhto1108_raw.csv', sep='\t')

Map mutants with identical flux vectors

In [30]:
df_pair_dist = pd.read_csv('./output/Step1_pfba_pairwise_distance_iRhto1108_raw.csv',
                           sep='\t', index_col=0, header=0)

In [31]:
mutants = df_pair_dist.columns
mut_identical = dict()
mut_already_check = []
for i in range(0, len(mutants)-1):
    mut1 = mutants[i]
    
    for j in range(i+1, len(mutants)):
        mut2 = mutants[j]
        if mut2 in mut_already_check:
            continue
        
        val = df_pair_dist.loc[mut1, mut2]
        if val == 0:
            mut_already_check.append(mut2)
            if mut1 in mut_identical.keys():
                mut_identical[mut1].append(mut2)
            else:
                mut_identical[mut1] = [mut2]
                
mut_in_identical = mut_identical.keys() + sum(mut_identical.values(), [])
mut_unique = [mut for mut in mutants if mut not in mut_in_identical]

In [32]:
mut_check = 'RPE_c_ko'
if mut_check[:-3] not in rxns_check:
    print mut_check, 'not in mutant candidate'
elif mut_check in mut_unique:
    print mut_check, 'in unique mutants (flux vector is unique)'
else:
    for mut,val in mut_identical.items():
        if mut_check in val:
            print mut, val
        if mut_check == mut:
            print mut, val

RPE_c_ko in unique mutants (flux vector is unique)


In [33]:
with open('./output/Step1_mut_identical.json', 'w') as f:
    f.write(json.dumps(mut_identical))
with open('./output/Step1_mut_unique.txt', 'w') as f:
    f.write('\n'.join(mut_unique))

In [34]:
idx = mut_identical.keys() + mut_unique
idx = ['WT'] + sorted([i for i in idx if i != 'WT'])
df_pair_dist = df_pair_dist.loc[idx, idx]
df_pair_dist.to_csv('./output/Step1_pfba_pairwise_distance_iRhto1108.csv', sep='\t')

#### Filter pfba results

In [35]:
df_pair_dist = pd.read_csv('./output/Step1_pfba_pairwise_distance_iRhto1108.csv', sep='\t', index_col=0)
idx = df_pair_dist.index.tolist()

In [36]:
df_pfba = pd.read_csv('./output/Step1_pfba_iRhto1108.csv', sep='\t', index_col=0)

In [37]:
df_pfba_filtered = df_pfba.loc[:, idx]

for i in tqdm(df_pfba.index, leave=False):
    for j in df_pfba.columns:
        if abs(df_pfba.loc[i,j]) < 1e-6:
            df_pfba.loc[i,j] = 0
            
rxns_zero = []
for i in df_pfba.index:
    if all([val == 0 for val in df_pfba.loc[i, :]]):
        rxns_zero.append(i)
        
rxns = [rxn for rxn in df_pfba.index if rxn not in rxns_zero]
df_pfba_filtered = df_pfba.loc[rxns, idx]

                                                   

In [38]:
df_pfba_filtered.to_csv('./output/pfba_iRhto1108_filtered.csv', sep='\t')

In [39]:
df_pfba_filtered.loc['Biomass_Rt_Clim', :].max()

0.7541272865685728