In [1]:
# set cwd and solver
import os
import cobra
cobra.Configuration().solver = "gurobi"
os.chdir('C:/Users/prins/GitHub/Human1_RPE-PR') 

In [2]:
# load all models in \cs_mods\July2023 folder by cycling through names
from cobra.io import read_sbml_model    
from pathlib import Path
import pandas as pd

# do not change directory but still go to the folder with the models
folder = Path().cwd() / "cs_mods" / "July2023"
model_list = []
for file in folder.glob("*.xml"):  
    model = read_sbml_model(file)
    model.id = file.stem # remove .xml 
    model.name = file.stem # remove .xml
    model_list.append(model)

Set parameter Username
Academic license - for non-commercial use only - expires 2024-02-03


No objective coefficients in model. Unclear what should be optimized
No objective coefficients in model. Unclear what should be optimized
No objective coefficients in model. Unclear what should be optimized
No objective coefficients in model. Unclear what should be optimized


In [3]:
# load old combined RPE_PR models
mod_RPE_PR = read_sbml_model(Path().cwd() / "models" / "mod_RPE_PR.xml")
mod_RPE_PR.id = 'RPE_PR_old'
mod_RPE_PR.name = 'RPE_PR_old'
mod_Human1_Human1 = read_sbml_model(Path().cwd() / "models" /  "mod_Human1_Human1.xml")
mod_Human1_Human1.id = 'RPE_PR_control_old'
mod_Human1_Human1.name = 'RPE_PR_control_old'
mod = read_sbml_model(Path().cwd() / 'models/Human-GEM.xml')
mod.id = 'Human1_old'
mod.name = 'Human1_old'

# add old models to model_list
model_list = model_list + [mod,mod_RPE_PR]

# check out model_list
model_list

[<Model Human-GEM_28062023 at 0x1bc7f76e020>,
 <Model mod_RPE_PRcone_Liang at 0x1bc09c08ac0>,
 <Model mod_RPE_PRcone_Liang_singlePR at 0x1bc4b8f2980>,
 <Model mod_RPE_PRcone_Liang_singleRPE at 0x1bc567b3ac0>,
 <Model mod_RPE_PRcone_Lukowski at 0x1bc63751bd0>,
 <Model mod_RPE_PRcone_Lukowski_singlePR at 0x1bc685899c0>,
 <Model mod_RPE_PRcone_Lukowski_singleRPE at 0x1bc6fd896c0>,
 <Model mod_RPE_PRcontrol at 0x1bc83ebb8b0>,
 <Model mod_RPE_PRrod_Liang at 0x1bc990975e0>,
 <Model mod_RPE_PRrod_Liang_singlePR at 0x1bcde7646d0>,
 <Model mod_RPE_PRrod_Liang_singleRPE at 0x1bce24d3910>,
 <Model mod_RPE_PRrod_Lukowski at 0x1bce65ee2c0>,
 <Model mod_RPE_PRrod_Lukowski_singlePR at 0x1bcefb3ed70>,
 <Model mod_RPE_PRrod_Lukowski_singleRPE at 0x1bcf15e2fb0>,
 <Model Human1_old at 0x1bd0ef47190>,
 <Model RPE_PR_old at 0x1bcf608e650>]

In [4]:
### COMPARE REACTIONS IN ROD MODELS

# get rod models
rod_Liang = [model for model in model_list if model.id == 'mod_RPE_PRrod_Liang_singlePR'][0]
rod_Lukowski = [model for model in model_list if model.id == 'mod_RPE_PRrod_Lukowski_singlePR'][0]
Human1 = [model for model in model_list if model.id == 'Human-GEM_28062023'][0]

# get reaction ids that are present in rod_Liang but not in rod_Lukowski and vice versa# get sets of reaction IDs for each model
liang_rxns = set(rxn.id for rxn in rod_Liang.reactions)
lukowski_rxns = set(rxn.id for rxn in rod_Lukowski.reactions)

# find reaction IDs that are present in Liang but not in Lukowski
liang_not_lukowski = liang_rxns - lukowski_rxns

# find reaction IDs that are present in Lukowski but not in Liang
lukowski_not_liang = lukowski_rxns - liang_rxns

# find reactions common to both models
common_rxns = liang_rxns.intersection(lukowski_rxns)

human1_rxns = set(rxn.id for rxn in Human1.reactions)

In [5]:
print('number of reactions in Human1: ' + str(len(human1_rxns)))
print('number of reactions in Liang rod AND in Lukowski rod: ' + str(len(common_rxns)))
print('number of reactions in Liang rod but not in Lukowski rod: ' + str(len(liang_not_lukowski)))
print('number of reactions in Lukowski rod but not in Liang rod: '+ str(len(lukowski_not_liang)))

number of reactions in Human1: 13085
number of reactions in Liang rod AND in Lukowski rod: 5599
number of reactions in Liang rod but not in Lukowski rod: 655
number of reactions in Lukowski rod but not in Liang rod: 861


In [6]:
# remove everyting after the first underscore in cell specific reaction IDs
liang_rxns_short = [rxn.split('_')[0] for rxn in liang_rxns]    
lukowski_rxns_short = [rxn.split('_')[0] for rxn in lukowski_rxns]

In [7]:
from src.get_info import make_rxn_df

# convert sets to lists
liang_not_lukowski_list = list(liang_not_lukowski)
lukowski_not_liang_list = list(lukowski_not_liang)
common_rxns_list = list(common_rxns)

# make rxn df of reactions liang_not_lukowski and lukowski_not_liang
liang_not_lukowski_rxns = make_rxn_df(rod_Liang).loc[liang_not_lukowski_list]
lukowski_not_liang_rxns = make_rxn_df(rod_Lukowski).loc[lukowski_not_liang_list]
common_rxns_rxns = make_rxn_df(rod_Liang).loc[common_rxns_list]

# write dfs to spreadsheet in results folder (results/compare_rod_models/), each df on different sheet (make folder if not exist)
from pathlib import Path

Path(Path().cwd() / 'results' / 'compare_rod_models').mkdir(parents=True, exist_ok=True)
with pd.ExcelWriter(Path().cwd() / 'results' / 'compare_rod_models' / 'compare_rod_models.xlsx') as writer:
    liang_not_lukowski_rxns.to_excel(writer, sheet_name = 'liang_not_lukowski')
    lukowski_not_liang_rxns.to_excel(writer, sheet_name = 'lukowski_not_liang')
    common_rxns_rxns.to_excel(writer, sheet_name = 'common_rxns')
    make_rxn_df(Human1).to_excel(writer, sheet_name = 'Human1')     # add Human1 df to spreadsheet

In [8]:
# modify models
# load blood exchange bounds

# for all models in the list set objective function (ATP hydrolysis, in PR for combined models)
for m in model_list:
    if 'MAR03964_PR' in [r.id for r in m.reactions]:
        m.objective = 'MAR03964_PR'
    elif 'MAR03964_RPE' in [r.id for r in m.reactions]:
        m.objective = 'MAR03964_RPE'
    elif 'MAR03964' in [r.id for r in m.reactions]:
        m.objective = 'MAR03964'
    else:
        print('no ATP hydrolysis reaction (MAR03964) in model: ' + m.id)

# list combined and single models
combined_models = [m for m in model_list if 'single' not in m.id and 'Human' not in m.id]
single_models = [m for m in model_list if 'single' in m.id]
Human1_models = [m for m in model_list if 'Human' in m.id]

# close PR exchange in combined models and open RPE exchange (efflux)
for m in combined_models:
    for r in [r for r in m.reactions if len(r.products) == 0 ]:           # close all exchange reactions
        r.bounds=(0,0)
    for r in [r for r in m.reactions if len(r.products) == 0 if '_RPE' in r.id]:           # open efflux for RPE exchange reactions
        r.bounds=(0,1000) 

# only allow efflux exchange in single models
for m in single_models:
    for r in [r for r in m.reactions if len(r.products) == 0 ]:           # close all exchange reactions
        r.bounds=(0,1000)

# for Human1 models, open efflux
for m in [m for m in model_list if 'Human' in m.id]:
    for r in [r for r in m.reactions if len(r.products) == 0]:           # open efflux for RPE exchange reactions
        r.bounds=(0,1000)



In [9]:
# set exchange bounds for blood
from src.modify_model import set_exchange_bounds
ex_bounds = {'MAR09048':(-3.51,1000),'MAR09034':(-4.18,1000),'MAR09135':(7.25,1000)}
# MAR09048: oxygen
# MAR09034: glucose
# MAR09135: lactate

results_dict = {}
for m in model_list:
    set_exchange_bounds(m, ex_bounds)
    m.optimize()
    results_dict[m.id] = m.objective.value
df1 = pd.DataFrame(results_dict, index = ['max ATP hydrolysis (pmol/s/mm^2)']).T
df1

Unnamed: 0,max ATP hydrolysis (pmol/s/mm^2)
Human-GEM_28062023,24.7325
mod_RPE_PRcone_Liang,23.9
mod_RPE_PRcone_Liang_singlePR,11.135
mod_RPE_PRcone_Liang_singleRPE,23.9
mod_RPE_PRcone_Lukowski,24.7325
mod_RPE_PRcone_Lukowski_singlePR,22.235
mod_RPE_PRcone_Lukowski_singleRPE,23.9
mod_RPE_PRcontrol,24.7325
mod_RPE_PRrod_Liang,23.9
mod_RPE_PRrod_Liang_singlePR,13.91


In [12]:
# FLUX VARIABILITY ANALYSIS

from cobra.flux_analysis import flux_variability_analysis

fba = {}
fva = {}
fva_loopless = {}

for m in [model_list[9],model_list[12]]:
    set_exchange_bounds(m, ex_bounds)
    fba[m.id] = pd.DataFrame(m.optimize().fluxes)
    fva[m.id] = flux_variability_analysis(m, loopless=False)
    fva_loopless[m.id] = flux_variability_analysis(m, loopless=True)

# pickle fba, fva and fva_loopless
import pickle
with open(Path().cwd() / 'results' / 'compare_rod_models' / 'fba.pkl', 'wb') as f:
    pickle.dump(fba, f)
with open(Path().cwd() / 'results' / 'compare_rod_models' / 'fva.pkl', 'wb') as f:
    pickle.dump(fva, f)
with open(Path().cwd() / 'results' / 'compare_rod_models' / 'fva_loopless.pkl', 'wb') as f:
    pickle.dump(fva_loopless, f)


In [13]:
import pickle
with open(Path().cwd() / 'results' / 'compare_rod_models' / 'fba.pkl', 'rb') as f:
    fba = pickle.load(f)
with open(Path().cwd() / 'results' / 'compare_rod_models' / 'fva.pkl', 'rb') as f:
    fva = pickle.load(f)
with open(Path().cwd() / 'results' / 'compare_rod_models' / 'fva_loopless.pkl', 'rb') as f:
    fva_loopless = pickle.load(f)   

In [14]:
# compare fva_Liang and fva_loopless_Liang 
fva_Liang = fva['mod_RPE_PRrod_Liang_singlePR']
fva_Lukowski = fva['mod_RPE_PRrod_Lukowski_singlePR']
fva_loopless_Liang = fva_loopless['mod_RPE_PRrod_Liang_singlePR']
fva_loopless_Lukowski = fva_loopless['mod_RPE_PRrod_Lukowski_singlePR']

# combine fva and fva_loopless results
fva_loopless_Liang.columns = ['min_loopless','max_loopless'] # rename columns
fva_Liang.columns = ['min','max'] # rename columns
fva_Liang = pd.concat([fva_Liang, fva_loopless_Liang], axis=1).sort_values(by='min', ascending=False) # combine fva_Liang and fva_loopless_Liang 
fva_Liang = fva_Liang[(fva_Liang.T != 0).any()] # drop rows with all zeros
fva_Liang_rxns = make_rxn_df(rod_Liang).loc[fva_Liang.index] # make rxn dfs
fva_Liang = pd.concat([fva_Liang, fva_Liang_rxns], axis=1).sort_values(by='min', ascending=False) # merge with rxns df
# select rows with similar (not more than .1 difference) values in min and min_loopless, AND in max and max_loopless columns
fva_Liang_same = fva_Liang[(abs(fva_Liang['min'] - fva_Liang['min_loopless']) < .1) & (abs(fva_Liang['max'] - fva_Liang['max_loopless']) < .1)]
# select rows with different values in min and min_loopless, OR in max and max_loopless columns
fva_Liang_diff = fva_Liang[(abs(fva_Liang['min'] - fva_Liang['min_loopless']) >= .1) | (abs(fva_Liang['max'] - fva_Liang['max_loopless']) >= .1)]

# do same for Lukowski
fva_loopless_Lukowski.columns = ['min_loopless','max_loopless']
fva_Lukowski.columns = ['min','max']
fva_Lukowski = pd.concat([fva_Lukowski, fva_loopless_Lukowski], axis=1).sort_values(by='min', ascending=False)
fva_Lukowski = fva_Lukowski[(fva_Lukowski.T != 0).any()]
fva_Lukowski_rxns = make_rxn_df(rod_Lukowski).loc[fva_Lukowski.index] # make rxn dfs
fva_Lukowski = pd.concat([fva_Lukowski, fva_Lukowski_rxns], axis=1).sort_values(by='min', ascending=False)
fva_Lukowski_same = fva_Lukowski[(abs(fva_Lukowski['min'] - fva_Lukowski['min_loopless']) < .1) & (abs(fva_Lukowski['max'] - fva_Lukowski['max_loopless']) < .1)]
fva_Lukowski_diff = fva_Lukowski[(abs(fva_Lukowski['min'] - fva_Lukowski['min_loopless']) >= .1) | (abs(fva_Lukowski['max'] - fva_Lukowski['max_loopless']) >= .1)]

# save to excel
Path(Path().cwd() / 'results' / 'compare_rod_models').mkdir(parents=True, exist_ok=True)
with pd.ExcelWriter(Path().cwd() / 'results' / 'compare_rod_models' / 'fva_loops_Liang_Lukowski_singlerods.xlsx') as writer:
    fva_Liang.to_excel(writer, sheet_name = 'Liang_all_possible_fluxes')
    fva_Liang_same.to_excel(writer, sheet_name = 'Liang_loopless_rxns')
    fva_Liang_diff.to_excel(writer, sheet_name = 'Liang_loopy_rxns')
    fva_Lukowski.to_excel(writer, sheet_name = 'Lukowski_all_possible_fluxes')
    fva_Lukowski_same.to_excel(writer, sheet_name = 'Luowski_loopless_rxns')
    fva_Lukowski_diff.to_excel(writer, sheet_name = 'Lukowski_loopy_rxns')


In [15]:
### FVA MODIFIED MODELS ###
# SET BOUNDS LOOPY REACTIONS (i.e. reactions with (min,max;0,0) in loopless fva but not in regular fva) TO 0 AND REPEAT FVA
# for Liang rod, select reaction ids  affected by loopyness fva_Liang_diff

# select index of columns that contain values within plus or minus 0.01 from zero for min_loopless and max_loopless
Liang_loop_rxns = list(fva_Liang_diff[(abs(fva_Liang_diff['min_loopless']) < .0001) & (abs(fva_Liang_diff['max_loopless']) < .0001)].index)
Lukowski_loop_rxns = list(fva_Lukowski_diff[(abs(fva_Lukowski_diff['min_loopless']) < .001) & (abs(fva_Lukowski_diff['max_loopless']) < .001)].index)

# set reaction bounds to (0,0) for Liang_loop_rxns
for r_id in Liang_loop_rxns:
    rod_Liang.reactions.get_by_id(r_id).bounds = (0,0)

# set reaction bounds to (0,0) for Lukowski_loop_rxns
for r_id in Lukowski_loop_rxns:
    rod_Lukowski.reactions.get_by_id(r_id).bounds = (0,0)


In [16]:
# FLUX VARIABILITY ANALYSIS 
from cobra.flux_analysis import flux_variability_analysis

# from cobra.flux_analysis import flux_variability_analysis
fva_modified = {}
fva_loopless_modified = {}
for m in [rod_Liang,rod_Lukowski]:
    set_exchange_bounds(m, ex_bounds)
    fva_modified[m.id] = flux_variability_analysis(m, loopless=False)
    fva_loopless_modified[m.id] = flux_variability_analysis(m, loopless=True)
    
# pickle fva_modified
#import pickle
with open(Path().cwd() / 'results' / 'compare_rod_models' / 'fva_modified.pkl', 'wb') as f:
    pickle.dump(fva_modified, f)
with open(Path().cwd() / 'results' / 'compare_rod_models' / 'fva_loopless_modified.pkl', 'wb') as f:
    pickle.dump(fva_loopless_modified, f)

In [106]:
fva_Liang_modified = fva_modified['mod_RPE_PRrod_Liang_singlePR']
fva_Lukowski_modified = fva_modified['mod_RPE_PRrod_Lukowski_singlePR']

In [107]:
#### FVA MODIFIED ####  

# compare fva_Liang and fva_loopless_Liang 
fva_Liang = fva['mod_RPE_PRrod_Liang_singlePR']
fva_Lukowski = fva['mod_RPE_PRrod_Lukowski_singlePR']
fva_loopless_Liang = fva_loopless['mod_RPE_PRrod_Liang_singlePR']
fva_loopless_Lukowski = fva_loopless['mod_RPE_PRrod_Lukowski_singlePR']
fva_modified_Liang = fva_modified['mod_RPE_PRrod_Liang_singlePR']
fva_modified_Lukowski = fva_modified['mod_RPE_PRrod_Lukowski_singlePR']
fva_loopless_modified_Liang = fva_loopless_modified['mod_RPE_PRrod_Liang_singlePR']
fva_loopless_modified_Lukowski = fva_loopless_modified['mod_RPE_PRrod_Lukowski_singlePR']

# combine fva, fva_modified and fva_loopless results
fva_loopless_Liang.columns = ['min_loopless','max_loopless'] # rename columns
fva_Liang.columns = ['min','max'] # rename columns
fva_modified_Liang.columns = ['min_modified','max_modified'] # rename columns
fva_loopless_modified_Liang.columns = ['min_loopless_modified','max_loopless_modified'] # rename columns
fva_Liang = pd.concat([fva_Liang, fva_Liang_modified , fva_loopless_Liang, fva_loopless_modified_Liang], axis=1).sort_values(by='min', ascending=False) # combine fva_Liang and fva_loopless_Liang 
fva_Liang = fva_Liang[((fva_Liang.T != 0).any()) | ((fva_Liang.T != 0).any())] # drop rows with all zeros 
fva_Liang_rxns = make_rxn_df(rod_Liang).loc[fva_Liang.index] # make rxn dfs
fva_Liang = pd.concat([fva_Liang, fva_Liang_rxns], axis=1).sort_values(by='min', ascending=False) # merge with rxns df
# select rows with similar (not more than .1 difference) values in min and min_loopless, AND in max and max_loopless columns
fva_Liang_same = fva_Liang[(abs(fva_Liang['min'] - fva_Liang['min_loopless']) < .1) & (abs(fva_Liang['max'] - fva_Liang['max_loopless']) < .1)]
# select rows with different values in min and min_loopless, OR in max and max_loopless columns
fva_Liang_diff = fva_Liang[(abs(fva_Liang['min'] - fva_Liang['min_loopless']) >= .1) | (abs(fva_Liang['max'] - fva_Liang['max_loopless']) >= .1)]

# do same for Lukowski
fva_loopless_Lukowski.columns = ['min_loopless','max_loopless']
fva_Lukowski.columns = ['min','max']
fva_modified_Lukowski.columns = ['min_modified','max_modified']
fva_loopless_modified_Lukowski.columns = ['min_loopless_modified','max_loopless_modified']
fva_Lukowski = pd.concat([fva_Lukowski, fva_modified_Lukowski, fva_loopless_Lukowski, fva_loopless_modified_Lukowski], axis=1).sort_values(by='min', ascending=False)
# select rows that have values different from 0 plus or minus 0.01
fva_Lukowski = fva_Lukowski[((fva_Lukowski.T >= .01).any()) | ((fva_Lukowski.T <= -.01).any())]
fva_Lukowski_rxns = make_rxn_df(rod_Lukowski).loc[fva_Lukowski.index] # make rxn dfs
fva_Lukowski = pd.concat([fva_Lukowski, fva_Lukowski_rxns], axis=1).sort_values(by='min', ascending=False)
fva_Lukowski_same = fva_Lukowski[(abs(fva_Lukowski['min'] - fva_Lukowski['min_loopless']) < .1) & (abs(fva_Lukowski['max'] - fva_Lukowski['max_loopless']) < .1)]
fva_Lukowski_diff = fva_Lukowski[(abs(fva_Lukowski['min'] - fva_Lukowski['min_loopless']) >= .1) | (abs(fva_Lukowski['max'] - fva_Lukowski['max_loopless']) >= .1)]

In [108]:
# function that finds Ensembl IDs in GPR column and matches them to expression data
def get_Ensembl_ids(GPR):
    import re
    GPR = str(GPR)
    pattern = re.compile(r'ENSG\d{11}')   # find patterns in GPR string that start with 'ENSG' and end with 11 numbers
    matches = pattern.finditer(GPR)
    Ensembl_list = []
    for match in matches:
        Ensembl_list.append(match.group())
    return Ensembl_list

# function to make dict combining info from two columns in df
def cols2dict(df, col1, col2):
    col_dict = {key: value for key, value in zip(df[col1], df[col2])}
    return col_dict

In [109]:
# load expression data
Lukowski_gene_exp = pd.read_excel(Path().cwd().parent/\
     'context_specific_models' / 'expression_data' / 'expression_data_Lukowski.xlsx')
Sysgo_gene_exp = pd.read_excel(Path().cwd().parent/\
     'context_specific_models' / 'expression_data' / 'expression_data_SysGO_2020_RPE_PR.xlsx')


In [110]:
# select relevant columns
Lukowski_expr = Lukowski_gene_exp[['GeneSymbol','Ensembl','C10 Cone PR (%)','Average Rod PR C0, C1, C2, C3, C4, C7 (%)']]
Lukowski_expr.columns = ['GeneSymbol','Ensembl','Cone_Lukowski','Rod_Lukoswki'] # rename columns
Liang_expr = Sysgo_gene_exp[['GeneSymbol','Ensembl','ConePhotoreceptors__LiangEtAl_2019_','RodPhotoreceptors__LiangEtAl_2019_']]
Liang_expr.columns = ['GeneSymbol','Ensembl','Cone_Liang','Rod_Liang'] # rename columns

# make dicts with Ensemble IDS as keys and expression levels / GeneSymbols as values
Ensembl_Rod_Liang_expression = cols2dict(Liang_expr, 'Ensembl', 'Rod_Liang')
Ensembl_Rod_Lukowski_expression = cols2dict(Lukowski_expr, 'Ensembl', 'Rod_Lukoswki')
Ensembl_GeneSymbol = cols2dict(Liang_expr, 'Ensembl', 'GeneSymbol')

def add_expression_info(df):
    df = df.assign(Ensembl_list=df['GPR'].apply(get_Ensembl_ids))
    df = df.assign(GeneSymbol=df['Ensembl_list'].apply(lambda x: [Ensembl_GeneSymbol.get(i, None) for i in x]))
    df = df.assign(Rod_Liang_expression=df['Ensembl_list'].apply(lambda x: [Ensembl_Rod_Liang_expression.get(i, None) for i in x]))
    df = df.assign(Rod_Lukowski_expression=df['Ensembl_list'].apply(lambda x: [Ensembl_Rod_Lukowski_expression.get(i, None) for i in x]))
    return df

# add expression info to fva_Liang_same, fva_Liang_diff, fva_Lukowski_same, fva_Lukowski_diff 
fva_Liang = add_expression_info(fva_Liang)
fva_Liang_same = add_expression_info(fva_Liang_same)
fva_Liang_diff = add_expression_info(fva_Liang_diff)
fva_Lukowski = add_expression_info(fva_Lukowski)
fva_Lukowski_same = add_expression_info(fva_Lukowski_same)
fva_Lukowski_diff = add_expression_info(fva_Lukowski_diff)

In [111]:
def discretize_data(df):
    import numpy as np
    import pandas as pd
    df_discretized = df.copy(deep=True)
    for c in df.columns[2:]:
        q25 = df_discretized[c].quantile([0.25]).values[0]
        q75 = df_discretized[c].quantile([0.75]).values[0]
        m1 = df_discretized.loc[:,c]<.00001
        m2 = df_discretized.loc[:,c].between(.00001, q25)
        m3 = df_discretized.loc[:,c].between(q25, q75)
        m4 = df_discretized.loc[:,c]> q75
        m5 = df_discretized.loc[:,c].isna()
        df_discretized[c] = np.select([m1, m2, m3, m4, m5], [-1,1,2,3,0], default=None)
    return df_discretized
    
Lukowski_exp_discretized = discretize_data(Lukowski_expr)
# remove rows that have nan for ensemble col
Lukowski_exp_discretized = Lukowski_exp_discretized[Lukowski_exp_discretized['Ensembl'].notna()]
Liang_expr_discretized  = discretize_data(Liang_expr)
Lukowski_exp_discretized_Rod = Lukowski_exp_discretized[['Ensembl','Rod_Lukoswki']]
Liang_expr_discretized_Rod = Liang_expr_discretized[['Ensembl','Rod_Liang']]
# fuse Lukowski and Liang expression data
Lukowski_Liang_exp_discretized_Rod = pd.merge(Lukowski_exp_discretized_Rod, Liang_expr_discretized_Rod, on='Ensembl', how='outer')

# make dicts with Ensemble IDS as keys and expression levels / GeneSymbols as values
Lukowski_exp_discretized_Rod = cols2dict(Lukowski_Liang_exp_discretized_Rod, 'Ensembl', 'Rod_Lukoswki')
Liang_exp_discretized_Rod = cols2dict(Lukowski_Liang_exp_discretized_Rod, 'Ensembl', 'Rod_Liang')

def add_discretised_expression_info(df):
    df = df.assign(Ensembl_list=df['GPR'].apply(get_Ensembl_ids))
    df = df.assign(GeneSymbol=df['Ensembl_list'].apply(lambda x: [Ensembl_GeneSymbol.get(i, None) for i in x]))
    df = df.assign(Lukowski_exp_discretized_Rod=df['Ensembl_list'].apply(lambda x: [Lukowski_exp_discretized_Rod.get(i, None) for i in x]))
    df = df.assign(Liang_exp_discretized_Rod=df['Ensembl_list'].apply(lambda x: [Liang_exp_discretized_Rod.get(i, None) for i in x]))
    return df

# add expression info to fva_Liang_same, fva_Liang_diff, fva_Lukowski_same, fva_Lukowski_diff 
fva_Liang = add_discretised_expression_info(fva_Liang)
fva_Liang_same = add_discretised_expression_info(fva_Liang_same)
fva_Liang_diff = add_discretised_expression_info(fva_Liang_diff)
fva_Lukowski = add_discretised_expression_info(fva_Lukowski)
fva_Lukowski_same = add_discretised_expression_info(fva_Lukowski_same)
fva_Lukowski_diff = add_discretised_expression_info(fva_Lukowski_diff)

In [113]:
# save to excel
Path(Path().cwd() / 'results' / 'compare_rod_models').mkdir(parents=True, exist_ok=True)
with pd.ExcelWriter(Path().cwd() / 'results' / 'compare_rod_models' / 'MODIFIED_fva_loops_Liang_Lukowski_singlerods.xlsx') as writer:
    fva_Liang.to_excel(writer, sheet_name = 'Liang_all_possible_fluxes')
    fva_Liang_same.to_excel(writer, sheet_name = 'Liang_loopless_rxns')
    fva_Liang_diff.to_excel(writer, sheet_name = 'Liang_loopy_rxns')
    fva_Lukowski.to_excel(writer, sheet_name = 'Lukowski_all_possible_fluxes')
    fva_Lukowski_same.to_excel(writer, sheet_name = 'Luowski_loopless_rxns')
    fva_Lukowski_diff.to_excel(writer, sheet_name = 'Lukowski_loopy_rxns')

In [114]:
######## COMPARE NON-ZERO FLUXES FOR LIANG AND LUKOWSKI ROD MODELS ########

# pull results out of dicts
fba_Liang = fba['mod_RPE_PRrod_Liang_singlePR']
fba_Lukowski = fba['mod_RPE_PRrod_Lukowski_singlePR']
fva_Liang = fva['mod_RPE_PRrod_Liang_singlePR']
fva_Lukowski = fva['mod_RPE_PRrod_Lukowski_singlePR']
fva_loopless_Liang = fva_loopless['mod_RPE_PRrod_Liang_singlePR']
fva_loopless_Lukowski = fva_loopless['mod_RPE_PRrod_Lukowski_singlePR']

# drop rows with all zeros
fba_Liang = fba_Liang[(fba_Liang.T != 0).any()]
fba_Lukowski = fba_Lukowski[(fba_Lukowski.T != 0).any()]
fva_Liang = fva_Liang[(fva_Liang.T != 0).any()]
fva_Lukowski = fva_Lukowski[(fva_Lukowski.T != 0).any()]
fva_loopless_Liang = fva_loopless_Liang[(fva_loopless_Liang.T != 0).any()]
fva_loopless_Lukowski = fva_loopless_Lukowski[(fva_loopless_Lukowski.T != 0).any()]

In [115]:
##### FBA #####

# get list of index ids in Liang but not in Lukowski
Lukowski_not_Liang = list(set(fba_Lukowski.index) - set(fba_Liang.index)) # get list of index ids in Lukowski but not in Liang
Lukowski_not_Liang_rxns = make_rxn_df(rod_Lukowski).loc[Lukowski_not_Liang] # make rxn dfs
# make df with fba results and fuse with rxns
Lukowski_not_Liang_rxns['absolute_fluxes'] = abs(fba_Lukowski.loc[Lukowski_not_Liang]) # add absolute fluxes column
fba_Lukowski_not_Liang = pd.concat([fba_Lukowski.loc[Lukowski_not_Liang], Lukowski_not_Liang_rxns], axis=1).sort_values(by='absolute_fluxes', ascending=False)
fba_Lukowski_not_Liang = fba_Lukowski_not_Liang.drop(columns=['absolute_fluxes']) # remove absolute fluxes column

# get list of index ids in Lukowski but not in Liang
Liang_not_Lukowski = list(set(fba_Liang.index) - set(fba_Lukowski.index)) # get list of index ids in Liang but not in Lukowski
Liang_not_Lukowski_rxns = make_rxn_df(rod_Liang).loc[Liang_not_Lukowski] # make rxn dfs
# make df with fba results and fuse with rxns
Liang_not_Lukowski_rxns['absolute_fluxes'] = abs(fba_Liang.loc[Liang_not_Lukowski]) # add absolute fluxes column
fba_Liang_not_Lukowski = pd.concat([fba_Liang.loc[Liang_not_Lukowski], Liang_not_Lukowski_rxns], axis=1).sort_values(by='absolute_fluxes', ascending=False)
fba_Liang_not_Lukowski = fba_Liang_not_Lukowski.drop(columns=['absolute_fluxes']) # remove absolute fluxes column

In [116]:
##### FVA #####

# get list of index ids in Lukowski but not in Liang
Lukowski_not_Liang = list(set(fva_Lukowski.index) - set(fva_Liang.index)) # get list of index ids in Lukowski but not in Liang
Lukowski_not_Liang_rxns = make_rxn_df(rod_Lukowski).loc[Lukowski_not_Liang] # make rxn dfs
# make df with fva results and fuse with rxns
Lukowski_not_Liang_rxns['absolute_min'] = abs(fva_Lukowski.loc[Lukowski_not_Liang,'min']) # add absolute min column
Lukowski_not_Liang_rxns['absolute_max'] = abs(fva_Lukowski.loc[Lukowski_not_Liang,'max']) # add absolute max column

fva_Lukowski_not_Liang = pd.concat([fva_Lukowski.loc[Lukowski_not_Liang], Lukowski_not_Liang_rxns], axis=1).sort_values(by='absolute_min', ascending=False)
fva_Lukowski_not_Liang = fva_Lukowski_not_Liang.drop(columns=['absolute_min','absolute_max']) # remove absolute min and max columns

# get list of index ids in Liang but not in Lukowski
Liang_not_Lukowski = list(set(fva_Liang.index) - set(fva_Lukowski.index)) # get list of index ids in Liang but not in Lukowski
Liang_not_Lukowski_rxns = make_rxn_df(rod_Liang).loc[Liang_not_Lukowski] # make rxn dfs
# make df with fva results and fuse with rxns
Liang_not_Lukowski_rxns['absolute_min'] = abs(fva_Liang.loc[Liang_not_Lukowski,'min']) # add absolute min column
Liang_not_Lukowski_rxns['absolute_max'] = abs(fva_Liang.loc[Liang_not_Lukowski,'max']) # add absolute max column
fva_Liang_not_Lukowski = pd.concat([fva_Liang.loc[Liang_not_Lukowski], Liang_not_Lukowski_rxns], axis=1).sort_values(by='absolute_min', ascending=False)
fva_Liang_not_Lukowski = fva_Liang_not_Lukowski.drop(columns=['absolute_min','absolute_max']) # remove absolute min and max columns


In [117]:
##### FVA LOOPLESS #####

# get list of index ids in Lukowski but not in Liang
Lukowski_not_Liang = list(set(fva_loopless_Lukowski.index) - set(fva_loopless_Liang.index)) # get list of index ids in Lukowski but not in Liang
Lukowski_not_Liang_rxns = make_rxn_df(rod_Lukowski).loc[Lukowski_not_Liang] # make rxn dfs
# make df with fva results and fuse with rxns
Lukowski_not_Liang_rxns['absolute_min'] = abs(fva_loopless_Lukowski.loc[Lukowski_not_Liang,'min_loopless']) # add absolute min column
Lukowski_not_Liang_rxns['absolute_max'] = abs(fva_loopless_Lukowski.loc[Lukowski_not_Liang,'max_loopless']) # add absolute max column
fva_loopless_Lukowski_not_Liang = pd.concat([fva_loopless_Lukowski.loc[Lukowski_not_Liang], Lukowski_not_Liang_rxns], axis=1).sort_values(by='absolute_min', ascending=False)
fva_loopless_Lukowski_not_Liang = fva_loopless_Lukowski_not_Liang.drop(columns=['absolute_min','absolute_max']) # remove absolute min and max columns

# get list of index ids in Liang but not in Lukowski
Liang_not_Lukowski = list(set(fva_loopless_Liang.index) - set(fva_loopless_Lukowski.index)) # get list of index ids in Liang but not in Lukowski
Liang_not_Lukowski_rxns = make_rxn_df(rod_Liang).loc[Liang_not_Lukowski] # make rxn dfs
# make df with fva results and fuse with rxns
Liang_not_Lukowski_rxns['absolute_min'] = abs(fva_loopless_Liang.loc[Liang_not_Lukowski,'min_loopless']) # add absolute min column
Liang_not_Lukowski_rxns['absolute_max'] = abs(fva_loopless_Liang.loc[Liang_not_Lukowski,'max_loopless']) # add absolute max column
fva_loopless_Liang_not_Lukowski = pd.concat([fva_loopless_Liang.loc[Liang_not_Lukowski], Liang_not_Lukowski_rxns], axis=1).sort_values(by='absolute_min', ascending=False)
fva_loopless_Liang_not_Lukowski = fva_loopless_Liang_not_Lukowski.drop(columns=['absolute_min','absolute_max']) # remove absolute min and max columns


In [118]:
##### FVA MODIFIED #####

# get list of index ids in Lukowski but not in Liang
Lukowski_not_Liang = list(set(fva_modified_Lukowski.index) - set(fva_modified_Liang.index)) # get list of index ids in Lukowski but not in Liang
Lukowski_not_Liang_rxns = make_rxn_df(rod_Lukowski).loc[Lukowski_not_Liang] # make rxn dfs
# make df with fva results and fuse with rxns
Lukowski_not_Liang_rxns['absolute_min'] = abs(fva_modified_Lukowski.loc[Lukowski_not_Liang,'min_modified']) # add absolute min column
Lukowski_not_Liang_rxns['absolute_max'] = abs(fva_modified_Lukowski.loc[Lukowski_not_Liang,'max_modified']) # add absolute max column
fva_modified_Lukowski_not_Liang = pd.concat([fva_modified_Lukowski.loc[Lukowski_not_Liang], Lukowski_not_Liang_rxns], axis=1).sort_values(by='absolute_min', ascending=False)
fva_modified_Lukowski_not_Liang = fva_modified_Lukowski_not_Liang.drop(columns=['absolute_min','absolute_max']) # remove absolute min and max columns

# get list of index ids in Liang but not in Lukowski
Liang_not_Lukowski = list(set(fva_modified_Liang.index) - set(fva_modified_Lukowski.index)) # get list of index ids in Liang but not in Lukowski
Liang_not_Lukowski_rxns = make_rxn_df(rod_Liang).loc[Liang_not_Lukowski] # make rxn dfs
# make df with fva results and fuse with rxns
Liang_not_Lukowski_rxns['absolute_min'] = abs(fva_modified_Liang.loc[Liang_not_Lukowski,'min_modified']) # add absolute min column
Liang_not_Lukowski_rxns['absolute_max'] = abs(fva_modified_Liang.loc[Liang_not_Lukowski,'max_modified']) # add absolute max column
fva_modified_Liang_not_Lukowski = pd.concat([fva_modified_Liang.loc[Liang_not_Lukowski], Liang_not_Lukowski_rxns], axis=1).sort_values(by='absolute_min', ascending=False)
fva_modified_Liang_not_Lukowski = fva_modified_Liang_not_Lukowski.drop(columns=['absolute_min','absolute_max']) # remove absolute min and max columns

In [119]:
##### FVA LOOPLESS MODIFIED #####

# get list of index ids in Lukowski but not in Liang
Lukowski_not_Liang = list(set(fva_loopless_modified_Lukowski.index) - set(fva_loopless_modified_Liang.index)) # get list of index ids in Lukowski but not in Liang
Lukowski_not_Liang_rxns = make_rxn_df(rod_Lukowski).loc[Lukowski_not_Liang] # make rxn dfs
# make df with fva results and fuse with rxns
Lukowski_not_Liang_rxns['absolute_min'] = abs(fva_loopless_modified_Lukowski.loc[Lukowski_not_Liang,'min_loopless_modified']) # add absolute min column
Lukowski_not_Liang_rxns['absolute_max'] = abs(fva_loopless_modified_Lukowski.loc[Lukowski_not_Liang,'max_loopless_modified']) # add absolute max column
fva_loopless_modified_Lukowski_not_Liang = pd.concat([fva_loopless_modified_Lukowski.loc[Lukowski_not_Liang], Lukowski_not_Liang_rxns], axis=1).sort_values(by='absolute_min', ascending=False)
fva_loopless_modified_Lukowski_not_Liang = fva_loopless_modified_Lukowski_not_Liang.drop(columns=['absolute_min','absolute_max']) # remove absolute min and max columns

# get list of index ids in Liang but not in Lukowski
Liang_not_Lukowski = list(set(fva_loopless_modified_Liang.index) - set(fva_loopless_modified_Lukowski.index)) # get list of index ids in Liang but not in Lukowski
Liang_not_Lukowski_rxns = make_rxn_df(rod_Liang).loc[Liang_not_Lukowski] # make rxn dfs
# make df with fva results and fuse with rxns
Liang_not_Lukowski_rxns['absolute_min'] = abs(fva_loopless_modified_Liang.loc[Liang_not_Lukowski,'min_loopless_modified']) # add absolute min column
Liang_not_Lukowski_rxns['absolute_max'] = abs(fva_loopless_modified_Liang.loc[Liang_not_Lukowski,'max_loopless_modified']) # add absolute max column
fva_loopless_modified_Liang_not_Lukowski = pd.concat([fva_loopless_modified_Liang.loc[Liang_not_Lukowski], Liang_not_Lukowski_rxns], axis=1).sort_values(by='absolute_min', ascending=False)
fva_loopless_modified_Liang_not_Lukowski = fva_loopless_modified_Liang_not_Lukowski.drop(columns=['absolute_min','absolute_max']) # remove absolute min and max columns

In [120]:
# function to remove rows that contain zeros in both columns with names containing min or max
def remove_zeros(df):
    # select columns that have names containing min or max
    min_max_cols = [col for col in df.columns if 'min' in col or 'max' in col]
    # select rows that have values different from 0 plus or minus 0.001
    df = df[((df[min_max_cols].T >= .001).any()) | ((df[min_max_cols].T <= -.001).any())]
    return df

In [125]:
# select relevant columns
Lukowski_expr = Lukowski_gene_exp[['GeneSymbol','Ensembl','C10 Cone PR (%)','Average Rod PR C0, C1, C2, C3, C4, C7 (%)']]
Lukowski_expr.columns = ['GeneSymbol','Ensembl','Cone_Lukowski','Rod_Lukoswki'] # rename columns
Liang_expr = Sysgo_gene_exp[['GeneSymbol','Ensembl','ConePhotoreceptors__LiangEtAl_2019_','RodPhotoreceptors__LiangEtAl_2019_']]
Liang_expr.columns = ['GeneSymbol','Ensembl','Cone_Liang','Rod_Liang'] # rename columns

# make dicts with Ensemble IDS as keys and expression levels / GeneSymbols as values
Ensembl_Rod_Liang_expression = cols2dict(Liang_expr, 'Ensembl', 'Rod_Liang')
Ensembl_Rod_Lukowski_expression = cols2dict(Lukowski_expr, 'Ensembl', 'Rod_Lukoswki')
Ensembl_GeneSymbol = cols2dict(Liang_expr, 'Ensembl', 'GeneSymbol')

df_list = [fba_Lukowski_not_Liang, fba_Liang_not_Lukowski,\
        fva_Lukowski_not_Liang, fva_Liang_not_Lukowski,\
        fva_loopless_Lukowski_not_Liang, fva_loopless_Liang_not_Lukowski]

# add expression info to dfs using function
fba_Lukowski_not_Liang = add_expression_info(fba_Lukowski_not_Liang)
fba_Liang_not_Lukowski = add_expression_info(fba_Liang_not_Lukowski)
fva_modified_Liang_not_Lukowski = add_expression_info(fva_modified_Liang_not_Lukowski)
fva_modified_Lukowski_not_Liang = add_expression_info(fva_modified_Lukowski_not_Liang)
fva_Lukowski_not_Liang = add_expression_info(fva_Lukowski_not_Liang)
fva_Liang_not_Lukowski = add_expression_info(fva_Liang_not_Lukowski)
fva_loopless_Lukowski_not_Liang = add_expression_info(fva_loopless_Lukowski_not_Liang)
fva_loopless_Liang_not_Lukowski = add_expression_info(fva_loopless_Liang_not_Lukowski)
fva_loopless_modified_Lukowski_not_Liang = add_expression_info(fva_loopless_modified_Lukowski_not_Liang)
fva_loopless_modified_Liang_not_Lukowski = add_expression_info(fva_loopless_modified_Liang_not_Lukowski)

# remove rows that contain zeros in both columns with names containing min or max
fva_modified_Liang_not_Lukowski = remove_zeros(fva_modified_Liang_not_Lukowski)
fva_modified_Lukowski_not_Liang = remove_zeros(fva_modified_Lukowski_not_Liang)
fva_Lukowski_not_Liang = remove_zeros(fva_Lukowski_not_Liang)
fva_Liang_not_Lukowski = remove_zeros(fva_Liang_not_Lukowski)
fva_loopless_Lukowski_not_Liang = remove_zeros(fva_loopless_Lukowski_not_Liang)
fva_loopless_Liang_not_Lukowski = remove_zeros(fva_loopless_Liang_not_Lukowski)
fva_loopless_modified_Lukowski_not_Liang = remove_zeros(fva_loopless_modified_Lukowski_not_Liang)
fva_loopless_modified_Liang_not_Lukowski = remove_zeros(fva_loopless_modified_Liang_not_Lukowski)

# use function to add expression info to dfs
fba_Lukowski_not_Liang = add_expression_info(fba_Lukowski_not_Liang)
fba_Liang_not_Lukowski = add_expression_info(fba_Liang_not_Lukowski)
fva_modified_Liang_not_Lukowski = add_expression_info(fva_modified_Liang_not_Lukowski)
fva_modified_Lukowski_not_Liang = add_expression_info(fva_modified_Lukowski_not_Liang)
fva_Lukowski_not_Liang = add_expression_info(fva_Lukowski_not_Liang)
fva_Liang_not_Lukowski = add_expression_info(fva_Liang_not_Lukowski)
fva_loopless_Lukowski_not_Liang = add_expression_info(fva_loopless_Lukowski_not_Liang)
fva_loopless_Liang_not_Lukowski = add_expression_info(fva_loopless_Liang_not_Lukowski)
fva_loopless_modified_Lukowski_not_Liang = add_expression_info(fva_loopless_modified_Lukowski_not_Liang)
fva_loopless_modified_Liang_not_Lukowski = add_expression_info(fva_loopless_modified_Liang_not_Lukowski)

# use function to add discretised expression info to dfs
fba_Lukowski_not_Liang = add_discretised_expression_info(fba_Lukowski_not_Liang)
fba_Liang_not_Lukowski = add_discretised_expression_info(fba_Liang_not_Lukowski)
fva_modified_Liang_not_Lukowski = add_discretised_expression_info(fva_modified_Liang_not_Lukowski)
fva_modified_Lukowski_not_Liang = add_discretised_expression_info(fva_modified_Lukowski_not_Liang)
fva_Lukowski_not_Liang = add_discretised_expression_info(fva_Lukowski_not_Liang)
fva_Liang_not_Lukowski = add_discretised_expression_info(fva_Liang_not_Lukowski)
fva_loopless_Lukowski_not_Liang = add_discretised_expression_info(fva_loopless_Lukowski_not_Liang)
fva_loopless_Liang_not_Lukowski = add_discretised_expression_info(fva_loopless_Liang_not_Lukowski)
fva_loopless_modified_Lukowski_not_Liang = add_discretised_expression_info(fva_loopless_modified_Lukowski_not_Liang)
fva_loopless_modified_Liang_not_Lukowski = add_discretised_expression_info(fva_loopless_modified_Liang_not_Lukowski)


In [124]:
# write dfs to spreadsheet in results folder (results/compare_rod_models/), each df on different sheet (make folder if not exist)
Path(Path().cwd() / 'results' / 'compare_rod_models').mkdir(parents=True, exist_ok=True)
with pd.ExcelWriter(Path().cwd() / 'results' / 'compare_rod_models' / 'compare_rod_models_fba_fva.xlsx') as writer:
    fba_Lukowski_not_Liang.to_excel(writer, sheet_name = 'fba_Lukowski_not_Liang')
    fba_Liang_not_Lukowski.to_excel(writer, sheet_name = 'fba_Liang_not_Lukowski')
    fva_modified_Lukowski_not_Liang.to_excel(writer, sheet_name = 'fva_mod_Lukowski_not_Liang')
    fva_modified_Liang_not_Lukowski.to_excel(writer, sheet_name = 'fva_mod_Liang_not_Lukowski')
    fva_Lukowski_not_Liang.to_excel(writer, sheet_name = 'fva_Lukowski_not_Liang')
    fva_Liang_not_Lukowski.to_excel(writer, sheet_name = 'fva_Liang_not_Lukowski')
    fva_loopless_Lukowski_not_Liang.to_excel(writer, sheet_name = 'fva_loopless_Lukowski_not_Liang')
    fva_loopless_Liang_not_Lukowski.to_excel(writer, sheet_name = 'fva_loopless_Liang_not_Lukowski')
    fva_loopless_modified_Lukowski_not_Liang.to_excel(writer, sheet_name = 'fva_loopless_mod_Lukowski_not_Liang')
    fva_loopless_modified_Liang_not_Lukowski.to_excel(writer, sheet_name = 'fva_loopless_mod_Liang_not_Lukowski')

