In [1]:
# compare 2 spreadsheet files of fluxes
import pandas as pd
import os, sys, re
from collections import OrderedDict
model_root_path = '../'
sys.path.append(model_root_path+'pycore/')
from gsm_custom_functions import *

In [2]:
df_pro = pd.read_excel(model_root_path+'build_model/model/PROTEIN_stoich_curation.tsv')
# flux_paths = {'../scRBA/application/output_max_withoutMP/ibutoh/flux.escher.csv',
# 			  model_root_path+'/output_max_withoutMP/ibutoh/flux.escher.csv'}
flux_paths = {
			model_root_path+'parameterization/kapp/datasets/Rabinowitz2023_batchGlc_using_MFA_and_scRBA_methods/min_flux_violation/enz_alloc.flux_unscaled.txt',
			model_root_path+'parameterization/kapp/datasets/PinheiroEtAl2020_batchXyl/min_flux_violation/enz_alloc.flux_unscaled.txt',
			model_root_path+'application/max_growth_xyl/gxf1_disabled/mu_max_on_glc_with_xyl/runRBA.flux.txt'
			  }
flux_paths_list = list(flux_paths)
fluxes = dict()
with open(model_root_path+'build_model/proteins_without_modeled_functions.txt') as f:
	proteins_without_modeled_functions = f.read().splitlines()

In [3]:
# open paths
for path in flux_paths:
	fluxes[path] = dict()
	if any(path.endswith(extension) for extension in ['.csv', '.tsv','.xlsx','.xls','.xlsm']):
		fluxes[path] = read_spreadsheet(path)
	else:
		# remove all lines that are just '/' or empty
		with open(path) as f:
			lines = f.readlines()
		lines = [line for line in lines if line.strip() not in ['','/']]
		for i in lines:
			# split by stretches of whitespace
			r,vtype,val = re.split(r'\s+',i.strip())
			tag,rxn_base_id,rxn_dir,enz_id = extract_details_from_rxnid(r)
			val = float(val)
			fluxes[path][r] = {'tag':tag, 'rxn_base_id':rxn_base_id, 'rxn_dir':rxn_dir, 'enz_id':enz_id, 'val':val}
			if tag == 'PROWASTE' and rxn_dir in proteins_without_modeled_functions:
				fluxes[path][r].update({'extra':'PROWASTE_NO_MODEL'})
				# print(rxn_dir, rxn_base_id, enz_id, val)

In [4]:
for k,v in fluxes[flux_paths_list[1]].items():
	if 'extra' in v:
		print(v['rxn_dir'],v['val'])

YTM1 2.057956478042165e-09
YML6 4.963901394217523e-08
TSF1 5.554002263642265e-10
TIF35 1.119281854772302e-07
TIF32 2.012042084641845e-08
SEY1 7.857717616234133e-09
RTC6 2.197044329767784e-08
rt7512 3.864024327977934e-07
rt6582 1.591659055047174e-09
rt6418 2.752472573938974e-06
rt6199 5.900970667789397e-09
rt5391 5.949992525898592e-07
rt4871 1.04251727437457e-09
rt3794 6.953662223646551e-08
rt3625 4.454770305890363e-09
rt2393 3.064407143202021e-08
rt2265 9.481210563305133e-10
RT16858 1.354061508864203e-09
RT16853 1.194955110134348e-09
RT16840 5.48546165952107e-09
RT16825 5.244361430995048e-08
RT16818 3.176476499778482e-09
RT16815 2.883424694279194e-09
RT16807 1.122954094932286e-09
RT16793 2.082855338169748e-10
RT16790 1.059044578640992e-10
RT16786 3.734939089641114e-09
RT16778 7.483425840055338e-10
RT16777 1.178101127456399e-10
RT16774 1.706241414076008e-09
RT16773 5.28872171474987e-09
RT16752 5.220175564141228e-09
RT16736 1.661460501064889e-09
RT16731 2.838305773146344e-08
RT16722 1.23

In [5]:
# make CSV file showing fluxes from each path, and the % difference from the first path
df = pd.DataFrame()
for path in fluxes:
	for r in fluxes[path]:
		df.loc[r,path] = fluxes[path][r]['val']
		# compare to first path
		if path == list(fluxes.keys())[0]:
			df.loc[r,'diff'] = 0
		else:
			try:
				df.loc[r,'diff'] = (fluxes[path][r]['val'] - fluxes[list(fluxes.keys())[0]][r]['val']) / fluxes[list(fluxes.keys())[0]][r]['val'] * 100
			except:
				df.loc[r,'diff'] = None
df.to_csv('flux_comparison.csv')