In [1]:
import cobra as cb
import pandas as pd
import numpy as np

In [2]:
GROWTH_MIN_OBJ = 0.01

# Functions for breaking down iiFBA tasks

### Functions to clean and streamline
- write code for pFBA
- write seperate code for Sampling
- Write code for iteration
- Write code to re-initialize environment
	
	Lastly:
- Write wrapper to run all combined
	

## pFBA function

In [None]:
def ii_pfba(model, iter, org_fluxes=None):      
	"""
	Summary:
	
	-------
	Params:
	- model: cobrapy.Model
	
	- iter: INT
	Numeric indexer for what iteration is being conducted
	- org_fluxes: pd.DataFrame
	Dataframe class, stores the Iteration Fluxes for the specified 
	model, for all iterations. 

	-------
	Returns:
	- org_fluxes (optional): pd.Dataframe
	default = None
	Updated fluxes for all reactions in given iteration.

	"""                            
	# run pFBA
	sol1 = model.slim_optimize()
	if sol1 > GROWTH_MIN_OBJ:
		sol = cb.flux_analysis.parsimonious.pfba(model)
		# standardize and save output                   
		df = pd.DataFrame([sol.fluxes],columns=sol.fluxes.index,index=[iter])
	else:
		rxnid = []
		for i in range(len(model.reactions)): 
			rxnid.append(model.reactions[i].id)
		df = pd.DataFrame([np.zeros(len(model.reactions))],columns=rxnid,index=[iter])
	
	if iter == 0:
		org_fluxes = df
	else:
		org_fluxes = pd.concat([org_fluxes,df])

	return org_fluxes

## Sampling

In [None]:
def ii_sampling(model, org_fluxes, m_vals, iter, m1_idx):
	# run flux sampling
	total_sample_ct = m_vals[0] * m_vals[1]
	sample_ct = total_sample_ct if iter == 0 else m_vals[0]
	sol = cb.sampling.sample(model, sample_ct)

	# standardize and save output
	arrays = [[iter]*total_sample_ct, list(sol.index +  m1_idx*m_vals[1])] # m1_idx is always zero for iter 0
	tuples = list(zip(*arrays))
	multi_idx = pd.MultiIndex.from_tuples(tuples,names=['iteration','run'])
	sol.index = multi_idx

	org_fluxes = pd.concat([org_fluxes, sol])

## Environment Set up

In [None]:
def init_env(models, media, m_vals = None):
	# get index type for env. flux log
	if m_vals is not None:
		# store fluxes of all exchange reactions for the overall model based on media
		arrays = [[0]*m_vals[0]*m_vals[1], list(range(m_vals[0]*m_vals[1]))]
		tuples = list(zip(*arrays))
		index = pd.MultiIndex.from_tuples(tuples,names=['iteration','run'])
	else: 
		index = [0]
	
	# extract all exchange reactions
	cols = set()
	for model in models:
		for model_ex in range(len(model.exchanges)):
			cols.add(model.exchanges[model_ex].id)

	# compile initial media conditions
	env_fluxes = pd.DataFrame([np.zeros(len(cols))],
					 columns=list(cols),
					 index=index,dtype=float)
	for media_idx in range(len(media)):
		exid = media.iloc[media_idx]['Reaction']
		ex_flux = media.iloc[media_idx]['LB']
		env_fluxes.loc[:,exid] = ex_flux

	return env_fluxes

In [None]:
def set_env(model, env_f, iter, run=None):
	# reset exchanges
	for ex in model.exchanges:
		ex.lower_bound = 0
		ex.upper_bound = 1000
	
	# change environment bounds of model
	for env_ex in range(len(env_f.columns)):
		# run is analog for pfba or sampling and index type & all 0th iter runs have same start env.
		index = iter if run is None else (iter,run if iter != 0 else 0) 
		ex_lb = env_f.loc[index][env_f.columns[env_ex]].item()
		if ex_lb != 0:
			ex_id = env_f.columns[env_ex]
			if ex_id in model.exchanges:
				model.exchanges.get_by_id(ex_id).lower_bound = ex_lb
	
	return model

## Env. Flux update

In [None]:
def update_flux(models, env_fluxes, org_fluxes, flow, rel_abund, iter, 
				m1_idx=None, M_iter=None, m_vals=None): # sampling specific
	# adjust for sampling
	if m_vals is not None: # sampling
		if iter == 0: 
			sampling_end_ct = m_vals[0]*m_vals[1]
		else:
			sampling_end_ct = m_vals[1]
	else: # pFBA
		sampling_end_ct = 1


	for m2_idx in range(sampling_end_ct): 
	# condense iter 0 & 1+ and condense pFBA vs sampling 
	# caused by indexing
		if m_vals is not None:
			index = (iter, M_iter)
			f0_index = (0,0)
			if iter == 0:
				summing_idx = (iter, m2_idx)
				new_index = pd.MultiIndex.from_tuples([(iter+1,M_iter)],names=["iteration","run"])
			else:
				summing_idx = (iter, m2_idx + m1_idx * m_vals[1])
				new_index = pd.MultiIndex.from_tuples([(iter+1,m2_idx + m1_idx*m_vals[1])],names=["iteration","run"])
		else:
			index = iter
			f0_index = 0
			summing_idx = iter
			new_index = [iter+1]
			
		# begin updating fluxes
		env_tmp = env_fluxes.loc[[index]].copy(deep=True) #temporary dataframe for base environment from iteration 0,0
		for ex_idx in range(len(env_tmp.columns)):# for each exchange flux in environment
			ex_flux_sum = 0
			ex_flux_id = env_tmp.columns[ex_idx]
			for org_idx, model in enumerate(models):# for each organism sum up flux * relative abundance
				if ex_flux_id in model.exchanges:
					if org_fluxes[org_idx].loc[summing_idx][ex_flux_id] != 0:
						ex_flux_sum += org_fluxes[org_idx].loc[summing_idx][ex_flux_id] * rel_abund[org_idx]
			env_tmp.loc[index, ex_flux_id] = (1-flow)*(env_tmp.loc[index][ex_flux_id].item()-ex_flux_sum) + flow*env_fluxes.loc[f0_index][ex_flux_id].item() # update flux
		
		#re-index tmp dataframe & append
		df_tt = pd.DataFrame([env_tmp.loc[index]],columns = env_tmp.columns, index = new_index)
		env_fluxes = pd.concat([env_fluxes,df_tt])
			
	return env_fluxes                     

## Wrapper

In [None]:
def iipfba(community_model, media, relative_abundance,
		  flow=0.5, iterations=10, v=False):
	# initialize environmental flux logging
	env_f = init_env(community_model, media)

	# store organism fluxes here
	org_F = [] # use multiindexing with bacteria?

	# iterations
	for iter in range(iterations):
		print("Iteration", iter)

		for org_idx, org_model in enumerate(community_model):
			with org_model as model:
				# reset exchanges and set env.
				model = set_env(model, env_f, iter)

				# run optimization
				if iter == 0:
					org_F.append(ii_pfba(model, iter))
				else:
					org_F[org_idx] = ii_pfba(model, iter, org_F[org_idx])

				# flux update
				env_f = update_flux(community_model, env_f, org_F, flow, relative_abundance, iter)
				
	return env_f, org_F

# Test 

#### Original iiFBA function

In [None]:
# Simple non-sampling
def iifba(community_model, media, relative_abundance,
		  flow=0.5, solution_type="pFBA", 
		  iterations=10,
		  m_vals=[1,1], v=False):
	"""
	Summary:

	
	Params:
	- community_model: LIST type of Cobra Models (len number of unique bacteria)
	descr.

	- media: pd.DataFrame ()
	descr.

	- relative_abundance: LIST type of FLOAT (len number of unique bacteria)
	relative abundances of each bacteria in community. If sum(relative_abundance) > 1,
	relative abundances will be scaled by the sum. 

	- flow (optional): FLOAT
	default = 0.5
	Input flow rate of new metabolites/exchanges in media

	- solution type (optional): STR
	default = "pFBA"
	Type of optimization for FLux balance
	can be "pFBA", "sampling"

	- iterations (optional): INT
	default = 10
	THe number of interations until completion. Must be greater than 1 iteration.

	- m_vals (optional): LIST type of INT (2,)
	default = [1, 1]
	Number of initial flux points to use in flux sampling and number of runs per 
	iterations. If both values are 1, then simple 1-to-1 iterations are done.

	- v (optional) BOOL
	default = False
	Turn on verbose or turn off

	
	Returns:
	- flux_log: pandas.Dataframe 
	Contains values of all fluxes in exchanges of the community. Dataframe is
	multi-indexed by (iteration, run), run will always be 0 if using pFBA.

	- F: LIST of pandas.Dataframe
	Each index of list corresponds to the model of community_model. Each dataframe 
	contains all the fluxes of the appropriate model. Dataframe is multi-indexed 
	by (iteration, run), run will always be 0 if using pFBA.

	
	"""
	# convert all numeric to ints to ensure proper variable useage
	m_vals[0] = int(m_vals[0])
	m_vals[1] = int(m_vals[1])
	iterations = int(iterations)
	if solution_type.lower() == "pfba":
		print("Using Parsimonious FBA")
		m_vals = [1,1]
	elif solution_type.lower() == "sampling":
		print("Using Flux Sampling")
	else:
		print("Defaulting to Using Parsimonious FBA")
		solution_type = "pfba"
	solution_type = solution_type.lower()

	if sum(relative_abundance) >1:
		print("Scaling Abundance") if v else None
		relative_abundance = [r/sum(relative_abundance) for r in relative_abundance]

	print("Initializing Iterations") if v else None
	M = np.zeros((m_vals[0], iterations -1), dtype=int)
	for i in range(iterations-1):
		Mcol = np.sort(np.random.choice(m_vals[0]*m_vals[1],m_vals[0],replace=False))
		M[:,i]=Mcol
		

	# store fluxes of all exchange reactions for the overall model based on media
	print("Initializing Exchanges Logging") if v else None
	arrays = [[0]*m_vals[0]*m_vals[1],list(range(m_vals[0]*m_vals[1]))]
	tuples = list(zip(*arrays))
	multi_idx = pd.MultiIndex.from_tuples(tuples,names=['iteration','run'])

	# extract all exchange reactions
	cols = set()
	for model_idx in range(len(community_model)):
		for model_ex in range(len(community_model[model_idx].exchanges)):
			cols.add(community_model[model_idx].exchanges[model_ex].id)

	# compile initial media conditions
	print("Initializing Environment Logging") if v else None
	flux_log = pd.DataFrame([np.zeros(len(cols))],
					 columns=list(cols),
					 index=multi_idx,dtype=float)
	for media_ex in range(len(media)):
		exid = media.iloc[media_ex]['Reaction']
		ex_flux = media.iloc[media_ex]['LB']
		flux_log.loc[:,exid] = ex_flux
	
	# initialize organism flux dataframes
	F = []  

	# iterations
	print("Running Iterations") if v else None
	for iter in range(iterations):
		print("Iteration:", iter)
		
		if iter == 0:
			# use media for the first time around for all models
			for org_idx in range(len(community_model)):
				print("Organism:", org_idx)
				with community_model[org_idx] as model_iter:
					# reset exchanges for environment setting
					print("Reset Exchanges") if v else None
					for ex in model_iter.exchanges:
						ex.lower_bound = 0
						ex.upper_bound = 1000
					
					# Set Environment for 0th run (same initial env. for all runs)
					print("Set Environment") if v else None
					for env_ex in range(len(flux_log.columns)):
						ex_lb = flux_log.loc[(0,0)][flux_log.columns[env_ex]] #initial environment is the same for all runs, so use the 0th run
						if ex_lb != 0:
							ex_id = flux_log.columns[env_ex]
							if ex_id in model_iter.exchanges:
								model_iter.exchanges.get_by_id(ex_id).lower_bound = ex_lb
					
					# run optimization with pfba
					if solution_type == 'pfba':
						print("Running Optimization") if v else None
						multi_idx = pd.MultiIndex.from_tuples([(0,0)],names=["iteration","run"])                                       
						# run pFBA
						sol1 = model_iter.slim_optimize()
						if sol1 > 0.001:
							sol = cb.flux_analysis.parsimonious.pfba(model_iter)
							# standardize and save output                   
							df = pd.DataFrame([sol.fluxes],columns=sol.fluxes.index,index=multi_idx)
							F.append(df)
						else:
							# if no growth and cannot use the solution
							rxnid = []
							for i in range(len(model_iter.reactions)): 
								rxnid.append(model_iter.reactions[i].id)
							df = pd.DataFrame([np.zeros(len(model_iter.reactions))],columns=rxnid,index=multi_idx)
							F.append(df)
					
					# run optimization with flux sampling
					if solution_type == 'sampling':
						print("Running Optimization") if v else None
						# run flux sampling
						sol = cb.sampling.sample(model_iter, m_vals[0]*m_vals[1])
						# standardize and save output
						arrays = [[0]*m_vals[0]*m_vals[1],list(sol.index)]
						tuples = list(zip(*arrays))
						multi_idx = pd.MultiIndex.from_tuples(tuples,names=['iteration','run'])
						sol.index = multi_idx
						F.append(sol)

			# update f
			for run_idx in range(m_vals[0]*m_vals[1]): 
				print("Updating Fluxes") if v else None
				env_tmp = flux_log.loc[[(iter,0)]].copy(deep=True) #temporary dataframe for base environment from iteration 0,0
				for env_ex in range(len(flux_log.columns)):# for each exchange flux in environment
					ex_flux_sum = 0
					ex_flux_id = flux_log.columns[env_ex]
					#sum total flux of all bacteria in model
					for org_idx in range(len(community_model)):# for each organism sum up flux * relative abundance
						if ex_flux_id in community_model[org_idx].exchanges:
							if F[org_idx].loc[(0,run_idx)][ex_flux_id] != 0:
								ex_flux_sum += F[org_idx].loc[(0,run_idx)][ex_flux_id] * relative_abundance[org_idx]

					#iifba update for ex
					env_tmp.loc[(0,0),ex_flux_id] = (1-flow)*(flux_log.loc[(0,0)][ex_flux_id].item()-ex_flux_sum) + flow*flux_log.loc[(0,0)][ex_flux_id].item() # update flux
				
				#re-index tmp dataframe
				multi_idx = pd.MultiIndex.from_tuples([(1,run_idx)],names=["iteration","run"])
				df_tt = pd.DataFrame([env_tmp.loc[(0,0)]],columns = env_tmp.columns, index = multi_idx)
				flux_log = pd.concat([flux_log,df_tt])
		
		# re-run for other iterations
		else:       
			# if flux sampling, repeat for multiple points
			for m1_idx in range(m_vals[0]):
				M_iter = M[m1_idx, iter-1]

				# run iteration for all bacteria in community
				for org_idx in range(len(community_model)):
					print('organism:',org_idx)

					with community_model[org_idx] as model_iter:
						# reset exchanges for environment setting
						print("Reset Exchanges") if v else None
						for ex in model_iter.exchanges:
							ex.lower_bound = 0
							ex.upper_bound = 1000
					
						# Set Environment
						print("Set Environment") if v else None
						for env_ex in range(len(flux_log.columns)):
							ex_lb = flux_log.loc[(iter,M_iter)][flux_log.columns[env_ex]]
							if ex_lb != 0:
								ex_id = flux_log.columns[env_ex]
								if ex_id in model_iter.exchanges:
									model_iter.exchanges.get_by_id(ex_id).lower_bound = ex_lb
						

						if solution_type == 'pfba':
							print("Running Optimization") if v else None
							multi_idx = pd.MultiIndex.from_tuples([(iter,0)],names=["iteration","run"])                                       
							# run pFBA
							sol1 = model_iter.slim_optimize()
							if sol1 > 0.001:
								sol = cb.flux_analysis.parsimonious.pfba(model_iter)
								# standardize and save output                   
								df = pd.DataFrame([sol.fluxes],columns=sol.fluxes.index,index=multi_idx)
								F[org_idx] = pd.concat([F[org_idx],df])
							else:
								rxnid = []
								for i in range(len(model_iter.reactions)): 
									rxnid.append(model_iter.reactions[i].id)
								df = pd.DataFrame([np.zeros(len(model_iter.reactions))],columns=rxnid,index=multi_idx)
								F[org_idx] = pd.concat([F[org_idx],df])

						if solution_type == 'sampling':
							print("Running Optimization") if v else None
							# run flux sampling
							sol = cb.sampling.sample(model_iter,m_vals[0])
							# standardize and save output
							arrays = [[iter]*m_vals[0]*m_vals[1],list(sol.index+m1_idx*m_vals[1])]
							tuples = list(zip(*arrays))
							multi_idx = pd.MultiIndex.from_tuples(tuples,names=['iteration','run'])
							sol.index = multi_idx
							F[org_idx] = pd.concat([F[org_idx],sol])
			
			# update fluxes
			for m2_idx in range(m_vals[1]):
				print("Updating Fluxes") if v else None
				env_tmp = flux_log.loc[[(iter,M_iter)]].copy(deep=True) #temporary dataframe for base environment from iteration 0,0
				for ex_idx in range(len(flux_log.columns)):# for each exchange flux in environment
					ex_flux_sum = 0
					ex_flux_id = flux_log.columns[ex_idx]
					for org_idx in range(len(community_model)):# for each organism sum up flux * relative abundance
						if ex_flux_id in community_model[org_idx].exchanges:
							if F[org_idx].loc[(iter, m2_idx+m1_idx*m_vals[1])][ex_flux_id] != 0:
								ex_flux_sum += F[org_idx].loc[(iter,m2_idx+m1_idx*m_vals[1])][ex_flux_id] * relative_abundance[org_idx]

					env_tmp.loc[(iter,M_iter),ex_flux_id] = (1-flow)*(flux_log.loc[(iter,M_iter)][ex_flux_id].item()-ex_flux_sum) + flow*flux_log.loc[(0,0)][ex_flux_id].item() # update flux
				#re-index tmp dataframe
				multi_idx = pd.MultiIndex.from_tuples([(iter+1,m2_idx+m1_idx*m_vals[1])],names=["iteration","run"])
				df_tt = pd.DataFrame([env_tmp.loc[(iter,M_iter)]],columns = env_tmp.columns, index = multi_idx)
				flux_log = pd.concat([flux_log,df_tt])

	return flux_log, F

### Test Functions

In [None]:
# model_pre_processing
mod_paths = ['../AGORA2_Models/Escherichia_coli_str_K_12_substr_MG1655.mat',
			 "../AGORA2_Models/Bacteroides_thetaiotaomicron_3731.mat"]
S_matrix = [] #list of models
# Load Models and Save in S vector
for i in range(len(mod_paths)):
	model = cb.io.load_matlab_model(mod_paths[i])
	S_matrix.append(model) #append models to list

# Define input environment f_0
# this should be defined as a pandas dataframe with columns "Reaction" and "LB"
# glucose minimal medium
# Define Medium Components
glc_min_med = ['EX_glc_D(e)','EX_so4(e)','EX_nh4(e)','EX_no3(e)','EX_pi(e)','EX_cys_L(e)',
			   'EX_mn2(e)','EX_cl(e)','EX_ca2(e)','EX_mg2(e)','EX_cu2(e)','EX_cobalt2(e)','EX_fe2(e)','EX_fe3(e)','EX_zn2(e)','EX_k(e)']
# Define medium uptake flux bounds
glc_min_med_flux = [-10,-100,-100,-100,-100,-100,
					-100,-100,-100,-100,-100,-100,-100,-100,-100,-100]

glc_f0 = pd.DataFrame(data={'Reaction': glc_min_med,'LB': glc_min_med_flux})
glc_f0 = dict(zip(glc_min_med, glc_min_med_flux))
print(glc_f0)

No defined compartments in model model. Compartments will be deduced heuristically using regular expressions.
Using regular expression found the following compartments:c, e, p
No defined compartments in model model. Compartments will be deduced heuristically using regular expressions.
Using regular expression found the following compartments:c, e, p


{'EX_glc_D(e)': -10, 'EX_so4(e)': -100, 'EX_nh4(e)': -100, 'EX_no3(e)': -100, 'EX_pi(e)': -100, 'EX_cys_L(e)': -100, 'EX_mn2(e)': -100, 'EX_cl(e)': -100, 'EX_ca2(e)': -100, 'EX_mg2(e)': -100, 'EX_cu2(e)': -100, 'EX_cobalt2(e)': -100, 'EX_fe2(e)': -100, 'EX_fe3(e)': -100, 'EX_zn2(e)': -100, 'EX_k(e)': -100}


In [None]:
# Simple non-sampling
def iifba(community_model, media, relative_abundance,
		  flow=0.5, solution_type="pFBA", 
		  iterations=10,
		  m_vals=[1,1], v=False):

	# convert all numeric to ints to ensure proper variable useage
	m_vals[0] = int(m_vals[0])
	m_vals[1] = int(m_vals[1])
	iterations = int(iterations)
	if solution_type.lower() == "pfba":
		print("Using Parsimonious FBA")
		m_vals = [1,1]
	elif solution_type.lower() == "sampling":
		print("Using Flux Sampling")
	else:
		print("Defaulting to Using Parsimonious FBA")
		solution_type = "pfba"
	solution_type = solution_type.lower()

	if sum(relative_abundance) >1:
		print("Scaling Abundance") if v else None
		relative_abundance = [r/sum(relative_abundance) for r in relative_abundance]

	print("Initializing Iterations") if v else None
	M = np.zeros((m_vals[0], iterations), dtype=int)
	for i in range(iterations-1):
		Mcol = np.sort(np.random.choice(m_vals[0]*m_vals[1],m_vals[0],replace=False))
		M[:,i+1]=Mcol
		

	# store fluxes of all exchange reactions for the overall model based on media
	print("Initializing Exchanges Logging") if v else None
	flux_log = init_env(community_model, media, m_vals=None)
	
	# initialize organism flux dataframes
	F = []  

	# iterations
	print("Running Iterations") if v else None
	for iter in range(iterations):
		print("Iteration:", iter)
		
		if iter == 0:
			# use media for the first time around for all models
			for org_idx in range(len(community_model)):
				print("Organism:", org_idx)
				with community_model[org_idx] as model_iter:
					# reset exchanges for environment setting
					print("Reset Exchanges") if v else None
					for ex in model_iter.exchanges:
						ex.lower_bound = 0
						ex.upper_bound = 1000
					
					# Set Environment for 0th run (same initial env. for all runs)
					print("Set Environment") if v else None
					for env_ex in range(len(flux_log.columns)):
						index = 0 if solution_type == "pfba" else (0,0)
						ex_lb = flux_log.loc[index][flux_log.columns[env_ex]] #initial environment is the same for all runs, so use the 0th run
						if ex_lb != 0:
							ex_id = flux_log.columns[env_ex]
							if ex_id in model_iter.exchanges:
								model_iter.exchanges.get_by_id(ex_id).lower_bound = ex_lb
					
					# run optimization with pfba
					if solution_type == 'pfba':
						print("Running Optimization") if v else None
						if iter == 0:
							F.append(ii_pfba(model_iter, iter, None ))
						else:
							F[org_idx] = ii_pfba(model_iter, iter, F[org_idx] )
						
					
					# run optimization with flux sampling
					if solution_type == 'sampling':
						print("Running Optimization") if v else None
						# run flux sampling
						sol = cb.sampling.sample(model_iter, m_vals[0]*m_vals[1])
						# standardize and save output
						arrays = [[0]*m_vals[0]*m_vals[1],list(sol.index)]
						tuples = list(zip(*arrays))
						multi_idx = pd.MultiIndex.from_tuples(tuples,names=['iteration','run'])
						sol.index = multi_idx
						F.append(sol)

			# update f
			for run_idx in range(m_vals[0]*m_vals[1]): 
				print("Updating Fluxes") if v else None
				index = iter if solution_type == "pfba" else (iter,run_idx)
				env_tmp = flux_log.loc[[(iter,0)]].copy(deep=True) #temporary dataframe for base environment from iteration 0,0
				for env_ex in range(len(flux_log.columns)):# for each exchange flux in environment
					ex_flux_sum = 0
					ex_flux_id = flux_log.columns[env_ex]
					#sum total flux of all bacteria in model
					for org_idx in range(len(community_model)):# for each organism sum up flux * relative abundance
						if ex_flux_id in community_model[org_idx].exchanges:
							if F[org_idx].loc[index][ex_flux_id] != 0:
								ex_flux_sum += F[org_idx].loc[0][ex_flux_id] * relative_abundance[org_idx]
						

					#iifba update for ex
					index_2 = 
					env_tmp.loc[index,ex_flux_id] = (1-flow)*(flux_log.loc[(0,0)][ex_flux_id].item()-ex_flux_sum) + flow*flux_log.loc[(0,0)][ex_flux_id].item() # update flux
				
				#re-index tmp dataframe
				multi_idx = pd.MultiIndex.from_tuples([(1,run_idx)],names=["iteration","run"])
				df_tt = pd.DataFrame([env_tmp.loc[(0,0)]],columns = env_tmp.columns, index = multi_idx)
				flux_log = pd.concat([flux_log,df_tt])
		
		# re-run for other iterations
		else:       
			# if flux sampling, repeat for multiple points
			for m1_idx in range(m_vals[0]):
				M_iter = M[m1_idx, iter-1]

				# run iteration for all bacteria in community
				for org_idx in range(len(community_model)):
					print('organism:',org_idx)

					with community_model[org_idx] as model_iter:
						# reset exchanges for environment setting
						print("Reset Exchanges") if v else None
						for ex in model_iter.exchanges:
							ex.lower_bound = 0
							ex.upper_bound = 1000
					
						# Set Environment
						print("Set Environment") if v else None
						for env_ex in range(len(flux_log.columns)):
							index = iter if solution_type == "pfba" else (iter,M_iter)
							ex_lb = flux_log.loc[(iter,M_iter)][flux_log.columns[env_ex]]
							if ex_lb != 0:
								ex_id = flux_log.columns[env_ex]
								if ex_id in model_iter.exchanges:
									model_iter.exchanges.get_by_id(ex_id).lower_bound = ex_lb
						

						if solution_type == 'pfba':
							print("Running Optimization") if v else None
							if iter == 0:
								F.append(ii_pfba(model_iter, iter, None ))
							else:
								F[org_idx] = ii_pfba(model_iter, iter, F[org_idx] )

						if solution_type == 'sampling':
							print("Running Optimization") if v else None
							# run flux sampling
							sol = cb.sampling.sample(model_iter,m_vals[0])
							# standardize and save output
							arrays = [[iter]*m_vals[0]*m_vals[1],list(sol.index+m1_idx*m_vals[1])]
							tuples = list(zip(*arrays))
							multi_idx = pd.MultiIndex.from_tuples(tuples,names=['iteration','run'])
							sol.index = multi_idx
							F[org_idx] = pd.concat([F[org_idx],sol])
			
			# update fluxes
			for m2_idx in range(m_vals[1]):
				print("Updating Fluxes") if v else None
				index = iter if solution_type == "pfba" else (iter,m2_idx+m1_idx*m_vals[1])
				
				env_tmp = flux_log.loc[[(iter,M_iter)]].copy(deep=True) #temporary dataframe for base environment from iteration 0,0
				for ex_idx in range(len(flux_log.columns)):# for each exchange flux in environment
					ex_flux_sum = 0
					ex_flux_id = flux_log.columns[ex_idx]
					for org_idx in range(len(community_model)):# for each organism sum up flux * relative abundance
						if ex_flux_id in community_model[org_idx].exchanges:
							if F[org_idx].loc[index][ex_flux_id] != 0:
									ex_flux_sum += F[org_idx].loc[index][ex_flux_id] * relative_abundance[org_idx]
							   
								
			   
					env_tmp.loc[(iter,M_iter),ex_flux_id] = (1-flow)*(flux_log.loc[(iter,M_iter)][ex_flux_id].item()-ex_flux_sum) + flow*flux_log.loc[(0,0)][ex_flux_id].item() # update flux
				#re-index tmp dataframe
				multi_idx = pd.MultiIndex.from_tuples([(iter+1,m2_idx+m1_idx*m_vals[1])],names=["iteration","run"])
				df_tt = pd.DataFrame([env_tmp.loc[(iter,M_iter)]],columns = env_tmp.columns, index = multi_idx)
				flux_log = pd.concat([flux_log,df_tt])

	return flux_log, F

SyntaxError: invalid syntax (3708645438.py, line 102)

In [None]:
f_test, F_test = iifba(S_matrix, glc_f0, [1],
				  flow=0.5, solution_type="pFBA", 
				  iterations=2,
				  m_vals=[1,1], v=False)

Using Parsimonious FBA
Iteration: 0
Organism: 0


KeyError: "None of [Index([(0, 0)], dtype='object')] are in the [index]"

In [23]:
f, F = iipfba(S_matrix, glc_f0, [0.5, 0.5], flow=0.49)

Iteration 0
Iteration 1
Iteration 2
Iteration 3
Iteration 4
Iteration 5
Iteration 6
Iteration 7
Iteration 8
Iteration 9


In [20]:
F[0]["biomass525"]

0    0.784637
1    0.588478
2    0.539438
3    0.527178
4    0.524113
5    0.523347
6    0.523155
7    0.523107
8    0.523095
9    0.523092
Name: biomass525, dtype: float64

In [25]:
F[0]["biomass525"]

0    0.784637
1    0.584555
2    0.533534
3    0.520523
4    0.517206
5    0.516360
6    0.516144
7    0.516089
8    0.516075
9    0.516071
Name: biomass525, dtype: float64

# Lin. Alg. Based Functions


## Environment


In [None]:
def init_iifba(models, media, iterations, m_vals=[1,1]):
	# get list of all unique rxns and exchanges
	org_exs = set()
	org_rxns = set()
	for model in models:
		exs_set = set(model.exchanges.list_attr("id"))
		org_exs = org_exs | exs_set # exchanges
		rxns_set = set(model.reactions.list_attr("id"))
		org_rxns = org_rxns | rxns_set # reactions

	# initialize env
	rows = (iterations) * m_vals[0] * m_vals[1] + 1 # add one iteration for final env
	cols = len(org_exs)
	env_f = np.zeros((rows, cols))
	env0_masks = [np.array(list(org_exs)) == rxn_id for rxn_id in list(media.keys()) ]
	for flux_idx, flux in enumerate(list(media.values())):
		env_f[0][env0_masks[flux_idx]] = flux
	
	#set columns for multi-indexing
	iters_col = np.repeat(np.arange(1, iterations+1), m_vals[0] * m_vals[1]) 
	run_col = np.tile(np.arange(m_vals[0] * m_vals[1]), iterations)
	iters_col = np.insert(iters_col, 0, 0) # add 0th iteration
	run_col = np.insert(run_col, 0, 0) # add 0th run 
	multi_idx = [iters_col , run_col]
	env_f = pd.DataFrame(env_f, columns=list(org_exs), index=multi_idx) # convert to interprettable df
	env_f.index.names = ["Iteration", "Run"]

	# initialize org_fluxes
	rows = iterations * m_vals[0] * m_vals[1] * len(models)
	cols = len(org_rxns)
	org_F = np.zeros((rows, cols)) # pfba will drop run column
	
	# create unique multi-index for 
	models_col = np.tile(np.arange(len(models)), iterations * m_vals[0] * m_vals[1]) 
	iters_col = np.repeat(np.arange(iterations), m_vals[0] * m_vals[1] * len(models)) 
	run_col = np.tile(np.repeat(np.arange(m_vals[0] * m_vals[1]), len(models)), iterations) 
	multi_idx = [models_col, iters_col , run_col]
	org_F = pd.DataFrame(org_F, columns=list(org_rxns), index=multi_idx)	# convert to interprettable df
	org_F.index.names = ["model", "Iteration", "Run"]
	
	return env_f, org_F

In [None]:
def set_env(model, env_f, iter, run):
	for ex in model.exchanges:
		ex.lower_bound = env_f.loc[iter, run][ex.id]
	
	return model

## Optimization

In [None]:
def run_pfba(model, model_idx, iter, org_F):
	# run pFBA
	sol1 = model.slim_optimize()
	if sol1 > GROWTH_MIN_OBJ:
		sol = cb.flux_analysis.parsimonious.pfba(model)
		
		org_F.loc[(model_idx, iter, 0), list(sol.fluxes.index)] = sol.fluxes.values
	# do nothing otherwise - already initiated as zeros!

	return org_F

In [66]:
def run_sampling(model, model_idx, iter, org_F, m_vals, rep_idx):
	# run flux sampling
	if iter == 0:
		sample_ct = m_vals[0] * m_vals[1]
	else:
		sample_ct = m_vals[1]
	sol = cb.sampling.sample(model, sample_ct)
	
	# standardize and save output
	arrays = [[model_idx] * sample_ct, [iter] * sample_ct, list(sol.index + rep_idx * sample_ct)]
	tuples = list(zip(*arrays))
	multi_idx = pd.MultiIndex.from_tuples(tuples, names=['model', 'Iteration', 'Run'])
	sol.index = multi_idx
	
	org_F.loc[sol.index, sol.columns] = sol

	return org_F

	


## Flux Update

In [78]:
def update_pfba_env(env_f, org_F, flow, rel_abund, iter):
	# get initial env. for flow
	init_env = env_f.loc[0,0].to_numpy()
	#pull iter info
	env_tmp = env_f.loc[iter, 0][:].to_numpy()
	run_exs = org_F.loc[:, iter, 0][env_f.columns].to_numpy()
		
	# run update
	flux_sums = (run_exs.T @ rel_abund).flatten()
	env_f.loc[iter+1, 0] = (1-flow)*(env_tmp - flux_sums) + flow*init_env
	
	return env_f


In [99]:
def update_sampling_env(env_f, org_F, flow, rel_abund, iter, m_vals, Mi, rep_idx):
	# get initial env. for flow
	init_env = env_f.loc[(0,0)].to_numpy()

	sample_ct = m_vals[0] * m_vals[1] if iter == 0 else m_vals[1]
	for sample_idx in range(sample_ct):
		#pull run info
		env_tmp = env_f.loc[iter, Mi][:].to_numpy()
		run_exs = org_F.loc[:, iter, Mi][env_f.columns].to_numpy()

		# run update
		flux_sums = (run_exs.T @ rel_abund).flatten()
		env_f.loc[iter+1, sample_idx+ m_vals[1]*rep_idx] = (1-flow)*(env_tmp - flux_sums) + flow*init_env

	return env_f

In [53]:
f, F = iipfba(S_matrix, glc_f0, np.array([0.5, 0.5]).reshape((-1,1)))


Iteration: 0
Iteration: 1
Iteration: 2
Iteration: 3
Iteration: 4
Iteration: 5
Iteration: 6
Iteration: 7
Iteration: 8
Iteration: 9


## Wrapper Function


In [None]:
def iipfba(models, media, rel_abund,
		   iters=10, flow=0.5):
	env_fluxes, org_fluxes = init_iifba(models, media, iters)

	for iter in range(iters):
		print("Iteration:", iter)

		for org_idx, org_model in enumerate(models):
			with org_model as model:
				# set exchanges
				model = set_env(model, env_fluxes, iter, 0) # only 0 runs

				# run optim
				org_fluxes = run_pfba(model, org_idx, iter, org_fluxes)
				
		# update fluxes
		env_fluxes = update_pfba_env(env_fluxes, org_fluxes, flow, rel_abund, iter)

	# pfba has no use for Run index
	env_fluxes = env_fluxes.droplevel("Run")
	org_fluxes =org_fluxes.droplevel("Run")

	return env_fluxes, org_fluxes

In [None]:
def iisampling(models, media, rel_abund, iters=10, flow=0.5, m_vals=[1,1]):
	# mapping of what flux sampling to iterate
	M = np.zeros([m_vals[0],iters],dtype=int) #randomly pre-assign sampling initial point matrix
	for i in range(1, iters):
		Mcol = np.sort(np.random.choice(m_vals[0]*m_vals[1],m_vals[0],replace=False))
		M[:,i]=Mcol
	print(M)

	# initialize env and org fluxes
	env_fluxes, org_fluxes = init_iifba(models, media, iters, m_vals)

	for iter in range(iters):
		print("Iteration:", iter)

		# number of times to re-sample per iteration
		repeat_ct = 1 if iter == 0 else m_vals[0] 
		for rep_idx in range(repeat_ct):
			Mi = M[rep_idx, iter]

			# samples taken
			samples = m_vals[0] * m_vals[1] if iter == 0 else m_vals[1]
			for org_idx, org_model in enumerate(models):
				with org_model as model:
					# set exchanges
					model = set_env(model, env_fluxes, iter, Mi)

					# run optim
					org_fluxes = run_sampling(model, org_idx, iter, org_fluxes, m_vals, rep_idx=rep_idx)
				
		# update fluxes
		env_fluxes = update_sampling_env(env_fluxes, org_fluxes, flow, rel_abund, iter, m_vals, Mi, rep_idx)


	return env_fluxes, org_fluxes, M

In [100]:
f, F = iisampling(S_matrix, glc_f0, np.array([0.5, 0.5]).reshape((-1,1)), 
				  iters=10, flow=0.49, m_vals=[10,10])

# print((f.loc[0,:].to_numpy()).sum(axis=1))
# print(f)
# print(F)

[[ 0  3  3  0  0  0 21  8  7  4]
 [ 0 16  5 10  2  1 27 26 12  5]
 [ 0 27 10 16  4 21 33 67 37 17]
 [ 0 32 12 18 23 30 38 69 38 20]
 [ 0 58 14 28 30 38 42 73 39 30]
 [ 0 60 23 40 44 45 49 77 50 31]
 [ 0 78 28 49 50 65 53 84 77 48]
 [ 0 79 46 76 72 68 64 86 84 49]
 [ 0 82 80 87 75 77 90 97 87 55]
 [ 0 90 88 94 78 92 92 98 91 61]]
Iteration: 0
Iteration: 1
Iteration: 2
Iteration: 3
Iteration: 4
Iteration: 5
Iteration: 6
Iteration: 7
Iteration: 8
Iteration: 9


In [101]:
print(F.loc[0,: ,:]["biomass525"])

Iteration  Run
0          0      1.482879e-04
           1      1.428481e-04
           2      1.731319e-05
           3      4.165337e-04
           4      4.924318e-04
                      ...     
9          95     1.902370e-14
           96    -1.326598e-14
           97    -1.258432e-14
           98    -1.761411e-14
           99    -2.616479e-14
Name: biomass525, Length: 1000, dtype: float64


In [362]:
print(F.loc[0, :]["biomass525"])

Iteration
0    0.784637
1    0.505684
2    0.472349
3    0.465576
4    0.464291
5    0.463970
6    0.463890
7    0.463870
8    0.463865
9    0.463863
Name: biomass525, dtype: float64
