### Simulate bulk competition experiments using empirical traits from Warringer 2003

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

from bulk_simulation_code import run_pairwise_experiment, run_bulk_experiment
from bulk_simulation_code import CalcRelativeYield,CalcReferenceFrequency
from bulk_simulation_code import CalcTotalSelectionCoefficientLogit, CalcTotalSelectionCoefficientLog
from m3_model import CalcRelativeSaturationTime as CalcSaturationTimeExact

In [None]:
### Update dependent parameters according to input
import os
import os.path
from os import path

## create export directory if necessary
## foldernames for output plots/lists produced in this notebook
import os
FIG_DIR = f'./figures/bulk_fitness/'
os.makedirs(FIG_DIR, exist_ok=True)
print("All  plots will be stored in: \n" + FIG_DIR)

In [None]:
### execute script to load modules here
# I get some error with this command
exec(open('setup_aesthetics.py').read()) 

# manual fix

FIGSIZE_A4 = (8.27, 11.69) # a4 format in inches

FIGWIDTH_TRIPLET = FIGSIZE_A4[0]*0.3*2
FIGHEIGHT_TRIPLET = FIGWIDTH_TRIPLET*0.75


In [None]:
DATASET_COLOR = 'darkorange'


In [None]:
SUFFIX_DATASET = 'all_traits_vary/'

FIG_DIR_DATASET = FIG_DIR + SUFFIX_DATASET
os.makedirs(FIG_DIR_DATASET, exist_ok=True)

OUTPUT_DIR_DATASET = './output/' + SUFFIX_DATASET
os.makedirs(OUTPUT_DIR_DATASET, exist_ok=True)

### set colorscheme

In [None]:
cmap = plt.get_cmap('tab20c')

In [None]:
color_B1 = cmap(4.5/20)
color_error1 = cmap(5.5/20) #cmap(13.5/20)
color_error2 = cmap(7.5/20)

color_stotal = cmap(16.5/20)
color_B2 = cmap(0.5/20)
color_ratio = cmap(13.5/20) #cmap(10.5/20)

color_wt = cmap(10.5/20)

In [None]:

knockout_cmap = plt.get_cmap('flag')

In [None]:
rel_threshold = 0.01 ### threshold for relative error plots

### Load wild-type traits

In [None]:
INDEX_COL = [0,1,2,3,4]
list_na_representations = ['not_present', 'failed_to_compute']

In [None]:
PCWS_TRAITS_WARRINGER = './output/df_M3_traits.csv'
df_warringer = pd.read_csv(PCWS_TRAITS_WARRINGER, header = 0, index_col= INDEX_COL,\
                                  float_precision=None, na_values=list_na_representations)


In [None]:
### define default wild_type
df_wildtypes = df_warringer[df_warringer['is_wildtype']==True]

WILDTYPE = df_wildtypes.median(axis = 0, numeric_only = True)

### Load mutant data (averaged)

In [None]:

PCWS_TRAITS_WARRINGER_AVERAGED = './output/df_M3_traits_averaged.csv'
df_averaged = pd.read_csv(PCWS_TRAITS_WARRINGER_AVERAGED, header = 0, float_precision=None)

In [None]:
### assign wild-type label
def is_wildtype(row):
    genotype = row['genotype']
    
    if genotype == 'BY4741':
        return True
    else:
        return False
    

row = df_averaged.iloc[0]
is_wildtype(row)

In [None]:
df_averaged['is_wildtype'] = df_averaged.apply(is_wildtype, axis =1)

In [None]:
### append mutant values (averaged) to set of individual wild-type strains
df_knockouts = df_averaged[~df_averaged['is_wildtype']]
df_knockouts = df_knockouts
df_input = df_wildtypes.reset_index().append(df_knockouts.reset_index())

In [None]:
### restore index
index_col_names = df_warringer.index.names
df_input = df_input.set_index(index_col_names)


### Load trait data into the standard form required by Michaels code

In [None]:
n_knockouts = df_knockouts.shape[0]

In [None]:
### growth rates
gs = np.zeros(n_knockouts+1)
gs[0] = WILDTYPE['gmax']
gs[1:] = df_knockouts['gmax'].values
### lag times
ls = np.zeros(n_knockouts+1)
ls[0] = WILDTYPE['lag']
ls[1:] = df_knockouts['lag'].values


### adjust units of time
gs = gs*60 # change units to growth rate per hour
ls = ls/60 # change units to hour

### yield
Ys = np.zeros(n_knockouts+1)
Ys[0] = WILDTYPE['yield']
Ys[1:] = df_knockouts['yield'].values


### Define initial condition for bulk growth cycle

In [None]:
### set initial resource concentrations

CONCENTRATION_GLUCOSE = 20/180 * 1e3 # concentrations are recored  in milliMolar, to match the units of yield
print(CONCENTRATION_GLUCOSE)

In [None]:
### define default initial_OD
OD_START = 0.05  #df_warringer['od_start'].median()

### compare to initial OD in the monoculture cycles
fig, ax = plt.subplots(figsize = (FIGWIDTH_TRIPLET, FIGHEIGHT_TRIPLET))

ax = df_warringer['od_start'].hist(bins=41, color = DATASET_COLOR, alpha = 0.6, log = True, rasterized = True)


ax.axvline(OD_START, color = 'tab:red', label = f'median value: $N_0={OD_START:.3f}$')
ax.legend()
ax.set_xlabel('initial OD')
ax.set_ylabel('no. growth curves')

### Calculate effective yield

In [None]:
from bulk_simulation_code import CalcRelativeYield

In [None]:
### calculcate effective yields
nus = CalcRelativeYield(Ys, R0 = CONCENTRATION_GLUCOSE, N0 = OD_START)


### Simulate pairwise competition growth cycles (scenario A)

The frequencies for scenario A can be summarized as 

    frequency of the focal mutant strain: x2 = 1/N
        frequency of the wildtype strain: x1 = 1 - x2

where $N$ is some population size (number of cells or biomass?). Intutively, a spontanteous mutation initially forms only a small fraction $x0 = 1/N$ in the population. The values of the population size in nature are largely unknown, but can be approximated in two ways. 

- by the effective population size $N_e$, which is inferred from the genomic variation across a set of natural isolates, and leads to estimates of $N\approx 10^8$ cells [see papers by Howard Ochman]
- by the bottleneck size $N$ in laboratory evolution experiments like the LTEE, which leads to an estimate of $N=5\cdot 10^6$ cells. According to the first paper on the LTEE, there are $5\cdot10^5$ cells per ml at the starting point of the growth cycle, total volue is 10ml. 



In [None]:
N = 1e6 # default value: 1e6

In [None]:
xs_pair, xs_pair_final, tsats, _,_,_ = run_pairwise_experiment(gs=gs,ls=ls,nus = nus, g1=gs[0],l1=ls[0],nu1=nus[0],
                                                               x0 = 1/N)

s_pair = CalcTotalSelectionCoefficientLogit(xs_pair,xs_pair_final)

#### Plot  frequency trajectory in pairwise competition for example mutant

In [None]:
## set axis limits
TMIN,TMAX = 0,15
XMIN,XMAX = 5e-8,2

In [None]:

## set timepoints for each trajectory
tvec = np.array([0, TMAX])
## set frequency values for each trajectory
trajectories = np.vstack((xs_pair,xs_pair_final))

## plot
fig, ax = plt.subplots(figsize = (FIGWIDTH_TRIPLET, FIGHEIGHT_TRIPLET))
ax.plot(tvec,trajectories[:,1:], color = 'dimgrey', label = 'mutants')
ax.plot(tvec,trajectories[:,0], color = 'orange', label = 'wild-type')

ax.set_xlabel('time')
ax.set_ylabel('frequency')

# fix axis limits
ax.set_yscale('log')
ax.set_ylim(XMIN,XMAX)
ax.set_xlim(TMIN,TMAX)
fig.tight_layout()

#### Plot distribution of fitness effects in the pairwise competition

In [None]:
fig, ax = plt.subplots(figsize = (FIGWIDTH_TRIPLET, FIGHEIGHT_TRIPLET))
plt.hist(s_pair, bins = 42, color = 'dimgrey')
ax.axvline(s_pair[0], color = 'orange')
ax.axvline(s_pair[1:].mean(), color = 'tab:red', ls = '--')
ax.set_xlabel('s_21 in pairwise experiment')

### Simulate bulk competition with background mutants added at invasion frequency (scenario B1)

The frequencies for this scenario can be summarized as 

    frequency of the mutants:             xi = 1/N
    frequency of the wildtype strain:     x1 = 1 - x2 - x3

where each mutant lineage is added at the frequency used in the pairwise competition. 

In [None]:
### set initial frequencies
mutant_library_freq = 0.01

xs = np.zeros_like(gs)
n_knockouts = len(xs[1:])
xs[1:] = mutant_library_freq/n_knockouts                  # mutant lineages
xs[0] = 1- xs[1:].sum()       # wildtype population




In [None]:
mutant_ratio_B1 = xs[1:].sum()

print("Proportion of mutants: %.8f " % mutant_ratio_B1 )
print("Proportion of wild-type: %.8f " % xs[0] )

In [None]:
## calculate final frequencies
xs, xs_final, tsat = run_bulk_experiment(gs=gs, ls = ls, nus =nus, xs=xs)

## calculate total selection coefficient
sigma_bulk_B1 = CalcTotalSelectionCoefficientLogit(xs,xs_final)

## compute pairwise selection coefficient in bulk
xi1 = CalcReferenceFrequency(xs,ref_strains = [0]) 
xi1_final = CalcReferenceFrequency(xs_final,ref_strains = [0])
s_bulk_B1 = CalcTotalSelectionCoefficientLogit(xi1,xi1_final)




#### Plot frequency trajectory for all barcodes

In [None]:

## set timepoints for each trajectory
tvec = np.array([0, tsat])
## set frequency values for each trajectory
trajectories = np.vstack((xs,xs_final))

## plot
fig, ax = plt.subplots(figsize = (FIGWIDTH_TRIPLET, FIGHEIGHT_TRIPLET))
ax.plot(tvec,trajectories[:,1:], color = 'dimgrey', label = 'mutants')
ax.plot(tvec,trajectories[:,0], color = 'orange', label = 'wild-type')

ax.set_xlabel('time')
ax.set_ylabel('frequency')

# fix axis limits
ax.set_yscale('log')
ax.set_ylim(XMIN,XMAX)
ax.set_xlim(TMIN,tsat)
fig.tight_layout()

#### Calculate error to pairwise competition

In [None]:
### compare in a plot

fig, axes = plt.subplots(1,1, figsize = (FIGHEIGHT_TRIPLET, FIGHEIGHT_TRIPLET), sharex = True)


ax = axes 
x = s_pair

## plot data
y = sigma_bulk_B1
ax.scatter(x[1:],y[1:]-x[1:], rasterized = True, color = color_stotal, alpha = 1, 
           label = r'total scoeff. $\sigma$', marker = 'o') 

y = s_bulk_B1
ax.scatter(x[1:],y[1:]-x[1:], rasterized = True, color = color_B1, 
           label = 'pairwise scoeff. $s$', marker = 'o') 


ymin,ymax = ax.get_ylim()
yabs = np.max(np.abs([ymin,ymax]))
ax.set_ylim(-yabs,yabs)
ax.axhline(0, ls = '--', color = 'black')
ax.set_ylabel('absolute error')
ax.set_xlabel('mutant fitness in pairwise competition')
#ax.legend(loc = 'lower right', frameon=False)
error1_min,error1_max = ax.get_ylim()


#fig.tight_layout()
fig.savefig(FIG_DIR_DATASET + 'absolute_error_bulk_competition_B.pdf',\
             DPI = DPI, bbox_inches = 'tight', pad_inches = PAD_INCHES)

In [None]:
### compare in a plot

fig, axes = plt.subplots(1,1, figsize = (FIGHEIGHT_TRIPLET, FIGHEIGHT_TRIPLET), sharex = True)


ax = axes 
x = s_pair

## plot data

y = np.abs(np.divide( sigma_bulk_B1 - s_pair, s_pair, where = s_pair !=0))
ax.scatter(x[1:],y[1:], rasterized = True, color = color_stotal, alpha = 1, 
           label = r'total scoeff. $\sigma$', marker = 'o') 

y = np.abs(np.divide( s_bulk_B1 - s_pair, s_pair, where = s_pair !=0))
ax.scatter(x[1:],y[1:], rasterized = True, color = color_B1, 
           label = 'pairwise scoeff. $s$', marker = 'o') 


ax.axhline(rel_threshold, ls = '--', color = 'black')

ymin,ymax = ax.get_ylim()
yabs = np.max(np.abs([ymin,ymax]))
ax.set_yscale('log')
ax.axhline(0, ls = '--', color = 'black')

ax.set_ylabel('relative error')
ax.set_xlabel('mutant fitness in pairwise competition')
#ax.legend(loc = 'upper left', frameon=False)
error1_min,error1_max = ax.get_ylim()


#fig.tight_layout()
fig.savefig(FIG_DIR_DATASET + 'relative_error_bulk_competition_B.pdf',\
             DPI = DPI, bbox_inches = 'tight', pad_inches = PAD_INCHES)

### Compare predictions of the ranking

In [None]:
data = pd.DataFrame()
## load data on selection coefficients
data['sigma_bulk_B1'] = sigma_bulk_B1
data['s_bulk_B1'] = s_bulk_B1
data['s_pair'] = s_pair
## exclude wild-type, has different offset
data= data.iloc[1:]
## create rank variables, fittest mutants come first
data['sigma_bulk_B1_rank'] = data['sigma_bulk_B1'].rank(ascending = True,method = 'min') 
data['s_bulk_B1_rank'] = data['s_bulk_B1'].rank(ascending = True, method = 'min') 
data['s_pair_rank'] = data['s_pair'].rank(ascending = True,method = 'min') 

In [None]:
### compare in a plot

fig, axes = plt.subplots(1,2, figsize = (2.5*FIGHEIGHT_TRIPLET, FIGHEIGHT_TRIPLET), sharex = True, sharey=True)


x = s_pair[1:]

ax = axes[0] 
## plot ranking total bulk vs pairwise bulk
y = data['sigma_bulk_B1_rank'] - data['s_bulk_B1_rank']
ax.scatter(x,y, rasterized = True, color = 'silver', alpha = 1, marker = 'o') 
ax.set_ylabel('rank difference: total fitness in bulk\nvs. pairwise fitness in bulk')
#ax.set_ylabel('rank difference between: \n total fit. (bulk) - pairwise comp.')
ax = axes[1] 
## plot ranking total bulk vs pairwise
y = data['s_bulk_B1_rank'] - data['s_pair_rank']
ax.scatter(x,y, rasterized = True, color = color_B1, alpha = 1, marker = 'o')
ax.set_ylabel('rank difference: pairwise fitness in bulk\nvs. pairwise fitness in pairwise competition')
ax.tick_params(labelleft=True)


## plot ranking pairwise bulk vs pairwisse
#y = data['s_bulk_B1_rank'] - data['s_pair_rank']
#ax.scatter(x,y, rasterized = True, color = 'dimgrey', alpha = 1, marker = 'o') 
#ax.set_ylabel('rank difference between: \n pairwise fit. (bulk) - pairwise comp.')


fig.subplots_adjust( wspace= 0.5)

for ax in axes: 

    ymin,ymax = ax.get_ylim()
    yabs = np.max(np.abs([ymin,ymax]))
    ax.set_ylim(-yabs,yabs)
    ax.axhline(0, ls = '--', color = 'black')

    ax.set_xlabel('mutant fitness in pairwise competition')


#fig.tight_layout()
fig.savefig(FIG_DIR_DATASET + 'ranking_mismatch_bulk_B1.pdf',\
             DPI = DPI, bbox_inches = 'tight', pad_inches = PAD_INCHES)

### Calculate trait components of the selection coefficient

In [None]:
from m3_model import CalcApproxSijComponentsMultitype, CalcApproxSijComponents

In [None]:
%%time

s_bulk_growth = np.zeros_like(s_bulk_B1)
s_bulk_lag = np.zeros_like(s_bulk_B1)
s_bulk_coupling = np.zeros_like(s_bulk_B1)

for i in range(len(gs)):
    s_bulk_growth[i], s_bulk_lag[i], s_bulk_coupling[i] = CalcApproxSijComponentsMultitype(i,0,xs,gs,ls,nus)



In [None]:
%%time

s_pair_growth = np.zeros_like(s_pair)
s_pair_lag = np.zeros_like(s_pair)
s_pair_coupling = np.zeros_like(s_pair)

for i in range(len(gs)):
        g1, l1, nu1 = gs[0], ls[0], nus[0]
        g2, l2, nu2 = gs[i], ls[i], nus[i] # get traits of the invader
        x0 = 1/N
        s_pair_growth[i], s_pair_lag[i], s_pair_coupling[i] =CalcApproxSijComponentsMultitype(1,0,
                                                xs=[1-x0,x0], gs = [g1,g2], ls= [l1,l2], nus = [nu1,nu2] )

In [None]:
## calculate error components

delta_s_lag = s_bulk_lag - s_pair_lag
delta_s_growth = s_bulk_growth - s_pair_growth
delta_s_one = delta_s_lag + delta_s_growth
delta_s_two = s_bulk_coupling

#### Plot the error components

In [None]:
### compare in a plot

fig, axes = plt.subplots(1,1, figsize = (FIGHEIGHT_TRIPLET, FIGHEIGHT_TRIPLET), sharex = True)


ax = axes 
x = s_pair

## plot data
y = delta_s_one
ax.scatter(x[1:],y[1:], rasterized = True, color = color_error1, zorder = 2, 
           label = r'linear error')

y = delta_s_two
ax.scatter(x[1:],y[1:], rasterized = True, color = color_error2, 
           label = r'quadratic error')


ymin,ymax = ax.get_ylim()
yabs = np.max(np.abs([ymin,ymax]))
ax.set_ylim(-yabs,yabs)
ax.axhline(0, ls = '--', color = 'black')
ax.set_ylabel('absolute error')#\n(pairwise relative fitness)')
ax.set_xlabel('mutant fitness in pairwise competition')
#ax.legend(loc = 'lower right', frameon=False)
error1_min,error1_max = ax.get_ylim()


#fig.tight_layout()
fig.savefig(FIG_DIR_DATASET + 'absolute_error_components_bulk_competition_B.PDF',\
             DPI = DPI, bbox_inches = 'tight', pad_inches = PAD_INCHES)

In [None]:
### compare in a plot

fig, axes = plt.subplots(1,1, figsize = (FIGHEIGHT_TRIPLET, FIGHEIGHT_TRIPLET), sharex = True)


ax = axes 
x = s_pair

## plot data
y = np.abs(np.divide(delta_s_one, s_pair, where = s_pair!=0))
ax.scatter(x[1:],y[1:], rasterized = True, color = color_error1, zorder = 2, 
           label = r'linear error')

y = np.abs(np.divide(delta_s_two, s_pair, where = s_pair!=0))
ax.scatter(x[1:],y[1:], rasterized = True, color = color_error2, 
           label = r'quadratic error')


ax.axhline(0, ls = '--', color = 'black')
ax.set_ylabel('relative error')# pairwise fitness\n(bulk competition B)')
ax.set_xlabel('mutant fitness in pairwise competition')
#ax.legend( frameon=False)
error1_min,error1_max = ax.get_ylim()

ax.set_yscale('log')

#fig.tight_layout()
fig.savefig(FIG_DIR_DATASET + 'relative_error_components_bulk_competition_B.pdf',\
             DPI = DPI, bbox_inches = 'tight', pad_inches = PAD_INCHES)

### Calculate traits of the coarse-grained population

In [None]:
xlib = xs[1:].sum()
Ylib = 1/(np.sum(xs[1:]/Ys[1:])/xlib)
nulib = CalcRelativeYield(Ylib, R0 = CONCENTRATION_GLUCOSE, N0 = OD_START)

taus = 1/gs
taulib = 1/(np.sum(xs[1:]/Ys[1:]/taus[1:])/(xlib/Ylib))
glib = 1/taulib

In [None]:
### compute population lag time
lambda_lib = np.sum(ls[1:]*xs[1:]/Ys[1:]/taus[1:])*(taulib*Ylib/xlib)

In [None]:
lambda_lib

In [None]:
nus[0]

### Calculate traits of the leave-one-out population

In [None]:
### compute the leave-one-out library abundance
xlib_no =np.zeros_like(xs)
xlib_no[0] = xlib
xlib_no[1:] = xlib-xs[1:]

### compute the leave-one-out biomas yield
Ylib_no = np.zeros_like(Ys)
Ylib_no[0] = Ylib
n = len(Ylib_no)
for i in range(1,n):
    tmp = 0
    for j in range(1, n):
        if j==i: continue # skip
        else: tmp += xs[j]/Ys[j]
    Ylib_no[i] = 1/(tmp/xlib_no[i])
    


In [None]:
### compute the individual leave-one-out growth rate
taulib_no = np.zeros_like(gs)
taulib_no[0]= taulib

for i in range(1, len(gs)):
    tmp = 0
    for j in range(1, len(gs)):
        if j==i: continue # skip
        else: tmp+= xs[j]/Ys[j]/taus[j]
    taulib_no[i] = 1/(tmp/(xlib_no[i]/Ylib_no[i]))

In [None]:
# calculate the leave-one-out lag time

lambdalib_no = np.zeros_like(gs) # prepare data container

n = len(lambdalib_no)

for i in range(1,n):
    ## calculate prefactor
    delta_tau = taus[i]- taus[0]
    
    ## calculate the sum over all other strains in the librar
    tmp = 0
    for j in range(1, n):
        if j==i: continue # skip
        else: tmp += xs[j]/(Ys[j]*taus[j]) * ((taus[i]-taus[j])*(ls[j]-ls[0]) - (ls[i]-ls[j])*(taus[j]-taus[0]) )

    ## calculate first term
    the_bracket = (Ylib_no[i]*taulib_no[i]/xlib_no[i]) *tmp
    first_term = 1/delta_tau * the_bracket
    ## calcualte the second termb
    second_term = 1/delta_tau * ( (taus[i]-taulib_no[i])*ls[0] + (taulib_no[i]-taus[0])*ls[i] )
    lambdalib_no[i]= first_term + second_term

### define lib value for lag time 
lambdalib = lambdalib_no[1:].mean()
lambdalib_no[0] = lambdalib

In [None]:
fig, axes = plt.subplots(1,3, figsize = (4*0.8*FIGWIDTH_TRIPLET, 0.8*FIGHEIGHT_TRIPLET))

ax = axes[0]
y = Ylib_no[1:] 
ax.hist(y, bins = 42, color = 'dimgrey', log= True)
## add mean
mean = Ylib_no[1:].mean()
label = r"mean value: $Y_{\mathrm{lib}/i}$" + f"={mean:.6f}"
ax.axvline(mean, color = 'tab:red',label = label)
ax.legend(frameon=False)
## add information in title
title = r'wild-type $Y_{\mathrm{wt}}$' + f'={Ys[0]:.6f}'
title += ', '+ r'library $Y_{\mathrm{lib}}$' + f'={Ylib_no[0]:.6f}'
ax.set_title(title, loc = 'right')
ax.set_xlabel(r'background biomass yield $Y_{\mathrm{lib}/i}$ [OD/mM glucose]')


ax = axes[1]
y = taulib_no[1:]
ax.hist(y, bins = 42, color = 'dimgrey', log= True)
## add mean
mean = taulib_no[1:].mean()
label = r"mean value: $\tau_{\mathrm{lib}/i}$" + f"={mean:.5f}"
ax.axvline(mean, color = 'tab:red', label = label)
ax.legend(frameon=False)
## add information in title
title = r'wild-type $\tau_{\mathrm{wt}}$' + f'={taus[0]:.5f}'
title += ', '+ r'library $\tau_{\mathrm{lib}}$' + f'={taulib_no[0]:.5f}'
ax.set_title(title, loc = 'right')
ax.set_xlabel(r'background doubling time $\tau_{\mathrm{lib}/i}$ [hours]')

ax = axes[2]
y = lambdalib_no[1:] 
ax.hist(y, bins = 42, color = 'dimgrey', log= True)
## add mean
mean = lambdalib_no[1:].mean()
label = r"mean value: $\lambda_{\mathrm{lib}/i}$"+ f"={mean:.5f}"
ax.axvline(mean, color = 'tab:red', label = label)
ax.legend(frameon=False)
## add information in title
title = r'wild-type $\lambda_{\mathrm{wt}}$' + f'={ls[0]:.5f}'
ax.set_title(title, loc = 'right')


ax.set_xlabel(r'background lag time $\lambda_{\mathrm{lib}/i}$ [hours]')


In [None]:
taulib_no.min()

In [None]:
## doubling time of the library
taulib*np.log(2)*60

In [None]:
## doubling time of the wild-type
taus[0]*np.log(2)*60

In [None]:
### analyze the outliers

data = pd.DataFrame()
data['taulib_no'] = taulib_no
data['lambdalib_no'] = lambdalib_no
data['Ylib_no'] = lambdalib_no

index = data.loc[1:,'lambdalib_no'].idxmin()

print(f"Outlier with lower background lag time: index = {index}")
print('biomass yield: %.6f' % Ys[index])
print("lag time: %.6f" %ls[index])

index = data.loc[1:,'taulib_no'].idxmin()

print(f"Outlier with lower background doubling time: index = {index}")
print('biomass yield: %.6f' % Ys[index])
print("doubling time: %.6f" %taus[index])


index = data.loc[1:,'Ylib_no'].idxmax()

print(f"Outlier with higher background biomass yield: index = {index}")
print('biomass yield: %.6f' % Ys[index])
print("doubling time: %.6f" %taus[index])
print("lag time: %.6f" %ls[index])

In [None]:
print("Compare to wild-type traits")
print("biomass yield %.6f OD/mM glucose" %Ys[0])
print("doubling time %.6f hours" %taus[0])
print("lag time %.6f hours" %ls[0])

In [None]:
## compare to bulk competition traits
Ybar_bulk  = 1/np.sum(xs/Ys)/xs.sum()
taubar_bulk = 1/np.sum(xs/Ys/taus)/Ybar_bulk



print("Compare to bulk competition traits")
print("biomass yield %.6f OD/mM glucose" %Ybar_bulk)
print("doubling time %.6f hours" %taubar_bulk)

In [None]:
### compare to triplet traits
Ybar_triplet = 1/((xs[0]/Ys[0] + xlib/Ylib)/(xs[0]+xlib))
taubar_triplet = 1/(xs[0]/Ys[0]/taus[0] + xlib/Ylib/taulib)/Ybar_triplet

print("Compare to  triplet competition traits")
print("biomass yield %.6f OD/mM glucose" %Ybar_triplet)
print("doubling time %.6f hours" %taubar_triplet)

In [None]:
### also calculate as relative yield
nulib_no = CalcRelativeYield(Ylib_no, R0 = CONCENTRATION_GLUCOSE, N0 = OD_START)


In [None]:
%%time

s_triplet_growth = np.zeros_like(s_bulk_growth)
s_triplet_lag = np.zeros_like(s_bulk_growth)
s_triplet_coupling = np.zeros_like(s_bulk_growth)



for i in range(len(gs)):
        g1, l1, nu1 = gs[0], ls[0], nus[0]
        g2, l2, nu2 = gs[i], ls[i], nus[i] # get traits of the invader
        #g3, l3, nu3 = 1/taulib, lambdalib, nulib  # use traits of the library as a whole
        g3, l3, nu3 = 1/taulib_no[i], lambdalib_no[i], nulib_no[i] # use leave-one-out traits
        xs_triplet =[1-xlib,1/n_knockouts, xlib - 1/n_knockouts]
        s_triplet_growth[i], s_triplet_lag[i], s_triplet_coupling[i] =CalcApproxSijComponentsMultitype(1,0,
                                                xs=xs_triplet, gs = [g1,g2,g3], ls= [l1,l2,l3], nus = [nu1,nu2,nu3] )

In [None]:
fig, axes = plt.subplots(1,3, figsize = (3.*FIGHEIGHT_TRIPLET, FIGHEIGHT_TRIPLET))

ax = axes[0]
ax.scatter(s_bulk_growth, s_triplet_growth-s_bulk_growth)
ax.set_ylabel("s_growth: triplet - bulk")
ax = axes[1]
ax.scatter(s_bulk_lag, s_triplet_lag-s_bulk_lag)
ax.set_ylabel("s_lag: triplet - bulk")
ax = axes[2]
ax.scatter(s_bulk_coupling, s_triplet_coupling-s_bulk_coupling)
ax.set_ylabel("s_coupling: triplet - bulk")

for ax in axes:
    ax.axhline(0, ls = '--', color = 'black')
    ymin,ymax = ax.get_ylim()
    ylim = np.max(np.abs([ymin,ymax]))
    ax.set_ylim(-ylim,ylim)
    
    ax.set_xlabel('mutant fitness in pairwise competition')
    
fig.tight_layout()

### Compare to Michael's formula for the effective lag time

\begin{align}
\lambda_{MM} = - \frac{\sum_{j=1}^n \exp(-\lambda_j g_j)}{\sum_{j=1}^n g_j \exp(-\lambda_j g_j)}
\log \left( \sum_{j=1}^n \exp(-\lambda_j g_j) \right)
\end{align}

In [None]:
## 

nominator = np.sum(xs[1:]/xlib*np.exp(-ls[1:]*gs[1:]))
denominator = np.sum(xs[1:]/xlib*gs[1:]* np.exp(-ls[1:]*gs[1:]))
log_term = np.log(nominator)

In [None]:
lambda_MM = -nominator/denominator * log_term

In [None]:
lambda_MM

In [None]:
lambdalib

### Simulate bulk competition with background mutants and added wild-type lineage  (scenario B2)

The frequencies for this scenario can be summarized as 

    frequency of the mutant straints:     xi = 1/(k+1)
    frequency of the wildtype strain:     x1 = 1/(k+1)

where `k` is the number of knockouts strain. Here all lineages, the mutants and the wild-type, have the same initial frequency. This roughly resembles scenario 'Bfull', but with a barcoded wild-type spiked into the culture.

In [None]:
k = n_knockouts

In [None]:
### set initial frequencies
mutant_library_freq = 0.9999

xs = np.zeros_like(gs)
n_knockouts = len(xs[1:])
xs[1:] = mutant_library_freq/n_knockouts                  # mutant lineages
xs[0] = 1- xs[1:].sum()       # wildtype population



In [None]:
mutant_ratio_B2 = xs[1:].sum()

print("Proportion of mutants: %.8f " % mutant_ratio_B2 )
print("Proportion of wild-type: %.8f " % xs[0] )

In [None]:
## calculate final frequencies
xs, xs_final,tsat = run_bulk_experiment(gs=gs, ls = ls, nus =nus, xs=xs)

## calculate total selection coefficient
sigma_bulk_B2 = CalcTotalSelectionCoefficientLogit(xs,xs_final)

## compute pairwise selection coefficient in bulk
xi1 = CalcReferenceFrequency(xs,ref_strains = [0]) 
xi1_final = CalcReferenceFrequency(xs_final,ref_strains = [0])
s_bulk_B2 = CalcTotalSelectionCoefficientLogit(xi1,xi1_final)



#### Plot frequency trajectory for all barcodes

In [None]:

## set timepoints for each trajectory
tvec = np.array([0, tsat])
## set frequency values for each trajectory
trajectories = np.vstack((xs,xs_final))

## plot
fig, ax = plt.subplots(figsize = (FIGWIDTH_TRIPLET, FIGHEIGHT_TRIPLET))
ax.plot(tvec,trajectories[:,1:], color = 'dimgrey', label = 'mutants')
ax.plot(tvec,trajectories[:,0], color = 'orange', label = 'wild-type')

ax.set_xlabel('time')
ax.set_ylabel('frequency')

# fix axis limits
ax.set_yscale('log')
ax.set_ylim(XMIN,XMAX)
ax.set_xlim(TMIN,tsat)

fig.tight_layout()

In [None]:
## plot
fig, ax = plt.subplots(figsize = (FIGHEIGHT_TRIPLET, 0.3* FIGHEIGHT_TRIPLET))
                       
ax.tick_params(left = False, labelleft = False, bottom = False, labelbottom=False)

n = len(xs)
colors = [knockout_cmap(i/k+1) for i in range(n)]
colors[0] = color_wt
for i in range(n):
    if i == 0: y1 = [0,0]
    else: y1 = [xs[i-1], xs_final[i-1]]
    
    if i == n-1: y2 = [1,1]
    else: y2 = [xs[i], xs_final[i]]
        
    ax.fill_between(x=[0,tsat], y1=y1,y2=y2)
    
ax.set_xlim(0,tsat)
ax.set_ylim(0,1)

In [None]:
### compare in a plot

fig, axes = plt.subplots(1,1, figsize = (FIGHEIGHT_TRIPLET, FIGHEIGHT_TRIPLET), sharex = True)


ax = axes 
x = s_pair

## plot data
y =  s_bulk_B2
ax.scatter(x[1:],y[1:]-x[1:], rasterized = True, color = color_B2, zorder = 2,
           label = f'mutant ratio:{mutant_ratio_B2*100:.2f}%', marker = 'o') 

y =  s_bulk_B1
ax.scatter(x[1:],y[1:]-x[1:], rasterized = True, color = color_B1, zorder = 5,
           label = f'mutant ratio:{mutant_ratio_B1*100:6.2f}%', marker = 'o') 



ymin,ymax = ax.get_ylim()
yabs = np.max(np.abs([ymin,ymax]))
ax.set_ylim(-yabs,yabs)
ax.axhline(0, ls = '--', color = 'black')
ax.set_ylabel('absolute error')# pairwise fitness\n(different bulk competitions)')
ax.set_xlabel('mutant fitness in pairwise competition')
#ax.legend(loc = 'lower right', frameon=False)
error1_min,error1_max = ax.get_ylim()


#fig.tight_layout()
fig.savefig(FIG_DIR_DATASET + 'absolute_error_bulk_competition_comparison.pdf',\
             DPI = DPI, bbox_inches = 'tight', pad_inches = PAD_INCHES)

In [None]:
### compare in a plot

fig, axes = plt.subplots(1,1, figsize = (FIGHEIGHT_TRIPLET, FIGHEIGHT_TRIPLET), sharex = True)


ax = axes 
x = s_pair

## plot data

y = np.abs(np.divide( s_bulk_B1 - s_pair, s_pair, where = s_pair !=0))
ax.scatter(x[1:],y[1:], rasterized = True, color = color_B1, zorder = 5, 
           label = f'mutant ratio:{mutant_ratio_B1*100:6.2f}%', marker = 'o')   

y = np.abs(np.divide( s_bulk_B2 - s_pair, s_pair, where = s_pair !=0))
ax.scatter(x[1:],y[1:], rasterized = True, color = color_B2, zorder = 2, 
           label = f'mutant ratio:{mutant_ratio_B2*100:6.2f}%', marker = 'o')      



ax.axhline(rel_threshold, ls = '--', color = 'black')

ymin,ymax = ax.get_ylim()
yabs = np.max(np.abs([ymin,ymax]))
ax.set_yscale('log')
ax.axhline(0, ls = '--', color = 'black')

ax.set_ylabel('relative error') #\n(bulk competition B)')
ax.set_xlabel('mutant fitness in pairwise competition')
#ax.legend(loc = 'upper left', frameon=False)
error1_min,error1_max = ax.get_ylim()


#fig.tight_layout()
fig.savefig(FIG_DIR_DATASET + 'relative_error_bulk_competition_comparison.pdf',\
             DPI = DPI, bbox_inches = 'tight', pad_inches = PAD_INCHES)

In [None]:
### compare in a plot

fig, axes = plt.subplots(1,1, figsize = (FIGHEIGHT_TRIPLET, FIGHEIGHT_TRIPLET), sharex = True)


ax = axes 
x = np.abs(s_pair)

## plot data

y = np.abs(np.divide( s_bulk_B1 - s_pair, s_pair, where = s_pair !=0))
ax.scatter(x[1:],y[1:], rasterized = True, color = color_B1, zorder = 5, 
           label = f'mutant ratio:{mutant_ratio_B1*100:6.2f}%', marker = 'o')   

y = np.abs(np.divide( s_bulk_B2 - s_pair, s_pair, where = s_pair !=0))
ax.scatter(x[1:],y[1:], rasterized = True, color = color_B2, zorder = 2, 
           label = f'mutant ratio:{mutant_ratio_B2*100:6.2f}%', marker = 'o')      



ax.axhline(rel_threshold, ls = '--', color = 'black')

ymin,ymax = ax.get_ylim()
yabs = np.max(np.abs([ymin,ymax]))
ax.set_yscale('log')
ax.axhline(0, ls = '--', color = 'black')

ax.set_ylabel('relative error')
ax.set_xlabel('mutant fitness magnitude\n in pairwise competition')
#ax.legend(loc = 'upper left', frameon=False)
error1_min,error1_max = ax.get_ylim()


#fig.tight_layout()
fig.savefig(FIG_DIR_DATASET + 'relative_error_bulk_competition_comparison_abs.pdf',\
             DPI = DPI, bbox_inches = 'tight', pad_inches = PAD_INCHES)

### Plot with inset

In [None]:
### compare in a plot

fig, axes = plt.subplots(1,1, figsize = (FIGHEIGHT_TRIPLET, FIGHEIGHT_TRIPLET), sharex = True)


ax = axes 
x = s_pair

## plot data
y = sigma_bulk_B1
ax.scatter(x[1:],y[1:]-x[1:], rasterized = True, color = color_stotal, alpha = 1, 
           label = r'total scoeff. $\sigma$', marker = 'o') 

y = s_bulk_B1
ax.scatter(x[1:],y[1:]-x[1:], rasterized = True, color = color_B1, 
           label = 'pairwise scoeff. $s$', marker = 'o') 


ymin,ymax = ax.get_ylim()
yabs = np.max(np.abs([ymin,ymax]))
yabs = 1.5*yabs
ax.set_ylim(-yabs,yabs)
ax.axhline(0, ls = '--', color = 'black')
ax.set_ylabel('absolute error')
ax.set_xlabel('mutant fitness in pairwise competition')
#ax.legend(loc = 'lower right', frameon=False)
error1_min,error1_max = ax.get_ylim()

### add inset

inset = ax.inset_axes([0.75, 0.045, 0.38, 0.38])


ax = inset.twinx()

## plot data
y =  s_bulk_B2
ax.scatter(x[1:],y[1:]-x[1:], rasterized = True, color = color_B2, zorder = 2,
           label = f'mutant ratio:{mutant_ratio_B2*100:.2f}%', marker = 'o') 

y =  s_bulk_B1
ax.scatter(x[1:],y[1:]-x[1:], rasterized = True, color = color_B1, zorder = 5,
           label = f'mutant ratio:{mutant_ratio_B1*100:6.2f}%', marker = 'o') 

ymin,ymax = ax.get_ylim()
yabs = np.max(np.abs([ymin,ymax]))
ax.set_ylim(-yabs,yabs)
ax.axhline(0, ls = '--', color = 'black')
ax.tick_params(labelbottom = False, bottom = False)

ax = inset
ax.tick_params(labelleft =False, left = False, bottom = False, labelbottom = False)
## add labels for inset
ax.set_ylabel('abs. error')




#fig.tight_layout()
fig.savefig(FIG_DIR_DATASET + 'absolute_error_bulk_competition_B_with_inset.pdf',\
             DPI = DPI, bbox_inches = 'tight', pad_inches = PAD_INCHES)

### Compare predictions of the ranking

In [None]:
data = pd.DataFrame()
## load data on selection coefficients
data['sigma_bulk_B1'] = sigma_bulk_B1
data['s_bulk_B1'] = s_bulk_B1
data['sigma_bulk_B2'] = sigma_bulk_B2
data['s_bulk_B2'] = s_bulk_B2
data['s_pair'] = s_pair

## exclude wild-type, has different offset
data= data.iloc[1:]
## create rank variables, fittest mutants come first
data['sigma_bulk_B1_rank'] = data['sigma_bulk_B1'].rank(ascending = True, method = 'min') 
data['s_bulk_B1_rank'] = data['s_bulk_B1'].rank(ascending = True, method = 'min') 
data['sigma_bulk_B2_rank'] = data['sigma_bulk_B2'].rank(ascending = True, method = 'min') 
data['s_bulk_B2_rank'] = data['s_bulk_B2'].rank(ascending = True, method = 'min')
data['s_pair_rank'] = data['s_pair'].rank(ascending = True, method = 'min') 

In [None]:
### compare in a plot

fig, axes = plt.subplots(1,2, figsize = (2.5*FIGHEIGHT_TRIPLET, FIGHEIGHT_TRIPLET), sharex = True, sharey=True)


x = s_pair[1:]

ax = axes[0] 
## plot ranking total bulk vs pairwise bulk
y = data['sigma_bulk_B1_rank'] - data['s_bulk_B1_rank']
ax.scatter(x,y, rasterized = True, color = color_B1, alpha = 1, marker = 'o', zorder = 5) 
y = data['sigma_bulk_B2_rank'] - data['s_bulk_B2_rank']
ax.scatter(x,y, rasterized = True, color = color_B2, alpha = 1, marker = 'o', zorder = 2)
ax.set_ylabel('rank difference: total fitness in bulk\nvs. pairwise fitness in bulk')
ax = axes[1] 
## plot ranking total bulk vs pairwise
y = data['s_bulk_B1_rank'] - data['s_pair_rank']
ax.scatter(x,y, rasterized = True, color = color_B1, alpha = 1, marker = 'o',zorder = 5)
y = data['s_bulk_B2_rank'] - data['s_pair_rank']
ax.scatter(x,y, rasterized = True, color = color_B2, alpha = 1, marker = 'o', zorder =2)
ax.set_ylabel('rank difference: pairwise fitness in bulk\nvs. pairwise fitness in pairwise')
ax.tick_params(labelleft=True)


## plot ranking pairwise bulk vs pairwisse
#y = data['s_bulk_B1_rank'] - data['s_pair_rank']
#ax.scatter(x,y, rasterized = True, color = 'dimgrey', alpha = 1, marker = 'o') 
#ax.set_ylabel('rank difference between: \n pairwise fit. (bulk) - pairwise comp.')


fig.subplots_adjust( wspace= 0.5)

for ax in axes: 

    ymin,ymax = ax.get_ylim()
    yabs = np.max(np.abs([ymin,ymax]))
    ax.set_ylim(-yabs,yabs)
    ax.axhline(0, ls = '--', color = 'black')

    ax.set_xlabel('mutant fitness in pairwise competition')


#fig.tight_layout()
fig.savefig(FIG_DIR_DATASET + 'ranking_mismatch_bulk_comparison.pdf',\
             DPI = DPI, bbox_inches = 'tight', pad_inches = PAD_INCHES)

### Find the optimum frequency for measuring bulk competition

The frequencies for this scenario can be summarized as 

    frequency of the mutant strain s:     xi = x/k
    frequency of the wildtype strain:     x1 = 1-x
    
where `k` is the number of knockouts strain and `x` is the total proportion of mutant. This scenario is interpolation between the scenario **B1** with dominating wild-type (`x<<1`) and the scenario **Bfull** with no wildtype (`x=1`). 


In [None]:
### choose range of frequencies to test
#xrange = np.linspace(1/N,0.99, num = 10)
xrange = np.geomspace(1e-6,.99999, num = 30)

In [None]:
%%time 

sigma_bulk_Bx = np.zeros((len(xrange),n_knockouts+1))
s_bulk_Bx = np.zeros((len(xrange),n_knockouts+1))


for i in range(len(xrange)):
    xlib = xrange[i]
    
    ### set initial frequencies
    xs = np.zeros_like(gs)
    xs[1:] = xlib/n_knockouts         # mutant lineages
    xs[0] = 1-xlib              # wildtype population

    ## calculate final frequencies
    xs, xs_final,_ = run_bulk_experiment(gs=gs, ls = ls, nus =nus, xs=xs)

    ## calculate total selection coefficient
    sigma_bulk_Bx[i,:] = CalcTotalSelectionCoefficientLogit(xs,xs_final)
    
    ## compute pairwise selection coefficient in bulk
    xi1 = CalcReferenceFrequency(xs,ref_strains = [0]) 
    xi1_final = CalcReferenceFrequency(xs_final,ref_strains = [0])
    s_bulk_Bx[i,:] = CalcTotalSelectionCoefficientLogit(xi1,xi1_final)

In [None]:
xrange

### Plot error to pairwise competition as a function of mutant frequency

In [None]:
truth = np.outer(np.ones(len(xrange)),s_pair) # need the right shape
error_abs = s_bulk_Bx - truth 
error_rel = np.divide(error_abs,truth, where = truth != 0)

In [None]:
### define traits of the coarse-grained population
xlib = xs[1:].sum()
Ylib = 1/(np.sum(xs[1:]/Ys[1:])/xlib)
nulib = CalcRelativeYield(Ylib, R0 = CONCENTRATION_GLUCOSE, N0 = OD_START)

taus = 1/gs
taulib = 1/(np.sum(xs[1:]/Ys[1:]/taus[1:])/(xlib/Ylib))
glib = 1/taulib
lambda_lib = np.sum(ls[1:]*xs[1:]/Ys[1:]/taus[1:])*(taulib*Ylib/xlib)

### Calculate more complex error bound
## define wild-type traits
Ywt = Ys[0]
xwt = xs[0]
lwt = ls[0]
gwt = gs[0]
tauwt = 1/gwt

In [None]:
print("lag times")
print("library: %.3f" % lambda_lib)
print("wild-type: %.3f" % lwt)

print("doubling times")
print("library: %.3f" % taulib)
print("wild-type: %.3f" % tauwt)

print("growth rates")
print("library: %.3f" % glib)
print("wild-type: %.3f" % gwt)


print("biomass yield")
print("library: %.4f" % Ylib)
print("wild-type: %.4f" % Ywt)


In [None]:
### specific relative error threshold
theta_rel = 0.01

In [None]:
### calculate simple error bound


x_lib_max = gwt/np.abs(glib-gwt) * theta_rel
print(f"The maximum tolerated mutant library frequency from our equation is: {x_lib_max}")
print(f"mutant library growth rate: {glib}")
print(f"wild-type growth rate: {gwt}")

In [None]:
### calculate complex error bound

## find maximum Bj value
LFCpairs = np.log(nus)
Bj = np.abs(LFCpairs[1:] + (ls[1:]-lwt)/(taus[1]-tauwt))*tauwt
Bj_max = Bj.max()

In [None]:
### calculate specific specific maximum library abundnace

assert glib < gwt, "The formula assumes this case!"

prefactor = gwt/((-1)*(glib-gwt))
eps = theta_rel
spair_lib_j = np.abs(-(lambda_lib-lwt)*glib - (taulib-tauwt)*LFCpairs[1:]*glib)
libfactor = spair_lib_j *glib/((-1)*(glib-gwt))
xlib_max_j =  prefactor * (eps/ (eps + libfactor * Bj))

In [None]:
xlib_max_j.min()

In [None]:
LFC_value = np.log(100)

In [None]:
assert glib < gwt, "The formula assumes this case!"

spair_lib = -(lambda_lib-lwt)*glib - (taulib-tauwt)*LFC_value*glib
libfactor = np.abs(spair_lib) *glib/np.abs(glib-gwt)
xlib_star =  prefactor * (eps/ (eps + libfactor * Bj_max))

In [None]:
xlib_star

In [None]:
print("selection coefficient:")
print("spair_lib: %.3f" % spair_lib)


In [None]:
### collect plot data into a Dataframe
# we want rows to be the genotyes, columns to be the timepoints
df_abs = pd.DataFrame(data= np.abs(error_rel.T), columns = xrange)
df_abs['s_pair'] = s_pair
df_abs = df_abs.melt(id_vars = 's_pair', var_name = 'xlib', value_name = 'error_rel', ignore_index = False,)
df_abs = df_abs.reset_index(drop = False)
df_abs = df_abs.sort_values('s_pair', ascending = True)


In [None]:
from matplotlib.colors import TwoSlopeNorm

In [None]:
### interpolate relative abundance that gives 1% from data
df_search = pd.DataFrame(data= np.abs(error_rel.T), columns = xrange)
high_error_mutants = set(df_search.idxmax(axis = 0).values)
high_error_mutants

In [None]:
eps

In [None]:
xp = df_search.columns
fp = df_search.loc[3407]
def fun(x):
    return np.square(np.interp(x, xp =xp, fp = fp) - eps)

In [None]:
fun(xlib_star)

In [None]:
from scipy.optimize import minimize_scalar

In [None]:
minimize_scalar(fun, [0.0001,0.01])

In [None]:
df_search

In [None]:
df_abs

In [None]:
df_abs['error_rel'].idxmax()

In [None]:
### choose graphical settings

# possible palettes to try: jet, nipy_spectral, icefire, coolwarm, viridis, crest
cmap = sns.color_palette('viridis', as_cmap=True)
#norm = plt.Normalize(-1, 1, clip = True) # basic norm, sames lope on both sides of zero, with cutoff at boundary
norm = TwoSlopeNorm(vcenter = 0, vmin = s_pair.min(),vmax = s_pair.max())

order = np.sort(s_pair)

In [None]:
import warnings
with warnings.catch_warnings():
    warnings.simplefilter(action='ignore', category=FutureWarning)

    fig, ax = plt.subplots(1,1, figsize = (1.25*FIGHEIGHT_TRIPLET, FIGHEIGHT_TRIPLET), sharex = True)

    sns.lineplot(df_abs, x = 'xlib', y = 'error_rel', units = 'index', estimator = None, ax = ax, 
                rasterized = True, hue = 's_pair', palette = cmap, hue_norm = norm, hue_order = order[::1],
                 legend = False, alpha = 1.0, zorder = -2,) #color = 'silver'

    ## add the colorbar
    sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
    sm.set_array([])

    # Remove the legend and add a colorbar
    label = 'true mutant fitness\n(pairwise competition A)'
    label = 'mutant fitness in pairwise competition'
    ax.figure.colorbar(sm, label = label,use_gridspec=True)


    ax.axhline(0, ls = '--', color = 'black')
    ax.set_ylabel(r'relative error')

    ax.axvline(x_lib_max, color='black', lw = 2)
    ax.axvline(xlib_max_j.min(), color='black', lw = 2)
    #ax.axvline(x_lib_max/2, color='magenta', lw = 2)
    ax.axhline(rel_threshold,color = 'black', ls = '--',lw = 1)
    ax.axvline(mutant_ratio_B1, color = color_B1, lw = 3)
    ax.axvline(mutant_ratio_B2, color = color_B2, lw = 3)

    ax.set_xlabel('relative abundance\nof the mutant library' + r' $x_{\mathrm{lib}}$')
    #ax.set_xlabel(r'mutant-to-wildtype ratio $x_{\mathrm{lib}}$')
    ax.set_yscale('log')
    ax.set_xscale('log')
    #ax.set_xlim(xrange[0],xrange[-1])
    #ax.set_xlim(xrange[0],xrange[-1]*1.1)
    
    ax.set_ylim(1e-12,1e2)

    ax.spines['right'].set_visible(False)

    #fig.tight_layout()
    ax.set_rasterization_zorder(-1)

    fig.savefig(FIG_DIR_DATASET + 'relative_error_vs_mutant-to-wildtype_ratio_colorbar.pdf',\
                 DPI = DPI, bbox_inches = 'tight', pad_inches = PAD_INCHES)



### Look at the mutant genotypes with a high relative error

In [None]:
df= pd.DataFrame(error_rel).T
df.shape

In [None]:
df_ranks = df.rank(axis=0, ascending = False,)
df_ranks_avg = df_ranks.mean(axis=1)

In [None]:
### looking for the index with the highest relative error
top_ten = df_ranks_avg.sort_values(ascending = True).index[:10]

In [None]:
top_ten

In [None]:
g_ranks = pd.DataFrame(gs).rank(ascending = False)
l_ranks = pd.DataFrame(ls).rank(ascending = True)
Y_ranks = pd.DataFrame(Ys).rank(ascending = False)

In [None]:
for index in top_ten: 
    print("####")
    print(f"rank in relative error: {df_ranks_avg.loc[index]}")
    print(f"mutant genotype index: {index}")
    print(f"relative fitness s^pair: {s_pair[index]}")
    print(f"relative fitness s^bulk: {s_bulk_B1[index]}")
    print(f"growth rate: {gs[index]}")
    print(f"rank in growth rate: {g_ranks.loc[index, 0]}")
    print(f"lag time: {ls[index]}")
    print(f"rank in lag time: {l_ranks.loc[index, 0]}")
    print(f"biomass yield: {Ys[index]}")
    print(f"rank in yield: {Y_ranks.loc[index, 0]}")
    

Observations on the mutants with high relative error

- The mutants have selection coefficient of small magnitude, so they are close to neutral. 
- The mutants have always a beneficial lag time, but a deleterious growth rate. This means they are neutal, but in a trade-off between the traits.
- It's interesting, that we see only mutants with one direction in this trade-off...those that have a beneficial lag time. I guess, this makes sense since we neglected the lag time variation.

In [None]:
### compare to the wild-type
index = 0
print(f"mutant genotype index: {index}")
print(f"relative fitness s^pair: {s_pair[index]}")
print(f"relative fitness s^bulk: {s_bulk_B1[index]}")
print(f"rank in relative error: {df_ranks_avg.loc[index]}")
print(f"growth rate: {gs[index]}")
print(f"rank in growth rate: {g_ranks.loc[index, 0]}")
print(f"lag time: {ls[index]}")
print(f"rank in lag time: {l_ranks.loc[index, 0]}")
print(f"biomass yield: {Ys[index]}")
print(f"rank in yield: {Y_ranks.loc[index, 0]}")

### Compute the discarded terms in error calculation - use d

\begin{equation}
        C_{231} = s^\pair_{21} \Delta\tau_{31} 
                -       \frac{1}{\tau_2}
        (\Delta\tau_{21}\Delta\lambda_{31} - \Delta\tau_{31}\Delta\lambda_{21})
\end{equation}


In [None]:
c_laggrowth = np.zeros_like(gs)
c_laggrowth[1:] = s_pair[1:]*(tau3-tau1)

In [None]:
l1 = ls[0]
tau1 = 1/gs[0]

In [None]:
c_coupling = np.zeros_like(gs)
c_coupling[1:] = -1/taus[1:] * ( (taus[1:]-tau1)*(lstars[1:]-l1) - (tau3-tau1)*(ls[1:]-l1))

In [None]:
fig, ax = plt.subplots(1,1, figsize = (FIGHEIGHT_TRIPLET, FIGHEIGHT_TRIPLET), sharex = True)

## plot data
x = s_pair
y = c_laggrowth
ax.scatter(x,y, rasterized = True, color = color_error1, zorder = 2, 
           label = r'laggrowth')

y = c_coupling
ax.scatter(x,y, rasterized = True, color = color_error2, 
           label = r'coupling')


### Compute the discarded terms in error calculation - use rewritten decomposition

\begin{equation}
        C_{231} = \frac{-\Delta \tau_{21}}{\tau_2} \Big[\Delta \tau_{31} \eta^\pair + \Delta\lambda_{31}\Big]
\end{equation}


In [None]:
c_laggrowth = np.zeros_like(gs)
#c_laggrowth[1:] = s_pair_growth[1:]*(tau3-tau1)
c_laggrowth[1:] = -((taus[1:]-tau1)/taus[1:])*((tau3-tau1)*np.log(1+nus[0]))

In [None]:
c_coupling = np.zeros_like(gs)
c_coupling[1:] = -((taus[1:]-tau1)/taus[1:])* (lstars[1:]-l1)


In [None]:
fig, ax = plt.subplots(1, figsize = (FIGHEIGHT_TRIPLET, FIGHEIGHT_TRIPLET), sharex = True)


## plot data
x = s_pair
y = c_laggrowth
ax.scatter(x,y, rasterized = True, color = color_error1, zorder = 2, 
           label = r'laggrowth')

y = c_coupling
ax.scatter(x,y, rasterized = True, color = color_error2, zorder = 4,
           label = r'coupling')

ax.set_ylabel('absolute error pairwise fitness\n(bulk competition B)')
ax.set_xlabel('true mutant fitness\n(pairwise competition A)')



### Simulate bulk competition with background mutants only (scenario Bfull)

The frequencies for this scenario can be summarized as 

    frequency of the mutant strains:      xi = 1/k
    frequency of the wildtype strain:     x1 = 0

where `k` is the number of knockouts strain. Here the culture only consists of background mutants, and has no wild-type. 

In [None]:
k = n_knockouts

In [None]:
### set initial frequencies
xs = np.zeros_like(gs)
xs[1:] = 1/k           # mutant lineages
xs[0] = 0              # wildtype population



In [None]:
print("Proportion of mutants: %.8f " % xs[1:].sum() )
print("Proportion of wild-type: %.8f " % xs[0] )

In [None]:
## calculate final frequencies
xs, xs_final, tsat = run_bulk_experiment(gs=gs, ls = ls, nus =nus, xs=xs)

## calculate total selection coefficient
sigma_bulk_Bfull = CalcTotalSelectionCoefficientLogit(xs,xs_final)
sigma_bulk_Bfull_log = CalcTotalSelectionCoefficientLog(xs,xs_final)


#### Plot frequency trajectory for all barcodes

In [None]:

## set timepoints for each trajectory
tvec = np.array([0, tsat])
## set frequency values for each trajectory
trajectories = np.vstack((xs,xs_final))

## plot
fig, ax = plt.subplots(figsize = (FIGWIDTH_TRIPLET, FIGHEIGHT_TRIPLET))
ax.plot(tvec,trajectories[:,1:], color = 'dimgrey', label = 'mutants')
ax.plot(tvec,trajectories[:,0], color = 'orange', label = 'wild-type')

ax.set_xlabel('time')
ax.set_ylabel('frequency')

# fix axis limits
ax.set_yscale('log')
ax.set_ylim(XMIN,XMAX)
ax.set_xlim(TMIN,tsat)

fig.tight_layout()

In [None]:
## plot
fig, ax = plt.subplots(figsize = (FIGHEIGHT_TRIPLET, 0.3* FIGHEIGHT_TRIPLET))
                       
ax.tick_params(left = False, labelleft = False, bottom = False, labelbottom=False)

n = len(xs)
colors = [knockout_cmap(i/k+1) for i in range(n)]
colors[0] = color_wt
for i in range(n):
    if i == 0: y1 = [0,0]
    else: y1 = [xs[i-1], xs_final[i-1]]
    
    if i == n-1: y2 = [1,1]
    else: y2 = [xs[i], xs_final[i]]
        
    ax.fill_between(x=[0,tsat], y1=y1,y2=y2)
    
ax.set_xlim(0,tsat)
ax.set_ylim(0,1)

In [None]:
### compare in a plot

fig, axes = plt.subplots(1,2, figsize = (2*FIGHEIGHT_TRIPLET, FIGHEIGHT_TRIPLET), sharex = True)

x = s_pair

ax = axes[0]
## plot data for total selection coefficient in logit
y = sigma_bulk_Bfull
ax.scatter(x[1:],y[1:]-x[1:], rasterized = True, color = color_stotal, alpha = 1, marker = 'o') 
ax.set_title('using logit encoding', loc = 'right')

ax = axes[1]
## plot data for total selection coefficient in log
y = sigma_bulk_Bfull_log
ax.scatter(x[1:],y[1:]-x[1:], rasterized = True, color = color_stotal, alpha = 1, marker = 'o') 
ax.set_title('using log encoding', loc = 'right')


for ax in axes: 
    ymin,ymax = ax.get_ylim()
    yabs = np.max(np.abs([ymin,ymax]))
    ax.set_ylim(-yabs,yabs)
    ax.axhline(0, ls = '--', color = 'black')
    ax.set_ylabel('absolute error total fitness\n(bulk competition C)')
    ax.set_xlabel('true mutant fitness\n(pairwise competition A)')
    #ax.legend(loc = 'lower right', frameon=False)


fig.tight_layout()
fig.savefig(FIG_DIR_DATASET + 'absolute_error_bulk_competition_full.pdf',\
             DPI = DPI, bbox_inches = 'tight', pad_inches = PAD_INCHES)

In [None]:
data = pd.DataFrame()
## load data on selection coefficients
data['sigma_bulk_Bfull'] = sigma_bulk_Bfull
data['sigma_bulk_Bfull_log'] =  sigma_bulk_Bfull_log
data['s_pair'] = s_pair
## create rank variables, fittest mutants come first
data['sigma_bulk_Bfull_rank'] = data['sigma_bulk_Bfull'].rank(ascending = False)
data['sigma_bulk_Bfull_log_rank'] =  data['sigma_bulk_Bfull_log'].rank(ascending = False)
data['s_pair_rank'] = data['s_pair'].rank(ascending = False) 

In [None]:
### compare in a plot

fig, axes = plt.subplots(1,3, figsize = (3.*FIGHEIGHT_TRIPLET, FIGHEIGHT_TRIPLET), sharex = True)


x = s_pair

ax = axes[0] 
## plot ranking total bulk vs pairwise
y = data['sigma_bulk_Bfull_rank'] - data['s_pair_rank']
ax.scatter(x[1:],y[1:], rasterized = True, color = color_stotal, alpha = 1, marker = 'o',zorder = 5)
ax.set_ylabel('rank diff. between statistics: \n total fit. (logit) - pairwise fit. (pair)')

ax = axes[1] 
## plot ranking pairwise bulk vs pairwisse
y = data['sigma_bulk_Bfull_log_rank'] - data['s_pair_rank']
ax.scatter(x[1:],y[1:], rasterized = True, color = color_stotal, alpha = 1, marker = 'o',zorder = 5) 
ax.set_ylabel('rank diff. between statistics: \n total fit. (log) - pairwise fit. (pair)')

ax = axes[2] 
## plot ranking total bulk vs pairwise bulk
y = data['sigma_bulk_Bfull_log_rank'] - data['sigma_bulk_Bfull_rank']
ax.scatter(x[1:],y[1:], rasterized = True, color = color_stotal, alpha = 1, marker = 'o', zorder = 5) 
ax.set_ylabel('rank diff. between statistics: \n total fit. (log) - total fit. (logit)')
for ax in axes: 

    ymin,ymax = ax.get_ylim()
    yabs = np.max(np.abs([ymin,ymax]))
    ax.set_ylim(-yabs,yabs)
    ax.axhline(0, ls = '--', color = 'black', zorder = 10)

    ax.set_xlabel('true mutant fitness\n(pairwise competition A)')



fig.tight_layout()
fig.savefig(FIG_DIR_DATASET + 'ranking_mismatch_bulk_Bfull.pdf',\
             DPI = DPI, bbox_inches = 'tight', pad_inches = PAD_INCHES)

In [None]:
### compare in a plot

fig, axes = plt.subplots(1,1, figsize = (FIGHEIGHT_TRIPLET, FIGHEIGHT_TRIPLET), sharex = True)


ax = axes 
x = s_pair

## plot data

y = np.abs(np.divide( sigma_bulk_Bfull - s_pair, s_pair, where = s_pair !=0))
ax.scatter(x[1:],y[1:], rasterized = True, color = color_stotal, alpha = 1, 
           label = r'total scoeff. $\sigma$', marker = 'o') 


ax.axhline(rel_threshold, ls = '--', color = 'black')

ymin,ymax = ax.get_ylim()
yabs = np.max(np.abs([ymin,ymax]))
ax.set_yscale('log')
ax.axhline(0, ls = '--', color = 'black')

ax.set_ylabel('relative error\n(bulk competition C)')
ax.set_xlabel('true mutant fitness\n(pairwise competition A)')
ax.legend(loc = 'upper left', frameon=False)
error1_min,error1_max = ax.get_ylim()


#fig.tight_layout()
fig.savefig(FIG_DIR_DATASET + 'relative_error_bulk_competition_C.pdf',\
             DPI = DPI, bbox_inches = 'tight', pad_inches = PAD_INCHES)