### Simulate bulk competition experiments using empirical traits from Warringer 2003

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

from bulk_simulation_code import run_pairwise_experiment, run_bulk_experiment
from bulk_simulation_code import CalcRelativeYield,CalcReferenceFrequency
from bulk_simulation_code import CalcTotalSelectionCoefficientLogit
from m3_model import CalcRelativeSaturationTime as CalcSaturationTimeExact

In [None]:
### Update dependent parameters according to input
import os
import os.path
from os import path

## create export directory if necessary
## foldernames for output plots/lists produced in this notebook
import os
FIG_DIR = f'./figures/log_fitness/'
os.makedirs(FIG_DIR, exist_ok=True)
print("All  plots will be stored in: \n" + FIG_DIR)

In [None]:
### execute script to load modules here
# I get some error with this command
exec(open('setup_aesthetics.py').read()) 

# manual fix

FIGSIZE_A4 = (8.27, 11.69) # a4 format in inches

FIGWIDTH_TRIPLET = FIGSIZE_A4[0]*0.3*2
FIGHEIGHT_TRIPLET = FIGWIDTH_TRIPLET*0.75


In [None]:
DATASET_COLOR = 'darkorange'


In [None]:
SUFFIX_DATASET = 'all_traits_vary/'

FIG_DIR_DATASET = FIG_DIR + SUFFIX_DATASET
os.makedirs(FIG_DIR_DATASET, exist_ok=True)

OUTPUT_DIR_DATASET = './output/' + SUFFIX_DATASET
os.makedirs(OUTPUT_DIR_DATASET, exist_ok=True)

### set colorscheme

In [None]:
cmap = plt.get_cmap('tab20c')

In [None]:
color_B1 = cmap(4.5/20)
color_error1 = cmap(5.5/20) #cmap(13.5/20)
color_error2 = cmap(7.5/20)

color_stotal = cmap(16.5/20)
color_B2 = cmap(0.5/20)
color_ratio = cmap(13.5/20) #cmap(10.5/20)

color_wt = cmap(10.5/20)

In [None]:

knockout_cmap = plt.get_cmap('flag')

In [None]:
rel_threshold = 0.01 ### threshold for relative error plots

### Load wild-type traits

In [None]:
INDEX_COL = [0,1,2,3,4]
list_na_representations = ['not_present', 'failed_to_compute']

In [None]:
PCWS_TRAITS_WARRINGER = './output/df_M3_traits.csv'
df_warringer = pd.read_csv(PCWS_TRAITS_WARRINGER, header = 0, index_col= INDEX_COL,\
                                  float_precision=None, na_values=list_na_representations)


In [None]:
### define default wild_type
df_wildtypes = df_warringer[df_warringer['is_wildtype']==True]

WILDTYPE = df_wildtypes.median(axis = 0, numeric_only = True)

### Load mutant data (averaged)

In [None]:

PCWS_TRAITS_WARRINGER_AVERAGED = './output/df_M3_traits_averaged.csv'
df_averaged = pd.read_csv(PCWS_TRAITS_WARRINGER_AVERAGED, header = 0, float_precision=None)

In [None]:
### assign wild-type label
def is_wildtype(row):
    genotype = row['genotype']
    
    if genotype == 'BY4741':
        return True
    else:
        return False
    

row = df_averaged.iloc[0]
is_wildtype(row)

In [None]:
df_averaged['is_wildtype'] = df_averaged.apply(is_wildtype, axis =1)

In [None]:
### append mutant values (averaged) to set of individual wild-type strains
df_knockouts = df_averaged[~df_averaged['is_wildtype']]
df_knockouts = df_knockouts
df_input = df_wildtypes.reset_index().append(df_knockouts.reset_index())

In [None]:
### restore index
index_col_names = df_warringer.index.names
df_input = df_input.set_index(index_col_names)


### Load trait data into the standard form required by Michaels code

In [None]:
n_knockouts = df_knockouts.shape[0]

In [None]:
### growth rates
gs = np.zeros(n_knockouts+1)
gs[0] = WILDTYPE['gmax']
#gs[1:] = df_knockouts['gmax'].values
gs[1:] = gs[0]
### lag times
ls = np.zeros(n_knockouts+1)
ls[0] = WILDTYPE['lag']
ls[1:] = df_knockouts['lag'].values


### adjust units of time
gs = gs*60 # change units to growth rate per hour
ls = ls/60 # change units to hour

### yield
Ys = np.zeros(n_knockouts+1)
Ys[0] = WILDTYPE['yield']
#Ys[1:] = df_knockouts['yield'].values
Ys[1:] = Ys[0]


### Define initial condition for bulk growth cycle

In [None]:
### set initial resource concentrations

CONCENTRATION_GLUCOSE = 20/180 * 1e3 # concentrations are recored  in milliMolar, to match the units of yield
print(CONCENTRATION_GLUCOSE)

In [None]:
### define default initial_OD
OD_START = 0.05  #df_warringer['od_start'].median()

### compare to initial OD in the monoculture cycles
fig, ax = plt.subplots(figsize = (FIGWIDTH_TRIPLET, FIGHEIGHT_TRIPLET))

ax = df_warringer['od_start'].hist(bins=41, color = DATASET_COLOR, alpha = 0.6, log = True, rasterized = True)


ax.axvline(OD_START, color = 'tab:red', label = f'median value: $N_0={OD_START:.3f}$')
ax.legend()
ax.set_xlabel('initial OD')
ax.set_ylabel('no. growth curves')

### Calculate effective yield

In [None]:
from bulk_simulation_code import CalcRelativeYield

In [None]:
### calculcate effective yields
nus = CalcRelativeYield(Ys, R0 = CONCENTRATION_GLUCOSE, N0 = OD_START)


### Simulate pairwise competition growth cycles (case I)

In [None]:
N = 1e6

In [None]:
xs_pair, xs_pair_final, tsats, _,_,_ = run_pairwise_experiment(gs=gs,ls=ls,nus = nus, g1=gs[0],l1=ls[0],nu1=nus[0],x0 = 1/N)

s_pair = CalcTotalSelectionCoefficientLogit(xs_pair,xs_pair_final)

#### Plot  frequency trajectory in pairwise competition for example mutant

In [None]:
## set axis limits
TMIN,TMAX = 0,15
XMIN,XMAX = 5e-8,2

In [None]:
### pick an example mutant
index = 1
### read mutant frequencies
xmut0, xmutf = xs_pair[index], xs_pair_final[index]
tsat = tsats[index]

## plot
fig, ax = plt.subplots(figsize = (FIGWIDTH_TRIPLET, FIGHEIGHT_TRIPLET))
ax.plot([0,tsat],[xmut0,xmutf], color = 'dimgrey')
ax.plot([0,tsat],[1-xmut0,1-xmutf], color = 'orange')

# fix axis limits
ax.set_yscale('log')
ax.set_ylim(XMIN,XMAX)
ax.set_xlim(TMIN,tsat)

ax.set_xlabel('time [hours]')
ax.set_ylabel('frequency')


#### Plot distribution of fitness effects in the pairwise competition

In [None]:
fig, ax = plt.subplots(figsize = (FIGWIDTH_TRIPLET, FIGHEIGHT_TRIPLET))
plt.hist(s_pair, bins = 42, color = 'dimgrey')
ax.axvline(s_pair[0], color = 'orange')
ax.axvline(s_pair[1:].mean(), color = 'tab:red', ls = '--')
ax.set_xlabel('s_21 in pairwise experiment')

### Simulate bulk competition with background mutants added at invasion frequency (case II)

In [None]:
### set initial frequencies
mutant_library_freq = 0.9999

xs = np.zeros_like(gs)
N = len(xs[1:])
xs[1:] = mutant_library_freq/N                  # mutant lineages
xs[0] = 1- xs[1:].sum()       # wildtype population




In [None]:
mutant_ratio_B1 = xs[1:].sum()

print("Proportion of mutants: %.8f " % mutant_ratio_B1 )
print("Proportion of wild-type: %.8f " % xs[0] )

In [None]:
from bulk_simulation_code import CalcTotalSelectionCoefficientLog

In [None]:
## calculate final frequencies
xs, xs_final, tsat = run_bulk_experiment(gs=gs, ls = ls, nus =nus, xs=xs)

In [None]:
## calculate total selection coefficient
sigma_bulk_B1 = CalcTotalSelectionCoefficientLog(xs,xs_final)

## compute pairwise selection coefficient in bulk
xi1 = CalcReferenceFrequency(xs,ref_strains = [0]) 
xi1_final = CalcReferenceFrequency(xs_final,ref_strains = [0])
s_bulk_B1 = CalcTotalSelectionCoefficientLog(xi1,xi1_final)

#### Plot frequency trajectory for all barcodes

In [None]:

## set timepoints for each trajectory
tvec = np.array([0, tsat])
## set frequency values for each trajectory
trajectories = np.vstack((xs,xs_final))

## plot
fig, ax = plt.subplots(figsize = (FIGWIDTH_TRIPLET, FIGHEIGHT_TRIPLET))
ax.plot(tvec,trajectories[:,1:], color = 'dimgrey', label = 'mutants')
ax.plot(tvec,trajectories[:,0], color = 'orange', label = 'wild-type')

ax.set_xlabel('time')
ax.set_ylabel('frequency')

# fix axis limits
ax.set_yscale('log')
ax.set_ylim(XMIN,XMAX)
ax.set_xlim(TMIN,tsat)
fig.tight_layout()

In [None]:
### set colors for lineags


#### Calculate error to pairwise competition

In [None]:
### compare in a plot

fig, axes = plt.subplots(1,1, figsize = (FIGHEIGHT_TRIPLET, FIGHEIGHT_TRIPLET), sharex = True)


ax = axes 
x = s_pair

## plot data
y = sigma_bulk_B1
ax.scatter(x[1:],y[1:]-x[1:], rasterized = True, color = color_stotal, alpha = 1, 
           label = r'total scoeff. $\sigma$', marker = 'o') 

y = s_bulk_B1
ax.scatter(x[1:],y[1:]-x[1:], rasterized = True, color = color_B1, 
           label = 'pairwise scoeff. $s$', marker = 'o') 


ymin,ymax = ax.get_ylim()
yabs = np.max(np.abs([ymin,ymax]))
ax.set_ylim(-yabs,yabs)
ax.axhline(0, ls = '--', color = 'black')
ax.set_ylabel('absolute error\n(bulk competition B)')
ax.set_xlabel('true mutant fitness\n(pairwise competition A)')
ax.legend(loc = 'lower right', frameon=False)
error1_min,error1_max = ax.get_ylim()


#fig.tight_layout()
fig.savefig(FIG_DIR_DATASET + 'absolute_error_bulk_competition_B.pdf',\
             DPI = DPI, bbox_inches = 'tight', pad_inches = PAD_INCHES)

In [None]:
### compare in a plot

fig, axes = plt.subplots(1,1, figsize = (FIGHEIGHT_TRIPLET, FIGHEIGHT_TRIPLET), sharex = True)


ax = axes 
x = s_pair

## plot data

y = np.abs(np.divide( sigma_bulk_B1 - s_pair, s_pair, where = s_pair !=0))
ax.scatter(x[1:],y[1:], rasterized = True, color = color_stotal, alpha = 1, 
           label = r'total scoeff. $\sigma$', marker = 'o') 

y = np.abs(np.divide( s_bulk_B1 - s_pair, s_pair, where = s_pair !=0))
ax.scatter(x[1:],y[1:], rasterized = True, color = color_B1, 
           label = 'pairwise scoeff. $s$', marker = 'o') 


ax.axhline(rel_threshold, ls = '--', color = 'black')

ymin,ymax = ax.get_ylim()
yabs = np.max(np.abs([ymin,ymax]))
ax.set_yscale('log')
ax.axhline(0, ls = '--', color = 'black')

ax.set_ylabel('relative error\n(bulk competition B)')
ax.set_xlabel('true mutant fitness\n(pairwise competition A)')
ax.legend(loc = 'upper left', frameon=False)
error1_min,error1_max = ax.get_ylim()


#fig.tight_layout()
fig.savefig(FIG_DIR_DATASET + 'relative_error_bulk_competition_B.pdf',\
             DPI = DPI, bbox_inches = 'tight', pad_inches = PAD_INCHES)