### Simulate bulk competition experiments using 3 model strains

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [None]:
### Update dependent parameters according to input
import os
import os.path
from os import path

## create export directory if necessary
## foldernames for output plots/lists produced in this notebook
import os
FIG_DIR = f'./figures/bulk_fitness/'
os.makedirs(FIG_DIR, exist_ok=True)
print("All  plots will be stored in: \n" + FIG_DIR)

In [None]:


### execute script to load modules here
exec(open('setup_aesthetics.py').read())

In [None]:
DATASET_COLOR = 'darkorange'


In [None]:
SUFFIX_DATASET = '3strains/'

FIG_DIR_DATASET = FIG_DIR + SUFFIX_DATASET
os.makedirs(FIG_DIR_DATASET, exist_ok=True)

OUTPUT_DIR_DATASET = './output/' + SUFFIX_DATASET
os.makedirs(OUTPUT_DIR_DATASET, exist_ok=True)

### Define trait data

In [None]:
### set growth rates
g1 = 1.  # wildtype (population 1)
g3 = 0.9 # background mutant (population 3)

## set lag times
l1,l3 = 3., 3.
## set biomass yields
Y1,Y3 = 1., 1. 

## generate vector of focal strains
np.random.seed(17961985)
g2_sample = np.random.normal(loc = 0.9,scale = 0.15, size = 200)
l2_sample = np.ones_like(g2_sample)*l3
Y2_sample = np.ones_like(g2_sample)*Y3

### Define initial condition for bulk growth cycle

In [None]:
### set initial resource concentrations
R0 = 1

In [None]:
### define default initial_OD
OD_START = 0.01

In [None]:
from bulk_simulation_code import CalcRelativeYield

In [None]:
### calculcate effective yields
nu1,nu3 = CalcRelativeYield(Ys = np.array([Y1,Y3]), R0 = R0, N0 = OD_START)
nu2_sample = CalcRelativeYield(Ys = Y2_sample, R0 = R0, N0 = OD_START)

### Simulate pairwise competition growth cycles (scenario A)

The frequencies for scenario A can be summarized as 

    frequency of the focal mutant strain: x2 = 1/N
        frequency of the wildtype strain: x1 = 1 - x2

where $N$ is some population size (number of cells or biomass?). Intutively, a spontanteous mutation initially forms only a small fraction $x0 = 1/N$ in the population. The values of the population size in nature are largely unknown, but can be approximated in two ways. 

- by the effective population size $N_e$, which is inferred from the genomic variation across a set of natural isolates, and leads to estimates of $N\approx 10^8$ cells [see papers by Howard Ochman]
- by the bottleneck size $N$ in laboratory evolution experiments like the LTEE, which leads to an estimate of $N=5\cdot 10^6$ cells. According to the first paper on the LTEE, there are $5\cdot10^5$ cells per ml at the starting point of the growth cycle, total volue is 10ml. 



In [None]:
from bulk_simulation_code import run_pairwise_experiment,CalcTotalSelectionCoefficientLogit

In [None]:
N = 1e6

In [None]:
xs_pair, xs_pair_final = run_pairwise_experiment(gs=g2_sample,ls=l2_sample,nus = nu2_sample,
                                                 g1=g1,l1=l1,nu1=nu1,x0 = 1/N)

s2_pair = CalcTotalSelectionCoefficientLogit(xs_pair,xs_pair_final)

### Plot distribution of fitness effects in the pairwise competition

In [None]:
fig, ax = plt.subplots()
plt.hist(s2_pair, bins = 21)
ax.set_xlabel('s_21 in pairwise competition')

### Simulate bulk competition with background mutants added at invasion frequency (scenario B1)

The frequencies for this scenario can be summarized as 

    frequency of the focal mutant strain: x2 = 1/N
    frequency of the background mutants:  x3 = (k-1)/N
    frequency of the wildtype strain:     x1 = 1 - x2 - x3

where `k` is the number of knockouts strain in this bulk competition. All strains except the focal strain are grouped in a population of background mutant, that forms an initial fraction `x3` of the culture biomass. Here we assume that all knockouts are present at the same initial frequency as the focal strain.

In [None]:
k = 1000

In [None]:
### set initial frequencies
xtriplet = np.zeros(3)
xtriplet[1] = 1/N                           # focal mutant (population 2)
xtriplet[2] = (k-1)/N                       # background mutants (population 3)
xtriplet[0] = 1 - xtriplet[1] - xtriplet[2] # wild-type (population 1)

In [None]:
print(xtriplet[0])

In [None]:
from bulk_simulation_code import run_bulk_experiment

In [None]:
### calculate final frequency of the focal mutant (population 2)
x2s_final = np.zeros_like(g2_sample)      # total frequency in population
x21s_final = np.zeros_like(g2_sample)      # relative frequency to wild-type (neutral population)

for i in range(len(g2_sample)):
    g2, l2, nu2 = g2_sample[i], l2_sample[i], nu2_sample[i]

    ## compute final frequencies in triplet
    _,xtriplet_final = run_bulk_experiment(gs = [g1,g2,g3], ls=[l1,l2,l3], nus=[nu1,nu2,nu3], xs =xtriplet)
    x2s_final[i] = xtriplet_final[1]
    x21s_final[i] = xtriplet_final[1]/(xtriplet_final[0] + xtriplet_final[1])
    
### store initial frequency of the focal mutant (population 2) in the same format
x2s  = np.ones_like(g2_sample)*xtriplet[1] # total frequency in population
x21s = np.ones_like(g2_sample)*xtriplet[1]/ (xtriplet[0] + xtriplet[1])

In [None]:
## compute total selection coefficient in bulk
s2_bulk_B1 = CalcTotalSelectionCoefficientLogit(x2s,x2s_final)

## compute pairwise selection coefficient in bulk
s21_bulk_B1 = CalcTotalSelectionCoefficientLogit(x21s, x21s_final)

    

### Calculate error Type I: higher-order interactions in the selection coefficient

In [None]:
### compare in a plot

fig, axes = plt.subplots(1,2, figsize = (2*FIGWIDTH_TRIPLET, FIGHEIGHT_TRIPLET), sharex = True)

x = s2_pair
y = s21_bulk_B1

ax = axes[0] # correlation plot
ax.scatter(x,y, rasterized = True)
ax.set_ylabel('s_21 in bulk competition B1')
ax = axes[1] # residual plot
ax.scatter(x,y-x, rasterized = True) 
ymin,ymax = ax.get_ylim()
yabs = np.max(np.abs([ymin,ymax]))
ax.set_ylim(-yabs,yabs)
ax.axhline(0, ls = '--', color = 'black')

for ax in axes: ax.set_xlabel('s_21 in pairwise competition')
    

### Simulate bulk competition with background mutants and added wild-type lineage  (scenario B2)

The frequencies for this scenario can be summarized as 

    frequency of the focal mutant strain: x2 = 1/(k+1)
    frequency of the background mutants:  x3 = k-1/(k+1)
    frequency of the wildtype strain:     x1 = 1/(k+1)

where `k` is the number of knockouts strain. Here all lineages, the mutants and the wild-type, have the same initial frequency. This roughly resembles scenario 'B2', but with a barcoded wild-type spiked into the culture.

In [None]:
### set initial frequencies
xtriplet = np.zeros(3)
xtriplet[1] = 1/(k+1)       # focal mutant (population 2)
xtriplet[2] = (k-1)/(k+1)   # background mutants (population 3)
xtriplet[0] = 1/(k+1)       # wild-type (population 1)

In [None]:
print(xtriplet[0])

In [None]:
### calculate final frequency of the focal mutant (population 2)
x2s_final = np.zeros_like(g2_sample)      # total frequency in population
x21s_final = np.zeros_like(g2_sample)      # relative frequency to wild-type (neutral population)

for i in range(len(g2_sample)):
    g2, l2, nu2 = g2_sample[i], l2_sample[i], nu2_sample[i]

    ## compute final frequencies in triplet
    _,xtriplet_final = run_bulk_experiment(gs = [g1,g2,g3], ls=[l1,l2,l3], nus=[nu1,nu2,nu3], xs =xtriplet)
    x2s_final[i] = xtriplet_final[1]
    x21s_final[i] = xtriplet_final[1]/(xtriplet_final[0] + xtriplet_final[1])
    
### store initial frequency of the focal mutant (population 2) in the same format
x2s  = np.ones_like(g2_sample)*xtriplet[1] # total frequency in population
x21s = np.ones_like(g2_sample)*xtriplet[1]/ (xtriplet[0] + xtriplet[1])

In [None]:
## compute total selection coefficient in bulk
s2_bulk_B2 = CalcTotalSelectionCoefficientLogit(x2s,x2s_final)

## compute pairwise selection coefficient in bulk
s21_bulk_B2 = CalcTotalSelectionCoefficientLogit(x21s, x21s_final)

    

#### Calculate error Type II: frequency-dependence in the selection coefficient

In [None]:
### compare in a plot

fig, axes = plt.subplots(1,2, figsize = (2*FIGWIDTH_TRIPLET, FIGHEIGHT_TRIPLET), sharex = True)

x = s2_bulk_B1
y = s21_bulk_B2

ax = axes[0] # correlation plot
ax.scatter(x,y, rasterized = True)
ax.set_ylabel('s_21 in bulk competition B2')
ax = axes[1] # residual plot
ax.scatter(x,y-x, rasterized = True) 
ymin,ymax = ax.get_ylim()
yabs = np.max(np.abs([ymin,ymax]))
ax.set_ylim(-yabs,yabs)
ax.axhline(0, ls = '--', color = 'black')

for ax in axes: ax.set_xlabel('s_21 in bulk competition B1')
    

### Simulate bulk competition with background mutants at variable frequency x (scenario Bx)

The frequencies for this scenario can be summarized as 

    frequency of the focal mutant strain: x2 = 1/k * x
    frequency of the background mutants:  x3 = (k-1)/k  * x
    frequency of the wildtype strain:     x1 = 1-x 

where `k` is the number of knockouts strain and `x` is the fraction of mutants in the total culture. The goal is to identify an optimal fraction $x^*$ that provides the best trade-off between a low noise in barcode counts (high `x` needed) and low error to the pairwise competition (low `x` needed). 

In [None]:
x = 0.5

In [None]:
### set initial frequencies
xtriplet = np.zeros(3)
xtriplet[1] = x/(k)       # focal mutant (population 2)
xtriplet[2] = x*(k-1)/k   # background mutants (population 3)
xtriplet[0] = 1-x         # wild-type (population 1)

print(xtriplet[0])

In [None]:
### calculate final frequency of the focal mutant (population 2)
x2s_final = np.zeros_like(g2_sample)      # total frequency in population
x21s_final = np.zeros_like(g2_sample)      # relative frequency to wild-type (neutral population)

for i in range(len(g2_sample)):
    g2, l2, nu2 = g2_sample[i], l2_sample[i], nu2_sample[i]

    ## compute final frequencies in triplet
    _,xtriplet_final = run_bulk_experiment(gs = [g1,g2,g3], ls=[l1,l2,l3], nus=[nu1,nu2,nu3], xs =xtriplet)
    x2s_final[i] = xtriplet_final[1]
    x21s_final[i] = xtriplet_final[1]/(xtriplet_final[0] + xtriplet_final[1])

### store initial frequency of the focal mutant (population 2) in the same format
x2s  = np.ones_like(g2_sample)*xtriplet[1] # total frequency in population
x21s = np.ones_like(g2_sample)*xtriplet[1]/ (xtriplet[0] + xtriplet[1])

## compute total selection coefficient in bulk
s2_bulk_Bx = CalcTotalSelectionCoefficientLogit(x2s,x2s_final)

## compute pairwise selection coefficient in bulk
s21_bulk_Bx = CalcTotalSelectionCoefficientLogit(x21s, x21s_final)

#### Calculate error for pairwise selection coefficient: Type I + Type II

In [None]:
### compare in a plot

fig, axes = plt.subplots(1,2, figsize = (2*FIGWIDTH_TRIPLET, FIGHEIGHT_TRIPLET), sharex = True)

x = s2_pair
y = s21_bulk_Bx

ax = axes[0] # correlation plot
ax.scatter(x,y, rasterized = True)
ax.set_ylabel('s_21 in bulk competition Bx')
ax = axes[1] # residual plot
ax.scatter(x,y-x, rasterized = True) 
ymin,ymax = ax.get_ylim()
yabs = np.max(np.abs([ymin,ymax]))
ax.set_ylim(-yabs,yabs)
ax.axhline(0, ls = '--', color = 'black')

for ax in axes: ax.set_xlabel('s_21 in pairwise competition')
    

#### Calculate error for bulk selection coefficient:  Type III

In [None]:
### compare in a plot

fig, axes = plt.subplots(1,2, figsize = (2*FIGWIDTH_TRIPLET, FIGHEIGHT_TRIPLET), sharex = True)

x = s21_bulk_Bx
y = s2_bulk_Bx

ax = axes[0] # correlation plot
ax.scatter(x,y, rasterized = True)
ax.set_ylabel('s_21 in bulk competition Bx')
ax = axes[1] # residual plot
ax.scatter(x,y-x, rasterized = True) 
ymin,ymax = ax.get_ylim()
yabs = 1.1*np.max(np.abs([ymin,ymax]))
ax.set_ylim(-yabs,yabs)
ax.axhline(0, ls = '--', color = 'black')

for ax in axes: ax.set_xlabel('s_2 in bulk competition Bx')
    

### Simulate bulk competition with background mutants only (scenario Bfull)

The frequencies for this scenario can be summarized as 

    frequency of the focal mutant strain: x2 = 1/k
    frequency of the background mutants:  x3 = k-1/k
    frequency of the wildtype strain:     x1 = 0

where `k` is the number of knockouts strain. Here the culture only consists of background mutants, and has no wild-type. 

In [None]:
### set initial frequencies
xtriplet = np.zeros(3)
xtriplet[1] = 1/(k)       # focal mutant (population 2)
xtriplet[2] = (k-1)/(k)   # background mutants (population 3)
xtriplet[0] = 0           # wild-type (population 1)

In [None]:
print(xtriplet[0])

In [None]:
### calculate final frequency of the focal mutant (population 2)
x2s_final = np.zeros_like(g2_sample)      # total frequency in population
x21s_final = np.zeros_like(g2_sample)      # relative frequency to wild-type (neutral population)

for i in range(len(g2_sample)):
    g2, l2, nu2 = g2_sample[i], l2_sample[i], nu2_sample[i]

    ## compute final frequencies in triplet
    _,xtriplet_final = run_bulk_experiment(gs = [g1,g2,g3], ls=[l1,l2,l3], nus=[nu1,nu2,nu3], xs =xtriplet)
    x2s_final[i] = xtriplet_final[1]
    x21s_final[i] = xtriplet_final[1]/(xtriplet_final[0] + xtriplet_final[1])
    
### store initial frequency of the focal mutant (population 2) in the same format
x2s  = np.ones_like(g2_sample)*xtriplet[1] # total frequency in population
x21s = np.ones_like(g2_sample)*xtriplet[1]/ (xtriplet[0] + xtriplet[1])

In [None]:
## compute total selection coefficient in bulk
s2_bulk_Bfull = CalcTotalSelectionCoefficientLogit(x2s,x2s_final)

## compute pairwise selection coefficient in bulk
s21_bulk_Bfull = CalcTotalSelectionCoefficientLogit(x21s, x21s_final) # this is not well-defined without wild-type

    

In [None]:
s21_bulk_Bfull

#### Calculate total error: Type I + Type II + Type III

In [None]:
### compare in a plot

fig, axes = plt.subplots(1,2, figsize = (2*FIGWIDTH_TRIPLET, FIGHEIGHT_TRIPLET), sharex = True)

x = s2_pair
y = s2_bulk_Bfull

ax = axes[0] # correlation plot
ax.scatter(x,y, rasterized = True)
ax.set_ylabel('s_2 in bulk competition with full mutants')
ax = axes[1] # residual plot
ax.scatter(x,y-x, rasterized = True) 
ymin,ymax = ax.get_ylim()
yabs = np.max(np.abs([ymin,ymax]))
ax.set_ylim(-yabs,yabs)
ax.axhline(0, ls = '--', color = 'black')

for ax in axes: ax.set_xlabel('s_21 in pairwise competition')
    