### Simulate bulk competition experiments using empirical traits from Warringer 2003

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

from bulk_simulation_code import run_pairwise_experiment, run_bulk_experiment
from bulk_simulation_code import CalcRelativeYield,CalcReferenceFrequency
from bulk_simulation_code import CalcTotalSelectionCoefficientLogit, CalcTotalSelectionCoefficientLog
from m3_model import CalcRelativeSaturationTime as CalcSaturationTimeExact

In [None]:
### Update dependent parameters according to input
import os
import os.path
from os import path

## create export directory if necessary
## foldernames for output plots/lists produced in this notebook
import os
FIG_DIR = f'./figures/pairwise_fitness/'
os.makedirs(FIG_DIR, exist_ok=True)
print("All  plots will be stored in: \n" + FIG_DIR)

In [None]:
### execute script to load modules here
# I get some error with this command
exec(open('setup_aesthetics.py').read()) 

# manual fix

FIGSIZE_A4 = (8.27, 11.69) # a4 format in inches

FIGWIDTH_TRIPLET = FIGSIZE_A4[0]*0.3*2
FIGHEIGHT_TRIPLET = FIGWIDTH_TRIPLET*0.75


In [None]:
DATASET_COLOR = 'darkorange'


In [None]:
SUFFIX_DATASET = 'all_traits_vary/'

FIG_DIR_DATASET = FIG_DIR + SUFFIX_DATASET
os.makedirs(FIG_DIR_DATASET, exist_ok=True)

OUTPUT_DIR_DATASET = './output/' + SUFFIX_DATASET
os.makedirs(OUTPUT_DIR_DATASET, exist_ok=True)

### set colorscheme

In [None]:
cmap = plt.get_cmap('tab20c')

In [None]:
color_B1 = cmap(4.5/20)
color_error1 = cmap(5.5/20) #cmap(13.5/20)
color_error2 = cmap(7.5/20)

color_stotal = cmap(16.5/20)
color_B2 = cmap(0.5/20)
color_ratio = cmap(13.5/20) #cmap(10.5/20)

color_wt = cmap(10.5/20)

In [None]:

knockout_cmap = plt.get_cmap('flag')

In [None]:
rel_threshold = 0.01 ### threshold for relative error plots

### Load wild-type traits

In [None]:
INDEX_COL = [0,1,2,3,4]
list_na_representations = ['not_present', 'failed_to_compute']

In [None]:
PCWS_TRAITS_WARRINGER = './output/df_M3_traits.csv'
df_warringer = pd.read_csv(PCWS_TRAITS_WARRINGER, header = 0, index_col= INDEX_COL,\
                                  float_precision=None, na_values=list_na_representations)


In [None]:
### define default wild_type
df_wildtypes = df_warringer[df_warringer['is_wildtype']==True]

WILDTYPE = df_wildtypes.median(axis = 0, numeric_only = True)

### Load mutant data (averaged)

In [None]:

PCWS_TRAITS_WARRINGER_AVERAGED = './output/df_M3_traits_averaged.csv'
df_averaged = pd.read_csv(PCWS_TRAITS_WARRINGER_AVERAGED, header = 0, float_precision=None)

In [None]:
### assign wild-type label
def is_wildtype(row):
    genotype = row['genotype']
    
    if genotype == 'BY4741':
        return True
    else:
        return False
    

row = df_averaged.iloc[0]
is_wildtype(row)

In [None]:
df_averaged['is_wildtype'] = df_averaged.apply(is_wildtype, axis =1)

In [None]:
### append mutant values (averaged) to set of individual wild-type strains
df_knockouts = df_averaged[~df_averaged['is_wildtype']]
df_knockouts = df_knockouts
df_input = df_wildtypes.reset_index().append(df_knockouts.reset_index())

In [None]:
### restore index
index_col_names = df_warringer.index.names
df_input = df_input.set_index(index_col_names)


### Load trait data into the standard form required by Michaels code

In [None]:
n_knockouts = df_knockouts.shape[0]

In [None]:
### growth rates
gs = np.zeros(n_knockouts+1)
gs[0] = WILDTYPE['gmax']
gs[1:] = df_knockouts['gmax'].values
### lag times
ls = np.zeros(n_knockouts+1)
ls[0] = WILDTYPE['lag']
ls[1:] = df_knockouts['lag'].values
#ls[1:] = ls[0]

### adjust units of time
gs = gs*60 # change units to growth rate per hour
ls = ls/60 # change units to hour

### yield
Ys = np.zeros(n_knockouts+1)
Ys[0] = WILDTYPE['yield']
#Ys[1:] = df_knockouts['yield'].values
Ys[1:] = Ys[0]

### Define initial condition for bulk growth cycle

In [None]:
### set initial resource concentrations

CONCENTRATION_GLUCOSE = 20/180 * 1e3 # concentrations are recored  in milliMolar, to match the units of yield
print(CONCENTRATION_GLUCOSE)

In [None]:
### define default initial_OD
OD_START = 0.05  #df_warringer['od_start'].median()

### compare to initial OD in the monoculture cycles
fig, ax = plt.subplots(figsize = (FIGWIDTH_TRIPLET, FIGHEIGHT_TRIPLET))

ax = df_warringer['od_start'].hist(bins=41, color = DATASET_COLOR, alpha = 0.6, log = True, rasterized = True)


ax.axvline(OD_START, color = 'tab:red', label = f'median value: $N_0={OD_START:.3f}$')
ax.legend()
ax.set_xlabel('initial OD')
ax.set_ylabel('no. growth curves')

### Calculate effective yield

In [None]:
from bulk_simulation_code import CalcRelativeYield

In [None]:
### calculcate effective yields
nus = CalcRelativeYield(Ys, R0 = CONCENTRATION_GLUCOSE, N0 = OD_START)


### Simulate pairwise competition growth cycles (low frequency)

In [None]:
x0_low = 1e-6

In [None]:
xs_pair, xs_pair_final, tsats, _,_,_ = run_pairwise_experiment(gs=gs,ls=ls,nus = nus, g1=gs[0],l1=ls[0],nu1=nus[0],
                                                               x0 = x0_low)

s_pair_low = CalcTotalSelectionCoefficientLogit(xs_pair,xs_pair_final)

#### Plot  frequency trajectory in pairwise competition for example mutant

In [None]:
## set axis limits
TMIN,TMAX = 0,15
XMIN,XMAX = 5e-8,2

In [None]:

## set timepoints for each trajectory
tvec = np.array([0, TMAX])
## set frequency values for each trajectory
trajectories = np.vstack((xs_pair,xs_pair_final))

## plot
fig, ax = plt.subplots(figsize = (FIGWIDTH_TRIPLET, FIGHEIGHT_TRIPLET))
ax.plot(tvec,trajectories[:,1:], color = 'dimgrey', label = 'mutants')
ax.plot(tvec,trajectories[:,0], color = 'orange', label = 'wild-type')

ax.set_xlabel('time')
ax.set_ylabel('frequency')

# fix axis limits
ax.set_yscale('log')
ax.set_ylim(XMIN,XMAX)
ax.set_xlim(TMIN,TMAX)
fig.tight_layout()

### Simulate pairwise competition growth cycles (high frequency)

In [None]:
x0_high = 1e-2

In [None]:
xs_pair, xs_pair_final, tsats, _,_,_ = run_pairwise_experiment(gs=gs,ls=ls,nus = nus, g1=gs[0],l1=ls[0],nu1=nus[0],
                                                               x0 = x0_high)

s_pair_high = CalcTotalSelectionCoefficientLogit(xs_pair,xs_pair_final)

#### Plot  frequency trajectory in pairwise competition for example mutant

In [None]:

## set timepoints for each trajectory
tvec = np.array([0, TMAX])
## set frequency values for each trajectory
trajectories = np.vstack((xs_pair,xs_pair_final))

## plot
fig, ax = plt.subplots(figsize = (FIGWIDTH_TRIPLET, FIGHEIGHT_TRIPLET))
ax.plot(tvec,trajectories[:,1:], color = 'dimgrey', label = 'mutants')
ax.plot(tvec,trajectories[:,0], color = 'orange', label = 'wild-type')

ax.set_xlabel('time')
ax.set_ylabel('frequency')

# fix axis limits
ax.set_yscale('log')
ax.set_ylim(XMIN,XMAX)
ax.set_xlim(TMIN,TMAX)
fig.tight_layout()

### Plot distribution of fitness effects in the pairwise competition

In [None]:
fig, ax = plt.subplots(figsize = (FIGWIDTH_TRIPLET, FIGHEIGHT_TRIPLET))
ax.hist(s_pair_low , bins = 42, color = color_B1, label = f"x0={x0_low:.1e}")
ax.hist(s_pair_high , bins = 42, color = color_B2, label = f"x0={x0_high:.1e}")

ax.legend()
ax.set_xlabel('s_21 in pairwise experiment')

In [None]:
### Plot correlation

fig, axes = plt.subplots(1,3, figsize = (3*FIGHEIGHT_TRIPLET, FIGHEIGHT_TRIPLET))

x = s_pair_low
ax = axes[0]
y = s_pair_high
ax.scatter(x, y, color = 'dimgrey')
label = f"x0={x0_high:.1e}"
ax.set_ylabel("s:pairwise competition " + label)
ax = axes[1]
y = s_pair_low-s_pair_high
ax.scatter(x, y, color = 'dimgrey')
ax.set_ylabel("absolute difference: low - high")

ax = axes[2]
y = np.abs(np.divide(s_pair_low-s_pair_high,s_pair_low, where = s_pair_low!=0))
ax.scatter(x, y, color = 'dimgrey')
ax.set_ylabel("relative difference: (low - high)/low")
ax.set_yscale('log')

for ax in axes: 
    label = f"x0={x0_low:.1e}"
    ax.set_xlabel("s:pairwise competition " + label)

fig.tight_layout()

### Show the variation in the taubar

In [None]:
taus = 1/gs

In [None]:
Ybars_high = 1/( (1-x0_high)/Ys[0] + x0_high/Ys)
taubars_high = 1/Ybars_high * 1/( (1-x0_high)/(Ys[0]*taus[0]) + x0_high/np.multiply(Ys,taus))

In [None]:
Ybars_low = 1/( (1-x0_low)/Ys[0] + x0_low/Ys)
taubars_low = 1/Ybars_low * 1/( (1-x0_low)/(Ys[0]*taus[0]) + x0_low/np.multiply(Ys,taus))

In [None]:
fig, ax = plt.subplots(1,1, figsize = (FIGHEIGHT_TRIPLET, FIGHEIGHT_TRIPLET))

x = s_pair_low
y = taubars_low
ax.scatter(x,y, color = color_B1, label = f"x0={x0_low:.1e}")
y = taubars_high
ax.scatter(x,y, color = color_B2, label = f"x0={x0_high:.1e}")

ax.axhline(taus[0], ls = '-', label = 'wildtype', lw = 2)
ax.legend(frameon=False)

label = f"x0={x0_low:.1e}"
ax.set_xlabel("s:pairwise competition " + label)
ax.set_ylabel("effective tau: pairwise competition")  

### Correlate the absolute errror with growth rate difference

In [None]:
### predicted trend of the error

def f(tau_2):
    tau_1 = 1/gs[0]
    delta_tau = tau_2 - tau_1
    x0 = x0_high
    
    nominator= delta_tau*x0
    denominator = tau_2*(1-x0) + tau_1*x0
    return -nominator/denominator

## test

f(1/gs)

In [None]:
### Plot correlation

fig, ax = plt.subplots(1,1, figsize = (FIGHEIGHT_TRIPLET, FIGHEIGHT_TRIPLET))

x = 1/gs-1/gs[0]
y = s_pair_low-s_pair_high
ax.scatter(x, y, color = 'dimgrey')

y = np.multiply(s_pair_low, f(taus))
#ax.scatter(x,y)
ax.set_xlabel(r'mutant growth rate difference $\Delta \tau_{21}$')
#ax.set_ylabel("absolute difference in s pairwise")
ax.set_ylabel("absolute difference: low - high ")


In [None]:
### Show the variation in 