In [None]:

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from scipy import stats

import seaborn as sns


In [None]:


### execute script to load modules here
exec(open('setup_aesthetics.py').read())

In [None]:
## create export directory if necessary
## foldernames for output plots/lists produced in this notebook
import os
FIG_DIR = f'./figures/alternative_encodings/'
os.makedirs(FIG_DIR, exist_ok=True)
print("All  plots will be stored in: \n" + FIG_DIR)

## plot isoclines for selection coefficient

In [None]:
def eval_statistic(xf,x0, phi = lambda x: np.log(x/(1-x))):
    return phi(xf) - phi(x0)

In [None]:
eval_statistic(xf = 0.55, x0 = 0.45)

In [None]:
def eval_statistic_s(xf,x0):
    return eval_statistic(xf=xf,x0=x0, phi =lambda x: np.log(x/(1-x))) 

def eval_statistic_deltalog(xf,x0):
    return eval_statistic(xf=xf,x0=x0, phi =lambda x: np.log(x)) 

In [None]:
def get_isocline_deltalog(x0, level ):
    return np.multiply(x0, np.exp(level))

### test
get_isocline_deltalog(x0 = np.geomspace(0.01,0.1,num=10), level = 0.0)

In [None]:
def get_isocline_s(x0, level ):    
    tmp = np.multiply(x0, np.exp(level))
    return np.divide(tmp, 1 + tmp - x0)


### test
get_isocline_s(x0 = np.geomspace(0.01,0.1,num=10), level = 0.0)

In [None]:
### define helper function
def logit(x):
    return np.log(np.divide(x,1-x))
## test
logit(0.5)

### plot logistic frequency trajectory

In [None]:
from scipy.special import expit as logistic

In [None]:
logistic(0.1)

In [None]:
def sol_exact_logistic(t, r=1, x_0 = 0.01):
    # see https://mathworld.wolfram.com/LogisticEquation.html
    
    assert x_0 > 0
    assert x_0 <=1
    assert r > 0
    
    prefactor = (1/x_0) - 1
    exponential_term = prefactor * np.exp(np.multiply(-r,t))
    fraction = 1/ (1 + exponential_term)
    return fraction

### test
fig, ax = plt.subplots()
t= np.linspace(0,10, num = 100)
y = sol_exact_logistic(t=t, r=1,x_0 = 0.01)
ax.plot(t,y)

In [None]:
palette = sns.color_palette("Set2")

In [None]:
### make plot
## define sample
t = np.linspace(0,12, num= 100)
x_vec = sol_exact_logistic(t, r=1, x_0 = 0.01)

fig, axes = plt.subplots(1,3, figsize = (3.3*FIGWIDTH_TRIPLET, FIGHEIGHT_TRIPLET))

ax = axes[0]
color  = palette[0]
ax.plot(t, x_vec, lw = 3, color = color)
ax.set_xlabel('time')
ax.set_ylabel('relative abundance $x$')

ax = axes[1]
color  = palette[2]
ax.plot(t, np.log(x_vec), lw = 3, color = color)
ax.set_xlabel('time')
ax.set_ylabel('log relative abundance $\log(x)$')

ax = axes[2]
color  = palette[1]
ax.plot(t, logit(x_vec), lw = 3, color = color)
ax.set_xlabel('time')
ax.set_ylabel('logit relative abundance $\log(x/1-x)$')

for ax in axes:
    ax.set_xlim(0,12)
    sns.despine(ax=ax, top = True)
    
fig.savefig(FIG_DIR + f'example_trajectories.pdf', DPI = DPI, bbox_inches = 'tight', pad_inches = PAD_INCHES)
    

### plot overview of encodings

In [None]:
palette = sns.color_palette("Set2")

In [None]:
fig, ax = plt.subplots(figsize = (FIGHEIGHT_TRIPLET,FIGHEIGHT_TRIPLET))

x0_vec = np.linspace(0.0001,0.9999, num = 100)

ax.plot(x0_vec,x0_vec, color = palette[0], label = '$m=x$', lw = 3)
ax.plot(x0_vec, logit(x0_vec), color = palette[1], ls = '-', label = '$m=\log(x/1-x)$', lw = 3)
ax.plot(x0_vec, np.log(x0_vec), color = palette[2], ls = '-', label = '$m=\log(x)$', lw = 3)

ax.axhline(0, color = 'black', ls = 'dotted')
ax.set_ylim(-3,3)
ax.set_xlim(0,1)

ax.set_xlabel('input strain frequency $x$')
ax.set_ylabel('output $m$ from encoding function')

ax.legend(loc = 'upper left')

fig.savefig(FIG_DIR + f'example_encodings.pdf', DPI = DPI, bbox_inches = 'tight', pad_inches = PAD_INCHES)
              

### plot residuals of regression

In [None]:
np.exp(0.1)

In [None]:
### set up an example change of frequency

x_start = 0.01
#z = np.exp(5)*(x_start/(1-x_start))
#x_final = z/(1+z)
x_final = 0.5
print(x_final)

In [None]:
### create a sample errors
rs = np.random.RandomState(27021997)

### size of the random vector
### basically, number of replicate experiments
size = 100

df_data = pd.DataFrame()

n_success  = 100 # number of counted colonies with mutant genotype that we require to count

for xtrue, n_sampled in zip([x_start,x_final],[n_success/x_start, n_success/x_final]):

    dist = rs.binomial(n=n_sampled,p=xtrue,size = size)/n_sampled

    ## sample raw frequencies
    df_raw = pd.DataFrame(data=np.vstack([xtrue*np.ones_like(dist),dist]).T, columns = ['true frequency', 'value'])
    
    df_raw['type'] = 'no encoding'
    df_data = df_data.append(df_raw)


    
    

In [None]:
### convert

df_data['logit'] = logit(df_data['value'].values)
df_data['log'] = np.log(df_data['value'].values)

In [None]:
### plot 

fig, axes = plt.subplots(2,1, figsize = (FIGWIDTH_TRIPLET, FIGHEIGHT_TRIPLET), sharex=True, sharey = True)


ax = axes[0]

sns.residplot(x = 'true frequency', y = 'log', data = df_data, ax = ax)
ax.set_xlabel("")

ax = axes[1]

#sns.regplot(x = 'true frequency', y = 'logit', data = df_data, ax = ax)
sns.residplot(x = 'true frequency', y = 'logit', data = df_data)

### plot a binomial distribution

In [None]:
### create a sample of initial frequencies
rs = np.random.RandomState(15031998)



### size of the random vector
### basically, number of replicate experiments
size = 100000

df_data = pd.DataFrame()

n_sampled = 1000 #/xtrue #number of balls drawn from the urn at each replicate experiment
n_success  = 100

for xtrue, n_sampled in zip([0.99, 0.5, 0.01],[n_success/0.01, n_success/0.5, n_success/0.01]):

    

    dist = rs.binomial(n=n_sampled,p=xtrue,size = size)/n_sampled

    ## sample raw frequencies
    df_raw = pd.DataFrame(data=np.vstack([xtrue*np.ones_like(dist),dist]).T, columns = ['true frequency', 'value'])
    
    
    mean,std = df_raw['value'].mean(), df_raw['value'].std()
    df_raw['rescaled'] = (df_raw['value'] - mean)/std
    df_raw['residual'] = df_raw['value'] - mean
    df_raw['type'] = 'no encoding'
    df_data = df_data.append(df_raw)
    
    ### evaluate under logit transform 
    df_logit = df_raw.copy(deep=True) 
    df_logit['value'] = np.array([logit(v) for v in df_raw['value']])
    df_logit['type'] = 'encoded with logit'
    # Replacing infinite with nan
    df_logit = df_logit.replace([np.inf, -np.inf], np.nan)
    mean,std = df_logit['value'].mean(), df_logit['value'].std()
    df_logit['rescaled'] = (df_logit['value'] - mean)/std
    df_logit['residual'] = df_logit['value'] - mean
    df_data = df_data.append(df_logit)
    
    ## evaluate under log transform 
    df_log = df_raw.copy(deep=True) 
    df_log['value'] = np.array([np.log(v) for v in df_raw['value']])
    df_log['type'] = 'encoded with log'
    df_log = df_log.replace([np.inf, -np.inf], np.nan)
    mean,std = df_log['value'].mean(), df_log['value'].std()
    df_log['rescaled'] = (df_log['value'] - mean)/std
    df_log['residual'] = df_log['value'] - mean
    df_data = df_data.append(df_log)


    
    

In [None]:


# Replacing infinite with nan
df_data = df_data.replace([np.inf, -np.inf], np.nan)

In [None]:
sum(df_data['value'].isna())

In [None]:
palette = sns.color_palette("Set2")

In [None]:
## sort
df_data = df_data.sort_values(['type', 'true frequency'], ascending = False)

In [None]:
fig, axes = plt.subplots(1, 3, figsize = (1.7*FIGHEIGHT_TRIPLET,FIGHEIGHT_TRIPLET), sharey = True)

ax = axes[0]
label = 'no encoding'
data_to_plot =df_data[df_data['type']== label]
sns.violinplot(x='residual',y = 'true frequency', data=data_to_plot, ax =ax,orient = 'h',
              label=label,  color = palette[0], scale = 'count', rasterized = True, 
               inner = None, cut = 0)
ax.set_xlabel(label)
sns.despine(ax=ax)


ax = axes[1]
label = 'encoded with logit'
data_to_plot =df_data[df_data['type']== label]
sns.violinplot(x='residual',y = 'true frequency', data=data_to_plot, ax =ax,orient = 'h',
              label=label,  color = palette[1], scale = 'count', rasterized = True, 
               inner = None, cut = 0)
ax.set_xlabel(label)
ax.set_ylabel("")
sns.despine(ax=ax, left = True)
ax.tick_params(left=False)

ax = axes[2]
label = 'encoded with log'
data_to_plot =df_data[df_data['type']== label]
sns.violinplot(x='residual',y = 'true frequency', data=data_to_plot, ax =ax,orient = 'h',
              label=label,  color = palette[2], scale = 'count', rasterized = True,
              inner = None, cut = 0)

sns.despine(ax=ax, left = True)
ax.tick_params(left=False)
ax.set_ylabel("")
ax.set_xlabel(label)
for ax in axes: 
    ### symmetrize
    xmin,xmax = ax.get_xlim()
    max_abs = np.abs([xmin,xmax]).max()
    ax.set_xlim(-max_abs,max_abs)





fig.savefig(FIG_DIR + f'example_distributions_after_encoding.pdf', DPI = DPI, bbox_inches = 'tight', pad_inches = PAD_INCHES)
              

In [None]:
print("Standard deviations")
for label in ['no encoding', 'encoded with logit', 'encoded with log']:
    print(label)
    df_bytype =df_data[df_data['type']== label]
    for xtrue in [0.99, 0.5, 0.01]:
        data = df_bytype[df_bytype['true frequency'] == xtrue]
        print(data['value'].std())

### plot phase diagram without sample

In [None]:
fig, ax = plt.subplots(figsize = (FIGHEIGHT_TRIPLET,FIGHEIGHT_TRIPLET))

levels = np.outer([-1,1],np.linspace(0.001,1,num = 6)).flatten()
x0_vec = np.linspace(0.0001,0.9999, num = 100)
color_s = 'tab:grey'
color_deltalog = 'navy'

for level in levels: 
    ### plot deltalog isoclines
    y = get_isocline_deltalog(x0 = x0_vec, level = level)
    ax.plot(x0_vec, y, color = color_deltalog)
    
    
    ## plot s isoclines
    y = get_isocline_s(x0 = x0_vec, level = level)
    ax.plot(x0_vec, y, color = color_s)
    
## plot diagonal 
ax.plot([0,1],[0,1], color = 'red', ls = '--', label = 'y=x')

## add  legend items
ax.plot([],[], color = color_deltalog, label = '$\Delta \log$ isocline')
ax.plot([],[], color = color_s, label = '$s$ isocline')


ax.set_xlim(0,1)
ax.set_ylim(0,1)
ax.legend()

ax.set_xlabel('initial mutant frequency $x_0$')
ax.set_ylabel('final mutant frequency $x_f$')

fig.savefig(FIG_DIR + f'isoclines_s_vs_deltalog.pdf', DPI = DPI, bbox_inches = 'tight', pad_inches = PAD_INCHES)
              

### Sample a bivariate set of frequencies

In [None]:
from selection_coefficient import Problem_M3, get_ODE_solution, plot_solution

In [None]:
### define solver params
SOLVER_PARAMS = {'t_final' : 100, 'timestep' : 10, 'adaptive_timewindow' : True, 'rtol' : 1e-8, 'atol' : 1e-12, 'scoeff_atol':1e-8, 'scoeff_rtol':1e-6}

In [None]:
strain_params = {'lam': [2,1], 'g': [1,1], 'Y':[1,1]}
initial_conditions = {'N_0': 0.01, 'R_0': 1}

problem_default = Problem_M3(**strain_params,**initial_conditions, x = 0.5)
sol = get_ODE_solution(problem_default, **SOLVER_PARAMS)

fig, ax = plt.subplots()
plot_solution(sol, ax = ax)


fig.savefig(FIG_DIR + f'growthycle_default_for_fixed_lag_time_effect.pdf', DPI = DPI, bbox_inches = 'tight', pad_inches = PAD_INCHES)

In [None]:
def sol2frequencies(sol):
    params = sol.params
    x_0 = params['x']
    
    Nw_f,Nm_f = sol.y[:2,-1]
    x_f = (Nm_f)/(Nm_f + Nw_f)
    return x_f, x_0

## test
sol2frequencies(sol)

In [None]:
np.random.seed(29071997)

### sample set of  initial frequency
x0_sample = np.random.normal(0.5, 0.1, size = 100)

In [None]:
### calculate solutions

list_problems = [Problem_M3(**strain_params,**initial_conditions, x =v ) for v in x0_sample]
list_sols = [get_ODE_solution(v, **SOLVER_PARAMS) for v in list_problems]
xf_vec = np.array([sol2frequencies(v)[0] for v in list_sols])
scoeff_logit_vec = np.array([eval_statistic_s(*sol2frequencies(v)) for v in list_sols])
scoeff_deltalog_vec = np.array([eval_statistic_deltalog(*sol2frequencies(v)) for v in list_sols])

In [None]:
fig, ax = plt.subplots(figsize = (FIGHEIGHT_TRIPLET,FIGHEIGHT_TRIPLET))

levels = np.outer([-1,1],np.linspace(0.001,1,num = 6)).flatten()
x0_vec = np.linspace(0.0001,0.9999, num = 100)
color_s = 'tab:grey'
color_deltalog = 'navy'

for level in levels: 
    ### plot deltalog isoclines
    y = get_isocline_deltalog(x0 = x0_vec, level = level)
    ax.plot(x0_vec, y, color = color_deltalog)
    
    
    ## plot s isoclines
    y = get_isocline_s(x0 = x0_vec, level = level)
    ax.plot(x0_vec, y, color = color_s)
    
## plot diagonal 
ax.plot([0,1],[0,1], color = 'red', ls = '--', label = 'y=x')

## add  legend items
ax.plot([],[], color = color_deltalog, label = '$\Delta \log$ isocline')
ax.plot([],[], color = color_s, label = '$s$ isocline')

### plot cloud of points

ax.scatter(x0_sample,xf_vec, color = 'tab:orange', marker = 'o', s = 20)

ax.set_xlim(0,1)
ax.set_ylim(0,1)
ax.legend()

ax.set_xlabel('initial mutant frequency $x_0$')
ax.set_ylabel('final mutant frequency $x_f$')

fig.savefig(FIG_DIR + f'isoclines_s_vs_deltalog_with_fixed_lag_time_effect.pdf', DPI = DPI, bbox_inches = 'tight', pad_inches = PAD_INCHES)
              

In [None]:
fig, ax = plt.subplots()


ax.scatter(x0_sample, scoeff_logit_vec, label = 'logit encoding')
ax.scatter(x0_sample, scoeff_deltalog_vec, label = 'log encoding')
ax.legend()

ax.set_xlabel('initial mutant frequency')
ax.set_ylabel('relative fitness')

fig.savefig(FIG_DIR + f'relative_fitness_vs_initial_frequency_for_fixed_lag_time_effect.pdf', DPI = DPI, bbox_inches = 'tight', pad_inches = PAD_INCHES)

### Sample a bivariate set of frequencies

In [None]:

np.random.seed(29071997)

In [None]:
# choose a schwerpunk in the space
mean = (0.5,0.7) #x0,xf

# sample a set of initial and final frequencies around it
x0_sample, xf_sample= np.random.multivariate_normal(mean, cov=0.001*np.eye(2),size=100).T


In [None]:
fig, ax = plt.subplots(figsize = (FIGHEIGHT_TRIPLET,FIGHEIGHT_TRIPLET))

levels = np.outer([-1,1],np.linspace(0.001,1,num = 6)).flatten()
x0_vec = np.linspace(0.0001,0.9999, num = 100)
color_s = 'tab:grey'
color_deltalog = 'navy'

for level in levels: 
    ### plot deltalog isoclines
    y = get_isocline_deltalog(x0 = x0_vec, level = level)
    ax.plot(x0_vec, y, color = color_deltalog)
    
    
    ## plot s isoclines
    y = get_isocline_s(x0 = x0_vec, level = level)
    ax.plot(x0_vec, y, color = color_s)
    
## plot diagonal 
ax.plot([0,1],[0,1], color = 'red', ls = '--', label = 'y=x')

## add  legend items
ax.plot([],[], color = color_deltalog, label = '$\Delta \log$ isocline')
ax.plot([],[], color = color_s, label = '$s$ isocline')

### plot cloud of points

ax.scatter(x0_sample,xf_sample, color = 'tab:orange', marker = 'o', s = 5)

ax.set_xlim(0,1)
ax.set_ylim(0,1)
ax.legend()

ax.set_xlabel('initial mutant frequency $x_0$')
ax.set_ylabel('final mutant frequency $x_f$')

fig.savefig(FIG_DIR + f'isoclines_s_vs_deltalog_with_sample.pdf', DPI = DPI, bbox_inches = 'tight', pad_inches = PAD_INCHES)
              

In [None]:
## create data frame
df_sample = pd.DataFrame(data = np.vstack([x0_sample,xf_sample]).T, columns=['x0', 'xf'])

### evaluate fitness statistics
df_sample['s'] = df_sample.apply(lambda row: eval_statistic_s(x0=row['x0'],xf=row['xf']), axis = 1)
df_sample['deltalog'] = df_sample.apply(lambda row: eval_statistic_deltalog(x0=row['x0'],xf=row['xf']), axis = 1)


In [None]:
## create ranks

# creating a rank column and passing the returned rank series
df_sample["s_rank"] = df_sample["s"].rank(ascending = True)
df_sample["deltalog_rank"] = df_sample["deltalog"].rank(ascending = True)

In [None]:
df_sample.sort_values('s_rank').tail(2)

In [None]:
# plot correlation in rho

fig, ax = plt.subplots(figsize = (FIGHEIGHT_TRIPLET,FIGHEIGHT_TRIPLET))

x = df_sample['s_rank']
y = df_sample['deltalog_rank']
ax.scatter(x,y, color = 'tab:orange')

ax.axhline(x.shape[0]*0.8, ls = '--')
ax.axvline(x.shape[0]*0.8, ls = '--')
r, pval = stats.pearsonr(x,y)
rho, pval = stats.spearmanr(x,y)

title = fr'Pearson $r={r:.2f}$, Spearman $\rho={rho:.2f}$'
ax.set_title(title, loc = 'right')

ax.set_xlabel("rank in terms of\nselection coefficient $s$")
ax.set_ylabel("rank in terms of\nalternative statistic $\Delta \log$")

fig.savefig(FIG_DIR + f'correlation_s_vs_deltalog_with_sample.pdf', DPI = DPI, bbox_inches = 'tight', pad_inches = PAD_INCHES)

### Sample a univariate set of frequencies

In [None]:
np.random.seed(29071997)

In [None]:
# choose a schwerpunk in the space
mean = (0.5,0.7) #x0,xf

# sample a set of initial and final frequencies around it
x0_sample, xf_sample= np.random.multivariate_normal(mean, cov=0.001*np.array([[0,0],[0,1]]),size=100).T


In [None]:
fig, ax = plt.subplots(figsize = (FIGHEIGHT_TRIPLET,FIGHEIGHT_TRIPLET))

levels = np.outer([-1,1],np.linspace(0.001,1,num = 6)).flatten()
x0_vec = np.linspace(0.0001,0.9999, num = 100)
color_s = 'tab:grey'
color_deltalog = 'navy'

for level in levels: 
    ### plot deltalog isoclines
    y = get_isocline_deltalog(x0 = x0_vec, level = level)
    ax.plot(x0_vec, y, color = color_deltalog)
    
    
    ## plot s isoclines
    y = get_isocline_s(x0 = x0_vec, level = level)
    ax.plot(x0_vec, y, color = color_s)
    
## plot diagonal 
ax.plot([0,1],[0,1], color = 'red', ls = '--', label = 'y=x')

## add  legend items
ax.plot([],[], color = color_deltalog, label = '$\Delta \log$ isocline')
ax.plot([],[], color = color_s, label = '$s$ isocline')

### plot cloud of points

ax.scatter(x0_sample,xf_sample, color = 'tab:orange', marker = 'o', s = 5)

ax.set_xlim(0,1)
ax.set_ylim(0,1)
ax.legend()

ax.set_xlabel('initial mutant frequency $x_0$')
ax.set_ylabel('final mutant frequency $x_f$')

fig.savefig(FIG_DIR + f'isoclines_s_vs_deltalog_with_sample_univariate.pdf', DPI = DPI, bbox_inches = 'tight', pad_inches = PAD_INCHES)
              

In [None]:
## create data frame
df_sample = pd.DataFrame(data = np.vstack([x0_sample,xf_sample]).T, columns=['x0', 'xf'])

### evaluate fitness statistics
df_sample['s'] = df_sample.apply(lambda row: eval_statistic_s(x0=row['x0'],xf=row['xf']), axis = 1)
df_sample['deltalog'] = df_sample.apply(lambda row: eval_statistic_deltalog(x0=row['x0'],xf=row['xf']), axis = 1)


In [None]:
## create ranks

# creating a rank column and passing the returned rank series
df_sample["s_rank"] = df_sample["s"].rank(ascending = True)
df_sample["deltalog_rank"] = df_sample["deltalog"].rank(ascending = True)

In [None]:
df_sample.sort_values('s_rank').tail(2)

In [None]:
# plot correlation in rho

fig, ax = plt.subplots(figsize = (FIGHEIGHT_TRIPLET,FIGHEIGHT_TRIPLET))

x = df_sample['s_rank']
y = df_sample['deltalog_rank']
ax.scatter(x,y, color = 'tab:orange')

ax.axhline(x.shape[0]*0.8, ls = '--')
ax.axvline(x.shape[0]*0.8, ls = '--')
r, pval = stats.pearsonr(x,y)
rho, pval = stats.spearmanr(x,y)

title = fr'Pearson $r={r:.2f}$, Spearman $\rho={rho:.2f}$'
ax.set_title(title, loc = 'right')

ax.set_xlabel("rank in terms of\nselection coefficient $s$")
ax.set_ylabel("rank in terms of\nalternative statistic $\Delta \log$")

fig.savefig(FIG_DIR + f'correlation_s_vs_deltalog_with_sample_univariate.pdf', DPI = DPI, bbox_inches = 'tight', pad_inches = PAD_INCHES)