### Plot all the measured competitions in a fitness ranking

Data available in the Supplemental Material of Good et al. Nature 2017. 
Download possible from Ben Good's github repository [here](https://github.com/benjaminhgood/LTEE-metagenomic/blob/master/additional_data/Concatenated.LTEE.data.all.csv)

In [None]:
### load data

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats

In [None]:
import os
import os.path
from os import path

## create export directory if necessary
## foldernames for output plots/lists produced in this notebook
import os
FIG_DIR = f'./figures/LTEE_competitions/'
os.makedirs(FIG_DIR, exist_ok=True)
print("All  plots will be stored in: \n" + FIG_DIR)

In [None]:
df = pd.read_csv('./output/LTEE_all_data.csv')

In [None]:


### execute script to load modules here
exec(open('setup_aesthetics.py').read())

### Prepare data for plotting

In [None]:
df['label']  = 'evolved' # give some label, to use existing code for plotting
df['logit_percycle_rank'] =df['logit_percycle'].rank(ascending =True, method = 'min')
df['logit_pergen_rank'] = df['logit_pergen'].rank(ascending = True, method = 'min')

df['deltarank'] =  df['logit_pergen_rank'] - df['logit_percycle_rank']

In [None]:


df_output = df # rename, to use existing code 

In [None]:
## sort by misranking

df_output['deltarank_abs'] = np.abs(df_output['deltarank'])
df_sorted = df_output.sort_values('deltarank', ascending = True)
select = df_sorted.index[[0]]

In [None]:
df_sorted.loc[select]

### plot correlation

In [None]:
### plot residuals
fig, ax = plt.subplots(figsize = (FIGHEIGHT_TRIPLET*1.25, FIGHEIGHT_TRIPLET))



x_var = 'logit_percycle'
y_var = 'logit_pergen'
data = df_output
sns.scatterplot(data = data, x = x_var, y = y_var, rasterized = True, ax = ax,
                hue = 'Generation', palette = 'crest', legend=False)

### plot select points
is_labeled = True
for i in select:
    A, B = float(data.loc[i, x_var]), float(data.loc[i, y_var])
    #ax.scatter(A-0.15,B,s=150,color ='tab:red', zorder = -1, marker = 5 )
    if is_labeled == False: 
        label = 'max. disagreement' 
        is_labeled = True
    else: label = None
    ax.scatter(A,B,s=200,color ='tab:blue', zorder = -1,label = label, alpha = 0.25)




### add colorbar
cmap = sns.color_palette('crest', as_cmap=True)
norm = plt.Normalize(data['Generation'].min(), data['Generation'].max())
sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
sm.set_array([])

# Remove the legend and add a colorbar
ax.figure.colorbar(sm, label = '# generations in evo. experiment',use_gridspec=True)
    
### annotate
ax.set_xlabel('relative fitness per-cycle:' + r'  $s^{\mathrm{logit}}_{\mathrm{cycle}}$')
ax.set_ylabel('relative fitness per-generation:' + r'  $s^{\mathrm{logit}}_{\mathrm{gen}}$')

title = f"n = {sum(data['label']=='evolved')} measured competitions"
ax.set_title(title, loc = 'left')


fig.savefig(FIG_DIR + f"correlation_{x_var}_vs_{y_var}.pdf", DPI = DPI, bbox_inches = 'tight', pad_inches = PAD_INCHES)


### plot misranking

In [None]:
### plot residuals
fig, ax = plt.subplots(figsize = (FIGHEIGHT_TRIPLET*1.25, FIGHEIGHT_TRIPLET))



x_var = 'logit_percycle'
y_var = 'deltarank'
data = df_output
sns.scatterplot(data = data, x = x_var, y = y_var, rasterized = True, ax = ax,
                hue = 'Generation', palette = 'crest', legend=False)

### plot select points
is_labeled = True
for i in select:
    A, B = float(data.loc[i, x_var]), float(data.loc[i, y_var])
    #ax.scatter(A-0.15,B,s=150,color ='tab:red', zorder = -1, marker = 5 )
    if is_labeled == False: 
        label = 'max. disagreement' 
        is_labeled = True
    else: label = None
    ax.scatter(A,B,s=200,color ='tab:blue', zorder = -1,label = label, alpha = 0.25)


### plot horizontal line for orientation
ax.axhline(0,ls = '--', color = 'black')


### add colorbar
cmap = sns.color_palette('crest', as_cmap=True)
norm = plt.Normalize(data['Generation'].min(), data['Generation'].max())
sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
sm.set_array([])

# Remove the legend and add a colorbar
ax.figure.colorbar(sm, label = '#generations in evo. experiment',use_gridspec=True)
    
### annotate
ax.set_xlabel('relative fitness per-cycle:' + r'  $s^{\mathrm{logit}}_{\mathrm{cycle}}$')
#ax.set_ylabel('rank difference to\nrelative fitness per-generation [rank]')
ax.set_ylabel('rank difference between fitness\nper-generation and fitness per-cycle')

title = f"n = {sum(data['label']=='evolved')} measured competitions"
ax.set_title(title, loc = 'left')


fig.savefig(FIG_DIR + f"residuals_{x_var}_vs_{y_var}.pdf", DPI = DPI, bbox_inches = 'tight', pad_inches = PAD_INCHES)


In [None]:
data['deltarank'].min()

### plot on foldchange phase diagram

In [None]:
### plot cloud of points

fig, ax = plt.subplots(figsize = (FIGHEIGHT_TRIPLET*1.25, FIGHEIGHT_TRIPLET))

x_var = 'logfc_wt'
y_var = 'logfc_mut'
data = df_output
sns.scatterplot(data = data, x = x_var, y = y_var, rasterized = True, ax = ax,
                hue = 'Generation', palette = 'crest', legend=False)

## find value limits

fcmax = np.max([data[x_var].max(),data[y_var].max()])
fcmin = np.min([data[x_var].min(),data[y_var].min()])
assert fcmin > 0

### set axis limits
#xmin = 1
#assert xmin < fcmin, 'We are cutting points from the dataplot!'
#xmax = 5.5
#assert xmax > fcmax, 'We are cutting points from the dataplot!'
#ax.set_xlim(xmin,xmax)
#ax.set_ylim(xmin,xmax)


## take off axis spines
#sns.despine(left=False, bottom = False, ax = ax)

### find axis limits
xmin, xmax = ax.get_xlim()
fcwt_vec = np.linspace(xmin,xmax, num = 100) 
fcwt_vec = np.concatenate((-fcwt_vec,fcwt_vec))
color_percycle = 'tab:grey'
color_pergen = 'navy'


    
## plot diagonal 
xmin, xmax = ax.get_xlim()
ymin, ymax = ax.get_ylim()
ax.plot([-xmin,xmax],[-xmin,xmax], color = 'black', ls = '--')
ax.set_xlim(xmin,xmax)
ax.set_ylim(ymin,ymax)



    

## plot red cone for a select point

for i in select:
    A, B = float(df_sorted.loc[i, 'logfc_wt']), float(df_sorted.loc[i, 'logfc_mut'])
    ax.scatter(A,B,s=200,color ='tab:blue', zorder = -1,label = label, alpha = 0.25)

    x_fill = np.linspace(fcwt_vec[0],fcwt_vec[-1])
    y_fill = B/A*x_fill

    ax.fill_between(x_fill, (x_fill - A) + B, y_fill, color='tab:red', alpha=0.25)
    
### add colorbar
cmap = sns.color_palette('crest', as_cmap=True)
norm = plt.Normalize(data['Generation'].min(), data['Generation'].max())
sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
sm.set_array([])

# Remove the legend and add a colorbar
ax.figure.colorbar(sm, label = '#generations in evo. experiment',use_gridspec=True)
  

### annotate

ax.set_xlabel(r"wild-type log fold-change: $\mathrm{LFC}_{\mathrm{wt}}$")
ax.set_ylabel(r"mutant log fold-change: $\mathrm{LFC}_{\mathrm{mut}}$")

title = f"n = {sum(data['label']=='evolved')} measured competitions"
ax.set_title(title, loc = 'left')

fig.savefig(FIG_DIR+ f'scatterplot_logfc_wt_vs_logfc_mut.pdf', DPI = DPI, bbox_inches = 'tight', pad_inches = PAD_INCHES)
        
    
### store axis limits

lfc_xmin, lfc_xmax = ax.get_xlim()
lfc_ymin, lfc_ymax = ax.get_ylim()

### Plot disranking between log and logit

In [None]:
df_output['deltarank_log'] = df_output['log_percycle'].rank() - df_output['logit_percycle'].rank()

In [None]:
### plot residuals
fig, ax = plt.subplots(figsize = (FIGHEIGHT_TRIPLET*1.25, FIGHEIGHT_TRIPLET))



x_var = 'logit_percycle'
y_var = 'deltarank_log'
data = df_output
sns.scatterplot(data = data, x = x_var, y = y_var, rasterized = True, ax = ax,
                hue = 'Generation', palette = 'crest', legend=False)

### plot select points
is_labeled = True
for i in select:
    A, B = float(data.loc[i, x_var]), float(data.loc[i, y_var])
    #ax.scatter(A-0.15,B,s=150,color ='tab:red', zorder = -1, marker = 5 )
    if is_labeled == False: 
        label = 'max. disagreement' 
        is_labeled = True
    else: label = None
    ax.scatter(A,B,s=200,color ='tab:blue', zorder = -1,label = label, alpha = 0.25)


### plot horizontal line for orientation
ax.axhline(0,ls = '--', color = 'black')


### add colorbar
cmap = sns.color_palette('crest', as_cmap=True)
norm = plt.Normalize(data['Generation'].min(), data['Generation'].max())
sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
sm.set_array([])

# Remove the legend and add a colorbar
ax.figure.colorbar(sm, label = '#generations in evo. experiment',use_gridspec=True)
    
### annotate
ax.set_xlabel('relative fitness logit-encoding:' + r'  $s^{\mathrm{logit}}_{\mathrm{cycle}}$')
#ax.set_ylabel('rank difference to\nrelative fitness per-generation [rank]')
ax.set_ylabel('rank difference between fitness\nunder log and logit encoding')

title = f"n = {sum(data['label']=='evolved')} measured competitions"
ax.set_title(title, loc = 'left')


fig.savefig(FIG_DIR + f"residuals_{x_var}_vs_{y_var}.pdf", DPI = DPI, bbox_inches = 'tight', pad_inches = PAD_INCHES)


### plot on frequency phase diagram

In [None]:
df_output.columns

In [None]:
### plot cloud of points

fig, ax = plt.subplots(figsize = (FIGHEIGHT_TRIPLET*1.25, FIGHEIGHT_TRIPLET))

x_var = 'xmut.0'
y_var = 'xmut.1'
data = df_output
sns.scatterplot(data = data, x = x_var, y = y_var, rasterized = True, ax = ax,
                hue = 'Generation', palette = 'crest', legend=False)


    
## plot diagonal 
xmin, xmax = ax.get_xlim()
ymin, ymax = ax.get_ylim()
ax.plot([-xmin,xmax],[-xmin,xmax], color = 'black', ls = '--')
ax.set_xlim(xmin,xmax)
ax.set_ylim(ymin,ymax)


## plot red cone for a select point

    
### add colorbar
cmap = sns.color_palette('crest', as_cmap=True)
norm = plt.Normalize(data['Generation'].min(), data['Generation'].max())
sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
sm.set_array([])

# Remove the legend and add a colorbar
ax.figure.colorbar(sm, label = '#generations in evo. experiment',use_gridspec=True)
  

### annotate

ax.set_xlabel(r"initial mutant frequency $x(t_0)$")
ax.set_ylabel(r"final mutant frequency $x(t_f)$")

title = f"n = {sum(data['label']=='evolved')} measured competitions"
ax.set_title(title, loc = 'left')

fig.savefig(FIG_DIR+ f'scatterplot_initial_vs_final_frequedncy.pdf', DPI = DPI, bbox_inches = 'tight', pad_inches = PAD_INCHES)
              

### compare with isoclines in terms of frequency

In [None]:
def eval_statistic(xf,x0, phi = lambda x: np.log(x/(1-x))):
    return phi(xf) - phi(x0)

eval_statistic(xf = 0.55, x0 = 0.45)

def eval_statistic_s(xf,x0):
    return eval_statistic(xf=xf,x0=x0, phi =lambda x: np.log(x/(1-x))) 

def eval_statistic_deltalog(xf,x0):
    return eval_statistic(xf=xf,x0=x0, phi =lambda x: np.log(x)) 

def get_isocline_deltalog(x0, level ):
    return np.multiply(x0, np.exp(level))

### test
get_isocline_deltalog(x0 = np.geomspace(0.01,0.1,num=10), level = 0.0)

def get_isocline_s(x0, level ):    
    tmp = np.multiply(x0, np.exp(level))
    return np.divide(tmp, 1 + tmp - x0)


### test
get_isocline_s(x0 = np.geomspace(0.01,0.1,num=10), level = 0.0)

In [None]:


### plot cloud of points

grid = sns.jointplot(data = df, x = 'xmut.0', y = 'xmut.1', color = 'tab:orange', 
             height = FIGHEIGHT_TRIPLET, space = 0, hue='Generation', palette = 'crest',
                    joint_kws = {'rasterized':True})


ax = grid.ax_joint
levels = np.outer([-1,1],np.linspace(0.001,1,num = 6)).flatten()
x0_vec = np.linspace(0.0001,0.9999, num = 100)
color_s = 'tab:grey'
color_deltalog = 'navy'

for level in levels: 
    ### plot deltalog isoclines
    y = get_isocline_deltalog(x0 = x0_vec, level = level)
    ax.plot(x0_vec, y, color = color_deltalog)
    
    
    ## plot s isoclines
    y = get_isocline_s(x0 = x0_vec, level = level)
    ax.plot(x0_vec, y, color = color_s)
    
## plot diagonal 
ax.plot([0,1],[0,1], color = 'red', ls = '--', label = 'y=x')

## add  legend items
ax.plot([],[], color = color_deltalog, label = '$\Delta \log$ isocline')
ax.plot([],[], color = color_s, label = '$s$ isocline')

ax.set_xlim(0,1)
ax.set_ylim(0,1)


ax.set_xlabel('initial mutant frequency $x_0$')
ax.set_ylabel('final mutant frequency $x_f$')

ax.legend(loc = 'upper left', bbox_to_anchor = (1.3,1))


grid.fig.savefig(FIG_DIR + f'isoclines_s_vs_deltalog_with_sample.pdf', DPI = DPI, bbox_inches = 'tight', pad_inches = PAD_INCHES)

In [None]:
### plot frequencies in overlay

## plot for single lineage

fig, ax = plt.subplots(1,1, sharex=True, figsize = (FIGHEIGHT_TRIPLET,FIGHEIGHT_TRIPLET))

### initial frequency
y = df[['xmut.0', 'xmut.1']]
x = np.multiply(np.ones_like(y), [0,1],)

_ = ax.plot(x.T,y.T,ls = '-',color = 'silver', alpha = 0.5)
df['tmp'] = 1
sns.scatterplot(data = df, x = 'tmp',  y = 'xmut.1', palette  = 'crest', hue = 'Generation',
                ax = ax, zorder = 3)

ax.legend(loc = 'upper left', bbox_to_anchor = (1.05,1))
ax.legend_.set_title('Generation')

ax.set_xlabel('time')
ax.set_ylabel('frequency x')

ax.set_ylim(0,1)


fig.savefig(FIG_DIR + f'lineplot_LTEE_competition_freq_in_overlay.pdf', DPI = DPI, bbox_inches = 'tight', pad_inches = PAD_INCHES)