In [None]:
import sys
import pathlib
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl
import pickle

In [None]:
from scipy import stats

In [None]:
import importlib
import evotsc_lib
import evotsc_plot
importlib.reload(evotsc_lib)
importlib.reload(evotsc_plot)

In [None]:
import met_brewer

In [None]:
label_fontsize=20
tick_fontsize=15
legend_fontsize=15
dpi=300

In [None]:
sc_path = pathlib.Path('/Users/theotime/Desktop/evotsc/phd/epistasis/with-sc/')
control_path = pathlib.Path('/Users/theotime/Desktop/evotsc/phd/epistasis/control/')
only_sc_path = pathlib.Path('/Users/theotime/Desktop/evotsc/phd/epistasis/only-sc/')
gen = 50_000
gene_types = ['AB', 'A', 'B'] # Name of each gene type
gene_type_color = ['tab:blue', 'tab:red', 'tab:green'] #AB, A, B
sc_color = 'tab:green'
control_color = 'tab:red'

In [None]:
sc_params = evotsc_lib.read_params(sc_path.joinpath('wt_00/shuffle_00/rep_00')) # Only the seed is different
nb_sc_wt = len([d for d in sc_path.iterdir() if (d.is_dir() and d.name.startswith('wt'))])

In [None]:
only_sc_params = evotsc_lib.read_params(only_sc_path.joinpath('wt_00/shuffle_00/rep_00'))

In [None]:
control_params = evotsc_lib.read_params(control_path.joinpath('wt_00/shuffle_00/rep_00'))
nb_control_wt = len([d for d in control_path.iterdir() if (d.is_dir() and d.name.startswith('wt'))])

In [None]:
genes_per_type = sc_params["nb_genes"] / len(gene_types) # Doesn't change

## Get the evolutionary statistics

In [None]:
def get_stats(exp_name, gen, params):
            
    wt_dirs = sorted([d for d in exp_name.iterdir() if (d.is_dir() and d.name.startswith('wt'))])
        
    res = pd.DataFrame()
    
    data_cols = ['Gen', 'Fitness', 'ABon_A', 'ABon_B', 'Aon_A', 'Aon_B', 'Bon_A', 'Bon_B']
        
    if params['intergene_poisson_lam'] != 0.0:
        data_cols += ['Genome size']

    if params['basal_sc_mutation_prob'] != 0.0:
        data_cols += ['Basal SC']
    
    for i_wt, wt_dir in enumerate(wt_dirs):
        shuffle_dirs = sorted([d for d in wt_dir.iterdir() if (d.is_dir() and d.name.startswith('shuffle'))])
        
        for i_shuffle, shuffle_dir in enumerate(shuffle_dirs):
            rep_dirs = sorted([d for d in shuffle_dir.iterdir() if (d.is_dir() and d.name.startswith('rep'))])

            for i_rep, rep_dir in enumerate(rep_dirs):
                res_dir = pd.read_csv(rep_dir.joinpath('stats.csv'), usecols=data_cols)

                res_dir.insert(0, 'WT', i_wt)
                res_dir.insert(1, 'Shuffle', i_shuffle)
                res_dir.insert(2, 'Replicate', i_rep)

                res = pd.concat([res, res_dir])
        
    res = res[res['Gen'] <= gen].copy()
            
    res['Log Fitness'] = np.log(res['Fitness'])

    return res

In [None]:
sc_stats = get_stats(sc_path, gen, sc_params)

In [None]:
only_sc_stats = get_stats(only_sc_path, gen, only_sc_params)

In [None]:
control_stats = get_stats(control_path, gen, control_params)

### Get the original individuals

In [None]:
def get_orig_indivs(exp_path, params):
    
    indiv_dir = [d for d in exp_path.iterdir() if d.name.startswith('best_indivs')][0]
    
    indivs = []
    
    for indiv_path in sorted([d for d in indiv_dir.iterdir() if d.is_file()]):
        
        with open(indiv_path, 'rb') as indiv_file:
            indiv = pickle.load(indiv_file)
        
        indiv.evaluate(params['sigma_A'], params['sigma_B'])
        indivs.append(indiv)
        
    return indivs

In [None]:
sc_orig_indivs = get_orig_indivs(sc_path, sc_params)

In [None]:
control_orig_indivs = get_orig_indivs(control_path, control_params)

### Add relative fitness (with regard to the original individuals) to the stats

In [None]:
def add_rel_fitness_to_stats(stats, orig_indivs):

    stats_with_rel_fitness = pd.DataFrame()

    for i_wt in stats['WT'].unique():
        
        wt_rep = stats[stats['WT'] == i_wt].copy()
        wt_rep['Log Rel Fitness'] = wt_rep['Log Fitness'] - np.log(sc_orig_indivs[i_wt].fitness)
        
        stats_with_rel_fitness = pd.concat([stats_with_rel_fitness, wt_rep])

    return stats_with_rel_fitness

In [None]:
sc_stats = add_rel_fitness_to_stats(sc_stats, sc_orig_indivs)

In [None]:
only_sc_stats = add_rel_fitness_to_stats(only_sc_stats, sc_orig_indivs)

In [None]:
control_stats = add_rel_fitness_to_stats(control_stats, control_orig_indivs)

### Plot the genome of the best individual and one of its shuffled copies for example

In [None]:
def plot_orig_indivs(orig_indivs, exp_path):
    for i_wt, orig_indiv in enumerate(orig_indivs):
        
        if i_wt != 1:
            continue

        plot_name = exp_path.joinpath(f'init_indiv_wt_{i_wt:02}_no_shuffle_env_A.pdf')
        print(f'WT {i_wt}: Fitness {orig_indivs[i_wt].fitness:e}')
        evotsc_plot.plot_genome_and_tsc(orig_indivs[i_wt].clone(), sigma=control_params['sigma_A'],
                                        coloring_type='on-off', show_bar=True, print_ids=True,
                                        id_interval=5, plot_name=plot_name)

In [None]:
plot_orig_indivs(sc_orig_indivs, sc_path)

In [None]:
def plot_initial_shuffled_indivs(params, exp_path):
    
    wt_dirs = sorted([d for d in exp_path.iterdir() if (d.is_dir() and d.name.startswith('wt'))])

    for i_wt, wt_dir in enumerate(wt_dirs):
        
        if i_wt != 1:
            continue
        
        shuffle_dirs = sorted([d for d in wt_dir.iterdir() if d.is_dir()])
        
        for i_shuffle, shuffle_dir in enumerate(shuffle_dirs):
            
            if i_shuffle != 0:
                continue
            
            shuffle_indiv_path = [f for f in shuffle_dir.iterdir() if f.name.startswith('best')][0]
            
            with open(shuffle_indiv_path, 'rb') as shuffle_file:
                shuffle_indiv = pickle.load(shuffle_file)

            shuffle_indiv.evaluate(params['sigma_A'], params['sigma_B'])
            print(f'wt {i_wt} shuffle {i_shuffle}: Fitness {shuffle_indiv.fitness}') 
            
            plot_name = exp_path.joinpath(f'init_indiv_wt_{i_wt:02}_shuffle_{i_shuffle:02}_env_A.pdf')
            evotsc_plot.plot_genome_and_tsc(shuffle_indiv, sigma=sc_params['sigma_A'], show_bar=False,
                                            print_ids=True, coloring_type='on-off', id_interval=5,
                                            plot_name=plot_name)

In [None]:
plot_initial_shuffled_indivs(sc_params, sc_path)

### Get the shuffled individuals at the start of each replica

In [None]:
def get_shuffled_indivs(exp_path):
    
    indiv_dir = [d for d in exp_path.iterdir() if d.name.startswith('best_indivs')][0]
    
    indivs = []
    
    for shuffle_path in sorted([d for d in indiv_dir.iterdir() if d.is_dir()]):
        wt_indivs = []
    
        for indiv_path in sorted([d for d in shuffle_path.iterdir() if d.is_file()]):
            with open(indiv_path, 'rb') as indiv_file:
                wt_indivs.append(pickle.load(indiv_file))
                
        indivs.append(wt_indivs)
        
    return indivs

In [None]:
# Load and initialize the individuals
sc_shuffled_indivs = get_shuffled_indivs(sc_path)
for wt_shuffled_indivs in sc_shuffled_indivs:
    for indiv in wt_shuffled_indivs:
        indiv.evaluate(sc_params['sigma_A'], sc_params['sigma_B'])

In [None]:
control_shuffled_indivs = get_shuffled_indivs(control_path)
for wt_shuffled_indivs in control_shuffled_indivs:
    for indiv in wt_shuffled_indivs:
        indiv.evaluate(control_params['sigma_A'], control_params['sigma_B'])

# Plot the number of active genes of each type over evolutionary time

In [None]:
def plot_gene_activity_all(full_stats, params, exp_path, legend_title, var_type='quantile'):
    
    mean_data = full_stats.groupby('Gen').mean().reset_index()
    if var_type == 'sigma':
        std_data = full_stats.groupby('Gen').std()
    elif var_type == 'quantile':
        first_dec = full_stats.groupby('Gen').quantile(0.1)
        last_dec = full_stats.groupby('Gen').quantile(0.9)
    elif var_type == 'minmax':
        min_data = full_stats.groupby('Gen').min()
        max_data = full_stats.groupby('Gen').max()
    
    for env in ["A", "B"]:

        fig, ax1 = plt.subplots(figsize=(9, 4), dpi=dpi)
        delta_y = params["nb_genes"] / 3 * 0.05 
        ax1.set_ylim(-delta_y, params["nb_genes"] / 3 + delta_y)
        ax1.set_ylabel('Activated genes', fontsize=label_fontsize)
        ax1.set_xlabel('Generation', fontsize=label_fontsize)
        ax1.set_xscale('log')
        ax1.grid(linestyle=':')
        ax1.grid(visible=True, which="minor", axis='x', linestyle=':')

        for i_gene_type, gene_type in enumerate(gene_types):

            ax1.plot(mean_data['Gen'], mean_data[f"{gene_type}on_{env}"],
                     color=gene_type_color[i_gene_type],
                     linewidth=2,
                     label=gene_type)
            
            # Show 2-sigma (95%) confidence intervals
            if var_type == 'sigma':
                ax1.plot(mean_data['Gen'],
                         mean_data[f"{gene_type}on_{env}"] - 2 * std_data[f"{gene_type}on_{env}"],
                         color=gene_type_color[i_gene_type],
                         alpha=0.3)
                ax1.plot(mean_data['Gen'],
                         mean_data[f"{gene_type}on_{env}"] + 2 * std_data[f"{gene_type}on_{env}"],
                         color=gene_type_color[i_gene_type],
                         alpha=0.3)
            # Show first and last deciles
            elif var_type == 'quantile':
                ax1.plot(mean_data['Gen'],
                         first_dec[f"{gene_type}on_{env}"],
                         color=gene_type_color[i_gene_type],
                         alpha=0.3)
                ax1.plot(mean_data['Gen'],
                         last_dec[f"{gene_type}on_{env}"],
                         color=gene_type_color[i_gene_type],
                         alpha=0.3)
            # Show min and max values
            elif var_type == 'minmax':
                ax1.plot(mean_data['Gen'],
                         min_data[f"{gene_type}on_{env}"],
                         color=gene_type_color[i_gene_type],
                         alpha=0.3)
                ax1.plot(mean_data['Gen'],
                         max_data[f"{gene_type}on_{env}"],
                         color=gene_type_color[i_gene_type],
                         alpha=0.3)


        
        ax1.tick_params(axis='both', which='major', labelsize=tick_fontsize)


        #plt.title(f"Environment {env}")
        fig.legend(bbox_to_anchor=(0, 0),
                   bbox_transform=ax1.transAxes,
                   loc='lower left',
                   title=legend_title,
                   title_fontsize=legend_fontsize - 2,
                   fontsize=legend_fontsize)
        
        plt.savefig(f'{exp_path}/gene_activity_env_{env}.pdf', dpi=dpi, bbox_inches='tight')

        plt.show()

In [None]:
plot_gene_activity_all(sc_stats, sc_params, sc_path, 'SC mutations')

In [None]:
plot_gene_activity_all(control_stats, control_params, control_path, 'No SC mutations')

In [None]:
plot_gene_activity_all(only_sc_stats, only_sc_params, only_sc_path, 'Only SC mutations')

# Plot fitness, genome size, and basal supercoiling over evolutionary time

In [None]:
def plot_fitness_per_wt(exp_path, full_stats):
    
    nb_wt = full_stats['WT'].nunique()
    
    all_colors = met_brewer.met_brew(name='Hokusai3', n=6, brew_type='continuous')
    colors = [all_colors[5], all_colors[4], all_colors[3], all_colors[2], all_colors[1]]

    plt.figure(figsize=(9, 4), dpi=dpi)
    
    for i_wt in range(nb_wt):
        wt_stats = full_stats[(full_stats["Gen"] > 0) & (full_stats['WT'] == i_wt)]

        # plot by replicate fitness
        #for i_rep in wt_stats['Replicate'].unique():
        #    rep_stats = wt_stats[wt_stats['Replicate'] == i_rep]
        #    if i_rep == 0:
        #        label = f'WT {i_wt}'
        #    else:
        #        label = None
        
        mean_data = wt_stats.groupby('Gen').mean().reset_index()
        plt.plot(mean_data['Gen'],
                 np.exp(mean_data['Log Fitness']),
                 color=colors[i_wt],
                 label=f'WT {i_wt}',
                 linewidth=2) 

        # Min and max (5 is not enough for quantiles)
        min_data = wt_stats.groupby('Gen').min()
        plt.plot(mean_data['Gen'],
                 min_data['Fitness'],
                 color=colors[i_wt],
                 alpha=0.3)

        max_data = wt_stats.groupby('Gen').max()
        plt.plot(mean_data['Gen'],
                 max_data['Fitness'],
                 color=colors[i_wt],
                 alpha=0.3)

    plt.xscale('log')
    plt.yscale('log')
    plt.grid(linestyle=':')
    plt.grid(visible=True, which="minor", axis='x', linestyle=':')

    plt.xlabel('Generation', fontsize=label_fontsize)
    plt.ylabel('Fitness', fontsize=label_fontsize)
    
    plt.legend()

    plt.tick_params(axis='both', which='major', labelsize=tick_fontsize)
        
    plt.savefig(f'{exp_path}/fitness_per_wt.pdf', dpi=dpi, bbox_inches='tight')

In [None]:
#plot_fitness_per_wt(sc_path, sc_stats)

In [None]:
#plot_fitness_per_wt(only_sc_path, only_sc_stats)

In [None]:
def plot_fitness_grouped(sc_stats, control_stats, exp_path):

    stats = [control_stats[control_stats['Gen'] > 0].copy(),
             sc_stats[sc_stats['Gen'] > 0].copy(),
             only_sc_stats[only_sc_stats['Gen'] > 0].copy()]
    
    name = ['Control runs', 'Supercoiling runs', 'SC only runs']
    
    all_colors = met_brewer.met_brew(name='Hokusai3', n=6, brew_type='continuous')
    colors = [all_colors[4], all_colors[3], all_colors[2]]

    plt.figure(figsize=(9, 4), dpi=dpi)
    
    for i_exp in range(len(stats)):
        mean_data = stats[i_exp].groupby('Gen').mean().reset_index()
        plt.plot(mean_data['Gen'],
                 np.exp(mean_data['Log Fitness']),
                 color=colors[i_exp],
                 linewidth=2,
                 label=name[i_exp]) 

        # 5*5 = 25 replicates so let's use quantiles
        first_dec = stats[i_exp].groupby('Gen').quantile(0.1)
        plt.plot(mean_data['Gen'],
                 first_dec['Fitness'],
                 color=colors[i_exp],
                 alpha=0.3)

        last_dec = stats[i_exp].groupby('Gen').quantile(0.9)
        plt.plot(mean_data['Gen'],
                 last_dec['Fitness'],
                 color=colors[i_exp],
                 alpha=0.3)

    plt.xscale('log')
    plt.yscale('log')
    plt.grid(linestyle=':')
    plt.grid(visible=True, which="minor", axis='x', linestyle=':')

    plt.xlabel('Generation', fontsize=label_fontsize)
    plt.ylabel('Fitness', fontsize=label_fontsize)
    #plt.ylim(1e-10, 1e0)
    
    plt.legend(fontsize=legend_fontsize, loc='upper left')
    
    plt.tick_params(axis='both', which='major', labelsize=tick_fontsize)
        
    plt.savefig(f'{exp_path}/fitness_grouped.pdf', dpi=dpi, bbox_inches='tight')

In [None]:
plot_fitness_grouped(sc_stats, control_stats, sc_path)

In [None]:
def plot_relative_fitness_grouped(sc_stats, control_stats, exp_path, sc_only=False):

    stats = [control_stats[control_stats["Gen"] > 0].copy(),
             sc_stats[sc_stats["Gen"] > 0].copy()]

    for i_exp in range(2):
        stats[i_exp]['Rel Fitness'] = np.exp(stats[i_exp]['Log Rel Fitness'])
    
    name = ['No SC mutations', 'SC mutations']
    
    if sc_only:
        name = ['SC mutations', 'SC mutations only']
    
    all_colors = met_brewer.met_brew(name='Hokusai3', n=6, brew_type='continuous')
    colors = [all_colors[5], all_colors[3]]

    plt.figure(figsize=(9, 4), dpi=dpi)
    
    for i_exp in range(2):
        mean_data = stats[i_exp].groupby('Gen').mean().reset_index()
        plt.plot(mean_data['Gen'],
                 np.exp(mean_data['Log Rel Fitness']),
                 color=colors[i_exp],
                 linewidth=2,
                 label=name[i_exp]) 

        first_dec = stats[i_exp].groupby('Gen').quantile(0.1)
        plt.plot(mean_data['Gen'],
                 first_dec['Rel Fitness'],
                 color=colors[i_exp],
                 alpha=0.3)

        last_dec = stats[i_exp].groupby('Gen').quantile(0.9)
        plt.plot(mean_data['Gen'],
                 last_dec['Rel Fitness'],
                 color=colors[i_exp],
                 alpha=0.3)

    plt.xscale('log')
    plt.yscale('log')
    plt.grid(linestyle=':')
    plt.grid(visible=True, which="minor", axis='x', linestyle=':')

    plt.xlabel('Generation', fontsize=label_fontsize)
    plt.ylabel('Relative fitness', fontsize=label_fontsize)
    #plt.ylim(1e-10, 1e0)
    
    plt.legend(fontsize=legend_fontsize)
    
    plt.tick_params(axis='both', which='major', labelsize=tick_fontsize)
        
    plt.savefig(f'{exp_path}/relative_fitness_grouped.pdf', dpi=dpi, bbox_inches='tight')

In [None]:
plot_relative_fitness_grouped(sc_stats, control_stats, sc_path)

In [None]:
stats.ttest_ind(np.exp(sc_stats[sc_stats['Gen'] == gen]['Log Rel Fitness']),
                np.exp(control_stats[control_stats['Gen'] == gen]['Log Rel Fitness']))

In [None]:
plot_relative_fitness_grouped(only_sc_stats, sc_stats, only_sc_path, sc_only=True)

In [None]:
def plot_relative_fitness_per_wt(full_stats, exp_path):
    
    nb_wt = full_stats['WT'].nunique()
    
    all_colors = met_brewer.met_brew(name='Hokusai3', n=6, brew_type='continuous')
    colors = [all_colors[5], all_colors[4], all_colors[3], all_colors[2], all_colors[1]]

    plt.figure(figsize=(9, 4), dpi=dpi)
    
    for i_wt in range(nb_wt):
        wt_stats = full_stats[(full_stats["Gen"] > 0) & (full_stats['WT'] == i_wt)]
                
        mean_data = wt_stats.groupby('Gen').mean().reset_index()
        plt.plot(mean_data['Gen'],
                 np.exp(mean_data['Log Rel Fitness']),
                 color=colors[i_wt],
                 label=f'WT {i_wt}',
                 linewidth=2) 

        # Min and max (5 is not enough for quantiles)
        #min_data = wt_stats.groupby('Gen').min()
        #plt.plot(mean_data['Gen'],
        #         np.exp(min_data['Log Rel Fitness']),
        #         color=colors[i_wt],
        #         alpha=0.3)

        #max_data = wt_stats.groupby('Gen').max()
        #plt.plot(mean_data['Gen'],
        #         np.exp(min_data['Log Rel Fitness']),
        #         color=colors[i_wt],
        #         alpha=0.3)

    plt.xscale('log')
    plt.yscale('log')
    plt.grid(linestyle=':')
    plt.grid(visible=True, which="minor", axis='x', linestyle=':')

    plt.xlabel('Generation', fontsize=label_fontsize)
    plt.ylabel('Relative fitness', fontsize=label_fontsize)
    
    plt.legend()

    plt.tick_params(axis='both', which='major', labelsize=tick_fontsize)
        
    plt.savefig(f'{exp_path}/all_relative_fitness_per_wt_2.pdf', dpi=dpi, bbox_inches='tight')

In [None]:
#plot_relative_fitness_per_wt(sc_stats, sc_path)

In [None]:
#plot_relative_fitness_per_wt(control_stats, control_path)

In [None]:
def plot_basal_sc_grouped(full_stats, exp_path):
    
    max_gen = full_stats.groupby('Replicate').max()['Gen'].min()
    
    stats = full_stats[(full_stats["Gen"] > 0) & (full_stats['Gen'] <= max_gen)][['Gen', 'Basal SC']].copy()
        
    mean_data = stats.groupby('Gen').mean().reset_index()
    
    # Note: for the fitness, the mean can be above the quantile
    first_dec = stats.groupby('Gen').quantile(0.1)
    last_dec = stats.groupby('Gen').quantile(0.9)
    
    all_colors = met_brewer.met_brew(name='Hokusai3', n=6, brew_type='continuous')
    colors = [all_colors[5], all_colors[3]]
    
    plt.figure(figsize=(9,4), dpi=dpi)
    
    plt.xscale('log')
    #plt.yscale('log')
    plt.grid(linestyle=':')
    plt.grid(visible=True, which="minor", axis='x', linestyle=':')
    
    plt.xlabel('Generation', fontsize=label_fontsize)
    plt.ylabel('Basal supercoiling', fontsize=label_fontsize)
    
    plt.tick_params(axis='both', which='major', labelsize=tick_fontsize)
        
    # Add main
    plt.hlines(sc_params['sigma_basal'], 1e0, gen, linewidth=2, color=colors[0],
               zorder=10, label='No SC mutations')
    
    # Average
    plt.plot(mean_data['Gen'],
             mean_data['Basal SC'],
             color=colors[1],
             linewidth=2,
             zorder=10,
             label='SC mutations') 
    
    # Quantiles
    plt.plot(mean_data['Gen'],
             first_dec['Basal SC'],
             color=colors[1],
             alpha=0.3)
    
    plt.plot(mean_data['Gen'],
             last_dec['Basal SC'],
             color=colors[1],
             alpha=0.3)

    plt.legend(fontsize=legend_fontsize)
    
    plt.ylim(-0.069, -0.059)
    
    plt.savefig(f'{exp_path}/basal_sc_all.pdf', dpi=dpi, bbox_inches='tight')

In [None]:
plot_basal_sc_grouped(sc_stats, sc_path)

In [None]:
def plot_basal_sc_per_wt(full_stats, exp_path):
    
    nb_wt = full_stats['WT'].nunique()
    
    all_colors = met_brewer.met_brew(name='Hokusai3', n=6, brew_type='continuous')
    colors = [all_colors[5], all_colors[4], all_colors[3], all_colors[2], all_colors[1]]

    plt.figure(figsize=(9, 4), dpi=dpi)
    
    for i_wt in range(nb_wt):
        wt_stats = full_stats[(full_stats["Gen"] > 0) & (full_stats['WT'] == i_wt)]

        # plot by replicate fitness
        #for i_rep in wt_stats['Replicate'].unique():
        #    rep_stats = wt_stats[wt_stats['Replicate'] == i_rep]
        #    if i_rep == 0:
        #        label = f'WT {i_wt}'
        #    else:
        #        label = None
        
        mean_data = wt_stats.groupby('Gen').mean().reset_index()
        plt.plot(mean_data['Gen'],
                 mean_data['Basal SC'],
                 color=colors[i_wt],
                 label=f'WT {i_wt}',
                 linewidth=2) 

        # Min and max (5 is not enough for quantiles)
        min_data = wt_stats.groupby('Gen').quantile(0.1)
        plt.plot(mean_data['Gen'],
                 min_data['Basal SC'],
                 color=colors[i_wt],
                 alpha=0.3)

        max_data = wt_stats.groupby('Gen').quantile(0.9)
        plt.plot(mean_data['Gen'],
                 max_data['Basal SC'],
                 color=colors[i_wt],
                 alpha=0.3)

    plt.xscale('log')
    #plt.yscale('log')
    plt.grid(linestyle=':')
    plt.grid(visible=True, which="minor", axis='x', linestyle=':')

    plt.xlabel('Generation', fontsize=label_fontsize)
    plt.ylabel('Basal SC', fontsize=label_fontsize)
    
    plt.legend(loc='lower left')

    plt.tick_params(axis='both', which='major', labelsize=tick_fontsize)
        
    plt.savefig(f'{exp_path}/sc_per_wt.pdf', dpi=dpi, bbox_inches='tight')

In [None]:
plot_basal_sc_per_wt(sc_stats, sc_path)

In [None]:
plot_basal_sc_per_wt(only_sc_stats, only_sc_path)

In [None]:
def plot_relative_basal_sc_per_wt(full_stats, orig_indivs, exp_path):
    
    nb_wt = full_stats['WT'].nunique()
    
    all_colors = met_brewer.met_brew(name='Hokusai3', n=6, brew_type='continuous')
    colors = [all_colors[5], all_colors[4], all_colors[3], all_colors[2], all_colors[1]]

    plt.figure(figsize=(9, 4), dpi=dpi)
    
    for i_wt in range(nb_wt):
        wt_stats = full_stats[(full_stats["Gen"] > 0) & (full_stats['WT'] == i_wt)]

        # plot by replicate fitness
        #for i_rep in wt_stats['Replicate'].unique():
        #    rep_stats = wt_stats[wt_stats['Replicate'] == i_rep]
        #    if i_rep == 0:
        #        label = f'WT {i_wt}'
        #    else:
        #        label = None
        
        mean_data = wt_stats.groupby('Gen').mean().reset_index()
        plt.plot(mean_data['Gen'],
                 mean_data['Basal SC'] / orig_indivs[i_wt].sigma_basal,
                 color=colors[i_wt],
                 label=f'WT {i_wt}',
                 linewidth=2) 

        # Min and max (5 is not enough for quantiles)
        min_data = wt_stats.groupby('Gen').min()
        plt.plot(mean_data['Gen'] ,
                 min_data['Basal SC'] / orig_indivs[i_wt].sigma_basal,
                 color=colors[i_wt],
                 alpha=0.3)

        max_data = wt_stats.groupby('Gen').max()
        plt.plot(mean_data['Gen'],
                 max_data['Basal SC'] / orig_indivs[i_wt].sigma_basal,
                 color=colors[i_wt],
                 alpha=0.3)

    #plt.xscale('log')
    #plt.yscale('log')
    plt.grid(linestyle=':')
    plt.grid(visible=True, which="minor", axis='x', linestyle=':')

    plt.xlabel('Generation', fontsize=label_fontsize)
    plt.ylabel('Basal SC', fontsize=label_fontsize)
    
    plt.legend(loc='lower left')

    plt.tick_params(axis='both', which='major', labelsize=tick_fontsize)
        
    plt.savefig(f'{exp_path}/sc_relative_per_wt.pdf', dpi=dpi, bbox_inches='tight')

In [None]:
#plot_relative_basal_sc_per_wt(sc_stats, sc_orig_indivs, sc_path)

In [None]:
def plot_basal_sc_per_shuffle(full_stats, orig_indivs, exp_path):
    
    full_stats = full_stats[full_stats['Gen'] > 0] # For the log x-axis
    nb_wt = full_stats['WT'].nunique()
    
    min_sc = full_stats['Basal SC'].min()
    max_sc = full_stats['Basal SC'].max()
    sc_range = max_sc - min_sc
    
    all_colors = met_brewer.met_brew(name='Hokusai3', n=6, brew_type='continuous')
    colors = [all_colors[4], all_colors[3], all_colors[2], all_colors[1], all_colors[0]]
    
    for i_wt in range(nb_wt):
        
        if i_wt != 1:
            continue
               
        wt_stats = full_stats[full_stats['WT'] == i_wt]
        nb_shuffle = wt_stats['Shuffle'].nunique()
        
        plt.figure(figsize=(9, 4), dpi=dpi)
        
        plt.xscale('log')
        #plt.yscale('log')
        
        for i_shuffle in range(nb_shuffle):
                
            shuffle_stats = wt_stats[wt_stats['Shuffle'] == i_shuffle]
            nb_rep = shuffle_stats['Replicate'].nunique()
        
            # Plot the 5 reps of this shuffle in the same color
            for i_rep in wt_stats['Replicate'].unique():
                rep_stats = shuffle_stats[shuffle_stats['Replicate'] == i_rep]

                if i_rep == 0:
                    label = f'Shock {i_shuffle}'
                else:
                    label=None
                
                plt.plot(rep_stats['Gen'],
                         rep_stats['Basal SC'],
                         color=colors[i_shuffle],
                         label=label,
                         linewidth=2)
                
        #x_min, x_max = plt.xlim()
        #orig_sc = orig_indivs[i_wt].sigma_basal
        #plt.hlines(orig_sc, 1, wt_stats['Gen'].max(), linewidth=2, color=colors[nb_wt],
        #           linestyle='--', label='WT')
        #plt.xlim(x_min, x_max)

        plt.grid(linestyle=':')
        plt.grid(visible=True, which="minor", axis='x', linestyle=':')

        plt.xlabel('Generation', fontsize=label_fontsize)
        plt.ylabel('Basal SC', fontsize=label_fontsize)

        plt.legend(loc='lower left')

        plt.tick_params(axis='both', which='major', labelsize=tick_fontsize)

        plt.savefig(exp_path.joinpath(f'sc_wt_{i_wt:02}_all_shuffles.pdf'), dpi=dpi, bbox_inches='tight')
        plt.show()
        plt.close()

In [None]:
#plot_basal_sc_per_shuffle(sc_stats, sc_orig_indivs, sc_path)

In [None]:
plot_basal_sc_per_shuffle(only_sc_stats, sc_orig_indivs, only_sc_path)

## Interesting stats: fitness relative to the ancestor at the last generation

In [None]:
def compute_rel_fitness_at(full_stats, orig_indivs, gen):
    last_stats_by_wt = full_stats[full_stats['Gen'] == gen].groupby('WT').mean()[['Replicate', 'Gen', 'Fitness']]

    nb_wt = full_stats['Replicate'].nunique()
    
    for i_wt in range(nb_wt):
        orig_fitness = orig_indivs[i_wt].fitness
        rel_fitness = last_stats_by_wt.iloc[i_wt] / orig_fitness

    orig_df = pd.DataFrame({'WT': list(range(nb_wt)),
                            'Orig Fitness': [i.fitness for i in orig_indivs]}).set_index('WT')

    full_df = pd.concat([last_stats_by_wt, orig_df], axis='columns')

    full_df['Rel Fitness'] = full_df['Fitness'] / full_df['Orig Fitness']

    return full_df

In [None]:
sc_rel_data = compute_rel_fitness_at(sc_stats, sc_orig_indivs, gen)

In [None]:
sc_rel_data

In [None]:
control_rel_data = compute_rel_fitness_at(control_stats, control_orig_indivs, gen)

In [None]:
stats.ttest_ind(sc_rel_data['Rel Fitness'], control_rel_data['Rel Fitness'])

In [None]:
control_giga_mean = control_stats.groupby('Gen').mean()

In [None]:
def plot_rel_fitness_agrege(sc_stats, sc_orig_indivs, control_stats, control_orig_indivs, exp_path):
            
    fig, (ax1, ax2) = plt.subplots(1, 2, gridspec_kw={'width_ratios': [5, 1]},
                                   figsize=(9, 4), dpi=dpi)
    
    # Left figure: over time
    ax1.set_yscale('log')
    ax1.set_xscale('log')
    #plt.ylim(1e-2, 1e-1)
    ax1.grid(linestyle=':', which='both')
    ax1.set_xlabel('Generation', fontsize=15)
    ax1.set_ylabel('Relative fitness', fontsize=15)
    ax1.tick_params(axis='both', which='major', labelsize=12)
    
    sc_last_fitness = []
    for i_wt in sc_stats['WT'].unique():
        wt_rep = sc_stats[(sc_stats['WT'] == i_wt) & (sc_stats['Gen'] > 0)].copy()
        wt_rep['Rel Fitness'] = wt_rep['Fitness'] / sc_orig_indivs[i_wt].fitness
        mean_data = wt_rep.groupby(['Gen']).mean()
        sc_last_fitness.append(mean_data.iloc[-1]['Rel Fitness'])
        if i_wt == 0:
            label = "Supercoiling runs"
        else:
            label = None
        ax1.plot(mean_data['Rel Fitness'], label=label, lw=2, color=sc_color)
        

    control_last_fitness = []
    for i_wt in control_stats['WT'].unique():
        wt_rep = control_stats[(control_stats['WT'] == i_wt) & (control_stats['Gen'] > 0)].copy()
        wt_rep['Rel Fitness'] = wt_rep['Fitness'] / control_orig_indivs[i_wt].fitness
        mean_data = wt_rep.groupby(['Gen']).mean()
        control_last_fitness.append(mean_data.iloc[-1]['Rel Fitness'])
        if i_wt == 0:
            label = "Control runs"
        else:
            label = None
        ax1.plot(mean_data['Rel Fitness'], label=label, lw=2, color=control_color)

    ax1.legend(fontsize=12)
    
    # Right figure: last time point
    # Replicates
    
    ax2.plot(np.zeros(5), sc_last_fitness,
             linestyle=' ', marker='o', color=sc_color)
    ax2.plot(np.ones(5), control_last_fitness,
             linestyle=' ', marker='o', color=control_color)
    
    #ax2.set_yscale('log')
    ax2.yaxis.tick_right()
    ax2.yaxis.set_label_position("right")
    #ax2.set_ylim(-0.05, 1.55)
    ax2.set_ylabel('Final relative fitness', fontsize=15)
    ax2.set_xlim(-0.5, 1.5)
    ax2.set_xticks(ticks=[0, 1])
    ax2.set_xticklabels(labels=['SC', 'Control'])
    ax2.tick_params(axis='both', which='major', labelsize=12)
    ax2.grid(linestyle=':', axis='y', which='both')

    # Save and show
    plt.tight_layout()
    
    plt.savefig(exp_path.joinpath('rel_fitness_sc_control.pdf'), dpi=dpi, bbox_inches='tight')
    
    plt.show()

In [None]:
#plot_rel_fitness_agrege(sc_stats, sc_orig_indivs, control_stats, control_orig_indivs, sc_path)

# Plot fitness landscapes

In [None]:
sigma_basal = sc_params['sigma_basal']

nb_sigmas = 1000
sigma_min = sigma_basal - 0.015
sigma_max = sigma_basal + 0.015

sigmas = np.linspace(sigma_min, sigma_max, nb_sigmas)

In [None]:
def compute_fitness_landscape(base_indiv, params, sigmas):
    
    fitnesses = np.zeros(nb_sigmas)
    
    calc_indiv = base_indiv.clone()
    calc_indiv.inter_matrix = calc_indiv.compute_inter_matrix()
    for i_sigma, sigma in enumerate(sigmas):
        calc_indiv.sigma_basal = sigma

        calc_indiv.expr_levels = (calc_indiv.run_system(params['sigma_A']),
                                  calc_indiv.run_system(params['sigma_B']))
        
        fitnesses[i_sigma] = calc_indiv.compute_fitness()

    return fitnesses

In [None]:
# See how gene activity levels depend on environmental supercoiling
def plot_sigma_fitness_landscape(all_fitnesses, base_indivs, indiv_type, plot_name=None):

    fig, ax = plt.subplots(figsize=(7, 4), dpi=dpi)
    
    plt.xlabel('Basal supercoiling $\sigma_{basal}$')
    plt.ylabel('Fitness')
    #plt.ylim(-0.05, 1.10)
    plt.yscale('log')
    plt.xlim(sigmas[0], sigmas[-1])
    plt.ylim(1e-45, 1e1)
    plt.grid(linestyle=':')
    
    all_colors = met_brewer.met_brew(name='Hokusai3', n=6, brew_type='continuous')
    #colors = [all_colors[5], all_colors[4], all_colors[3], all_colors[2], all_colors[1]]
    colors = [all_colors[4], all_colors[3], all_colors[2], all_colors[1], all_colors[0]]
    
    for i_indiv, indiv in enumerate(base_indivs):
        # Plot fitness
        plt.plot(sigmas, all_fitnesses[i_indiv], color=colors[i_indiv], linewidth=2,
                 label=f'{indiv_type} {i_indiv}')

        # Add the original SC
        if indiv_type == 'Shock':
            plt.plot(indiv.sigma_basal, indiv.fitness, color=colors[i_indiv],
                     marker='o', markersize=5, markeredgecolor='black', markeredgewidth=1)
        else:
            plt.plot(indiv.sigma_basal, indiv.fitness, color=colors[i_indiv],
                     marker='*', markersize=8, markeredgecolor='black', markeredgewidth=1)
    
    plt.legend(loc='upper left')
    
    # Wrap up            
    if plot_name:
        plt.savefig(plot_name, dpi=dpi, bbox_inches='tight')
        
    plt.show()
    plt.close()

## Initial fitness landscapes of the WT individuals

In [None]:
sc_fitnesses = [compute_fitness_landscape(indiv, sc_params, sigmas) for indiv in sc_orig_indivs]

In [None]:
plot_sigma_fitness_landscape(sc_fitnesses, base_indivs=sc_orig_indivs, indiv_type='WT',
                             plot_name=sc_path.joinpath('fitness_landscapes_wt.pdf'))

In [None]:
control_fitnesses = [compute_fitness_landscape(indiv, control_params, sigmas) for indiv in control_orig_indivs]

In [None]:
plot_sigma_fitness_landscape(control_fitnesses, base_indivs=control_orig_indivs, indiv_type='WT',
                             plot_name=control_path.joinpath('fitness_landscapes_wt.pdf'))

## Fitness landscapes of the shuffled WT individuals

In [None]:
def plot_shuffled_fitness_landscape(shuffled_indivs, params, exp_path):
    for i_wt in range(len(shuffled_indivs)):
        
        if i_wt != 1:
            continue
        
        fitnesses = [compute_fitness_landscape(indiv, sc_params, sigmas) for indiv in shuffled_indivs[i_wt]]
        plot_name = exp_path.joinpath(f'fitness_landscapes_shuffled_wt_{i_wt:02}.pdf')
        plot_sigma_fitness_landscape(fitnesses, shuffled_indivs[i_wt], indiv_type="Shock",
                                     plot_name=plot_name)

In [None]:
plot_shuffled_fitness_landscape(sc_shuffled_indivs, sc_params, sc_path)

In [None]:
plot_shuffled_fitness_landscape(control_shuffled_indivs, control_params, control_path)

## Fitness landscapes of the evolved individuals

In [None]:
def plot_evolved_fitness_landscapes(exp_path):
    
    wt_dirs = sorted([d for d in exp_path.iterdir() if (d.is_dir() and d.name.startswith('wt'))])

    for i_wt, wt_dir in enumerate(wt_dirs):

        if i_wt != 1:
            continue

        shuffle_dirs = sorted([d for d in wt_dir.iterdir() if d.is_dir()])
        
        for i_shuffle, shuffle_dir in enumerate(shuffle_dirs):
            
            if i_shuffle != 0:
                continue
            
            rep_dirs = sorted([d for d in shuffle_dir.iterdir() if d.is_dir()])
                        
            best_indivs = [evotsc_lib.get_best_indiv(rep_dir, gen) for rep_dir in rep_dirs]
            fitnesses = [compute_fitness_landscape(indiv, sc_params, sigmas) for indiv in best_indivs]
            
            plot_name = exp_path.joinpath(f'fitness_landscapes_evolved_wt_{i_wt:02}_shuffle_{i_shuffle:02}.pdf')
            plot_sigma_fitness_landscape(fitnesses, best_indivs, indiv_type="Rep",
                                         plot_name=plot_name)

In [None]:
plot_evolved_fitness_landscapes(sc_path)

In [None]:
plot_evolved_fitness_landscapes(control_path)

In [None]:
#plot_evolved_fitness_landscapes(only_sc_path)

In [None]:
def plot_sigma_fitness_landscape_sc_only(sigmas, all_fitnesses, shuffled_indivs, final_stats, plot_name=None):

    fig, ax = plt.subplots(figsize=(7, 4), dpi=dpi)
    
    plt.xlabel('Background supercoiling $\sigma_{basal}$')
    plt.ylabel('Fitness')
    #plt.ylim(-0.05, 1.10)
    plt.yscale('log')
    plt.xlim(sigmas[0], sigmas[-1])
    #plt.ylim(1e-45, 1e1)
    plt.grid(linestyle=':')
    
    all_colors = met_brewer.met_brew(name='Hokusai3', n=6, brew_type='continuous')
    #colors = [all_colors[5], all_colors[4], all_colors[3], all_colors[2], all_colors[1]]
    colors = [all_colors[4], all_colors[3], all_colors[2], all_colors[1], all_colors[0]]
    #color = all_colors[4]
    
    for i_shuffle, indiv in enumerate(shuffled_indivs):
        plt.plot(sigmas, all_fitnesses[i_shuffle], color=colors[i_shuffle], linewidth=2,
                 label=f'Shock {i_shuffle}')

        # Add the original SC
        plt.plot(indiv.sigma_basal, indiv.fitness, color=colors[i_shuffle],
                 marker='o', markersize=5, markeredgecolor='black', markeredgewidth=1, zorder=10)

        # Add the evolved SC
        for i_rep in final_stats['Replicate'].unique():
            rep_stats = final_stats[(final_stats['Shuffle'] == i_shuffle) &
                                    (final_stats['Replicate'] == i_rep)]
            
            plt.plot(rep_stats['Basal SC'], rep_stats['Fitness'], color=colors[i_shuffle],
                     marker='*', markersize=8, markeredgecolor='black', markeredgewidth=1,
                     zorder=10)

    plt.legend(loc='upper left')
    
    # Wrap up            
    if plot_name:
        plt.savefig(plot_name, dpi=dpi, bbox_inches='tight')
        
    plt.show()
    plt.close()

In [None]:
def plot_fitness_landscape_with_evolved():
    
    # Locally zoom
    sigma_min = -0.071
    sigma_max = -0.055

    sigmas = np.linspace(sigma_min, sigma_max, nb_sigmas)   
    
    for i_wt, shuffled_indivs_wt in enumerate(sc_shuffled_indivs):

        if i_wt != 1:
            continue

        all_fitnesses = [compute_fitness_landscape(indiv, sc_params, sigmas) for indiv in shuffled_indivs_wt]
        plot_name = sc_path.joinpath(f'fitness_landscapes_wt_{i_wt:02}_with_evolved.pdf')
        plot_sigma_fitness_landscape_sc_only(sigmas,
                                             all_fitnesses,
                                             shuffled_indivs_wt,
                                             only_sc_stats[(only_sc_stats['Gen'] == gen) &
                                                           (only_sc_stats['WT'] == i_wt)],
                                             plot_name=plot_name)

In [None]:
plot_fitness_landscape_with_evolved()