In [None]:
import pathlib
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl
import pickle
import statsmodels.api as sm

In [None]:
from scipy import stats

In [None]:
import autoreload
import evotsc_plot
autoreload.reload(evotsc_plot)

In [None]:
label_fontsize=20
tick_fontsize=15
legend_fontsize=15
dpi=300

In [None]:
exp_path = pathlib.Path('/Users/theotime/Desktop/evotsc/alife-journal/')
gen = 250_000
gene_types = ['AB', 'A', 'B'] # Name of each gene type
gene_type_color = ['tab:blue', 'tab:red', 'tab:green'] #AB, A, B
orient_name = ['leading', 'lagging'] # Name of each gene orientation
rel_orient = ['conv', 'div', 'upstr', 'downstr']

In [None]:
def get_params(exp_path):
    rep_dirs = sorted([d for d in exp_path.iterdir() if (d.is_dir() and d.name.startswith("rep"))])
    
    with open(rep_dirs[0].joinpath('params.txt'), 'r') as params_file:
        param_lines = params_file.readlines()
        
    params = {}
    for line in param_lines:
        param_name = line.split(':')[0]
        if param_name == 'commit':
            param_val = line.split(':')[1].strip()
        else:
            param_val = float(line.split(':')[1])
        
        params[param_name] = param_val
        
    return params

In [None]:
params = get_params(exp_path)

In [None]:
params

In [None]:
nb_genes_type = int(params["nb_genes"] / 3) # Number of genes per type

In [None]:
def get_stats(exp_name):
    
    exp_name = pathlib.Path(exp_name)
        
    rep_dirs = sorted([d for d in exp_name.iterdir() if (d.is_dir() and d.name.startswith("rep"))])
    
    res = pd.DataFrame()
    
    for i_rep, rep_dir in enumerate(rep_dirs):
                
        res_dir = pd.read_csv(rep_dir.joinpath('stats.csv'))
        
        res_dir.insert(0, 'Replicate', i_rep)
        
        res = pd.concat([res, res_dir])
    
    return res

# Plot the number of active genes of each type over evolutionary time

In [None]:
def plot_gene_activity(base_path, gen, plot_sc=False, plot_genome_size=False):
    
    base_path = pathlib.Path(base_path)
    
    rep_dirs = sorted([d for d in exp_path.iterdir() if (d.is_dir() and d.name.startswith("rep"))])
    
    nb_rep = len(rep_dirs)
    
    for i_rep, rep_dir in enumerate(rep_dirs):
        stats_path = rep_dir.joinpath('stats.csv')
        if stats_path.stat().st_size > 0: # not an empty file

            data = pd.read_csv(stats_path)
            data = data[data['Gen'] > 0]
            data = data[data['Gen'] <= gen]

            for env in ["A", "B"]:

                fig, ax1 = plt.subplots(figsize=(9, 4), dpi=dpi)
                ax1.set_ylim(-1, 21)
                ax1.set_ylabel('Activated genes', fontsize=label_fontsize)
                ax1.set_xlabel('Generation', fontsize=label_fontsize)
                ax1.set_xscale('log')
                ax1.grid()

                ax1.plot(data['Gen'], data[f"ABon_{env}"], #/ (data[f"ABon_{env}"] + data[f"ABoff_{env}"]),
                         color="tab:blue",
                         linewidth=2,
                         label="AB")
                ax1.plot(data['Gen'], data[f"Aon_{env}"], # / (data[f"Aon_{env}"] + data[f"Aoff_{env}"]),
                         color="tab:red",
                         linewidth=2,
                         label="A")
                ax1.plot(data['Gen'], data[f"Bon_{env}"], # / (data[f"Bon_{env}"] + data[f"Boff_{env}"]),
                         color="tab:green",
                         linewidth=2,
                         label="B")
                ax1.tick_params(axis='both', which='major', labelsize=tick_fontsize)


                ## 2nd axis: fitness
                ax2 = ax1.twinx()
                ax2.set_yscale('log')
                ax2.set_ylim(1e-25, 1e0)
                ax2.set_ylabel('Fitness', fontsize=label_fontsize, color='tab:cyan')
                ax2.plot(data['Gen'],
                         data["Fitness"],
                         color="tab:cyan",
                         linewidth=2)
                ax2.tick_params(axis='both', which='major', labelsize=tick_fontsize, colors='tab:cyan')

                
                ## 3rd axis: supercoiling
                if plot_genome_size:
                    ax3 = ax1.twinx()

                    ax3.spines['right'].set_position(('outward', 80))
                    ax3.set_ylim(0.7e4, 6.3e4)
                    ax3.set_ylabel('Genome Size', fontsize=label_fontsize, color='tab:olive')
                    ax3.plot(data['Gen'],
                             data["Genome size"],
                             color="tab:olive",
                             linewidth=2)
                    ax3.tick_params(axis='both', which='major', labelsize=tick_fontsize)
                
                #plt.title(f"Environment {env}")
                fig.legend(bbox_to_anchor=(0, 1),
                           bbox_transform=ax1.transAxes,
                           loc="upper left",
                           fontsize=legend_fontsize)   

                rep_num = rep_dir.name[3:] # add a `_` between the `rep` and the rep number 
                plt.savefig(f'{base_path}/rep_{rep_num}_env_{env}.pdf', dpi=dpi, bbox_inches='tight')
                
                plt.close('all')


In [None]:
plot_gene_activity(exp_path, gen)

# Plot the mean and stddev of activated genes of each type in each environment over all replicas

In [None]:
full_stats = get_stats(exp_path)

In [None]:
# Compute error bars with a binomial proportion confidence interval
# https://www.statsmodels.org/stable/generated/statsmodels.stats.proportion.proportion_confint.html
def compute_error(count, mean, nobs, alpha, method):
    lower, upper = sm.stats.proportion_confint(count, nobs, alpha, method)
    return (mean - lower, upper - mean) 

In [None]:
def plot_mean_std(exp_path, full_stats, gen, plot_boxplot=True,
                  show_epsilon=False, show_inter_coef=False, show_sigma=False):
    last_gen_stats = full_stats[full_stats["Gen"] == gen] / nb_genes_type
    mean_stats = last_gen_stats.mean()
    std_stats = last_gen_stats.std()
    fig, ax = plt.subplots(figsize=(9, 4), dpi=dpi)

    x = np.arange(3)  # 3 types of genes
    width = 0.35  # the width of the bars

    env_A_means = np.array([mean_stats["ABon_A"], mean_stats["Aon_A"], mean_stats["Bon_A"]])
    env_B_means = np.array([mean_stats["ABon_B"], mean_stats["Aon_B"], mean_stats["Bon_B"]])    
    
    rects_A = ax.bar(x - width/2,
                     env_A_means,
                     width=width,
                     label='Environment A',
                     color='#008fd5')

    rects_B = ax.bar(x + width/2,
                     env_B_means,
                     width=width,
                     label='Environment B',
                     color='#fc4f30')

    if plot_boxplot: # If we have many replicates, draw a boxplot over the means
        ax.boxplot([last_gen_stats["ABon_A"], last_gen_stats["Aon_A"], last_gen_stats["Bon_A"]],
                       positions=x - width/2, manage_ticks=False, medianprops={'color':'black'})
        ax.boxplot([last_gen_stats["ABon_B"], last_gen_stats["Aon_B"], last_gen_stats["Bon_B"]],
                       positions=x + width/2, manage_ticks=False, medianprops={'color':'black'})    
    
    else: # Else, just draw every replicate.
        nb_reps = last_gen_stats["Replicate"].nunique()
        x_plot = np.tile(x, (nb_reps, 1)).T + np.tile(np.linspace(-width/4, width/4, nb_reps), (len(x), 1))
        
        ax.plot(x_plot - width/2, [last_gen_stats["ABon_A"], last_gen_stats["Aon_A"], last_gen_stats["Bon_A"]],
                marker='o', linestyle='', markeredgecolor='black', markerfacecolor='none')
        ax.plot(x_plot + width/2, [last_gen_stats["ABon_B"], last_gen_stats["Aon_B"], last_gen_stats["Bon_B"]],
                marker='o', linestyle='', markeredgecolor='black', markerfacecolor='none')
    
    ax.set_ylabel('Fraction of activated genes', fontsize=legend_fontsize)
    ax.set_xticks(x)
    ax.set_xticklabels(["AB genes on", "A genes on", "B genes on"])

    plt.grid(linestyle=':', axis='y')

    plt.tick_params(axis='both', which='major', labelsize=tick_fontsize)

    # Title when changing parameter values
    exp_params = get_params(exp_path)
    extra_name = ''
    if show_epsilon:
        plt.title(f'$\epsilon$ = {exp_params["epsilon"]}', fontsize=label_fontsize)
        if params["epsilon"] == exp_params["epsilon"]:
            extra_name = "_epsilon"

    elif show_inter_coef:
        plt.title(f'$c$ = {exp_params["interaction_coef"]}', fontsize=label_fontsize)
        if params["interaction_coef"] == exp_params["interaction_coef"]:
            extra_name = "_inter_coef"

    elif show_sigma:
        plt.title(f'$\sigma_A$ = {exp_params["sigma_A"]}, $\sigma_B$ = {exp_params["sigma_B"]}',
                  fontsize=label_fontsize)
        if params["sigma_A"] == exp_params["sigma_A"]:
            extra_name = "_sigma"
        
    
    plt.legend(fontsize=legend_fontsize, loc='upper left', bbox_to_anchor=(0.46, 1))

    plt.savefig(f'{exp_path}/mean_activation{extra_name}.pdf', bbox_inches='tight')

In [None]:
plot_mean_std(exp_path, full_stats, gen)

## Same figure for the different parameter values explored to evaluate model robustness

In [None]:
epsilon_exp_names = [f'epsilon-{eps}' for eps in [0.003, 0.01, 0.1]]
epsilon_exp_paths = [pathlib.Path('/Users/theotime/Desktop/evotsc/alife-revision-200').joinpath(p) for p in epsilon_exp_names] + [exp_path]

inter_coef_exp_names = [f'inter-coef-{c}' for c in [0.1, 1.0, 3.0]]
inter_coef_exp_paths = [pathlib.Path('/Users/theotime/Desktop/evotsc/alife-revision-200').joinpath(p) for p in inter_coef_exp_names] + [exp_path]

sigma_exp_names = [f'sigma-{s}' for s in [0.01, 0.05, 0.2]]
sigma_exp_paths = [pathlib.Path('/Users/theotime/Desktop/evotsc/alife-revision-200').joinpath(p) for p in sigma_exp_names] + [exp_path]

In [None]:
for param_exp_path in epsilon_exp_paths:
    param_full_stats = get_stats(param_exp_path)
    plot_mean_std(param_exp_path, param_full_stats, gen, plot_boxplot=False, show_epsilon=True)

In [None]:
for param_exp_path in inter_coef_exp_paths:
    param_full_stats = get_stats(param_exp_path)
    plot_mean_std(param_exp_path, param_full_stats, gen, plot_boxplot=False, show_inter_coef=True)

In [None]:
for param_exp_path in sigma_exp_paths:
    param_full_stats = get_stats(param_exp_path)
    plot_mean_std(param_exp_path, param_full_stats, gen, plot_boxplot=False, show_sigma=True)

## Statistical significance tests for the above figure

In [None]:
def stats_tests(full_stats, gen):

    last_gen_stats = full_stats[full_stats["Gen"] == gen] / nb_genes_type
    stats_AB = stats.ttest_rel(last_gen_stats["ABon_A"], last_gen_stats["ABon_B"])
    stats_Aon = stats.ttest_rel(last_gen_stats["Aon_A"], last_gen_stats["Aon_B"])
    stats_Bon = stats.ttest_rel(last_gen_stats["Bon_A"], last_gen_stats["Bon_B"])
    print(f'AB genes: {stats_AB}')
    print(f'A genes on: {stats_Aon}')
    print(f'B genes on: {stats_Bon}')

In [None]:
stats_tests(full_stats, gen)

# Plot fitness, genome size, and basal supercoiling over evolutionary time

In [None]:
def plot_fitness(full_stats, gen):
    nb_rep = full_stats["Replicate"].nunique()
    
    full_stats = full_stats[full_stats["Gen"] <= gen]
    
    colors = mpl.cm.get_cmap('viridis', nb_rep)(range(nb_rep))
    
    plt.figure(figsize=(9,4), dpi=dpi)
    
    plt.xscale('log')
    plt.yscale('log')
    plt.grid(linestyle=':')
    
    plt.xlabel('Generation', fontsize=label_fontsize)
    plt.ylabel('Fitness', fontsize=label_fontsize)
    
    plt.tick_params(axis='both', which='major', labelsize=tick_fontsize)
    
    for rep in range(nb_rep):
        stats_rep = full_stats[full_stats["Replicate"] == rep]
        stats_rep = stats_rep[stats_rep["Gen"] > 0]
        plt.plot(stats_rep['Gen'],
                 stats_rep["Fitness"],
                 linewidth=2,
                 color=colors[rep])
        
    plt.savefig(f'{exp_path}/all_fitness.pdf', dpi=dpi, bbox_inches='tight')

In [None]:
plot_fitness(full_stats, gen)

# Plot gene activities over the lifecycle of the best individual in each replica

In [None]:
def plot_expr(indiv, sigma, plot_title, plot_name):

    indiv.already_evaluated = False
    (temporal_expr, _), fitness = indiv.evaluate(sigma, sigma)

    colors = ['tab:blue', 'tab:red', 'tab:green'] # AB: blue, A: red, B: green

    plt.figure(figsize=(9, 5), dpi=dpi)

    ## First subplot: environment A
    plt.subplot(1, 1, 1)
    plt.ylim(-0.05, 1.05)

    for i_gene, gene in enumerate(indiv.genes):
        linestyle = 'solid' if gene.orientation == 0 else 'dashed'
        plt.plot(temporal_expr[i_gene, :],
                 linestyle=linestyle,
                 linewidth=2,
                 color=colors[gene.gene_type],
                 #alpha=0.25,
                 label=f'Gene {gene.id}')

    plt.grid(linestyle=':')
    plt.xlabel('Iteration steps', fontsize=label_fontsize)
    plt.ylabel('Expression level', fontsize=label_fontsize)

    plt.tick_params(axis='both', which='major', labelsize=tick_fontsize)

    #plt.legend(loc='center right')
    #plt.title('Environment A')
    ## Final stuff

    plt.tight_layout()
    plt.savefig(plot_name, dpi=dpi, bbox_inches='tight')
    
    plt.show()
    plt.close()

In [None]:
def get_best_indiv(rep_path, gen):
    
    with open(rep_path.joinpath(f'pop_gen_{gen:06}.evotsc'), 'rb') as save_file:
        pop_rep = pickle.load(save_file)
        
    pop_rep.evaluate()
    
    best_fit = 0
    best_indiv = pop_rep.individuals[0]
    
    try:
        for indiv in pop_rep.individuals:
            if indiv.fitness > best_fit:
                best_fit = indiv.fitness
                best_indiv = indiv
    except AttributeError: # In the neutral control, individuals are not evaluated so there is no fitness field
        pass
    
    return best_indiv

In [None]:
def plot_best(gen):
    rep_dirs = sorted([d for d in exp_path.iterdir() if (d.is_dir() and d.name.startswith("rep"))])

    for rep, rep_dir in enumerate(rep_dirs):
        best_rep = get_best_indiv(rep_dir, gen)
        plot_expr(best_rep, sigma=params['sigma_A'],
                     plot_title='', plot_name=f'{exp_path}/best_rep{rep}_env_A.pdf')
        plot_expr(best_rep, sigma=params['sigma_B'],
                     plot_title='', plot_name=f'{exp_path}/best_rep{rep}_env_B.pdf')
        #evotsc_plot.plot_genome(best_rep, name=f'{exp_path}/genome_rep{rep}')

In [None]:
plot_best(gen)

# Plot the genomes and SC values of the best individual in each run

In [None]:
def plot_best_genome_and_tsc(exp_path, gen):
    rep_dirs = sorted([d for d in exp_path.iterdir() if (d.is_dir() and d.name.startswith("rep"))])
    
    for rep, rep_dir in enumerate(rep_dirs):
        
        best_indiv = get_best_indiv(rep_dir, gen)

        evotsc_plot.plot_genome_and_tsc(best_indiv, params['sigma_A'], show_bar=True,
                            name=exp_path.joinpath(f'genome_and_tsc_rep{rep:02}_env_A.pdf'), print_ids=True)
        evotsc_plot.plot_genome_and_tsc(best_indiv, params['sigma_B'], show_bar=True,
                            name=exp_path.joinpath(f'genome_and_tsc_rep{rep:02}_env_B.pdf'), print_ids=True)

In [None]:
plot_best_genome_and_tsc(exp_path, gen)