In [None]:
import pathlib
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl

In [None]:
from scipy import stats

In [None]:
import pickle

In [None]:
import autoreload
import evotsc_plot
autoreload.reload(evotsc_plot)

In [None]:
nb_genes_type = 20

In [None]:
label_fontsize=20
tick_fontsize=15
legend_fontsize=15
dpi=300

In [None]:
exp_path = pathlib.Path('/Users/theotime/Desktop/evotsc/change_inter_coef/inter_0.01')

In [None]:
def get_params(exp_path):
    rep_dirs = sorted([d for d in exp_path.iterdir() if (d.is_dir() and d.name.startswith("rep"))])
    
    with open(rep_dirs[0].joinpath('params.txt'), 'r') as params_file:
        param_lines = params_file.readlines()
        
    params = {}
    for line in param_lines:
        param_name = line.split(':')[0]
        if param_name == 'commit':
            param_val = line.split(':')[1].strip()
        else:
            param_val = float(line.split(':')[1])
        
        params[param_name] = param_val
        
    return params

In [None]:
exp_params = get_params(exp_path)

In [None]:
exp_params

In [None]:
def get_stats(exp_name):
    
    exp_name = pathlib.Path(exp_name)
        
    rep_dirs = sorted([d for d in exp_name.iterdir() if (d.is_dir() and d.name.startswith("rep"))])
    
    res = pd.DataFrame()
    
    for i_rep, rep_dir in enumerate(rep_dirs):
                
        res_dir = pd.read_csv(rep_dir.joinpath('stats.csv'))
        
        res_dir.insert(0, 'Replicate', i_rep)
        
        res = pd.concat([res, res_dir])
    
    return res

In [None]:
def plot_gene_activity(base_path, plot_sc=False, plot_genome_size=False):
    
    base_path = pathlib.Path(base_path)
    
    rep_dirs = sorted([d for d in exp_path.iterdir() if (d.is_dir() and d.name.startswith("rep"))])
    
    nb_rep = len(rep_dirs)
    
    for i_rep, rep_dir in enumerate(rep_dirs):
        stats_path = rep_dir.joinpath('stats.csv')
        if stats_path.stat().st_size > 0: # not an empty file

            data = pd.read_csv(stats_path)
            data = data[data['Gen'] > 0]

            for env in ["A", "B"]:

                fig, ax1 = plt.subplots(figsize=(9, 4), dpi=dpi)
                ax1.set_ylim(-0.05, 1.05)
                ax1.set_ylabel('Proportion of activated genes', fontsize=label_fontsize)
                ax1.set_xlabel('Generation', fontsize=label_fontsize)
                ax1.set_xscale('log')
                ax1.grid()

                ax1.plot(data['Gen'], data[f"ABon_{env}"] / (data[f"ABon_{env}"] + data[f"ABoff_{env}"]),
                         color="tab:blue",
                         linewidth=2,
                         label="AB genes on")
                ax1.plot(data['Gen'], data[f"Aon_{env}"] / (data[f"Aon_{env}"] + data[f"Aoff_{env}"]),
                         color="tab:red",
                         linewidth=2,
                         label="A genes on")
                ax1.plot(data['Gen'], data[f"Bon_{env}"] / (data[f"Bon_{env}"] + data[f"Boff_{env}"]),
                         color="tab:green",
                         linewidth=2,
                         label="B genes on")
                ax1.tick_params(axis='both', which='major', labelsize=tick_fontsize)


                ## 2nd axis: fitness
                ax2 = ax1.twinx()
                ax2.set_yscale('log')
                ax2.set_ylim(1e-25, 1e0)
                ax2.set_ylabel('Fitness', fontsize=label_fontsize, color='tab:cyan')
                ax2.plot(data['Gen'],
                         data["Fitness"],
                         color="tab:cyan",
                         linewidth=2)
                ax2.tick_params(axis='both', which='major', labelsize=tick_fontsize)

                
                ## 3rd axis: supercoiling
                if plot_genome_size:
                    ax3 = ax1.twinx()

                    ax3.spines['right'].set_position(('outward', 80))
                    ax3.set_ylim(0.7e4, 6.3e4)
                    ax3.set_ylabel('Genome Size', fontsize=label_fontsize, color='tab:olive')
                    ax3.plot(data['Gen'],
                             data["Genome size"],
                             color="tab:olive",
                             linewidth=2)
                    ax3.tick_params(axis='both', which='major', labelsize=tick_fontsize)
                
                #plt.title(f"Environment {env}")
                fig.legend(bbox_to_anchor=(0,0),
                           bbox_transform=ax1.transAxes,
                           loc="lower left",
                           fontsize=legend_fontsize)            

                rep_num = rep_dir.name[3:] # add a `_` between the `rep` and the rep number 
                plt.savefig(f'{base_path}/rep_{rep_num}_env_{env}', dpi=dpi, bbox_inches='tight')
                
                plt.close('all')


In [None]:
plot_gene_activity(exp_path)

In [None]:
full_stats = get_stats(exp_path)

In [None]:
def plot_mean_std(full_stats):
    last_gen = np.min(full_stats.groupby('Replicate').max()['Gen'])
    last_gen_stats = full_stats[full_stats['Gen'] == last_gen] / nb_genes_type
    mean_stats = last_gen_stats.mean()
    std_stats = last_gen_stats.std()
    fig, ax = plt.subplots(figsize=(9, 4), dpi=dpi)

    x = np.arange(3)  # 3 types of genes
    width = 0.35  # the width of the bars

    env_A_means = np.array([mean_stats["ABon_A"], mean_stats["Aon_A"], mean_stats["Bon_A"]])
    env_B_means = np.array([mean_stats["ABon_B"], mean_stats["Aon_B"], mean_stats["Bon_B"]])

    env_A_std = np.array([std_stats["ABon_A"], std_stats["Aon_A"], std_stats["Bon_A"]])
    env_B_std = np.array([std_stats["ABon_B"], std_stats["Aon_B"], std_stats["Bon_B"]])


    rects_A = ax.bar(x - width/2,
                    env_A_means,
                    width=width,
                    label='Environment A',
                    #yerr=[np.zeros_like(env_A_std), env_A_std],
                    yerr=env_A_std,
                    capsize=5)

    rects_B = ax.bar(x + width/2,
                     env_B_means,
                     width=width,
                     label='Environment B',
                     yerr=env_B_std,
                     capsize=5)

    ax.set_ylabel('Activated genes', fontsize=label_fontsize)
    ax.set_xticks(x)
    ax.set_xticklabels(["AB genes on", "A genes on", "B genes on"])

    plt.grid(linestyle=':', axis='y')

    plt.tick_params(axis='both', which='major', labelsize=tick_fontsize)

    plt.legend(fontsize=legend_fontsize, loc='upper center')

    plt.savefig(f'{exp_path}/mean_activation', bbox_inches='tight')


In [None]:
plot_mean_std(full_stats)

In [None]:
def stats_tests(full_stats):
    last_gen = np.min(full_stats.groupby('Replicate').max()['Gen'])
    last_gen_stats = full_stats[full_stats["Gen"] == last_gen] / nb_genes_type
    stats_AB = stats.ttest_rel(last_gen_stats["ABon_A"], last_gen_stats["ABon_B"])
    stats_Aon = stats.ttest_rel(last_gen_stats["Aon_A"], last_gen_stats["Aon_B"])
    stats_Bon = stats.ttest_rel(last_gen_stats["Bon_A"], last_gen_stats["Bon_B"])
    print(f'AB genes: {stats_AB}')
    print(f'A genes on: {stats_Aon}')
    print(f'B genes on: {stats_Bon}')

In [None]:
stats_tests(full_stats)

In [None]:
def plot_expr_AB(indiv, sigma_A, sigma_B, plot_title, plot_name):

    (temporal_expr_A, temporal_expr_B), fitness = indiv.evaluate(sigma_A, sigma_B)

    colors = ['tab:blue', 'tab:red', 'tab:green'] # AB: blue, A: red, B: green

    plt.figure(figsize=(9, 8), dpi=dpi)

    ## First subplot: environment A
    plt.subplot(2, 1, 1)
    plt.ylim(-0.05, 1.05)

    for gene in range(indiv.nb_genes):
        linestyle = 'solid' if indiv.genes[gene].orientation == 0 else 'dashed'
        plt.plot(temporal_expr_A[indiv.genes[gene].id, :],
                 linestyle=linestyle,
                 linewidth=2,
                 color=colors[indiv.genes[gene].gene_type],
                 #alpha=0.25,
                 label=f'Gene {indiv.genes[gene].id}')

    plt.grid(linestyle=':')
    #plt.xlabel('Time', fontsize='large')
    plt.ylabel('Expression level', fontsize=label_fontsize)

    plt.tick_params(axis='both', which='major', labelsize=tick_fontsize)

    #plt.legend(loc='center right')
    #plt.title('Environment A')

    ## Second subplot: environment B
    plt.subplot(2, 1, 2)
    plt.ylim(-0.05, 1.05)

    for gene in range(indiv.nb_genes):
        linestyle = 'solid' if indiv.genes[gene].orientation == 0 else 'dashed'
        plt.plot(temporal_expr_B[indiv.genes[gene].id, :],
                 linestyle=linestyle,
                 linewidth=2,
                 color=colors[indiv.genes[gene].gene_type],
                 #alpha=0.25,
                 label=f'Gene {indiv.genes[gene].id}')

    plt.grid(linestyle=':')
    plt.xlabel('Time', fontsize=label_fontsize)
    plt.ylabel('Expression level', fontsize=label_fontsize)

    plt.tick_params(axis='both', which='major', labelsize=tick_fontsize)

    #plt.legend(loc='center right')
    #plt.title('Environment B')

    ## Final stuff

    plt.tight_layout()
    plt.savefig(plot_name, dpi=dpi, bbox_inches='tight')
    plt.close()


In [None]:
def plot_fitness(full_stats):
    nb_rep = full_stats["Replicate"].nunique()
    
    colors = mpl.cm.get_cmap('viridis', nb_rep)(range(nb_rep))
    
    plt.figure(figsize=(9,4), dpi=dpi)
    
    plt.xscale('log')
    plt.yscale('log')
    plt.grid(linestyle=':')
    
    plt.xlabel('Generation', fontsize=label_fontsize)
    plt.ylabel('Fitness', fontsize=label_fontsize)
    
    plt.tick_params(axis='both', which='major', labelsize=tick_fontsize)
    
    for rep in range(nb_rep):
        stats_rep = full_stats[full_stats["Replicate"] == rep]
        stats_rep = stats_rep[stats_rep["Gen"] > 0]
        plt.plot(stats_rep['Gen'],
                 stats_rep["Fitness"],
                 linewidth=2,
                 color=colors[rep])
        
    plt.savefig(f'{exp_path}/all_fitness', dpi=dpi, bbox_inches='tight')

In [None]:
plot_fitness(full_stats)

In [None]:
def plot_sigma(full_stats):
    
    if 'basal_sc' not in full_stats.columns:
        return
    
    nb_rep = full_stats["Replicate"].nunique()
    
    colors = mpl.cm.get_cmap('viridis', nb_rep)(range(nb_rep))
    
    fig, ax1 = plt.subplots(figsize=(9,4), dpi=dpi)
    
    #plt.xscale('log')
    #plt.yscale('log')
    plt.grid(linestyle=':')
    
    ax1.set_xlabel('Generation', fontsize=label_fontsize)
    ax1.set_ylabel('Basal SC', fontsize=label_fontsize)
    
    plt.tick_params(axis='both', which='major', labelsize=tick_fontsize)
        
    for rep in range(nb_rep):
        stats_rep = full_stats[full_stats["Replicate"] == rep]
        stats_rep = stats_rep[stats_rep["Gen"] > 0]
        ax1.plot(stats_rep["Gen"],
                 stats_rep["basal_sc"],
                 linewidth=2,
                 color=colors[rep])
        
    plt.savefig(f'{exp_path}/all_basal_sc', dpi=dpi, bbox_inches='tight')

In [None]:
plot_sigma(full_stats)

In [None]:
def plot_genome_size(full_stats):
    
    if 'Genome size' not in full_stats.columns:
        return
    
    nb_rep = full_stats["Replicate"].nunique()
    
    colors = mpl.cm.get_cmap('viridis', nb_rep)(range(nb_rep))
    
    fig, ax1 = plt.subplots(figsize=(9,4), dpi=dpi)
    
    #plt.xscale('log')
    #plt.yscale('log')
    plt.grid(linestyle=':')
    
    ax1.set_xlabel('Generation', fontsize=label_fontsize)
    ax1.set_ylabel('Genome Size', fontsize=label_fontsize)
    
    ax1.set_ylim(0, 63000)
    
    plt.tick_params(axis='both', which='major', labelsize=tick_fontsize)
        
    for rep in range(nb_rep):
        stats_rep = full_stats[full_stats["Replicate"] == rep]
        stats_rep = stats_rep[stats_rep["Gen"] > 0]
        ax1.plot(stats_rep["Gen"],
                 stats_rep["Genome size"],
                 linewidth=2,
                 color=colors[rep])
        
    plt.savefig(f'{exp_path}/all_genome_size.png', dpi=dpi, bbox_inches='tight')

In [None]:
plot_genome_size(full_stats)

In [None]:
def plot_best(gen):
    rep_dirs = sorted([d for d in exp_path.iterdir() if (d.is_dir() and d.name.startswith("rep"))])

    for rep, rep_dir in enumerate(rep_dirs):
        with open(rep_dir.joinpath(f'best_gen_{gen:06}.evotsc'), 'rb') as save_file:
            best_rep = pickle.load(save_file)
            plot_expr_AB(best_rep, sigma_A=exp_params['sigma_A'], sigma_B=exp_params['sigma_B'],
                         plot_title='', plot_name=f'{exp_path}/best_rep{rep}.png')
            #evotsc_plot.plot_genome(best_rep, name=f'{exp_path}/genome_rep{rep}.png')

In [None]:
plot_best(20_000)