In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl
import datetime
import pickle
import pathlib

In [None]:
import autoreload
import evotsc
import evotsc_plot
autoreload.reload(evotsc)
autoreload.reload(evotsc_plot)

In [None]:
exp_path = pathlib.Path('/Users/theotime/Desktop/evotsc/vanilla_200k/')
neutral_exp_path = pathlib.Path('/Users/theotime/Desktop/evotsc/neutral_100k/')
gen=200_000
gene_types = ['AB', 'A', 'B'] # Name of each gene type
gene_type_color = ['tab:blue', 'tab:red', 'tab:green'] #AB, A, B
orient_name = ['leading', 'lagging'] # Name of each gene orientation
rel_orient = ['conv', 'div', 'upstr', 'downstr']

In [None]:
label_fontsize=20
tick_fontsize=15
legend_fontsize=15
dpi=300

In [None]:
def get_params(exp_path):
    rep_dirs = sorted([d for d in exp_path.iterdir() if (d.is_dir() and d.name.startswith("rep"))])
    
    with open(rep_dirs[0].joinpath('params.txt'), 'r') as params_file:
        param_lines = params_file.readlines()
        
    params = {}
    for line in param_lines:
        param_name = line.split(':')[0]
        if param_name == 'commit':
            param_val = line.split(':')[1].strip()
        else:
            param_val = float(line.split(':')[1])
        
        params[param_name] = param_val
        
    return params

In [None]:
params = get_params(exp_path)

In [None]:
params

In [None]:
def get_best_indiv(rep_path, gen):
    
    with open(rep_path.joinpath(f'pop_gen_{gen}.evotsc'), 'rb') as save_file:
        pop_rep = pickle.load(save_file)
        
    pop_rep.evaluate()
    
    best_fit = 0
    best_indiv = pop_rep.individuals[0]
    
    try:
        for indiv in pop_rep.individuals:
            if indiv.fitness > best_fit:
                best_fit = indiv.fitness
                best_indiv = indiv
    except AttributeError: # In the neutral control, individuals are not evaluated so there is no fitness field
        pass
    
    return best_indiv

# Plot genomes

In [None]:
def plot_best_genome(exp_path, gen):
    rep_dirs = sorted([d for d in exp_path.iterdir() if (d.is_dir() and d.name.startswith("rep"))])
    
    for rep, rep_dir in enumerate(rep_dirs):
        
        best_indiv = get_best_indiv(rep_dir, gen)

        evotsc_plot.plot_genome(best_indiv, name=exp_path.joinpath(f'genome_rep{rep:02}.png'))
        #evotsc_plot.plot_expr_AB(best_indiv,
        #                         sigma_A=params['sigma_A'],
        #                         sigma_B=params['sigma_B'],
        #                         plot_title=f'best rep {rep:02}',
        #                         plot_name=exp_path.joinpath(f'best_rep{rep:02}.png'))

In [None]:
plot_best_genome(exp_path, gen)

# Plot gene pairs

In [None]:
def get_gene_stats(indiv):
    result_dict = {'gene_id': [],
                   'gene_type': [],
                   'gene_orient': [],
                   'other_id': [],
                   'other_type': [],
                   'other_orient': [],
                   'rel_orient': [],
                   'distance': []}
    
    for i_gene, gene in enumerate(indiv.genes):
        #                                                      other - gene
        other = indiv.genes[i_gene - 1]
        result_dict['gene_id'].append(gene.id)
        result_dict['gene_type'].append(gene_types[gene.gene_type])
        result_dict['gene_orient'].append(orient_name[gene.orientation])
        result_dict['other_id'].append(other.id)
        result_dict['other_type'].append(gene_types[other.gene_type])
        result_dict['other_orient'].append(orient_name[other.orientation])
        if other.orientation == 0 and gene.orientation == 1:   # ---> <---
            rel_orient = 'conv'
        elif other.orientation == 0 and gene.orientation == 0: # ---> --->
            rel_orient = 'downstr'
        elif other.orientation == 1 and gene.orientation == 0: # <--- --->
            rel_orient = 'div'
        else:                                                  # <--- <---
            rel_orient = 'upstr'
        result_dict['rel_orient'].append(rel_orient)
        result_dict['distance'].append(other.intergene)

        #                                                       gene - other
        other = indiv.genes[(i_gene + 1) % indiv.nb_genes]
        result_dict['gene_id'].append(gene.id)
        result_dict['gene_type'].append(gene_types[gene.gene_type])
        result_dict['gene_orient'].append(orient_name[gene.orientation])
        result_dict['other_id'].append(other.id)
        result_dict['other_type'].append(gene_types[other.gene_type])
        result_dict['other_orient'].append(orient_name[other.orientation])
        if gene.orientation == 0 and other.orientation == 1:   # ---> <---
            rel_orient = 'conv'
        elif gene.orientation == 0 and other.orientation == 0: # ---> --->
            rel_orient = 'upstr'
        elif gene.orientation == 1 and other.orientation == 0: # <--- --->
            rel_orient = 'div'
        else:                                                  # <--- <---
            rel_orient = 'downstr'
        result_dict['rel_orient'].append(rel_orient)
        result_dict['distance'].append(gene.intergene)

    return pd.DataFrame.from_dict(result_dict)

In [None]:
def get_full_stats(exp_path, gen):
    rep_dirs = sorted([d for d in exp_path.iterdir() if (d.is_dir() and d.name.startswith("rep"))])
    
    full_stats = pd.DataFrame()
        
    for rep, rep_dir in enumerate(rep_dirs):
        
        best_indiv = get_best_indiv(rep_dir, gen)

        indiv_stats = get_gene_stats(best_indiv)
        indiv_stats.insert(0, 'Replicate', rep)
        full_stats = pd.concat([full_stats, indiv_stats])
            
    return full_stats

In [None]:
evol_stats = get_full_stats(exp_path, gen)

In [None]:
neutral_stats = get_full_stats(neutral_exp_path, 100000)

In [None]:
def plot_gene_stats(stats, plot_name, count_bars):
    
    # If count_bars is true, the bars represent the count for each pair type and
    # we annotate them with the average distances; if count_bars is false, we 
    # plot the opposite: the bars represent average distances, and are annotated
    # with the count for each pair type.
    
    fig, axs = plt.subplots(3, 3, sharey='all', figsize=(12, 12), dpi=300)
    
    group_cols = ['gene_type', 'other_type', 'rel_orient']
    
    idx = pd.MultiIndex.from_product([gene_types, gene_types, rel_orient], names=group_cols)

    grouped_stats = stats.groupby(group_cols).count().reindex(idx, fill_value=0).sort_index()
    mean_stats = stats.groupby(group_cols).mean().reindex(idx, fill_value=0).sort_index()
    std_stats = stats.groupby(group_cols).mean().reindex(idx, fill_value=0).sort_index()
    
    nb_reps = stats['Replicate'].nunique()
    
    colors = ['tab:blue', 'tab:orange', 'tab:green', 'tab:red']
    
    x = np.arange(len(rel_orient))

    for i_gene_type, gene_type in enumerate(gene_types):
        for i_other_type, other_type in enumerate(gene_types):
            for cur_orient in enumerate(rel_orient):
                
                if count_bars:
                    bar_data = grouped_stats.loc[(gene_type, other_type)]['Replicate'] / nb_reps
                    text_data = mean_stats.loc[(gene_type, other_type)]['distance']
                    
                else:
                    bar_data = mean_stats.loc[(gene_type, other_type)]['distance'] / nb_reps
                    text_data = grouped_stats.loc[(gene_type, other_type)]['Replicate']
                

                ax = axs[i_gene_type][i_other_type]
                
                rects = ax.bar(x, bar_data, color=colors)
                
                if count_bars:
                    ax.set_ylim(0, 20)
                #    
                #else:
                #    ax.set_ylim(0, 3250)
                
                ax.set_xticks(x)
                ax.set_xticklabels(labels=rel_orient, fontsize="large")
                
                #Label rects with # of events
                for i_rect, rect in enumerate(rects):
                    ax.annotate(f"{text_data[i_rect]:.1f}",
                                xy=(rect.get_x() + rect.get_width()/2, 0),
                                xytext=(0, 3),
                                ha='center',
                                textcoords="offset points",
                                color='black')
                
                ax.grid(b=True, axis='y', linestyle=':')

                if i_gene_type == 0: # First line
                    ax.set_title(other_type, fontsize='x-large')
                if i_other_type == 0:
                    ax.set_ylabel(gene_type, rotation='horizontal', ha='right', fontsize='x-large')

    plt.savefig(plot_name, dpi=300, bbox_inches='tight')
    plt.show()

In [None]:
plot_gene_stats(evol_stats, count_bars=True, plot_name=exp_path.joinpath('gene_pair_counts.png'))

In [None]:
plot_gene_stats(evol_stats, count_bars=False, plot_name=exp_path.joinpath('gene_pair_distances.png'))

In [None]:
plot_gene_stats(neutral_stats, count_bars=True, plot_name=neutral_exp_path.joinpath('gene_pair_counts.png'))

In [None]:
#plot_gene_stats(neutral_stats, count_bars=False, plot_name=neutral_exp_path.joinpath('gene_pair_distances.png'))

# Plot intergene distribution

In [None]:
def get_intergene_distances(exp_path, gen):
    rep_dirs = sorted([d for d in exp_path.iterdir() if (d.is_dir() and d.name.startswith("rep"))])
    full_res = pd.DataFrame()
    for rep, rep_dir in enumerate(rep_dirs):

        best_indiv = get_best_indiv(rep_dir, gen)
        
        intergenes = np.zeros(best_indiv.nb_genes)
        for i_gene, gene in enumerate(best_indiv.genes):
            intergenes[i_gene] = gene.intergene
        indiv_res = pd.DataFrame(data={'Intergene':intergenes}, dtype=int)
        indiv_res.insert(0, 'Replicate', rep)

        full_res = pd.concat([full_res, indiv_res])
    
    return full_res

In [None]:
def plot_intergenes(exp_path, neutral_exp_path, gen, cutoff, plot_name):
    
    intergenes = get_intergene_distances(exp_path, gen)
    neutral_intergenes = get_intergene_distances(neutral_exp_path, 100000)
    
    bins = np.linspace(0.0, 4.0, 40)
    
    data = [intergenes, neutral_intergenes]
    titles = ['With selection', 'Without selection']
    
    plt.figure(figsize=(9, 4), dpi=300)

    for i in range(2):

        plt.subplot(1, 2, i+1)
        plt.ylim(0, 1)
        plt.xlim(-0.2, 4.2)
        plt.xlabel('Distance (log)')
        plt.ylabel('Density')
        # Plot intergene distances
        plt.hist(np.log10(data[i]['Intergene']), bins=bins, density=True)
        # Plot cutoff line
        plt.vlines(np.log10(cutoff), 0, 1, linestyle='--', linewidth=1,
                   color='tab:red', label='Cutoff distance')
        plt.grid(linestyle=':')
        plt.title(titles[i])
        plt.legend(loc='upper left')
        
    plt.savefig(plot_name, dpi=300)
    plt.show()

In [None]:
plot_intergenes(exp_path, neutral_exp_path, gen=gen, cutoff=params['interaction_dist'],
                plot_name=exp_path.joinpath('intergene_distr.png'))

# Plot local sigma

In [None]:
def get_sigma_tsc(indiv, sigma_env: float):
    temporal_expr = np.zeros((indiv.nb_genes, indiv.nb_eval_steps))

    # Initial values at t = 0
    temporal_expr[:, 0] = np.array([gene.basal_expression for gene in indiv.genes])
    
    sigma_tsc = np.zeros((indiv.nb_genes, indiv.nb_eval_steps))

    # Iterate the system
    for t in range(1, indiv.nb_eval_steps):
        sigma_local = indiv.inter_matrix @ temporal_expr[:, t-1]
        sigma_tsc[:, t] = sigma_local
        sigma_total = indiv.sigma_basal + sigma_local + sigma_env
        temporal_expr[:, t] = 1.0 / (1.0 + np.exp((sigma_total - indiv.sigma_opt)/indiv.epsilon))

    return sigma_tsc

In [None]:
def plot_sigma_tsc(indiv, sigma_env, all_sigma=False, name=None):
    
    colors= ['tab:blue', 'tab:red', 'tab:green'] #AB, A, B
    
    sigma_tsc = get_sigma_tsc(indiv, sigma_env=sigma_env)
        
    plt.figure(figsize=(9, 6), dpi=300)
    for i_gene in range(indiv.nb_genes):
        tsc_val = sigma_tsc[i_gene]
        if all_sigma:
            tsc_val += indiv.sigma_basal + sigma_env
        plt.plot(tsc_val, color=colors[indiv.genes[i_gene].gene_type])
    
    
    # Add horizontal activation line
    x_min, x_max = plt.xlim()
    plt.hlines(params['sigma_opt'], x_min, x_max, linestyle='--', linewidth=1,
           color='tab:red', label='Half activation threshold')
    plt.xlim(x_min, x_max)

    plt.ylim(1, -1)
    
    plt.xlabel('Time')
    if all_sigma:
        plt.ylabel('Total supercoiling')
    else:
        plt.ylabel('TSC supercoiling')
    plt.title(f'Environment SC: {sigma_env:.2}')
    
    plt.grid(linestyle=':')
    
    if name:
        plt.savefig(name, dpi=300, bbox_inches='tight')
    
    plt.show()

In [None]:
def plot_avg_sigma_tsc(indiv, sigma_env, all_sigma=False, title=None, name=None):
    
    colors = ['tab:blue', 'tab:red', 'tab:green'] #AB, A, B
    
    sigma_tsc = get_sigma_tsc(indiv, sigma_env=sigma_env)
    
    avg_sigma_tsc = np.zeros((3, indiv.nb_eval_steps))
        
    fig, ax = plt.subplots(figsize=(9, 4), dpi=300)
    for i_gene in range(indiv.nb_genes):
        avg_sigma_tsc[indiv.genes[i_gene].gene_type] += sigma_tsc[i_gene]
    
    avg_sigma_tsc /= (indiv.nb_genes // 3)
    
    if all_sigma:
        avg_sigma_tsc += indiv.sigma_basal + sigma_env
        
    for i_gene_type, gene_type in enumerate(gene_types):
        ax.plot(avg_sigma_tsc[i_gene_type], color=colors[i_gene_type], label=f'{gene_type} genes')
    
    ax.set_xlabel('Time')
    
    # Add horizontal activation line
    x_min, x_max = ax.get_xlim()
    ax.hlines(params['sigma_opt'], x_min, x_max, linestyle='--', linewidth=1,
           color='tab:red', label='Half activation threshold')
    ax.set_xlim(x_min, x_max)

    
    if not all_sigma:
        ax.set_ylabel('TSC supercoiling')
    else:
        ax.set_ylabel('Total supercoiling')
        
    ax.set_ylim(0.25, -0.85)
    
    ax.grid(linestyle=':')
    ax.legend(loc='upper left')
    
    if title:
        ax.set_title(title)
    
    if name:
        plt.savefig(name, dpi=300, bbox_inches='tight')
    
    plt.show()

In [None]:
def plot_med_sigma_tsc(indiv, sigma_env, all_sigma=False, title=None, name=None):
    
    colors = ['tab:blue', 'tab:red', 'tab:green'] #AB, A, B
    
    sigma_tsc = get_sigma_tsc(indiv, sigma_env=sigma_env)
    
    sigma_tsc_type = {}
    
    for i_gene_type, gene_type in enumerate(gene_types):
        data = [sigma_tsc[i_gene, :] for i_gene in range(indiv.nb_genes)
                if indiv.genes[i_gene].gene_type == i_gene_type]
        sigma_tsc_type[gene_type] = pd.DataFrame(data)
        
        if all_sigma:
            sigma_tsc_type[gene_type] += indiv.sigma_basal + sigma_env
        

    fig, ax = plt.subplots(figsize=(9, 4), dpi=300)
    
    for i_gene_type, gene_type in enumerate(gene_types):
        med_sigma_tsc = sigma_tsc_type[gene_type].median()
        ax.plot(med_sigma_tsc, color=gene_type_color[i_gene_type], label=f'{gene_type} genes')
    
    ax.set_xlabel('Time')
    
    # Add horizontal activation line
    x_min, x_max = ax.get_xlim()
    ax.hlines(params['sigma_opt'], x_min, x_max, linestyle='--', linewidth=1,
           color='tab:red', label='Half activation threshold')
    ax.set_xlim(x_min, x_max)

    
    if not all_sigma:
        ax.set_ylabel('TSC supercoiling')
    else:
        ax.set_ylabel('Total supercoiling')
        
    ax.set_ylim(0.25, -0.85)
    
    ax.grid(linestyle=':')
    ax.legend(loc='upper left')
    
    if title:
        ax.set_title(title)
    
    if name:
        plt.savefig(name, dpi=300, bbox_inches='tight')
    
    plt.show()

In [None]:
def plot_violin_sigma_tsc(indiv, sigma_env, wanted_type, all_sigma=False, title=None, name=None):
        
    sigma_tsc = get_sigma_tsc(indiv, sigma_env=sigma_env)
    
    sigma_tsc_type = {}
    
    for i_gene_type, gene_type in enumerate(gene_types):
        data = [sigma_tsc[i_gene, :] for i_gene in range(indiv.nb_genes)
                if indiv.genes[i_gene].gene_type == i_gene_type]
        sigma_tsc_type[gene_type] = pd.DataFrame(data)
        
        if all_sigma:
            sigma_tsc_type[gene_type] += indiv.sigma_basal + sigma_env

            
    fig, ax = plt.subplots(figsize=(9, 4), dpi=300)
    
    # On plotte que les gènes de type `wanted_type`
    for i_gene_type, gene_type in enumerate(gene_types):
        if gene_type != wanted_type:
            continue
    
        ax.plot(sigma_tsc_type[gene_type].median(), 
                #linestyle='',
                #marker='x',
                color=gene_type_color[i_gene_type], label=f'{gene_type} genes')
        
    #for i_gene_type, gene_type in enumerate(gene_types):
    ax.violinplot(sigma_tsc_type[wanted_type], positions=np.arange(indiv.nb_eval_steps),
                  showmedians=True) # A genes only for now
    
    ax.set_xlabel('Time')
    
    # Add horizontal activation line
    x_min, x_max = ax.get_xlim()
    ax.hlines(params['sigma_opt'], x_min, x_max, linestyle='--', linewidth=1,
           color='tab:red', label='Half activation threshold')
    ax.set_xlim(x_min, x_max)

    
    if not all_sigma:
        ax.set_ylabel('TSC supercoiling')
    else:
        ax.set_ylabel('Total supercoiling')
        
    ax.set_ylim(0.85, -0.85)
    
    if title:
        ax.set_title(title)
    ax.grid(linestyle=':')
    ax.legend(loc='upper left')
    
    if name:
        plt.savefig(name, dpi=300, bbox_inches='tight')
    
    plt.show()

In [None]:
def plot_best_sigma_tsc(exp_path, gen):
    rep_dirs = sorted([d for d in exp_path.iterdir() if (d.is_dir() and d.name.startswith("rep"))])
    
    for rep, rep_dir in enumerate(rep_dirs):

        best_indiv = get_best_indiv(rep_dir, gen)
                
        plot_violin_sigma_tsc(best_indiv, sigma_env=params['sigma_A'], wanted_type='A', all_sigma=True,
                              title='Environment A: A genes',
                              name=exp_path.joinpath(f'violin_sc_rep{rep:02}_env_A_genes_A.png'))
        plot_violin_sigma_tsc(best_indiv, sigma_env=params['sigma_A'], wanted_type='B', all_sigma=True,
                              title='Environment A: B genes',
                              name=exp_path.joinpath(f'violin_sc_rep{rep:02}_env_A_genes_B.png'))
        plot_violin_sigma_tsc(best_indiv, sigma_env=params['sigma_A'], wanted_type='AB', all_sigma=True,
                              title='Environment A: AB genes',
                              name=exp_path.joinpath(f'violin_sc_rep{rep:02}_env_A_genes_AB.png'))

        
        #plot_sigma_tsc(best_indiv, sigma_env=params['sigma_A'], all_sigma=True,
        #               name=exp_path.joinpath(f'sigma_tsc_best_rep{rep:02}_env_A.png'))
    
        #plot_sigma_tsc(best_indiv, sigma_env=params['sigma_B'], all_sigma=True,
        #               name=exp_path.joinpath(f'sigma_tsc_best_rep{rep:02}_env_B.png'))

        #plot_avg_sigma_tsc(best_indiv, sigma_env=params['sigma_A'], title='Environment A: mean SC',
        #                   name=exp_path.joinpath(f'sigma_tsc_avg_best_rep{rep:02}_env_A_mean.png'))
        #plot_med_sigma_tsc(best_indiv, sigma_env=params['sigma_A'], title='Environment A: median SC',
        #                   name=exp_path.joinpath(f'sigma_tsc_avg_best_rep{rep:02}_env_A_med.png'))

        #plot_avg_sigma_tsc(best_indiv, sigma_env=params['sigma_B'],
        #                   name=exp_path.joinpath(f'sigma_tsc_avg_best_rep{rep:02}_env_B.png'))
        
        #plot_avg_sigma_tsc(best_indiv, sigma_env=params['sigma_A'], all_sigma=True,
        #                   title='Environment A: mean SC',
        #                   name=exp_path.joinpath(f'sigma_tsc_all_best_rep{rep:02}_env_A_mean.png'))
        plot_med_sigma_tsc(best_indiv, sigma_env=params['sigma_A'], all_sigma=True,
                           title='Environment A: median SC',
                           name=exp_path.joinpath(f'sigma_tsc_all_best_rep{rep:02}_env_A_med.png'))

        #plot_avg_sigma_tsc(best_indiv, sigma_env=params['sigma_B'], all_sigma=True, title='Environment B',
        #                   name=exp_path.joinpath(f'sigma_tsc_all_best_rep{rep:02}_env_B.png'))


In [None]:
plot_best_sigma_tsc(exp_path, gen)

# Detailed plot of every gene of a type in an individual (for debugging)

In [None]:
def plot_every_gene(indiv, sigma_env, wanted_type, base_name, extra):
    
    sigma_tsc = get_sigma_tsc(indiv, sigma_env=sigma_env)
    sigma_total = sigma_tsc + indiv.sigma_basal + sigma_env
    temporal_expr = indiv.run_system(sigma_env)
        
    i_gene_type = 0 # we skip genes of the wrong type but want to count uniformly
    for i_gene, gene in enumerate(indiv.genes):
    
        if gene_types[gene.gene_type] == wanted_type:
            
            print(f'Gene {i_gene} type {gene_types[gene.gene_type]}: '
                  f'mean activation {np.sum(temporal_expr[i_gene, :])/indiv.nb_eval_steps:.2}') 

            

            fig, ax1 = plt.subplots(figsize=(9, 4), dpi=300)

            ## Axis 1: Total supercoiling
            ax1.set_ylim(0.65, -0.85)
            ax1.set_ylabel('Total supercoiling', color='tab:red')
            ax1.set_xlabel('Time')
            ax1.grid(linestyle=':')

            ax1.plot(sigma_total[i_gene, :], color='tab:red', label='SC level')
            
            x_min, x_max = ax1.get_xlim()
            ax1.hlines(params['sigma_opt'], x_min, x_max, linestyle='--', linewidth=1,
                   color='tab:red', label='Half activation (SC)')
            ax1.set_xlim(x_min, x_max)


            ## Axis 2: Expression level
            ax2 = ax1.twinx()
            ax2.set_ylim(-0.02, 1.02)
            ax2.set_ylabel('Expression level', color='tab:green')

            ax2.plot(temporal_expr[i_gene, :], linestyle='--',
                     color='tab:green', label='Expression level')
            
            x_min, x_max = ax2.get_xlim()
            ax2.hlines(0.5, x_min, x_max, linestyle='--', linewidth=1,
                   color='tab:green', label='Half activation (expr)')
            ax2.set_xlim(x_min, x_max)

            nb_steps = 5
            mean_activ = np.sum(temporal_expr[i_gene, indiv.nb_eval_steps-nb_steps:]) / nb_steps
            if mean_activ > 0.5:
                plt.title(f'Gene {i_gene} type {wanted_type}: activated')
            else:
                plt.title(f'Gene {i_gene} type {wanted_type}: inhibited')

            fig.legend(loc="upper left", bbox_to_anchor=(0,1), bbox_transform=ax1.transAxes)
            
            plt.savefig(base_name.joinpath(f'plot_all_genes_{wanted_type}_gene_{i_gene}_{extra}.png'), dpi=300)
            
            plt.close()
            i_gene_type += 1

In [None]:
#plot_every_gene(focal_indiv, sigma_env=params['sigma_A'], wanted_type='B',
#                base_name=exp_path, extra='before')

# Plot average supercoiling for each type of gene over replicas (_il faut changer les moyennes en médianes_)

In [None]:
def get_avg_sigma(exp_path, gen):
    
    rep_dirs = sorted([d for d in exp_path.iterdir() if (d.is_dir() and d.name.startswith("rep"))])
    nb_reps = len(rep_dirs)
    
    time_steps = 5
    
    full_data = pd.DataFrame()

    for rep, rep_dir in enumerate(rep_dirs):
        best_indiv = get_best_indiv(rep_dir, gen)
        
        sigma_A = get_sigma_tsc(best_indiv, sigma_env=params['sigma_A'])
        sigma_B = get_sigma_tsc(best_indiv, sigma_env=params['sigma_B'])

        # Take temporal averages
        mean_sigma_A_gene = np.sum(sigma_A[:, best_indiv.nb_eval_steps-time_steps:], axis=1) / time_steps
        mean_sigma_B_gene = np.sum(sigma_B[:, best_indiv.nb_eval_steps-time_steps:], axis=1) / time_steps
        
        # Sort by gene type
        mean_sigma_A = np.zeros(3)
        mean_sigma_B = np.zeros(3)
        for i_gene in range(best_indiv.nb_genes):
            mean_sigma_A[best_indiv.genes[i_gene].gene_type] += mean_sigma_A_gene[i_gene]
            mean_sigma_B[best_indiv.genes[i_gene].gene_type] += mean_sigma_B_gene[i_gene]

        # Divide by number of genes in each class
        mean_sigma_A /= (best_indiv.nb_genes // 3)
        mean_sigma_B /= (best_indiv.nb_genes // 3)
        
        # Add other supercoiling sources
        mean_sigma_A += best_indiv.sigma_basal + params['sigma_A']
        mean_sigma_B += best_indiv.sigma_basal + params['sigma_B']
        
        col_names = []
        
        for gene_type in gene_types:
            col_names.append(f'{gene_type}sc_A')
            
        for gene_type in gene_types:
            col_names.append(f'{gene_type}sc_B')
                    
        rep_data = pd.DataFrame(data=[np.concatenate([mean_sigma_A, mean_sigma_B])], 
                                columns=col_names)
        
        rep_data.insert(0, 'Replicate', rep)
        
        full_data = pd.concat([full_data, rep_data])

    full_data = full_data.set_index('Replicate')

    return full_data

In [None]:
def plot_mean_sigma(exp_path, gen, title=None):
    
    full_data = get_avg_sigma(exp_path, gen)
                
    ## Actual plotting
    mean_stats = full_data.mean()
    std_stats = full_data.std()
    med_stats = full_data.median()
    
    fig, ax = plt.subplots(figsize=(9, 4), dpi=300)

    x = np.arange(3)  # 3 types of genes
    width = 0.1  # the width of the bars

    env_A_means = np.array([mean_stats["ABsc_A"], mean_stats["Asc_A"], mean_stats["Bsc_A"]])
    env_B_means = np.array([mean_stats["ABsc_B"], mean_stats["Asc_B"], mean_stats["Bsc_B"]])

    env_A_std = np.array([std_stats["ABsc_A"], std_stats["Asc_A"], std_stats["Bsc_A"]])
    env_B_std = np.array([std_stats["ABsc_B"], std_stats["Asc_B"], std_stats["Bsc_B"]])
    
    env_A_med = np.array([med_stats["ABsc_A"], med_stats["Asc_A"], med_stats["Bsc_A"]])
    env_B_med = np.array([med_stats["ABsc_B"], med_stats["Asc_B"], med_stats["Bsc_B"]])


    # Plot mean and std
    ax.errorbar(x - width/2,
                env_A_means,
                label='Environment A',
                marker='o',
                linestyle='',
                color='tab:blue',
                yerr=env_A_std,
                capsize=5)

    ax.errorbar(x + width/2,
                env_B_means,
                marker='o',
                linestyle='',
                color='tab:orange',
                label='Environment B',
                yerr=env_B_std,
                capsize=5)
    
    # Add median values -------- note : c'est la médiane des *moyennes*, or ce sont les *médianes* qu'il faut
    ax.plot(x - width/2,
            env_A_med,
            marker='x',
            linestyle='',
            color='tab:blue')
    
    ax.plot(x + width/2,
            env_B_med,
            marker='x',
            linestyle='',
            color='tab:orange')
    
    
    # Add half-activation threshold (sigma_opt)
    x_min, x_max = ax.get_xlim()
    ax.hlines(params['sigma_opt'], x_min, x_max, linestyle='--', linewidth=1,
           color='tab:red', label='Half activation threshold')
    ax.set_xlim(x_min, x_max)

    ax.set_ylabel('SC level', fontsize=label_fontsize)
    ax.set_ylim(0.25, -0.85)
    ax.set_xticks(x)
    ax.set_xticklabels(["AB genes", "A genes", "B genes"])

    plt.grid(linestyle=':', axis='y')

    plt.tick_params(axis='both', which='major', labelsize=tick_fontsize)

    plt.legend(fontsize=legend_fontsize, loc='upper center')
    
    if title:
        plt.title(title, fontsize=label_fontsize)

    plt.savefig(f'{exp_path}/mean_supercoiling.png', bbox_inches='tight')
    

In [None]:
plot_mean_sigma(exp_path, gen, title="Evolve basal SC")

In [None]:
plot_mean_sigma(pathlib.Path('/Users/theotime/Desktop/evotsc/new_100k/'), 100_000, title="Vanilla")

In [None]:
plot_mean_sigma(pathlib.Path('/Users/theotime/Desktop/evotsc/unbiased_gen_size_750k/'), 750_000, title="Evolve genome size")

# Plot gene expression levels

In [None]:
def plot_avg_expr(indiv, sigma_env, plot_title, plot_name):

    temporal_expr = indiv.run_system(sigma_env)

    nb_genes, nb_steps = temporal_expr.shape

    colors = mpl.cm.get_cmap('viridis', nb_genes)(range(nb_genes))

    plt.figure(figsize=(9, 4), dpi=dpi)

    plt.ylim(-0.05, 1.05)
    
    type_expr = np.zeros((3, nb_steps))
    
    for wanted_gene_type in gene_types:
        for i_gene, gene in enumerate(indiv.genes):
            if gene_types[gene.gene_type] == wanted_gene_type:
                type_expr[gene.gene_type] += temporal_expr[i_gene, :]
        
    type_expr /= (nb_genes // 3)

    for i_gene_type, gene_type in enumerate(gene_types):
        plt.plot(type_expr[i_gene_type, :],
                 color=gene_type_color[i_gene_type],
                 label=f'{gene_type} genes')

    plt.grid(linestyle=':')
    plt.xlabel('Time', fontsize='large')
    plt.ylabel('Expression level', fontsize='large')
    
    x_min, x_max = plt.xlim()
    plt.hlines(0.5, x_min, x_max, linestyle='--', linewidth=1,
           color='tab:red', label='Half activation threshold')
    plt.xlim(x_min, x_max)


    plt.legend(loc='lower left')
    plt.title(plot_title)
    
    plt.savefig(exp_path.joinpath(plot_name), dpi=300, bbox_inches='tight')

    plt.show()

    plt.close()


In [None]:
def plot_avg_expr_all(exp_path, gen):
    rep_dirs = sorted([d for d in exp_path.iterdir() if (d.is_dir() and d.name.startswith("rep"))])
    
    for rep, rep_dir in enumerate(rep_dirs):
        
        indiv = get_best_indiv(rep_dir, gen)
        
        plot_avg_expr(indiv, params['sigma_A'], 'Environment A', f'avg_expr_rep{rep:02}_env_A.png')
        #plot_avg_expr(indiv, params['sigma_B'], 'Environment B', f'avg_expr_rep{rep:02}_env_B.png')
        

In [None]:
plot_avg_expr_all(exp_path, gen)