# Figures for the Plos CB paper that use data from evolutionary runs

In [None]:
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pathlib
import pickle
import itertools

In [None]:
import importlib
import evotsc
import evotsc_run
import evotsc_plot
importlib.reload(evotsc)
importlib.reload(evotsc_run)
importlib.reload(evotsc_plot)

In [None]:
exp_path = pathlib.Path('/Users/theotime/Desktop/evotsc/pci/main/')
gen=1000_000

In [None]:
rep_dirs = sorted([d for d in exp_path.iterdir() if (d.is_dir() and d.name.startswith("rep"))])
params = evotsc_run.read_params(rep_dirs[0])
params['m'] = 2.5 # Temporary fix because the parameter wasn't saved

In [None]:
rng = np.random.default_rng(seed=123456)

In [None]:
gene_types = ['AB', 'A', 'B'] # Name of each gene type
gene_type_color = ['tab:blue', 'tab:red', 'tab:green'] #AB, A, B
dpi = 300

## Helper funcs

In [None]:
def get_best_indiv(rep_path, gen):
    
    with open(rep_path.joinpath(f'pop_gen_{gen:06}.evotsc'), 'rb') as save_file:
        pop_rep = pickle.load(save_file)
        
    pop_rep.evaluate()
    
    best_fit = 0
    best_indiv = pop_rep.individuals[0]
    
    try:
        for indiv in pop_rep.individuals:
            if indiv.fitness > best_fit:
                best_fit = indiv.fitness
                best_indiv = indiv
    except AttributeError: # In the neutral control, individuals are not evaluated so there is no fitness field
        pass
    
    return best_indiv

In [None]:
def make_random_indiv(default_basal_expression=None, nb_mut=0):
    genes = evotsc.Gene.generate(intergene=int(params['intergene']),
                                 length=int(params['gene_length']),
                                 nb_genes=int(params['nb_genes']),
                                 default_basal_expression=default_basal_expression,
                                 rng=rng)
    
    indiv = evotsc.Individual(genes=genes,
                              interaction_dist=int(params['interaction_dist']),
                              interaction_coef=params['interaction_coef'],
                              sigma_basal=params['sigma_basal'],
                              sigma_opt=params['sigma_opt'],
                              epsilon=params['epsilon'],
                              m=params['m'],
                              selection_coef=params['selection_coef'],
                              rng=rng)
    
    mutation = evotsc.Mutation(inversion_poisson_lam=params['inversion_poisson_lam'])
    
    for i_mut in range(nb_mut):
        indiv.mutate(mutation)
    
    return indiv

## Evolved individual: influence of env. supercoiling on final gene expression levels

In [None]:
def compute_activity_sigma_per_type(indiv, sigmas):
    
    # Initialize the individual
    indiv.evaluate(0.0, 0.0)

    activ = np.zeros((3, len(sigmas))) # Compute activity for each gene type

    for i_sigma, sigma_env in enumerate(sigmas):
        # Evaluate the individual in the environment with sigma
        temporal_expr = indiv.run_system(sigma_env)

        # Compute total gene activation levels        
        for i_gene, gene in enumerate(indiv.genes):
            activ[gene.gene_type][i_sigma] += temporal_expr[-1, i_gene]
            
    activ /= (indiv.nb_genes / 3)
    
    return activ

In [None]:
# See how gene activity levels depend on environmental supercoiling
def plot_activity_sigma_per_type(activ, sigmas, plot_title=None, plot_name=None):
    
    colors = ['tab:blue', 'tab:red', 'tab:green'] # AB: blue, A: red, B: green
            
        
    plt.figure(figsize=(6, 4), dpi=dpi)
    plt.xlabel('Environment supercoiling $\sigma_{env}$')
    plt.ylabel('Average gene activity by type')
    plt.ylim(-0.05, 1.05)
    plt.xlim(sigmas[0], sigmas[-1])
    plt.grid(linestyle=':')
    
    if plot_title:
        plt.title(plot_title)

    for i_gene_type, gene_type in enumerate(gene_types):
        plt.plot(sigmas, activ[i_gene_type, :],
                 color=gene_type_color[i_gene_type],
                 linewidth=2,
                 label=gene_type)

    # Add sigma_A and sigma_B
    y_min, y_max = plt.ylim()
    plt.vlines(params['sigma_A'], y_min, y_max, linestyle='--', linewidth=2, color='black')
    plt.vlines(params['sigma_B'], y_min, y_max, linestyle='--', linewidth=2, color='black')
    # To make math bold, use \mathbf{}
    plt.text(params['sigma_A'], y_min, '$\sigma_A$', va='top', fontsize='large')
    plt.text(params['sigma_B'], y_min, '$\sigma_B$', va='top', fontsize='large')
    plt.ylim(y_min, y_max)
    
    # Add 1/2 expression level
    half_expr = (1 + np.exp(- params['m'])) / 2
    plt.hlines(half_expr, sigmas[0], sigmas[-1], linestyle=':', linewidth=2,
           color='tab:pink', label='Activation threshold')

    
    plt.legend()
    
    plt.tight_layout()
        
    if plot_name:
        plt.savefig(plot_name, dpi=dpi, bbox_inches='tight')
        
    plt.show()
    plt.close()

In [None]:
def plot_best_activ_by_sigma():
    rep_dirs = sorted([d for d in exp_path.iterdir() if (d.is_dir() and d.name.startswith("rep"))])
    nb_reps = len(rep_dirs)
    
    nb_sigmas = 250
    sigmas = np.linspace(-0.05, 0.05, nb_sigmas)

    for i_rep in range(nb_reps):
        indiv = get_best_indiv(exp_path.joinpath(f'rep{i_rep:02}'), gen=gen)
            
            
        activ = compute_activity_sigma_per_type(indiv, sigmas)

        plot_activity_sigma_per_type(activ, sigmas, plot_title=f'Best replicate {i_rep}',
                                     plot_name=exp_path.joinpath(f'activity_sigmas_best_rep{i_rep}.pdf'))

In [None]:
#plot_best_activ_by_sigma()

In [None]:
def plot_avg_best_activ_by_sigma():
    rep_dirs = sorted([d for d in exp_path.iterdir() if (d.is_dir() and d.name.startswith("rep"))])
    nb_reps = len(rep_dirs)
    
    nb_sigmas = 250
    sigmas = np.linspace(-0.05, 0.05, nb_sigmas)
    activ = np.zeros((3, len(sigmas)))

    for i_rep in range(nb_reps):
        indiv = get_best_indiv(exp_path.joinpath(f'rep{i_rep:02}'), gen=gen)
            
            
        activ += compute_activity_sigma_per_type(indiv, sigmas)
        
    activ /= nb_reps

    plot_activity_sigma_per_type(activ, sigmas, #plot_title='Average over all replicas',
                                 plot_name=exp_path.joinpath(f'activity_sigmas_avg.pdf'))

In [None]:
plot_avg_best_activ_by_sigma()

## See how supercoiling / gene activity locally changes after switching each gene

In [None]:
def count_genes_affected_by_reversal(indiv, sigma):
        
    if indiv.inter_matrix is None:
        indiv.inter_matrix = indiv.compute_inter_matrix()
    orig_activ = indiv.run_system(sigma)[-1, :] > (1 + np.exp(- indiv.m)) / 2

    total_diff = [0, 0, 0] # Count per gene type

    for i_switched in range(indiv.nb_genes):
        clone = indiv.clone()
        clone.genes[i_switched].orientation = 1 - clone.genes[i_switched].orientation
        clone.inter_matrix = clone.compute_inter_matrix()
        #evotsc_plot.plot_genome_and_tsc(indiv=clone,
        #                    sigma=sigma,
        #                    show_bar=True,
        #                    plot_name=exp_path.joinpath(f'switch/rep_{i_gene:02}_switch_gene_{i_switched:02}.pdf'))

        clone_activ = clone.run_system(sigma)[-1, :] > (1 + np.exp(- indiv.m)) / 2

        for i_gene in range(indiv.nb_genes):
            if orig_activ[i_gene] != clone_activ[i_gene]:
                # count by class of the switched gene
                total_diff[indiv.genes[i_switched].gene_type] += 1
        

    return total_diff

In [None]:
def count_genes_affected_by_reversal_all(exp_path, gen, sigma):
    rep_dirs = sorted([d for d in exp_path.iterdir() if (d.is_dir() and d.name.startswith("rep"))])
    nb_reps = len(rep_dirs)
    
    count = [] # list of (1, 3) np arrays
    
    for i_rep in range(nb_reps):
        indiv = get_best_indiv(exp_path.joinpath(f'rep{i_rep:02}'), gen=gen)
        count.append(count_genes_affected_by_reversal(indiv, sigma))
        
    nb_genes_per_type = params["nb_genes"] / len(gene_types)
        
    return pd.DataFrame(count, columns=gene_types) / nb_genes_per_type

In [None]:
def count_genes_affected_by_reversal_random(nb_indiv, sigma):
    count = []
    
    for i_rep in range(nb_indiv):
        indiv = make_random_indiv(nb_mut=100)
        count.append(count_genes_affected_by_reversal(indiv, sigma))
        
    nb_genes_per_type = params["nb_genes"] / len(gene_types)
    
    return pd.DataFrame(count, columns=gene_types) / nb_genes_per_type

In [None]:
def plot_single_inversion_effect(exp_path, gen):
    
    plt.figure(figsize=(8, 5), dpi=300)
    
    exp_env_A = count_genes_affected_by_reversal_all(exp_path, gen, sigma=params['sigma_A'])
    exp_env_B = count_genes_affected_by_reversal_all(exp_path, gen, sigma=params['sigma_B'])
    rand_env_A = count_genes_affected_by_reversal_random(100, params['sigma_A'])
    rand_env_B = count_genes_affected_by_reversal_random(100, params['sigma_B'])
    
    all_data = [exp_env_A, rand_env_A, exp_env_B, rand_env_B]
    
    width = 0.2
    x_pos = np.array([0, 4*width, 9*width, 13*width])
    delta = np.array([-width, 0, width])

    rects = {}
    
    for i_gene_type, gene_type in enumerate(gene_types):
        rects[gene_type] = plt.bar(x_pos + delta[i_gene_type],
                                   [exp_env_A[gene_type].mean(), rand_env_A[gene_type].mean(),
                                    exp_env_B[gene_type].mean(), rand_env_B[gene_type].mean()],
                                    width=width, color=gene_type_color[i_gene_type])
        plt.boxplot([exp[gene_type] for exp in all_data], positions=x_pos + delta[i_gene_type], 
                    manage_ticks=False, widths=0.1, medianprops={'color':'black'})
    
    for i_rect, rect in enumerate(rects['A']):  # middle rects
        plt.annotate(f"n = {len(all_data[i_rect]['A'])}",
                    xy=(rect.get_x() + rect.get_width()/2, 0),
                    xytext=(0, 3),
                    ha='center',
                    textcoords="offset points",
                    color='black')
    
    plt.xticks(ticks=x_pos, labels=['Evolved env. A', 'Random env. A', 'Evolved env. B', 'Random env. B'])

    plt.ylabel('Avg. # of genes switched by inverting a gene of type')
    #plt.ylim(0, 11)
    plt.grid(axis='y', linestyle=':')
    
    patches = ([mpl.patches.Patch(facecolor=color, edgecolor='black', label=label)
                    for color, label in zip(gene_type_color, gene_types)])
    plt.legend(handles=patches, title='Gene type')#, title_fontsize=15, fontsize=15)

    #plt.suptitle(exp_path.name)
    
    plt.savefig(exp_path.joinpath('inversion_switches.pdf'), bbox_inches='tight')
    plt.show()

In [None]:
plot_single_inversion_effect(exp_path, gen)