In [None]:
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pathlib
import pickle
import itertools

In [None]:
import autoreload
import evotsc
import evotsc_plot
autoreload.reload(evotsc)
autoreload.reload(evotsc_plot)

In [None]:
## Constants
# Population
nb_indivs = 100
nb_genes = 60

# Genome
intergene = 2000
gene_length = 1000
interaction_dist = 5000
interaction_coef = 0.3
sigma_basal = -0.06
sigma_opt = -0.06
epsilon = 0.03
default_basal_expression = 0.5
nb_eval_steps = 51

# Environment
sigma_A = 0.1
sigma_B = -0.1

# Mutations
inversion_poisson_lam = 2.0
intergene_poisson_lam = 0.0 #2.0
intergene_mutation_var = 0.0 #1e1
basal_sc_mutation_prob = 0.0 #1e-1
basal_sc_mutation_var = 0.0 #1e-4

In [None]:
gene_types = ['AB', 'A', 'B'] # Name of each gene type
gene_type_color = ['tab:blue', 'tab:red', 'tab:green'] #AB, A, B
dpi = 300

In [None]:
exp_path = pathlib.Path('/Users/theotime/Desktop/evotsc/inter-5k/')

In [None]:
rng = np.random.default_rng(seed=123456)

# Helper functions

In [None]:
def get_params(exp_path):
    rep_dirs = sorted([d for d in exp_path.iterdir() if (d.is_dir() and d.name.startswith("rep"))])
    
    with open(rep_dirs[0].joinpath('params.txt'), 'r') as params_file:
        param_lines = params_file.readlines()
        
    params = {}
    for line in param_lines:
        param_name = line.split(':')[0]
        if param_name == 'commit':
            param_val = line.split(':')[1].strip()
        else:
            param_val = float(line.split(':')[1])
        
        params[param_name] = param_val
        
    return params

In [None]:
params = get_params(exp_path)

In [None]:
def get_best_indiv(rep_path, gen):
    
    with open(rep_path.joinpath(f'pop_gen_{gen:06}.evotsc'), 'rb') as save_file:
        pop_rep = pickle.load(save_file)
        
    pop_rep.evaluate()
    
    best_fit = 0
    best_indiv = pop_rep.individuals[0]
    
    try:
        for indiv in pop_rep.individuals:
            if indiv.fitness > best_fit:
                best_fit = indiv.fitness
                best_indiv = indiv
    except AttributeError: # In the neutral control, individuals are not evaluated so there is no fitness field
        pass
    
    return best_indiv

In [None]:
def plot_expr(indiv, sigma_env, plot_title=None, plot_name=None):
    
    if not indiv.already_evaluated:
        indiv.evaluate(sigma_env, sigma_env)
    
    # Plot only environment A
    temporal_expr = indiv.run_system(sigma_env)

    nb_genes, nb_steps = temporal_expr.shape

    colors = mpl.cm.get_cmap('viridis', nb_genes)(range(nb_genes))

    plt.figure(figsize=(6, 4), dpi=dpi)

    plt.ylim(-0.05, 1.05)

    for gene in range(nb_genes):
        linestyle = 'solid' if indiv.genes[gene].orientation == 0 else 'dashed'
        plt.plot(temporal_expr[indiv.genes[gene].id, :],
                 linestyle=linestyle,
                 linewidth=2,
                 color=colors[indiv.genes[gene].id],
                 label=f'Gene {indiv.genes[gene].id}')

    plt.grid(linestyle=':')
    plt.xlabel('Time', fontsize='large')
    plt.ylabel('Expression level', fontsize='large')

    if plot_title:
        plt.title(plot_title)

    plt.tight_layout()
    if plot_name:
        plt.savefig(plot_name + '.pdf', dpi=300, bbox_inches='tight')
    plt.show()
    plt.close()


# Plot the genome of an individual, with the final supercoiling level at each position

In [None]:
# Compute the final supercoiling level at positions `positions` under external supercoiling `sigma`
def compute_final_sc_at(indiv, sigma, positions) -> np.ndarray:
    gene_positions, genome_size = indiv.compute_gene_positions()

    nb_pos = len(positions)
    sc_tsc = np.zeros(nb_pos)

    # Run the individual
    if not indiv.already_evaluated:
        indiv.evaluate(sigma, sigma)
        
    temporal_expr = indiv.run_system(sigma)
    gene_expr = temporal_expr[:, -1]
    
    for i_pos, x in enumerate(positions):
        
        pos_tsc = 0.0
        
        for i_gene, gene in enumerate(indiv.genes):

            # We compute the influence of gene i at position x

            pos_1_minus_x = gene_positions[i_gene] - x
            pos_x_minus_1 = - pos_1_minus_x

            ## On veut savoir si le gène 1 est avant la position x ou après
            # Avant : -------1--x-------- ou -x---------------1-
            # Après : -------x--1-------- ou -1---------------x-

            if pos_1_minus_x < 0: # -------1--2-------- ou -1---------------2-
                if pos_x_minus_1 < genome_size + pos_1_minus_x: # -------1--2--------
                    distance = pos_x_minus_1
                    i_before_x = True
                else: # -1---------------2-
                    distance = genome_size + pos_1_minus_x
                    i_before_x = False

            else: # -------2--1-------- ou -2---------------1-
                if pos_1_minus_x < genome_size + pos_x_minus_1: # -------2--1--------
                    distance = pos_1_minus_x
                    i_before_x = False
                else:
                    distance = genome_size + pos_x_minus_1
                    i_before_x = True

            # Exit early if genes are too far
            if distance > indiv.interaction_dist:
                continue

            if i_before_x:
                if gene.orientation == 1: # i lagging : +
                    sign_1_on_x = +1
                else:
                    sign_1_on_x = -1
            else:
                if gene.orientation == 0: # i leading : +
                    sign_1_on_x = +1
                else:
                    sign_1_on_x = -1

            # Here, we know that distance <= self.interaction_dist
            strength = 1.0 - distance/indiv.interaction_dist

            # Supercoiling variations are additive
            pos_tsc += sign_1_on_x * strength * gene_expr[i_gene]
            
        sc_tsc[i_pos] = pos_tsc


    return sc_tsc

In [None]:
def plot_genome_and_tsc(indiv, sigma, show_types=True, print_ids=False, name=None):

    # Compute gene positions
    gene_pos, genome_length = indiv.compute_gene_positions()

    # Plot
    pos_rect = [0, 0, 1, 1]
    fig = plt.figure(figsize=(9,9), dpi=dpi)
    ax = fig.add_axes(pos_rect)

    rect_width = 0.04
    rect_height = 0.1

    ax.set_xlim(-1.2, 1.2)
    ax.set_ylim(-1.2, 1.2)
    circle = plt.Circle(xy=(0, 0), radius=1, linestyle='-', fill=False)
    ax.add_patch(circle)
    ax.set_axis_off()

    seq_colors = colors = mpl.cm.get_cmap('viridis', indiv.nb_genes)(range(indiv.nb_genes))
    
    ## Plot the genes themselves
    for i_gene, gene in enumerate(indiv.genes):
        pos_angle = 360 * gene_pos[i_gene] / genome_length
        orient_angle = 360 - pos_angle
        pos_rad = np.radians(pos_angle)
        orient_rad = np.radians(orient_angle)

        ## Plot the gene rectangle

        x0 = (1.0 - rect_height / 2.0) * np.sin(pos_rad)
        y0 = (1.0 - rect_height / 2.0) * np.cos(pos_rad)


        if gene.orientation == 0:
            final_width = rect_width
        else:
            final_width = -rect_width

        if show_types:
            gene_color = gene_type_color[gene.gene_type]
        else:
            gene_color = seq_colors[i_gene]

        rect = plt.Rectangle(xy=(x0, y0),
                             width=final_width,
                             height=rect_height,
                             angle=orient_angle, #in degrees anti-clockwise about xy.
                             facecolor=gene_color,
                             edgecolor='black',
                             label=f'Gene {i_gene}')

        ax.add_patch(rect)

        ## Plot the orientation bar and arrow

        # Bar
        x_lin = (1.0 + (np.array([0.5, 1.0])) * rect_height) * np.sin(pos_rad)
        y_lin = (1.0 + (np.array([0.5, 1.0])) * rect_height) * np.cos(pos_rad)

        ax.plot(x_lin, y_lin, color='black', linewidth=1)

        # Arrow
        dx_arr = rect_width * np.cos(pos_rad) / 3.0
        dy_arr = - rect_width * np.sin(pos_rad) / 3.0

        if gene.orientation == 1: # Reverse
            dx_arr, dy_arr = -dx_arr, -dy_arr

        ax.arrow(x_lin[1], y_lin[1], dx_arr, dy_arr, head_width=0.02, color='black')

        ## Print gene ID
        if print_ids and (i_gene % 5 == 0):
            ha = 'left'
            if gene.orientation == 1:
                ha = 'right'
            ax.text(x=0.92*x0, y=0.92*y0, s=f'{i_gene}',
                    rotation=orient_angle, ha=ha, va='bottom', rotation_mode='anchor',
                    fontsize=15)

    ## Plot local supercoiling along the genome, at the end of the individual's lifecycle
    sc_ax = fig.add_axes(pos_rect, projection='polar', frameon=False)
    sc_ax.set_ylim(0, 1)

    n = 1000  # the number of data points

    # theta values (see 
    # https://matplotlib.org/devdocs/gallery/images_contours_and_fields/pcolormesh_grids.html)
    # To have the crisp version: put n+1 in theta and [data] as the 3rd argument of pcolormesh()
    # To have the blurry version: put n in theta and [data, data] ----------------------------
    theta = np.linspace(0, 2 * np.pi, n)
    radius = np.linspace(.6, .72, 2)

    #data = np.array([theta[:-1]]) #np.array([np.random.random(n) * 2 * np.pi])
    positions = np.linspace(0, genome_length, n, dtype=int)
    data = compute_final_sc_at(indiv, sigma, positions)
    
    norm = mpl.colors.Normalize(-2.0, 2.0) # Extremum values for the SC level 
    
    data = -data # Reverse data to get blue = positive and red = negative SC
    sc_ax.pcolormesh(theta, radius, [data, data], shading='gouraud',
                     norm=norm, cmap=plt.get_cmap('seismic'))
    sc_ax.set_yticklabels([])
    sc_ax.set_xticklabels([])
    #sc_ax.spines['polar'].set_visible(False)
    sc_ax.set_theta_zero_location('N')
    sc_ax.set_theta_direction('clockwise')

    ## Plot the legend
    if show_types:
        patches = [mpl.patches.Patch(facecolor=color, edgecolor='black', label=label)
                   for color, label in zip(gene_type_color, gene_types)]
        ax.legend(handles=patches, title='Gene type', loc='center',
                  fontsize=15, title_fontsize=15)

    line_len = np.pi*indiv.interaction_dist/genome_length
    if show_types:
        line_y = -0.3
    else:
        line_y = -0.1
    ax.plot([-line_len, line_len], [line_y, line_y],
             color='black',
             linewidth=1)
    ax.text(0, line_y - 0.07, 'Gene interaction distance', ha='center', fontsize=15)
    
    if name:
        plt.savefig(name, dpi=300, bbox_inches='tight')

    plt.show()

    plt.close()

In [None]:
def make_random_indiv(nb_genes=nb_genes, nb_mut=0):
    genes = evotsc.Gene.generate(intergene=intergene,
                                 length=gene_length,
                                 nb_genes=nb_genes,
                                 default_basal_expression=default_basal_expression,
                                 rng=rng)
    
    indiv = evotsc.Individual(genes=genes,
                              interaction_dist=interaction_dist,
                              interaction_coef=interaction_coef,
                              sigma_basal=sigma_basal,
                              sigma_opt=sigma_opt,
                              epsilon=epsilon,
                              rng=rng)
    
    mutation = evotsc.Mutation(basal_sc_mutation_prob=basal_sc_mutation_prob,
                               basal_sc_mutation_var=basal_sc_mutation_var,
                               intergene_poisson_lam=intergene_poisson_lam,
                               intergene_mutation_var=intergene_mutation_var,
                               inversion_poisson_lam=inversion_poisson_lam)
    
    for i_mut in range(nb_mut):
        indiv.mutate(mutation)
    
    return indiv

# Random individual: influence of environmental supercoiling on final gene expression levels

In [None]:
def compute_extended_phenotype(indiv, sigmas):

    # Initialize the individual (compute the inter matrix)
    indiv.evaluate(0, 0)

    nb_sigmas = len(sigmas)
    ext_phen = np.zeros((indiv.nb_genes, nb_sigmas))

    for i_sigma, sigma_env in enumerate(sigmas):
        temporal_expr = indiv.run_system(sigma_env)
        for i_gene, gene in enumerate(indiv.genes):
            ext_phen[i_gene, i_sigma] = temporal_expr[i_gene, -1]
    
    return ext_phen

In [None]:
init_indiv = make_random_indiv(nb_genes=20, nb_mut=0)

In [None]:
plot_expr(init_indiv, sigma_env=0, plot_title='Random individual', plot_name='random_gene_expr')

In [None]:
evotsc_plot.plot_genome_and_tsc(init_indiv, sigma=0, name='random_genome_and_tsc.pdf')

In [None]:
# See how gene activity levels depend on environmental supercoiling
def plot_activity_sigma_all_genes(indiv, plot_title=None, plot_name=None):

    colors = mpl.cm.get_cmap('viridis', indiv.nb_genes)(range(indiv.nb_genes))
    
    nb_sigmas = 100
    
    plt.figure(figsize=(6, 4), dpi=dpi)
    plt.xlabel('Environment supercoiling')
    plt.ylabel('Final gene activity')
    plt.ylim(-0.05, 1.05)
    plt.grid(linestyle=':')
    if plot_title:
        plt.title(plot_title)

    sigmas = np.linspace(-0.25, 0.15, nb_sigmas)
    ext_phen = compute_extended_phenotype(indiv, sigmas)

    for i_gene in range(indiv.nb_genes):
        linestyle = 'solid' if indiv.genes[i_gene].orientation == 0 else 'dashed'
        plt.plot(sigmas, ext_phen[i_gene, :],
                 linestyle=linestyle,
                 color=colors[i_gene],
                 label=f'Gene {i_gene}')
        
    # Plot what an isolated gene looks like
    final_exprs = 1.0 / (1.0 + np.exp((indiv.sigma_basal + sigmas - indiv.sigma_opt)/indiv.epsilon))

    plt.plot(sigmas, final_exprs, color='tab:red')
        
    #plt.legend(ncol=1)
                
    if plot_name:
        plt.savefig(plot_name + '.pdf', dpi=dpi, bbox_inches='tight')

In [None]:
plot_activity_sigma_all_genes(init_indiv, plot_title='Random individual', plot_name='activity_random')

In [None]:
def plot_all_single_switches(indiv):
    for i_gene in range(indiv.nb_genes):
        new_indiv = indiv.clone()
        new_indiv.already_evaluated = False
        # Switch one gene
        new_indiv.genes[i_gene].orientation = 1 - new_indiv.genes[i_gene].orientation
        plot_activity_sigma_all_genes(new_indiv,
                                      plot_title=f'Random individual switched gene {i_gene}',
                                      plot_name=f'activity_random_switch_{i_gene}')

In [None]:
plot_all_single_switches(init_indiv)

In [None]:
def plot_phenotypic_distance(indiv):

    nb_sigmas = 50
    sigmas = np.linspace(-0.25, 0.15, nb_sigmas)

    indiv_phen = compute_extended_phenotype(indiv, sigmas)

    max_switches = 3

    all_switches = []

    # The distance of each individual with k gene switches from the original individual
    for nb_switches in range(1, max_switches+1):
        phen_dist = []

        for genes_to_switch in itertools.combinations(range(indiv.nb_genes), nb_switches):
            new_indiv = indiv.clone()
            new_indiv.already_evaluated = False

            # Switch genes
            for i_gene in genes_to_switch: 
                new_indiv.genes[i_gene].orientation = 1 - new_indiv.genes[i_gene].orientation

            new_indiv_phen = compute_extended_phenotype(new_indiv, sigmas)

            phen_dist.append(np.sqrt(np.sum(np.square(indiv_phen - new_indiv_phen))))

        all_switches.append(phen_dist)
    
    plt.xlabel('Number of switches')
    plt.xticks(range(1, max_switches+1))
    plt.ylabel('L2 distance')

    plt.violinplot(all_switches, showmeans=True)


In [None]:
plot_phenotypic_distance(init_indiv)

# Evolved individual: influence of env. supercoiling on final gene expression levels

In [None]:
# See how gene activity levels depend on environmental supercoiling
def plot_activity_sigma_per_type(indiv, plot_all_genes=False, plot_title=None, plot_name=None):

    colors = ['tab:blue', 'tab:red', 'tab:green'] # AB: blue, A: red, B: green
    all_colors = mpl.cm.get_cmap('viridis', indiv.nb_genes)(range(indiv.nb_genes))

    # Initialize the individual
    indiv.evaluate(0.0, 0.0)

    nb_sigmas = 250

    activ = np.zeros((3, nb_sigmas)) # Compute activity for each gene type
    sigmas = np.linspace(-0.2, 0.2, nb_sigmas)

    for i_sigma, sigma_env in enumerate(sigmas):
        # Evaluate the individual in the environment with sigma
        temporal_expr = indiv.run_system(sigma_env)

        # Compute total gene activation levels        
        for i_gene, gene in enumerate(indiv.genes):
            activ[gene.gene_type][i_sigma] += temporal_expr[i_gene, -1]
            
    activ /= (indiv.nb_genes / 3)
            
    plt.figure(figsize=(6, 4), dpi=dpi)
    plt.xlabel('Environment supercoiling $\sigma_{env}$')
    plt.ylabel('Average gene activity by type')
    plt.ylim(-0.05, 1.05)
    plt.grid(linestyle=':')
    
    if plot_title:
        plt.title(plot_title)

    for i_gene_type, gene_type in enumerate(gene_types):
        plt.plot(sigmas, activ[i_gene_type, :],
                 color=gene_type_color[i_gene_type],
                 linewidth=2,
                 label=gene_type)

    # Add sigma_A and sigma_B
    y_min, y_max = plt.ylim()
    plt.vlines(params['sigma_A'], y_min, y_max, linestyle='--', linewidth=2, color='black')
    plt.vlines(params['sigma_B'], y_min, y_max, linestyle='--', linewidth=2, color='black')
    plt.ylim(y_min, y_max)
    
    plt.legend()
    
    plt.tight_layout()
        
    if plot_name:
        plt.savefig(plot_name, dpi=dpi, bbox_inches='tight')

In [None]:
for i_rep in range(15):
    indiv = get_best_indiv(exp_path.joinpath(f'rep{i_rep:02}'), gen=200_000)
    plot_activity_sigma_per_type(indiv, plot_title=f'Best replicate {i_rep}', plot_name=exp_path.joinpath(f'activity_best_rep{i_rep}.pdf'))

# Dispersion of gene activity levels with mutations

In [None]:
# Generate N mutants from an individual to see dispersion of gene activity levels
def plot_activity_mutation(indiv, sigma, mutation, plot_name=None):
    
    nb_mut = 1000

    activ = np.zeros(nb_mut)

    rng = np.random.default_rng()
    
    gene_positions, genome_size = indiv.compute_gene_positions(include_coding=False)

    for i_mut in range(nb_mut):

        # Generate a new mutant and evaluate it
        mut_indiv = indiv.clone()
        
        start_pos = rng.integers(0, genome_size)
        end_pos = rng.integers(0, genome_size)
        
        if end_pos < start_pos:
            start_pos, end_pos = end_pos, start_pos

        mut_indiv.perform_inversion(start_pos, end_pos)
        mut_indiv.already_evaluated = False
        
        (temporal_expr, _), _ = mut_indiv.evaluate(sigma, sigma)

        # Compute total gene activation levels
        activ[i_mut] = np.sum(np.square(temporal_expr[:, -1])) /  mut_indiv.nb_genes

    # Plot setup    
    plt.figure(figsize=(9, 3), dpi=dpi)
    plt.xlabel('Average squared gene activity')
    #plt.xlim(-0.025, 0.525)
    plt.ylabel('Number of mutants')
    plt.grid(linestyle=':')
    
    # Plot the histogram
    plt.hist(activ)
    
    # Plot the original activity
    (orig_expr, _), _ = indiv.evaluate(sigma, sigma)
    orig_activ = np.sum(np.square(temporal_expr[:, -1])) / indiv.nb_genes
    y_min, y_max = plt.ylim()
    plt.vlines(orig_activ, y_min, y_max, linestyle='--', linewidth=1,
                   color='tab:red', label='Original activity level')
    plt.ylim(y_min, y_max)
    
    plt.legend()

    plt.tight_layout()
    
    if plot_name:
        plt.savefig(plot_name + '.pdf', dpi=dpi, bbox_inches='tight')

In [None]:
for i in range(5):
    plot_activity_mutation(make_random_indiv(nb_mut=100), sigma=sigma_A, mutation=mutation, plot_name=f'robustness_random_{i}')

In [None]:
plot_activity_mutation(get_best_indiv(exp_path.joinpath('rep0'), gen=200_000), sigma=sigma_A, mutation=mutation)

In [None]:
plot_activity_mutation(get_best_indiv(exp_path.joinpath('rep0'), gen=200_000), sigma=sigma_B, mutation=mutation)

In [None]:
plot_activity_mutation(get_best_indiv(exp_path.joinpath('rep0'), gen=200_000), sigma=sigma_B, mutation=mutation)