In [None]:
import sys
import pathlib
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl
import pickle
import itertools
import networkx as nx

In [None]:
from numba import jit

In [None]:
import met_brewer

In [None]:
import importlib
import evotsc
import evotsc_lib
import evotsc_plot
importlib.reload(evotsc)
importlib.reload(evotsc_lib)
importlib.reload(evotsc_plot)

In [None]:
label_fontsize=20
tick_fontsize=15
legend_fontsize=15
dpi=300

In [None]:
exp_path = pathlib.Path('/Users/theotime/Desktop/evotsc/phd/param-explor/interaction-25k/')
main_path = pathlib.Path('/Users/theotime/Desktop/evotsc/phd/param-explor/main/')
gen = 250_000
gene_types = ['AB', 'A', 'B'] # Name of each gene type
gene_type_color = ['tab:blue', 'tab:red', 'tab:green'] #AB, A, B
orient_name = ['leading', 'lagging'] # Name of each gene orientation
rel_orients = ['conv', 'div', 'downstr', 'upstr'] # In alphabetical order
envs = ['A', 'B'] # Environment names

In [None]:
rng = np.random.default_rng(seed=123456)

In [None]:
exp_rep_dirs = sorted([d for d in exp_path.iterdir() if (d.is_dir() and d.name.startswith("rep"))])
nb_exp_reps = len(exp_rep_dirs)
params = evotsc_lib.read_params(exp_rep_dirs[0])
params['m'] = 2.5 # Temporary fix because the parameter wasn't saved

In [None]:
nb_exp_reps

In [None]:
params

In [None]:
genes_per_type = params["nb_genes"] / len(gene_types)

# Make random individuals (reused throughout)

In [None]:
def make_random_indivs(nb_indiv, params):
        
    mutation = evotsc.Mutation(inversion_poisson_lam=params['inversion_poisson_lam'])
    
    rand_indivs = []
    
    for rep in range(nb_indiv):
        indiv = evotsc_lib.make_random_indiv(intergene=int(params['intergene']),
                                             gene_length=int(params['gene_length']),
                                             nb_genes=int(params['nb_genes']),
                                             default_basal_expression=params['default_basal_expression'],
                                             interaction_dist=params['interaction_dist'],
                                             interaction_coef=params['interaction_coef'],
                                             sigma_basal=params['sigma_basal'],
                                             sigma_opt=params['sigma_opt'],
                                             epsilon=params['epsilon'],
                                             m=params['m'],
                                             selection_coef=params['selection_coef'],
                                             mutation=mutation,
                                             rng=rng,
                                             nb_mutations=100)
        
        indiv.inter_matrix = indiv.compute_inter_matrix()

        rand_indivs.append(indiv)
        
    return rand_indivs

In [None]:
rand_indivs = make_random_indivs(nb_indiv=100, params=params)

# Evolutionary stats

In [None]:
def get_stats(exp_name, gen):
    
    exp_name = pathlib.Path(exp_name)
        
    rep_dirs = sorted([d for d in exp_name.iterdir() if (d.is_dir() and d.name.startswith("rep"))])
    
    res = pd.DataFrame()
    
    for i_rep, rep_dir in enumerate(rep_dirs):
        
        res_dir = pd.read_csv(rep_dir.joinpath('stats.csv'),
                              usecols=['Gen', 'Fitness',
                                       'ABon_A', 'ABon_B', 'Aon_A', 'Aon_B', 'Bon_A', 'Bon_B'])

        res_dir.insert(0, 'Replicate', i_rep)
        
        res = pd.concat([res, res_dir])
    
    res['Log Fitness'] = np.log(res['Fitness'])
    
    # Crop at generation `gen`
    
    res = res[res['Gen'] <= gen]
    
    return res

In [None]:
exp_stats = get_stats(exp_path, gen)

In [None]:
main_stats = get_stats(main_path, gen)

## Plot fitness over evolutionary time

In [None]:
def plot_fitness(exp_stats, main_stats, exp_path):
    
    all_stats = [main_stats[main_stats["Gen"] > 0][['Gen', 'Log Fitness', 'Fitness']].copy(),
                 exp_stats[exp_stats["Gen"] > 0][['Gen', 'Log Fitness', 'Fitness']].copy()]
    
    name = ['5 kb (main)', '25 kb']
    
    
    #colors = mpl.cm.get_cmap('viridis', 2)(range(2)) # Main vs experiment
    all_colors = met_brewer.met_brew(name='Hokusai3', n=6, brew_type='continuous')
    colors = [all_colors[4], all_colors[2]]
    colors = [all_colors[5], all_colors[3]]
    
    plt.figure(figsize=(9, 4), dpi=dpi)
    
    plt.xscale('log')
    plt.yscale('log')
    plt.grid(linestyle=':')
    plt.grid(visible=True, which="minor", axis='x', linestyle=':')
    
    plt.xlabel('Generation', fontsize=label_fontsize)
    plt.ylabel('Fitness', fontsize=label_fontsize)
    
    plt.tick_params(axis='both', which='major', labelsize=tick_fontsize)
    
    for i_exp in range(2):
        
        mean_data = all_stats[i_exp].groupby('Gen').mean().reset_index()
        first_dec = all_stats[i_exp].groupby('Gen').quantile(0.1)
        last_dec = all_stats[i_exp].groupby('Gen').quantile(0.9)
        
        # Average fitness
        plt.plot(mean_data['Gen'],
                 np.exp(mean_data['Log Fitness']),
                 color=colors[i_exp],
                 linewidth=2,
                 zorder=10,
                 label='$d_{max} = $' + name[i_exp]) 

        # Quantiles
        plt.plot(mean_data['Gen'],
                 first_dec['Fitness'],
                 color=colors[i_exp],
                 alpha=0.3)

        plt.plot(mean_data['Gen'],
                 last_dec['Fitness'],
                 color=colors[i_exp],
                 alpha=0.3)

    #plt.title(exp_path.name)
    plt.legend(loc='lower right', fontsize=legend_fontsize)
        
    plt.savefig(f'{exp_path}/fitness_all_with_main.pdf', dpi=dpi, bbox_inches='tight')

In [None]:
plot_fitness(exp_stats, main_stats, exp_path)

# Plot the number of active genes of each type over evolutionary time

In [None]:
def plot_gene_activity_all(full_stats, exp_path, var_type='quantile'):
    
    mean_data = full_stats.groupby('Gen').mean().reset_index()
    if var_type == 'sigma':
        std_data = full_stats.groupby('Gen').std()
    elif var_type == 'quantile':
        first_dec = full_stats.groupby('Gen').quantile(0.1)
        last_dec = full_stats.groupby('Gen').quantile(0.9)
    elif var_type == 'minmax':
        min_data = full_stats.groupby('Gen').min()
        max_data = full_stats.groupby('Gen').max()
    
    for env in ["A", "B"]:

        fig, ax1 = plt.subplots(figsize=(9, 4), dpi=dpi)
        delta_y = params["nb_genes"] / 3 * 0.05 
        ax1.set_ylim(-delta_y, params["nb_genes"] / 3 + delta_y)
        ax1.set_ylabel('Activated genes', fontsize=label_fontsize)
        ax1.set_xlabel('Generation', fontsize=label_fontsize)
        ax1.set_xscale('log')
        ax1.grid(linestyle=':')
        ax1.grid(visible=True, which="minor", axis='x', linestyle=':')

        for i_gene_type, gene_type in enumerate(gene_types):

            ax1.plot(mean_data['Gen'], mean_data[f"{gene_type}on_{env}"],
                     color=gene_type_color[i_gene_type],
                     linewidth=2,
                     label=gene_type)
            
            # Show 2-sigma (95%) confidence intervals
            if var_type == 'sigma':
                ax1.plot(mean_data['Gen'],
                         mean_data[f"{gene_type}on_{env}"] - 2 * std_data[f"{gene_type}on_{env}"],
                         color=gene_type_color[i_gene_type],
                         alpha=0.3)
                ax1.plot(mean_data['Gen'],
                         mean_data[f"{gene_type}on_{env}"] + 2 * std_data[f"{gene_type}on_{env}"],
                         color=gene_type_color[i_gene_type],
                         alpha=0.3)
            # Show first and last deciles
            elif var_type == 'quantile':
                ax1.plot(mean_data['Gen'],
                         first_dec[f"{gene_type}on_{env}"],
                         color=gene_type_color[i_gene_type],
                         alpha=0.3)
                ax1.plot(mean_data['Gen'],
                         last_dec[f"{gene_type}on_{env}"],
                         color=gene_type_color[i_gene_type],
                         alpha=0.3)
            # Show min and max values
            elif var_type == 'minmax':
                ax1.plot(mean_data['Gen'],
                         min_data[f"{gene_type}on_{env}"],
                         color=gene_type_color[i_gene_type],
                         alpha=0.3)
                ax1.plot(mean_data['Gen'],
                         max_data[f"{gene_type}on_{env}"],
                         color=gene_type_color[i_gene_type],
                         alpha=0.3)


        
        ax1.tick_params(axis='both', which='major', labelsize=tick_fontsize)


        #plt.title(f"Environment {env}")
        fig.legend(bbox_to_anchor=(0, 0),
                   bbox_transform=ax1.transAxes,
                   loc='lower left',
                   title='$d_{max}$ = 25 kb',
                   title_fontsize=legend_fontsize - 2,
                   fontsize=legend_fontsize)

        plt.savefig(f'{exp_path}/gene_activity_env_{env}.pdf', dpi=dpi, bbox_inches='tight')

        plt.show()

In [None]:
plot_gene_activity_all(exp_stats, exp_path)

# Influence of enviromental supercoiling on final gene expression levels

In [None]:
nb_sigmas = 250
sigma_min = -0.061
sigma_max = 0.061

In [None]:
def compute_activity_sigma_per_type(indiv, sigmas):
    
    # Initialize the individual
    indiv.evaluate(0.0, 0.0)

    activ = np.zeros((3, len(sigmas))) # Compute activity for each gene type

    for i_sigma, sigma_env in enumerate(sigmas):
        # Evaluate the individual in the environment with sigma
        temporal_expr = indiv.run_system(sigma_env)

        # Compute total gene activation levels        
        for i_gene, gene in enumerate(indiv.genes):
            activ[gene.gene_type][i_sigma] += temporal_expr[-1, i_gene]
            
    activ /= (indiv.nb_genes / 3)
    
    return activ

In [None]:
def compute_avg_best_activ_by_sigma():

    nb_exp_reps = len(exp_rep_dirs)
    
    sigmas = np.linspace(sigma_min, sigma_max, nb_sigmas)
    activ = np.zeros((3, len(sigmas)))

    for i_rep in range(nb_exp_reps):
        indiv = evotsc_lib.get_best_indiv(exp_path.joinpath(f'rep{i_rep:02}'), gen=gen)
            
        activ += compute_activity_sigma_per_type(indiv, sigmas)
            
    return activ / nb_exp_reps

In [None]:
activ = compute_avg_best_activ_by_sigma()

In [None]:
def generate_rand_activ_by_sigma(rand_indivs):

    sigmas = np.linspace(sigma_min, sigma_max, nb_sigmas)
    activ = np.zeros(len(sigmas))  # Average over all genes, not gene types
    
    for i_rand, rand_indiv in enumerate(rand_indivs):
        activ += np.mean(compute_activity_sigma_per_type(rand_indiv, sigmas), axis=0)
        
    return activ / len(rand_indivs)

In [None]:
rand_activ = generate_rand_activ_by_sigma(rand_indivs)

In [None]:
# See how gene activity levels depend on environmental supercoiling
def plot_activity_sigma_per_type(activ, rand_activ, plot_title=None, plot_name=None):
    
    sigma_basal = params['sigma_basal']
    sigma_opt = params['sigma_opt']      
    
    sigmas_env = np.linspace(sigma_min, sigma_max, nb_sigmas)

    colors = ['tab:blue', 'tab:red', 'tab:green'] # AB: blue, A: red, B: green
    
    fig, ax = plt.subplots(figsize=(7, 4), dpi=dpi)
    
    plt.xlabel('Background supercoiling ($\sigma_{basal} + \delta\sigma_{env}$)')
    plt.ylabel('Average gene expression by type')
    plt.ylim(-0.05, 1.10)
    plt.xlim(sigmas_env[0] + sigma_basal, sigmas_env[-1] + sigma_basal)
    plt.grid(linestyle=':')
    
    # Add 1/2 expression level
    half_expr = (1 + np.exp(- params['m'])) / 2
    plt.hlines(half_expr, sigmas_env[0] + sigma_basal, sigmas_env[-1] + sigma_basal,
               linestyle=':', linewidth=1.5, color='tab:pink')#, label='Activation threshold')
    
    
    # Add average expression per gene type
    for i_gene_type, gene_type in enumerate(gene_types):
        plt.plot(sigmas_env + sigma_basal, activ[i_gene_type, :],
                 color=gene_type_color[i_gene_type],
                 linewidth=2,
                 label=gene_type)

    # Add sigma_A and sigma_B
    y_min, y_max = plt.ylim()
    plt.vlines(params['sigma_A'] + sigma_basal, y_min, y_max, linestyle='--', linewidth=1, color='black')
    plt.vlines(params['sigma_B'] + sigma_basal, y_min, y_max, linestyle='--', linewidth=1, color='black')
    
    plt.text(params['sigma_A'] + sigma_basal, y_max + 0.005, '$\sigma_A$',
             va='bottom', ha='center', fontsize='large') # Use \mathbf{} for bold
    plt.text(params['sigma_B'] + sigma_basal, y_max + 0.005, '$\sigma_B$',
             va='bottom', ha='center', fontsize='large')
    plt.ylim(y_min, y_max)
    
    # Add expression for a random genome
    plt.plot(sigmas_env + sigma_basal, rand_activ,
         linewidth=2, color='tab:cyan', zorder=0, linestyle=(0, (3, 1, 1, 1)), label='Random')
    
    # Add expression for an isolated gene
    sigmas_total = sigmas_env + sigma_basal
    activities = 1.0 / (1.0 + np.exp((sigmas_total - sigma_opt)/ params['epsilon']))
    plt.plot(sigmas_env + sigma_basal, np.exp(params['m'] * (activities - 1)),
             linewidth=2, color='tab:cyan', zorder=0, linestyle='--', label='Isolated gene')

    plt.legend(loc='lower left')
    
    # Add other ax with other sc
    ax2 = ax.twiny()
    xmin, xmax = ax.get_xlim()
    ax2.set_xlim(xmin - params['sigma_basal'], xmax - params['sigma_basal'])
    ax2.set_xlabel('Environmental shift in supercoiling ($\delta\sigma_{env}$)')
    
    # Wrap up            
    if plot_name:
        plt.savefig(plot_name, dpi=dpi, bbox_inches='tight')
        
    plt.show()
    plt.close()

In [None]:
plot_activity_sigma_per_type(activ, rand_activ,
                             plot_name=exp_path.joinpath(f'activity_sigmas_avg.pdf'))

## See variation of random genome activity with different intergenic distances

In [None]:
inter_dists = np.array([1000, 2500, 5000, 10000, 25000, 50000])

In [None]:
def generate_rand_activs_by_dist(inter_dists):

    rand_indivs_dist = []
    for i_dist, dist in enumerate(inter_dists):
        dist_params = params.copy()
        dist_params['interaction_dist'] = dist
        rand_indivs_dist.append(make_random_indivs(nb_indiv=250, params=dist_params))

    return [generate_rand_activ_by_sigma(rand_indivs_exp) for rand_indivs_exp in rand_indivs_dist]

In [None]:
rand_activs_dist = generate_rand_activs_by_dist(inter_dists)

In [None]:
# See how gene activity levels depend on interaction distance
def plot_random_activity_sigma_per_type(inter_dists, rand_activs, params, plot_title=None, plot_name=None):
    
    sigma_basal = params['sigma_basal']
    sigma_opt = params['sigma_opt']      
    
    sigmas_env = np.linspace(sigma_min, sigma_max, nb_sigmas)
    
    nb_exps = len(rand_activs)
    
    all_colors = met_brewer.met_brew(name='Hokusai3', n=len(inter_dists), brew_type='continuous')
    colors = list(reversed(all_colors))
    #[all_colors[5], all_colors[4], all_colors[3], all_colors[2], all_colors[1], all_colors[0]]
    
    fig, ax = plt.subplots(figsize=(7, 4), dpi=dpi)
    
    plt.xlabel('Background supercoiling ($\sigma_{basal} + \delta\sigma_{env}$)')
    plt.ylabel('Average gene expression')
    plt.ylim(-0.05, 1.10)
    plt.xlim(sigmas_env[0] + sigma_basal, sigmas_env[-1] + sigma_basal)
    plt.grid(linestyle=':')
    
    # Add 1/2 expression level
    half_expr = (1 + np.exp(- params['m'])) / 2
    plt.hlines(half_expr, sigmas_env[0] + sigma_basal, sigmas_env[-1] + sigma_basal,
               linestyle=':', linewidth=1.5, color='tab:pink')#, label='Activation threshold')
    
    
    # Add average expression per gene type
    labels = ['1 kb', '2.5 kb', '5 kb (main)', '10 kb', '25 kb (exp)', '50 kb']
    labels = [f'{d / 1000} kb' for d in inter_dists]
    for i_expr, rand_activ in enumerate(rand_activs):
        label = '$d_{max} = $' + labels[i_expr]
        plt.plot(sigmas_env + sigma_basal, rand_activ,
                 color=colors[i_expr],
                 linewidth=2,
                 label=label) 
    
    # Add expression for an isolated gene
    sigmas_total = sigmas_env + sigma_basal
    activities = 1.0 / (1.0 + np.exp((sigmas_total - sigma_opt)/ params['epsilon']))
    plt.plot(sigmas_env + sigma_basal, np.exp(params['m'] * (activities - 1)),
             linewidth=2, color='tab:cyan', zorder=0, linestyle='--', label='Isolated gene')

    plt.legend(loc='lower left')
    
    # Add sigma_A and sigma_B
    y_min, y_max = plt.ylim()
    plt.vlines(params['sigma_A'] + sigma_basal, y_min, y_max, linestyle='--', linewidth=1, color='black')
    plt.vlines(params['sigma_B'] + sigma_basal, y_min, y_max, linestyle='--', linewidth=1, color='black')
    
    plt.text(params['sigma_A'] + sigma_basal, y_max + 0.005, '$\sigma_A$',
             va='bottom', ha='center', fontsize='large') # Use \mathbf{} for bold
    plt.text(params['sigma_B'] + sigma_basal, y_max + 0.005, '$\sigma_B$',
             va='bottom', ha='center', fontsize='large')
    plt.ylim(y_min, y_max)

    
    # Add other ax with other sc
    ax2 = ax.twiny()
    xmin, xmax = ax.get_xlim()
    ax2.set_xlim(xmin - params['sigma_basal'], xmax - params['sigma_basal'])
    ax2.set_xlabel('Environmental shift in supercoiling ($\delta\sigma_{env}$)')
    
    # Wrap up            
    if plot_name:
        plt.savefig(plot_name, dpi=dpi, bbox_inches='tight')
        
    plt.show()
    plt.close()

In [None]:
plot_random_activity_sigma_per_type(inter_dists, rand_activs_dist, params,
                                    plot_name=exp_path.joinpath('random_activ_per_sigma.pdf'))

# Gene knockouts

## Arrange Individual and run_system() to accommodate a knocked-out gene

In [None]:
@jit(nopython=True)
def run_system_numba_ko(nb_genes: int,
                        init_expr: np.ndarray,
                        inter_matrix: np.ndarray,
                        sigma_basal: float,
                        sigma_opt: float,
                        epsilon: float,
                        m: float,
                        sigma_env: float,
                        id_ko: int) -> np.ndarray:

    step_size = 0.5
    stop_dist = 1e-7
    max_eval_steps = 200

    temporal_expr = np.zeros((max_eval_steps+1, nb_genes))

    # Initial values at t = 0
    temporal_expr[0, :] = init_expr
    temporal_expr[0, id_ko] = 0.0


    # Iterate the system
    it = 1
    cont = True
    while cont:
        prev_expr = temporal_expr[it-1, :]
        sigma_local = inter_matrix @ prev_expr
        sigma_total = sigma_basal + sigma_local + sigma_env

        promoter_activity = 1.0 / (1.0 + np.exp((sigma_total - sigma_opt)/epsilon))

        # We subtract 1 to rescale between exp(-m) and 1
        iter_expr = np.exp(m * (promoter_activity - 1.0))

        nouv_expr = step_size * iter_expr + (1 - step_size) * prev_expr

        # Knockout
        nouv_expr[id_ko] = 0

        temporal_expr[it, :] = nouv_expr

        # Check if we're done
        dist = np.abs(nouv_expr - prev_expr).sum() / nb_genes

        prev_expr = nouv_expr

        if dist < stop_dist:
            cont = False

        if it == max_eval_steps:
            cont = False
        it += 1

    temporal_expr = temporal_expr[:it, :]

    return temporal_expr

In [None]:
class KoIndividual(evotsc.Individual):
    def __init__(self,
                 orig_indiv: evotsc.Individual,
                 id_ko: int) -> None:
        
        super().__init__([gene.clone() for gene in orig_indiv.genes],
                         orig_indiv.interaction_dist,
                         orig_indiv.interaction_coef,
                         orig_indiv.sigma_basal,
                         orig_indiv.sigma_opt,
                         orig_indiv.epsilon,
                         orig_indiv.m,
                         orig_indiv.selection_coef,
                         orig_indiv.rng)
        
        self.id_ko = id_ko
                    

    def run_system(self, sigma_env):

        init_expr = np.array([gene.basal_expression for gene in self.genes])

        self.inter_matrix = self.compute_inter_matrix()

        return run_system_numba_ko(nb_genes=self.nb_genes,
                                   init_expr=init_expr,
                                   inter_matrix=self.inter_matrix,
                                   sigma_basal=self.sigma_basal,
                                   sigma_opt=self.sigma_opt,
                                   epsilon=self.epsilon,
                                   m=self.m,
                                   sigma_env=sigma_env,
                                   id_ko=self.id_ko)

## Compute the effective graph: genes switched on or off by a KO

In [None]:
def get_effective_graph(best_indiv, sigma):
    
    _, genome_length = best_indiv.compute_gene_positions(include_coding=True)
    
    init_expr = best_indiv.run_system(sigma)[-1, :]
    half_expr = (1 + np.exp(-best_indiv.m)) / 2
    init_activ = init_expr > half_expr
    

    ## Build the graph
    inter_graph = nx.DiGraph()

    # Nodes
    for i_gene, gene in enumerate(best_indiv.genes):
        inter_graph.add_node(i_gene, gene=gene)

    ## Compute the interactions: if KOing gene A switches gene B on or off, add A -> B arrow to the graph

    for i_ko in range(best_indiv.nb_genes):
        
        ko_indiv = KoIndividual(orig_indiv=best_indiv, id_ko=i_ko)
                                                                    
        final_expr = ko_indiv.run_system(sigma)[-1, :]
        ko_activ = final_expr > half_expr
        
        for i_other in range(best_indiv.nb_genes):
            if i_other == i_ko:
                continue
            
            if init_activ[i_other] != ko_activ[i_other]:
                if init_activ[i_other]: # KOing i_ko inhibits i_other: -> i_ko activates i_other
                    inter_graph.add_edge(i_ko, i_other, kind='activ')
                else:
                    inter_graph.add_edge(i_ko, i_other, kind='inhib')

    return best_indiv, inter_graph

In [None]:
def get_all_effective_graphs(exp_path, gen):
    rep_dirs = sorted([d for d in exp_path.iterdir() if (d.is_dir() and d.name.startswith("rep"))])
    
    graphs = {rep: {} for rep in range(len(rep_dirs))}
    
    for rep, rep_dir in enumerate(rep_dirs):
        best_indiv = evotsc_lib.get_best_indiv(rep_path=rep_dir, gen=gen)
        for env in ['A', 'B']:
            graphs[rep][env] = get_effective_graph(best_indiv, params[f'sigma_{env}'])
    
    return graphs

In [None]:
exp_effective_graphs = get_all_effective_graphs(exp_path, gen)

In [None]:
main_effective_graphs = get_all_effective_graphs(main_path, gen)

## Stats on the effective graphs: out degree (number of switches)

In [None]:
def dist_in_genes(nb_genes, i, j):
    pos_1_minus_2 = i - j
    pos_2_minus_1 = - pos_1_minus_2

    # We want to know whether gene 1 comes before or after gene 2
    # Before: -------1--2-------- or -2---------------1-
    # After:  -------2--1-------- or -1---------------2-

    if pos_1_minus_2 < 0: # -------1--2-------- ou -1---------------2-
        if pos_2_minus_1 < nb_genes + pos_1_minus_2: # -------1--2--------
            distance = pos_2_minus_1
        else: # -1---------------2-
            distance = nb_genes + pos_1_minus_2

    else: # -------2--1-------- ou -2---------------1-
        if pos_1_minus_2 < nb_genes + pos_2_minus_1: # -------2--1--------
            distance = pos_1_minus_2
        else:
            distance = nb_genes + pos_2_minus_1

    return distance

In [None]:
def get_combined_graph_stats(effective_graphs):
    res_dict = {'Replicate': [],
                'gene_type': [],
                'out_deg': [],
                'in_deg': [],
                'avg_dist': [],
                'max_dist': []}

    for rep in effective_graphs.keys():

        indiv, graph_A = effective_graphs[rep]['A']
        indiv, graph_B = effective_graphs[rep]['B']
        combined_graph = nx.compose(graph_A, graph_B)
        
        for i_gene, gene in enumerate(indiv.genes):
            res_dict['Replicate'].append(rep)
            res_dict['gene_type'].append(gene_types[gene.gene_type])
            res_dict['out_deg'].append(combined_graph.out_degree[i_gene])
            res_dict['in_deg'].append(combined_graph.in_degree[i_gene])

            if combined_graph.out_degree[i_gene] > 0:
                dists = np.zeros(len(combined_graph[i_gene].keys()), dtype=int)                
                for i_neighbor, neighbor in enumerate(combined_graph[i_gene].keys()):
                    dists[i_neighbor] = dist_in_genes(indiv.nb_genes, i_gene, neighbor)

                res_dict['avg_dist'].append(np.mean(dists))
                res_dict['max_dist'].append(np.max(dists))

            else:
                res_dict['avg_dist'].append(0)
                res_dict['max_dist'].append(0)
                
    return pd.DataFrame.from_dict(res_dict)

In [None]:
exp_combined_stats = get_combined_graph_stats(exp_effective_graphs)

In [None]:
main_combined_stats = get_combined_graph_stats(main_effective_graphs)

## Plot the average out degree of nodes in the network

In [None]:
def plot_degree_by_env_combined(graph_stats, rand_graph_stats, deg_type):
    
    fig, ax = plt.subplots(figsize=(8, 5), dpi=dpi)

    mean_stats = graph_stats.groupby(['gene_type']).mean()[f'{deg_type}_deg']
    rep_stats = graph_stats.groupby(['gene_type', 'Replicate']).mean()[f'{deg_type}_deg']
    rand_mean_stats = rand_graph_stats.groupby(['gene_type']).mean()[f'{deg_type}_deg']
    rand_rep_stats = rand_graph_stats.groupby(['gene_type', 'Replicate']).mean()[f'{deg_type}_deg']
    
    width = 0.2
    x_pos = np.array([1, 2])
    delta = np.array([-width, 0, width])

    rects = {}

    for i_gene_type, gene_type in enumerate(gene_types):
        rects[gene_type] = plt.bar(x_pos + delta[i_gene_type],
                                   [mean_stats.loc[gene_type], rand_mean_stats.loc[gene_type]],
                                    width=width, color=gene_type_color[i_gene_type])

        plt.boxplot([rep_stats.loc[gene_type], rand_rep_stats.loc[gene_type]],
                    positions=x_pos + delta[i_gene_type], 
                    #manage_ticks=False, 
                    widths=0.1, medianprops={'color':'black'})

    plt.xticks(ticks=x_pos, labels=['25 kb', '5 kb (main)'])
    plt.tick_params(axis='both', which='major', labelsize=tick_fontsize)

    #ax.yaxis.set_major_locator(mpl.ticker.MultipleLocator(1))

    plt.ylabel(f'{deg_type.capitalize()}-degree', fontsize=20)
    plt.ylim(0, 45)
    plt.grid(axis='y', linestyle=':')
    
    patches = ([mpl.patches.Patch(facecolor=color, edgecolor='black', label=label)
                for color, label in zip(gene_type_color, gene_types)])
    plt.legend(handles=patches, title='Gene type', title_fontsize=15, fontsize=15)

    
    plt.savefig(exp_path.joinpath(f'effective_graph_combined_{deg_type}_degree.pdf'), bbox_inches='tight', dpi=dpi)

In [None]:
plot_degree_by_env_combined(exp_combined_stats, main_combined_stats, 'in')

In [None]:
plot_degree_by_env_combined(exp_combined_stats, main_combined_stats, 'out')