In [None]:
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pathlib

In [None]:
import importlib
import evotsc
import evotsc_lib
import evotsc_plot
importlib.reload(evotsc)
importlib.reload(evotsc_lib)
importlib.reload(evotsc_plot)

In [None]:
exp_path = pathlib.Path('/Users/theotime/Desktop/evotsc/pci/main/')
gen=1_000_000
gene_types = ['AB', 'A', 'B'] # Name of each gene type
gene_type_color = ['tab:blue', 'tab:red', 'tab:green'] #AB, A, B
orient_name = ['leading', 'lagging'] # Name of each gene orientation
dpi=300

In [None]:
rep_dirs = sorted([d for d in exp_path.iterdir() if (d.is_dir() and d.name.startswith("rep"))])
nb_rep = len(rep_dirs)
params = evotsc_lib.read_params(rep_dirs[0])
params['m'] = 2.5 # Temporary fix because the parameter wasn't saved

In [None]:
rng = np.random.default_rng(seed=123456)

In [None]:
indiv = evotsc_lib.get_best_indiv(rep_dirs[0], gen)

In [None]:
for env in ['A', 'B']:
    evotsc_plot.plot_genome_and_tsc(indiv, sigma=params[f'sigma_{env}'], coloring_type='on-off', id_interval=1,
                                print_ids=True, naming_type='pos',
                                plot_name=exp_path.joinpath(f'sub_rep00/init_env_{env}.pdf'))

In [None]:
def extract_subnetwork(indiv, i_start, size, keep_ids=False):
    
    orig_pos, indiv_len = indiv.compute_gene_positions(include_coding=True)
    
    new_genes = []
    for i_gene in range(i_start, i_start + size):
        new_gene = indiv.genes[(i_gene % indiv.nb_genes)].clone()
        if keep_ids == False:
            new_gene.id = i_gene % indiv.nb_genes
        new_genes.append(new_gene)


    clone = evotsc.Individual(genes=new_genes,
                              interaction_dist=indiv.interaction_dist,
                              interaction_coef=indiv.interaction_coef,
                              sigma_basal=indiv.sigma_basal,
                              sigma_opt=indiv.sigma_opt,
                              epsilon=indiv.epsilon,
                              m=indiv.m,
                              selection_coef=indiv.selection_coef,
                              rng=indiv.rng)

    _, clone_len = clone.compute_gene_positions(include_coding=True)

    new_genes[-1].intergene += indiv_len - clone_len

    return clone

In [None]:
def plot_subnetworks(indiv, segment_sizes):
    
    indiv = evotsc_lib.get_best_indiv(exp_path.joinpath(f'rep{rep:02}'), gen)
    
    orig_pos, indiv_len = indiv.compute_gene_positions(include_coding=True)
    
    sub_path = exp_path.joinpath(f'sub_rep{rep:02}')
    sub_path.mkdir(exist_ok=True)

    for segment_size in segment_sizes:
        for i_start in range(indiv.nb_genes):
            clone = extract_subnetwork(indiv, i_start, segment_size)
            
            # For plotting at the same position as the original individual
            shift = orig_pos[i_start]
            if indiv.genes[i_start].orientation == 1: # Lagging
                shift -= (indiv.genes[i_start].length - 1)
                
            for env in ['A', 'B']:
                sigma = params[f'sigma_{env}']
                name = sub_path.joinpath(f'sub_{segment_size}_genes_{i_start:02}_env_{env}.pdf')

                evotsc_plot.plot_genome_and_tsc(clone, sigma=sigma, coloring_type='on-off',
                                                id_interval=1, print_ids=True, naming_type='id',
                                                shift=-shift, plot_name=name, show_plot=False)   

In [None]:
#plot_subnetworks(rep=0, gen=gen, segment_sizes=[6])

In [None]:
def compute_subnetwork_stats(exp_path, gen, max_network_size):
    
    rep_dirs = sorted([d for d in exp_path.iterdir() if (d.is_dir() and d.name.startswith("rep"))])
    
    full_stats = pd.DataFrame()
        
    for rep, rep_dir in enumerate(rep_dirs):
        
        indiv = evotsc_lib.get_best_indiv(rep_dir, gen)
        
        result_dict = {'Replicate': [],
               'network_size': [],
               'gene_id': [],
               'gene_type': [],
               'gene_orient': [],
               'final_expr_A': [],
               'final_expr_B': []}
    
        for size in range(1, max_network_size+1):

            for i_start in range(indiv.nb_genes):
                sub_indiv = extract_subnetwork(indiv, i_start, size, keep_ids=True)

                (expr_A, expr_B), fitness = sub_indiv.evaluate(params['sigma_A'], params['sigma_B'])

                for i_gene, gene in enumerate(sub_indiv.genes):
                    result_dict['Replicate'].append(rep)
                    result_dict['gene_id'].append(gene.id)
                    result_dict['gene_type'].append(gene_types[gene.gene_type])
                    result_dict['gene_orient'].append(orient_name[gene.orientation])
                    result_dict['final_expr_A'].append(expr_A[-1, i_gene])
                    result_dict['final_expr_B'].append(expr_B[-1, i_gene])
                    result_dict['network_size'].append(size)


        indiv_stats = pd.DataFrame.from_dict(result_dict)

        full_stats = pd.concat([full_stats, indiv_stats])
            
    return full_stats

In [None]:
stats = compute_subnetwork_stats(exp_path, gen, max_network_size=60)

In [None]:
stats

In [None]:
def plot_expr_by_network_size(stats, plot_name=None):
    
    mean_stats = stats.groupby(['gene_type', 'network_size']).mean()
    med_stats = stats.groupby(['gene_type', 'network_size']).median()
    
    plt.figure(figsize=(9, 8), dpi=dpi)

    custom_lines = [mpl.lines.Line2D([0], [0], color='k', linewidth=2),
                    mpl.lines.Line2D([0], [0], color='k', linestyle=':', linewidth=2)]
    
    for i_env, env in enumerate(['A', 'B']):
        plt.subplot(2, 1, i_env+1)
        plt.grid(linestyle=':')
        plt.ylim(-0.05, 1.05)
        for i_gene_type, gene_type in enumerate(gene_types):
            plt.plot(med_stats.loc[(gene_type)][f'final_expr_{env}'],
                     color=gene_type_color[i_gene_type], linewidth=2)
            plt.plot(mean_stats.loc[(gene_type)][f'final_expr_{env}'],
                     color=gene_type_color[i_gene_type], linewidth=2, linestyle=':')
    
        plt.ylabel('Expression level')    
        
    plt.xlabel('Subnetwork size')
    plt.legend(custom_lines, ['Medians', 'Means'])
     
    if plot_name:
        plt.savefig(plot_name, bbox_inches='tight')
    plt.show()

In [None]:
plot_expr_by_network_size(stats, plot_name=exp_path.joinpath('med_expr_by_network_size_all.pdf'))

In [None]:
#for i_rep in range(nb_rep):
#    plot_expr_by_network_size(stats[stats['Replicate'] == i_rep], 
#                               plot_name=exp_path.joinpath(f'med_expr_by_network_size_rep{i_rep}.pdf'))

In [None]:
def plot_distrib_by_network_size(stats, network_size):
    
    for env in ['A', 'B']:
    
        # Data
        size_stats = stats[stats['network_size'] == network_size].copy().reindex()

        median_stats = size_stats.groupby('gene_type').median()[f'final_expr_{env}']
        mean_stats = size_stats.groupby('gene_type').mean()[f'final_expr_{env}']

        x = range(1, 4)

        # Plot
        fig, ax = plt.subplots(dpi=300)

        colors = plt.cm.get_cmap('tab20').colors
        light_type_color = [colors[1], colors[7], colors[5]]
        dark_type_color = [colors[0], colors[6], colors[4]]

        violins = []
        for i_gene_type, gene_type in enumerate(gene_types):
            type_stats = size_stats[size_stats['gene_type'] == gene_type][f'final_expr_{env}']
            violins.append(ax.violinplot(type_stats, positions=[x[i_gene_type]],
                                         showmeans=True, showmedians=True))

            if i_gene_type == 0:
                ax.scatter([x[i_gene_type]], median_stats.loc[gene_type], marker='o', 
                            color=dark_type_color[i_gene_type], label='Medians')
                ax.scatter([x[i_gene_type]], mean_stats.loc[gene_type], marker='x',
                            color=dark_type_color[i_gene_type], label='Means')
            else:
                ax.scatter([x[i_gene_type]], median_stats.loc[gene_type], marker='o', 
                            color=dark_type_color[i_gene_type])
                ax.scatter([x[i_gene_type]], mean_stats.loc[gene_type], marker='x',
                            color=dark_type_color[i_gene_type])


        for i_violin, violin in enumerate(violins):
            for i_pc, pc in enumerate(violin['bodies']):
                pc.set_facecolor(light_type_color[i_violin])

            for partname in ['cbars','cmins','cmaxes','cmeans','cmedians']:
                violin[partname].set_edgecolor(dark_type_color[i_violin])

        ax.set_ylim(-0.05, 1.05)
        ax.set_ylabel('Expression level')
        ax.set_xticks(x)
        ax.set_xticklabels(gene_types)
        ax.set_xlabel('Gene type')
        ax.grid(axis='y', linestyle=':')

        plt.legend(loc='lower left')

        plt.savefig(f'distrib_expr_size_env_{env}_{network_size}.pdf', bbox_inches='tight')

        plt.show()
        plt.close()

In [None]:
plot_distrib_by_network_size(stats, network_size=59)

In [None]:
def plot_nb_activ_by_network_size(stats, plot_name=None):
    
    stats = stats.copy()
    
    half_expr = (1 + np.exp(- params['m'])) / 2
    for env in ['A', 'B']:
        stats[f'activ_{env}'] = stats[f'final_expr_{env}'] > half_expr

    mean_stats = stats.groupby(['gene_type', 'network_size']).mean()
        
    plt.figure(figsize=(9, 8), dpi=dpi)
    
    for i_env, env in enumerate(['A', 'B']):
        plt.subplot(2, 1, i_env+1)
        plt.grid(linestyle=':')
        plt.ylim(-0.05, 1.05)
        for i_gene_type, gene_type in enumerate(gene_types):
            plt.plot(mean_stats.loc[(gene_type)][f'activ_{env}'],
                     color=gene_type_color[i_gene_type], linewidth=2)
    
        plt.ylabel('Proportion of activated genes')
    plt.xlabel('Subnetwork size')
     
    if plot_name:
        plt.savefig(plot_name, bbox_inches='tight')
    plt.show()

In [None]:
plot_nb_activ_by_network_size(stats, plot_name=exp_path.joinpath('med_activ_by_network_size_all.pdf'))