In [None]:
import pathlib
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl
import pickle

In [None]:
import autoreload
import evotsc_plot
autoreload.reload(evotsc_plot)
import evotsc
autoreload.reload(evotsc)

In [None]:
base_path = pathlib.Path('/Users/theotime/Desktop/evotsc/')
exp_paths = ['inter-0.5k', 'inter-1k', 'inter-2k', 'inter-4k',
             'interaction-10k/inter-2k', 'interaction-10k/inter-5k', 'interaction-10k/inter-9k']
gen = 100_000
gene_types = ['AB', 'A', 'B'] # Name of each gene type
gene_type_color = ['tab:blue', 'tab:red', 'tab:green'] #AB, A, B
orient_name = ['leading', 'lagging'] # Name of each gene orientation
rel_orient = ['conv', 'div', 'upstr', 'downstr']

In [None]:
def get_params(exp_path):
    rep_dirs = sorted([d for d in exp_path.iterdir() if (d.is_dir() and d.name.startswith("rep"))])
    
    with open(rep_dirs[0].joinpath('params.txt'), 'r') as params_file:
        param_lines = params_file.readlines()
        
    params = {}
    for line in param_lines:
        param_name = line.split(':')[0]
        if param_name == 'commit':
            param_val = line.split(':')[1].strip()
        elif param_name == 'neutral':
            param_val = (line.split(':')[1] == True)
        else:
            param_val = float(line.split(':')[1])
        
        params[param_name] = param_val
        
    return params

In [None]:
def get_best_indiv(rep_path, gen):
    
    with open(rep_path.joinpath(f'pop_gen_{gen:06}.evotsc'), 'rb') as save_file:
        pop_rep = pickle.load(save_file)
        
    pop_rep.evaluate()
    
    best_fit = 0
    best_indiv = pop_rep.individuals[0]
    
    try:
        for indiv in pop_rep.individuals:
            if indiv.fitness > best_fit:
                best_fit = indiv.fitness
                best_indiv = indiv
    except AttributeError: # In the neutral control, individuals are not evaluated so there is no fitness field
        pass
    
    return best_indiv

In [None]:
def plot_mutant_fitnesses(exp_path, nb_mutants):
    
    params = get_params(exp_path)
    
    mutation = evotsc.Mutation(inversion_poisson_lam=params['inversion_poisson_lam'])
    
    rep_dirs = sorted([d for d in exp_path.iterdir() if (d.is_dir() and d.name.startswith("rep"))])
    
    for rep, rep_dir in enumerate(rep_dirs):
        
        best_indiv = get_best_indiv(rep_dir, gen)
        
        _, orig_fitness = best_indiv.evaluate(params['sigma_A'], params['sigma_B'])
        
        fitnesses = [orig_fitness]
        best_fitness = orig_fitness
        improvements = 0
    
        for i_new_indiv in range(nb_mutants):

            new_indiv = best_indiv.clone()
            new_indiv.mutate(mutation)
            _, fitness = new_indiv.evaluate(params['sigma_A'], params['sigma_B'])
            
            
            if fitness >= orig_fitness:
                fitnesses.append(fitness)
            
            if fitness > orig_fitness:
                improvements += 1
                
            best_fitness = max(fitness, best_fitness)
                
        fitnesses = np.array(fitnesses)
        improvements = len(fitnesses)
                
        plt.hist(np.log10(fitnesses), cumulative=True)
        ymin, ymax = plt.ylim()
        plt.vlines(np.log10(orig_fitness), ymin, ymax, linestyle='--', linewidth=1,
            color='tab:red', label='Parent fitness')
        plt.ylim(ymin, ymax)
        
        plt.legend(loc='upper left')
        plt.xlim(np.log10(np.min(fitnesses)), np.log10(np.max(fitnesses)))
        if exp_path.parts[-2] == 'interaction-10k':
            exp_name = exp_path.parts[-2] + '_' + exp_path.name
        else:
            exp_name = exp_path.name
        plt.title(f'{exp_name} rep {rep}, improvements: {improvements / nb_mutants * 100:.3f}%')
        #plt.savefig(f'{exp_name}_rep{rep:02}.pdf')
        plt.show()
        print(f'{exp_name} rep {rep}, improvements: {improvements / nb_mutants * 100:.3f}%')
        print(f'  Orig fitness: {fitness:.3}, best fitness: {best_fitness:.3}, ({100*(best_fitness/fitness-1):.3}% improvement)')

In [None]:
for path in exp_paths:
    total_path = base_path.joinpath(path)
    plot_mutant_fitnesses(total_path, 100)

In [None]:
def neutral_mutations(exp_path, nb_mutations):
    params = get_params(exp_path)
    
    mutation = evotsc.Mutation(inversion_poisson_lam=params['inversion_poisson_lam'])
    
    rep_dirs = sorted([d for d in exp_path.iterdir() if (d.is_dir() and d.name.startswith("rep"))])
    
    for rep, rep_dir in enumerate(rep_dirs):
        
        best_indiv = get_best_indiv(rep_dir, gen)
        
        nb_mutants = 0
        
        for inv in range(nb_mutations):
            # Here, we are only looking at intergenic distances, and do not care
            # about gene lengths, so we only count non-coding bases.
            _, noncoding_size = best_indiv.compute_gene_positions(include_coding=False)

            start_pos = best_indiv.rng.integers(0, noncoding_size)
            end_pos = best_indiv.rng.integers(0, noncoding_size)

            # Inverting between start and end or between end and start is equivalent
            if end_pos < start_pos:
                start_pos, end_pos = end_pos, start_pos

            if best_indiv.perform_inversion(start_pos, end_pos):
                nb_mutants += 1
                
        nb_neutral = nb_mutations - nb_mutants
        
        
        if exp_path.parts[-2] == 'interaction-10k':
            exp_name = exp_path.parts[-2] + '_' + exp_path.name
        else:
            exp_name = exp_path.name
        
        print(f'{exp_name} rep {rep}, neutral: {nb_neutral / nb_mutations * 100:.3f}%')

In [None]:
for path in exp_paths:
    total_path = base_path.joinpath(path)
    neutral_mutations(total_path, 1000)