# Variables

In [1]:
ga_input = '/grid/home/nbourgeois/data/test_sequences/hidua/hidua.pep.fas' #sequence pep.fas Input
tissue = 'Brain_Cerebellum' # Tissue type for CoCoPuts
filename = 'test_git'

# Imports


## PyGAD

In [2]:
import os
cwd = os.getcwd()
pygad_loc = '/grid/home/nbourgeois/hmi_bifx_tools/codon_optimization'
os.chdir(pygad_loc)
import pygad
import numpy
import os
import sys
import Bio
import pandas as pd
from general_functions import *
from metrics import *
import hashlib
import subprocess
import time
os.chdir(cwd)


Using TensorFlow backend.


In [3]:
import inspect

print(inspect.getsource(get_cai))

def get_cai(seq, weight_dict):
    if weight_dict is str:
        weight_dict = get_codon_weights(weight_dict)
    weights = [weight_dict[seq[i:i+3]] for i in range(0, len(seq), 3)] #convert to codon_list -> use weight dictionary
    cai = geo_mean(weights)
    return(cai)



# Start

## Parameters

### Metrics

In [3]:
cai_on = True
bai_on = True
sps_on = False
cpg_on = True
pas_on = False


### Weights

In [4]:
cai_w = 1 *cai_on
bai_w = 1 *bai_on
sps_w = 1 *sps_on
cpg_w = 1 *cpg_on
pas_w = 1 *pas_on

In [5]:
global total_weight
total_weight = sum([cai_w,bai_w,sps_w,cpg_w,pas_w])

### Locations

In [6]:
cai_index_loc = os.path.join(pygad_loc,'CoCoPUTs_codon_usage/codon_usage/'+tissue+'.codon.txt')
bai_index_loc = os.path.join(pygad_loc,'CoCoPUTs_codon_usage/bicodon_usage/'+tissue+'.bicodon.txt')

codon_usage_table_loc = os.path.join(pygad_loc,'codon_usage.getex.txt')

# Algorithm

### Read in File

In [7]:
(keys, seqs) = readFasta(ga_input)
if len(seqs) == 1:
    aa_seq=str(seqs[0])

### Initialize Parameters
- gene_space
- codon_to_int dictionary

In [8]:
global codon_to_int

codon_to_int, gene_space = init_parameters(codon_usage_table_loc, aa_seq)
    
gene_space_int = [[codon_to_int[x] for x in y] for y in gene_space]

# Fitness Function

In [9]:
def fitness_func(solution, solution_idx):
    
    if not type(solution) is str:
        seq_aa = ''.join([codon_to_int[x] for x in solution])
    else:
        seq_aa = solution
#     print(solution_idx)

    tmp_dict = {}
    
    #Check for redundancy
    if seq_aa in all_sols.keys():
        fitness = all_sols[seq_aa]['fitness']

    else:
        fitness = 0
        
        if cai_on:
            cai = get_cai(seq_aa, cai_index_loc)
            fitness += cai*cai_w
            tmp_dict['cai'] = cai
        
        if bai_on:
            bai = get_bai(seq_aa, bai_index_loc)
            fitness += bai*bai_w
            tmp_dict['bai'] = bai
            
        if cpg_on:
            cpg = get_cpg(seq_aa)
            fitness += cpg*cpg_w
            tmp_dict['cpg'] = cpg

        if sps_on:
            sps = get_sps(seq_aa)
            print('SPS retuned.')

            fitness += sps*sps_w
            tmp_dict['sps'] = sps

        if pas_on:
            pas = get_pas(seq_aa)
            fitness += pas*pas_w
            tmp_dict['pas'] = pas

        fitness = fitness/total_weight
        tmp_dict['fitness'] = fitness
        all_sols[seq_aa] = tmp_dict
        
    
    return fitness

    

# Genetic Algorithm

In [10]:
"""
Given the following function:
    y = f(w1:w6) = w1x1 + w2x2 + w3x3 + w4x4 + w5x5 + 6wx6
    where (x1,x2,x3,x4,x5,x6)=(4,-2,3.5,5,-11,-4.7) and y=44
What are the best values for the 6 weights (w1 to w6)? We are going to use the genetic algorithm to optimize this function.
"""

# def convert_aa_int():
    
desired_output = 1 # Function output.
all_sols = {}

fitness_function = fitness_func

num_generations = 100 # Number of generations.
num_parents_mating = 5 # Number of solutions to be selected as parents in the mating pool.

sol_per_pop = 20 

parent_selection_type = "sss" # Type of parent selection.
keep_parents = 5 # Number of parents to keep in the next population. -1 means keep all parents and 0 means keep nothing.

crossover_type = "two_points" # Type of the crossover operator.

# Parameters of the mutation operation.
mutation_type = "random" # Type of the mutation operator.
mutation_percent_genes = 5 # Percentage of genes to mutate. This parameter has no action if the parameter mutation_num_genes exists or when mutation_type is None.

last_fitness = 0

def callback_generation(ga_instance):
    global last_fitness
    print("Generation = {generation}".format(generation=ga_instance.generations_completed))
    print("Fitness    = {fitness}".format(fitness=ga_instance.best_solution()[1]))
    print("Change     = {change}".format(change=ga_instance.best_solution()[1] - last_fitness))
    last_fitness = ga_instance.best_solution()[1]

# Creating an instance of the GA class inside the ga module. Some parameters are initialized within the constructor.

num_genes = len(gene_space)
ga_instance = pygad.GA(num_generations=num_generations,
                       num_parents_mating=num_parents_mating, 
                       fitness_func=fitness_function,
                       sol_per_pop=sol_per_pop, 
                       num_genes=num_genes,
                       parent_selection_type=parent_selection_type,
                       keep_parents=keep_parents,
                       crossover_type=crossover_type,
                       mutation_type=mutation_type,
                       mutation_percent_genes=mutation_percent_genes,
                       on_generation=callback_generation,
                       gene_type=int,
                       gene_space=gene_space_int,
)


In [None]:
%%time

# Running the GA to optimize the parameters of the function.
ga_instance.run()

# After the generations complete, some plots are showed that summarize the how the outputs/fitenss values evolve over generations.
ga_instance.plot_result()

# Returning the details of the best solution.
solution, solution_fitness, solution_idx = ga_instance.best_solution()
print("Parameters of the best solution : {solution}".format(solution=solution))
seq_aa = ''.join([codon_to_int[x] for x in solution])
print("NT Sequence of the best solution : {seq_aa}".format(seq_aa=seq_aa))
print("Fitness value of the best solution = {solution_fitness}".format(solution_fitness=solution_fitness))
print("Index of the best solution : {solution_idx}".format(solution_idx=solution_idx))

# # prediction = numpy.sum(numpy.array(function_inputs)*solution) Replace
# print("Predicted output based on the best solution : {prediction}".format(prediction=prediction))

if ga_instance.best_solution_generation != -1:
    print("Best fitness value reached after {best_solution_generation} generations.".format(best_solution_generation=ga_instance.best_solution_generation))

# Saving the GA instance.
 # The filename to which the instance is saved. The name is without extension.
ga_instance.save(filename=filename)

# Loading the saved GA instance.
loaded_ga_instance = pygad.load(filename=filename)
loaded_ga_instance.plot_result()

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/data/software/anaconda3/envs/bfx-gpu/lib/python3.7/site-packages/IPython/core/magics/execution.py", line 1321, in time
    exec(code, glob, local_ns)
  File "<timed exec>", line 2, in <module>
  File "/data/software/anaconda3/envs/bfx-gpu/lib/python3.7/site-packages/pygad/pygad.py", line 1170, in run
    self.last_generation_fitness = self.cal_pop_fitness()
  File "/data/software/anaconda3/envs/bfx-gpu/lib/python3.7/site-packages/pygad/pygad.py", line 1148, in cal_pop_fitness
    fitness = self.fitness_func(sol, sol_idx)
  File "<ipython-input-9-696bcbf0aad5>", line 19, in fitness_func
    cai = get_cai(seq_aa, cai_index_loc)
  File "/grid/home/nbourgeois/wip/codon_optimization/metrics.py", line 149, in get_cai
    weights = [weight_dict[seq[i:i+3]] for i in range(0, len(seq), 3)] #convert to codon_list -> use weight dictionary
  File "/grid/home/nbourgeois/wip/codon_optimization/metrics.py", line 149, in <listcomp>
    weights = [weight_dict