# Variables

In [1]:
ga_input = '/grid/home/nbourgeois/data/test_proteins/hidua/hidua.pep.fas' #sequence pep.fas Input
tissue = 'Brain_Cerebellum' # Tissue type for CoCoPuts
filename = 'test'

# Imports


## PyGAD

In [2]:
import os
cwd = os.getcwd()
pygad_loc = '/grid/home/nbourgeois/wip/codon_optimization'
os.chdir(pygad_loc)
import pygad
import numpy
import os
import sys
import Bio
import pandas as pd
from general_functions import *
from metrics import *
import hashlib
import subprocess
import time
os.chdir(cwd)
import importlib


Using TensorFlow backend.


# Start

## Parameters

### Metrics

In [3]:
cai_on = True
bai_on = True
sps_on = False
cpg_on = True
pas_on = False


### Weights

In [4]:
cai_w = 1 *cai_on
bai_w = 1 *bai_on
sps_w = 1 *sps_on
cpg_w = 1 *cpg_on
pas_w = 1 *pas_on

In [5]:
global total_weight
total_weight = sum([cai_w,bai_w,sps_w,cpg_w,pas_w])

### Locations

In [6]:
cai_index_loc = os.path.join(pygad_loc,'CoCoPUTs_codon_usage/codon_usage/'+tissue+'.codon.txt')
bai_index_loc = os.path.join(pygad_loc,'CoCoPUTs_codon_usage/bicodon_usage/'+tissue+'.bicodon.txt')

codon_usage_table_loc = os.path.join(pygad_loc,'references','codon_usage.getex.txt')
cai_weight_dict = get_codon_weights(cai_index_loc)
bai_weight_dict = get_bicodon_weights(bai_index_loc)

# Algorithm

### Read in File

In [7]:
(keys, seqs) = readFasta(ga_input)
if len(seqs) == 1:
    aa_seq=str(seqs[0])

### Initialize Parameters
- gene_space
- codon_to_int dictionary

In [8]:
global codon_to_int

codon_to_int, gene_space = init_parameters(codon_usage_table_loc, aa_seq)
    
gene_space_int = [[codon_to_int[x] for x in y] for y in gene_space]

# Fitness Function

In [9]:
def fitness_func(solution, solution_idx):
    
    if not type(solution) is str:
        seq_aa = ''.join([codon_to_int[x] for x in solution])
    else:
        seq_aa = solution
#     print(solution_idx)

    tmp_dict = {}
    
    #Check for redundancy
    if seq_aa in all_sols.keys():
        fitness = all_sols[seq_aa]['fitness']

    else:
        fitness = 0
        
        if cai_on:
            cai = get_cai(seq_aa, cai_weight_dict)
            fitness += cai*cai_w
            tmp_dict['cai'] = cai
        
        if bai_on:
            bai = get_bai(seq_aa, bai_weight_dict)
            fitness += bai*bai_w
            tmp_dict['bai'] = bai
            
        if cpg_on:
            cpg = get_cpg(seq_aa)
            fitness += cpg*cpg_w
            tmp_dict['cpg'] = cpg

        if sps_on:
            sps = get_sps(seq_aa)
            print('SPS retuned.')

            fitness += sps*sps_w
            tmp_dict['sps'] = sps

        if pas_on:
            pas = get_pas(seq_aa)
            fitness += pas*pas_w
            tmp_dict['pas'] = pas

        fitness = fitness/total_weight
        tmp_dict['fitness'] = fitness
        all_sols[seq_aa] = tmp_dict
        
    
    return fitness

    

# Genetic Algorithm

In [10]:
"""
Given the following function:
    y = f(w1:w6) = w1x1 + w2x2 + w3x3 + w4x4 + w5x5 + 6wx6
    where (x1,x2,x3,x4,x5,x6)=(4,-2,3.5,5,-11,-4.7) and y=44
What are the best values for the 6 weights (w1 to w6)? We are going to use the genetic algorithm to optimize this function.
"""

# def convert_aa_int():
    
desired_output = 1 # Function output.
all_sols = {}

fitness_function = fitness_func

num_generations = 1000 # Number of generations.
num_parents_mating = 5 # Number of solutions to be selected as parents in the mating pool.

sol_per_pop = 20 

parent_selection_type = "sss" # Type of parent selection.
keep_parents = 5 # Number of parents to keep in the next population. -1 means keep all parents and 0 means keep nothing.

crossover_type = "two_points" # Type of the crossover operator.

# Parameters of the mutation operation.
mutation_type = "random" # Type of the mutation operator.
mutation_percent_genes = 5 # Percentage of genes to mutate. This parameter has no action if the parameter mutation_num_genes exists or when mutation_type is None.

last_fitness = 0

def callback_generation(ga_instance):
    global last_fitness
    print("Generation = {generation}".format(generation=ga_instance.generations_completed))
    print("Fitness    = {fitness}".format(fitness=ga_instance.best_solution()[1]))
    print("Change     = {change}".format(change=ga_instance.best_solution()[1] - last_fitness))
    last_fitness = ga_instance.best_solution()[1]

# Creating an instance of the GA class inside the ga module. Some parameters are initialized within the constructor.

num_genes = len(gene_space)
ga_instance = pygad.GA(num_generations=num_generations,
                       num_parents_mating=num_parents_mating, 
                       fitness_func=fitness_function,
                       sol_per_pop=sol_per_pop, 
                       num_genes=num_genes,
                       parent_selection_type=parent_selection_type,
                       keep_parents=keep_parents,
                       crossover_type=crossover_type,
                       mutation_type=mutation_type,
                       mutation_percent_genes=mutation_percent_genes,
                       on_generation=callback_generation,
                       gene_type=int,
                       gene_space=gene_space_int,
)


In [11]:
%%time

# Running the GA to optimize the parameters of the function.
ga_instance.run()

# After the generations complete, some plots are showed that summarize the how the outputs/fitenss values evolve over generations.
# ga_instance.plot_result()

# Returning the details of the best solution.
# solution, solution_fitness, solution_idx = ga_instance.best_solution()
# print("Parameters of the best solution : {solution}".format(solution=solution))
# seq_aa = ''.join([codon_to_int[x] for x in solution])
# print("NT Sequence of the best solution : {seq_aa}".format(seq_aa=seq_aa))
# print("Fitness value of the best solution = {solution_fitness}".format(solution_fitness=solution_fitness))
# print("Index of the best solution : {solution_idx}".format(solution_idx=solution_idx))

# # # prediction = numpy.sum(numpy.array(function_inputs)*solution) Replace
# # print("Predicted output based on the best solution : {prediction}".format(prediction=prediction))

# if ga_instance.best_solution_generation != -1:
#     print("Best fitness value reached after {best_solution_generation} generations.".format(best_solution_generation=ga_instance.best_solution_generation))

# Saving the GA instance.
 # The filename to which the instance is saved. The name is without extension.
# ga_instance.save(filename=filename)

# # Loading the saved GA instance.
# loaded_ga_instance = pygad.load(filename=filename)
# loaded_ga_instance.plot_result()

Generation = 1
Fitness    = 0.5904401888360155
Change     = 0.5904401888360155
Generation = 2
Fitness    = 0.593370091896865
Change     = 0.0029299030608495036
Generation = 3
Fitness    = 0.5976500558274983
Change     = 0.004279963930633346
Generation = 4
Fitness    = 0.6012940531287914
Change     = 0.003643997301293056
Generation = 5
Fitness    = 0.6034356800130655
Change     = 0.002141626884274106
Generation = 6
Fitness    = 0.6069126191855252
Change     = 0.0034769391724597254
Generation = 7
Fitness    = 0.6098085785322079
Change     = 0.0028959593466827416
Generation = 8
Fitness    = 0.6110585875175357
Change     = 0.0012500089853277885
Generation = 9
Fitness    = 0.6134586448049447
Change     = 0.0024000572874089965
Generation = 10
Fitness    = 0.6134586448049447
Change     = 0.0
Generation = 11
Fitness    = 0.6143157042465411
Change     = 0.0008570594415964194
Generation = 12
Fitness    = 0.6169801079150926
Change     = 0.002664403668551474
Generation = 13
Fitness    = 0.62185606

Generation = 121
Fitness    = 0.682313971855223
Change     = 5.313814519136706e-05
Generation = 122
Fitness    = 0.682313971855223
Change     = 0.0
Generation = 123
Fitness    = 0.6829596451410032
Change     = 0.000645673285780135
Generation = 124
Fitness    = 0.6829596451410032
Change     = 0.0
Generation = 125
Fitness    = 0.6829596451410032
Change     = 0.0
Generation = 126
Fitness    = 0.6848833731457474
Change     = 0.001923728004744274
Generation = 127
Fitness    = 0.6848833731457474
Change     = 0.0
Generation = 128
Fitness    = 0.6848833731457474
Change     = 0.0
Generation = 129
Fitness    = 0.6848833731457474
Change     = 0.0
Generation = 130
Fitness    = 0.6848833731457474
Change     = 0.0
Generation = 131
Fitness    = 0.6848833731457474
Change     = 0.0
Generation = 132
Fitness    = 0.6848833731457474
Change     = 0.0
Generation = 133
Fitness    = 0.6848833731457474
Change     = 0.0
Generation = 134
Fitness    = 0.6848833731457474
Change     = 0.0
Generation = 135
Fitness  

Generation = 251
Fitness    = 0.6970742141957715
Change     = 0.0
Generation = 252
Fitness    = 0.6970742141957715
Change     = 0.0
Generation = 253
Fitness    = 0.6970742141957715
Change     = 0.0
Generation = 254
Fitness    = 0.6970742141957715
Change     = 0.0
Generation = 255
Fitness    = 0.6970742141957715
Change     = 0.0
Generation = 256
Fitness    = 0.6974709118333791
Change     = 0.00039669763760752375
Generation = 257
Fitness    = 0.7000144252906336
Change     = 0.002543513457254565
Generation = 258
Fitness    = 0.7000144252906336
Change     = 0.0
Generation = 259
Fitness    = 0.7000144252906336
Change     = 0.0
Generation = 260
Fitness    = 0.7000144252906336
Change     = 0.0
Generation = 261
Fitness    = 0.7000144252906336
Change     = 0.0
Generation = 262
Fitness    = 0.7000144252906336
Change     = 0.0
Generation = 263
Fitness    = 0.7000144252906336
Change     = 0.0
Generation = 264
Fitness    = 0.7000144252906336
Change     = 0.0
Generation = 265
Fitness    = 0.70001442

Generation = 381
Fitness    = 0.7094989897442598
Change     = 0.0
Generation = 382
Fitness    = 0.7094989897442598
Change     = 0.0
Generation = 383
Fitness    = 0.7094989897442598
Change     = 0.0
Generation = 384
Fitness    = 0.709678427155196
Change     = 0.0001794374109361785
Generation = 385
Fitness    = 0.709678427155196
Change     = 0.0
Generation = 386
Fitness    = 0.709678427155196
Change     = 0.0
Generation = 387
Fitness    = 0.709678427155196
Change     = 0.0
Generation = 388
Fitness    = 0.709678427155196
Change     = 0.0
Generation = 389
Fitness    = 0.709678427155196
Change     = 0.0
Generation = 390
Fitness    = 0.709678427155196
Change     = 0.0
Generation = 391
Fitness    = 0.709678427155196
Change     = 0.0
Generation = 392
Fitness    = 0.709678427155196
Change     = 0.0
Generation = 393
Fitness    = 0.709678427155196
Change     = 0.0
Generation = 394
Fitness    = 0.709678427155196
Change     = 0.0
Generation = 395
Fitness    = 0.709678427155196
Change     = 0.0
Gene

Generation = 512
Fitness    = 0.7153358506840924
Change     = 0.0
Generation = 513
Fitness    = 0.7153358506840924
Change     = 0.0
Generation = 514
Fitness    = 0.7153358506840924
Change     = 0.0
Generation = 515
Fitness    = 0.7153358506840924
Change     = 0.0
Generation = 516
Fitness    = 0.7153358506840924
Change     = 0.0
Generation = 517
Fitness    = 0.7153358506840924
Change     = 0.0
Generation = 518
Fitness    = 0.7153358506840924
Change     = 0.0
Generation = 519
Fitness    = 0.7153358506840924
Change     = 0.0
Generation = 520
Fitness    = 0.7153358506840924
Change     = 0.0
Generation = 521
Fitness    = 0.7153358506840924
Change     = 0.0
Generation = 522
Fitness    = 0.7153358506840924
Change     = 0.0
Generation = 523
Fitness    = 0.7153358506840924
Change     = 0.0
Generation = 524
Fitness    = 0.7153358506840924
Change     = 0.0
Generation = 525
Fitness    = 0.7153358506840924
Change     = 0.0
Generation = 526
Fitness    = 0.7153358506840924
Change     = 0.0
Generation

Generation = 636
Fitness    = 0.7187528878365629
Change     = 0.0
Generation = 637
Fitness    = 0.7187528878365629
Change     = 0.0
Generation = 638
Fitness    = 0.7187528878365629
Change     = 0.0
Generation = 639
Fitness    = 0.7187528878365629
Change     = 0.0
Generation = 640
Fitness    = 0.7187528878365629
Change     = 0.0
Generation = 641
Fitness    = 0.7187528878365629
Change     = 0.0
Generation = 642
Fitness    = 0.7187528878365629
Change     = 0.0
Generation = 643
Fitness    = 0.7187528878365629
Change     = 0.0
Generation = 644
Fitness    = 0.7187528878365629
Change     = 0.0
Generation = 645
Fitness    = 0.7187528878365629
Change     = 0.0
Generation = 646
Fitness    = 0.7187528878365629
Change     = 0.0
Generation = 647
Fitness    = 0.7187528878365629
Change     = 0.0
Generation = 648
Fitness    = 0.7187528878365629
Change     = 0.0
Generation = 649
Fitness    = 0.7187528878365629
Change     = 0.0
Generation = 650
Fitness    = 0.7187528878365629
Change     = 0.0
Generation

Generation = 767
Fitness    = 0.7200917320395388
Change     = 0.0
Generation = 768
Fitness    = 0.7200917320395388
Change     = 0.0
Generation = 769
Fitness    = 0.7200917320395388
Change     = 0.0
Generation = 770
Fitness    = 0.7200917320395388
Change     = 0.0
Generation = 771
Fitness    = 0.7203713624142946
Change     = 0.00027963037475575714
Generation = 772
Fitness    = 0.7203713624142946
Change     = 0.0
Generation = 773
Fitness    = 0.7203713624142946
Change     = 0.0
Generation = 774
Fitness    = 0.7203713624142946
Change     = 0.0
Generation = 775
Fitness    = 0.7203713624142946
Change     = 0.0
Generation = 776
Fitness    = 0.7203713624142946
Change     = 0.0
Generation = 777
Fitness    = 0.7203713624142946
Change     = 0.0
Generation = 778
Fitness    = 0.7203713624142946
Change     = 0.0
Generation = 779
Fitness    = 0.7203713624142946
Change     = 0.0
Generation = 780
Fitness    = 0.7203713624142946
Change     = 0.0
Generation = 781
Fitness    = 0.7203713624142946
Change  

Generation = 896
Fitness    = 0.7245129769890628
Change     = 0.0
Generation = 897
Fitness    = 0.7245129769890628
Change     = 0.0
Generation = 898
Fitness    = 0.7245129769890628
Change     = 0.0
Generation = 899
Fitness    = 0.7245129769890628
Change     = 0.0
Generation = 900
Fitness    = 0.7245129769890628
Change     = 0.0
Generation = 901
Fitness    = 0.7245129769890628
Change     = 0.0
Generation = 902
Fitness    = 0.7245129769890628
Change     = 0.0
Generation = 903
Fitness    = 0.7245129769890628
Change     = 0.0
Generation = 904
Fitness    = 0.7245129769890628
Change     = 0.0
Generation = 905
Fitness    = 0.7245129769890628
Change     = 0.0
Generation = 906
Fitness    = 0.7245129769890628
Change     = 0.0
Generation = 907
Fitness    = 0.7245129769890628
Change     = 0.0
Generation = 908
Fitness    = 0.7245129769890628
Change     = 0.0
Generation = 909
Fitness    = 0.7245129769890628
Change     = 0.0
Generation = 910
Fitness    = 0.7245129769890628
Change     = 0.0
Generation