In [1]:
import random
from utils import *
from deap import base, creator, tools

In [2]:
creator.create("FitnessMulti", base.Fitness, weights=(-1.0, -1.0, -1.0, -1.0))
creator.create("Individual", list, fitness=creator.FitnessMulti)
toolbox = base.Toolbox()

In [3]:
def evaluate(individual):
    strand = ''.join(individual)
    stability = float(compute_stability(strand))
    secondary_structures = check_secondary_structures(strand)
    lcs_value = 0 # lcs not yet created. TODO: establish graph relationships
    cross_hybridization = 0 # c_h method not yet created. TODO: establish graph relationships
    return lcs_value, stability, secondary_structures, cross_hybridization

toolbox.register("evaluate", evaluate)

In [4]:
desired_lengths_quantities = { # take actual input from flask.
    10: 25,
    15: 30,
    20: 20,
    25: 25
}
population = []
for length, quantity in desired_lengths_quantities.items():
    toolbox.register("individual_{}".format(length), tools.initIterate, creator.Individual, lambda l=length: initialize_sequence(l))
    population.extend(toolbox.__getattribute__("individual_{}".format(length))() for _ in range(quantity))

In [5]:
def variable_length_crossover(parent1, parent2):
    if len(parent1) < len(parent2):
        shorter, longer = parent1, parent2
    else:
        shorter, longer = parent2, parent1
    # Determine the crossover point within the range of the shorter sequence.
    crossover_point = random.randint(0, len(shorter) - 1)
    # Create offspring
    offspring1 = longer[:len(longer) - len(shorter) + crossover_point] + shorter[crossover_point:]
    offspring2 = shorter[:crossover_point] + longer[len(longer) - len(shorter) + crossover_point:len(longer) - len(shorter) + len(shorter)]
    # Return offspring in the order of input parents
    if len(parent1) < len(parent2):
        return offspring2, offspring1
    else:
        return offspring1, offspring2
    
toolbox.register("mate", variable_length_crossover)

In [6]:

def mutate_sequence(individual):
    mutation_point = random.randint(0, len(individual) - 1)
    available_bases = set(["A", "T", "C", "G"]) - {individual[mutation_point]}
    individual[mutation_point] = random.choice(list(available_bases))
    return individual,

toolbox.register("mutate", mutate_sequence)
toolbox.register("select", tools.selNSGA2)





In [7]:
# Evaluate the entire initial population
fitnesses = list(map(toolbox.evaluate, population))
for ind, fit in zip(population, fitnesses):
    ind.fitness.values = fit


In [13]:
n_generations = 5  # You can adjust this value as per your requirements
for gen in range(n_generations):
    # Select the next generation individuals
    offspring = toolbox.select(population, len(population))
    
    # Clone the selected individuals
    offspring = list(map(toolbox.clone, offspring))

    # Apply crossover and mutation on the offspring
    for child1, child2 in zip(offspring[::2], offspring[1::2]):
        if random.random() < 0.7:  # 70% crossover probability; you can adjust this value
            toolbox.mate(child1, child2)
            del child1.fitness.values
            del child2.fitness.values

    for mutant in offspring:
        if random.random() < 0.2:  # 20% mutation probability; you can adjust this value
            toolbox.mutate(mutant)
            del mutant.fitness.values

    # Evaluate the offspring
    fitnesses = list(map(toolbox.evaluate, offspring))
    for ind, fit in zip(offspring, fitnesses):
        ind.fitness.values = fit

    # Replace the old population with the offspring
    population[:] = offspring

gen	nevals
0  	100   
1  	100   
2  	100   


In [10]:
best_individuals = tools.selBest(population, 10)  # Extracts the top individual; you can adjust to get more
for bi in best_individuals:
    print(''.join(bi), bi.fitness.values)


TCGGTGCTCACATTAGAGCACGGGG (0.0, -59161.94201, -6.398709297180176, 0.0)
TCCAACTCGCGCTGGCGCGTCACAG (0.0, -9193.68347, -5.48976993560791, 0.0)
AAGACCAGGAAAATCCTGCGGGGAG (0.0, -1495.545132, -3.329612970352173, 0.0)
TTCTGGCTACGGACCTTGCAAAGGG (0.0, -451.4961369, -3.333465814590454, 0.0)
TGCTCGTTACCCGGTCGATCGACCT (0.0, -400.5372283, -3.4408445358276367, 0.0)
GCATATGATGCGCTATCCGACTCCG (0.0, -163.5892956, -1.7635512351989746, 0.0)
TCCCGACACTATTACAAGTGAGATC (0.0, -157.5697607, -2.892761707305908, 0.0)
TAACCCGTAGTTCAAGTAATTCTTC (0.0, -147.914882, -2.033112049102783, 0.0)
AGGTGAACACCCCTGGTACT (0.0, -104.3906411, -2.1645398139953613, 0.0)
CTCTTTCCGACAATATCGAGGACGT (0.0, -103.8415258, -2.5554447174072266, 0.0)
