# To be, or not to be

This is the "hello world" of genetic algorithms. The implementation below is object oriented. Each individual is a separate class. The whole population is a list of individuals, and is controlled by the Population class. 

The code is clearly not written for a notebook. The imports, and global variable declarations should be in the same file as the main, the classes can be in separate files.

In [1]:
import sys
import numpy as np

In [7]:
"""main.py
Parameters:
    GENES (str): possibles genes in a string
    TARGET (str): target of evolution
    LEN_TARGET (int): length of the target
    POPULATION_SIZE (int): Number of individuals per population
    MUTATION_RATE (float): Chance of a gene mutating during 
        creation of offspring
"""

__author__ = "Tim de Klijn"

POPULATION_SIZE = 300
MUTATION_RATE = 0.01

GENES = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890,.!? "

TARGET = "To be, or not to be."
TARGET_LENGTH = len(TARGET)

In [8]:
class Individual():
    """
    Class containing info and functions on individuals in the 
    population. Depending on value of dna, will create new
    dna or accept input. 

    Parameters: 
        genes (str): str with options to generate dna.
        target (str): str with target of evolution
        target_length (int): length of target
        dna (str): dna sequence of individual
    """

    def __init__(self,
                 genes: str,
                 target: str,
                 target_length: int,
                 dna: str = None) -> None:

        self.genes = genes

        self.target = target
        self.target_length = target_length

        if dna is None:
            self.dna = "".join(
                np.random.choice(list(self.genes), self.target_length))
        else:
            self.dna = dna

        self.fitness = self.calc_fitness()


    def calc_fitness(self) -> float:
        """
        Calculate fitness score based on difference between current
        DNA sequence and target sequence
        """

        fitness = 0
        for i in range(self.target_length):
            if self.target[i] == self.dna[i]:
                fitness += 1
        return fitness / self.target_length


    def mix_dna(self, dna_2: str, mutation_rate: float) -> str:
        """
        Given a second DNA sequence, mix two sequences and return
        offspring DNA. Based on mutation rate, mutate certain genes.
        """

        new_dna = ""
        for i in range(self.target_length):
            choice = np.random.uniform()
            if choice < mutation_rate:
                new_gene = np.random.choice(list(self.genes))[0]
            else:
                new_gene = np.random.choice([self.dna[i], dna_2[i]])[0]
            new_dna += new_gene
        return new_dna

In [9]:
from typing import List

class Population():
    """
    Control population during running the genetic algorithm. Has 
    functions for creating a population, producing offspring and
    finding the max fitness in the population.

    Parameters:
        genes (str): options to create dna from
        population_size (int): amount of individuals per generation
        target (str): target of evolution
        target_length (int): length of target
        mutation_rate (float): determines mutation during reproduction
    """

    def __init__(self,
                 genes: str,
                 population_size: int,
                 target: str,
                 target_length: int,
                 mutation_rate: float) -> None:

        self.genes = genes

        self.population: List[Individual] = []
        self.population_size: int = population_size

        self.target: str = target
        self.target_length: int = target_length

        self.mutation_rate: float = mutation_rate

        self.max_fitness: float = 0.0

    def init_population(self) -> None:
        """
        Create n individuals where n is the population
        size. Save these in self.population.
        """

        for _ in range(self.population_size):
            self.population.append(Individual(
                self.genes,
                self.target,
                self.target_length))


    def _create_p_list(self) -> None:
        """
        Create a list of fitness scores and scale it to have a sum
        of 1.
        """

        p_list = np.array([ind.fitness for ind in self.population])
        return p_list / np.sum(p_list)


    def create_offspring(self) -> None:
        """
        For population size, pick two individuals from the population
        based on the fitness score and create offspring, this will be
        the new population.
        """

        new_population = []
        p_list = self._create_p_list()
        for _ in range(self.population_size):
            # Select to individuals, chance is scaled to fitness score
            parents = np.random.choice(self.population, 2, p=p_list)
            # Create new dna form parents
            new_dna = parents[0].mix_dna(
                parents[1].dna, self.mutation_rate)
            new_population.append(Individual(
                self.genes,
                self.target,
                self.target_length,
                new_dna))
        self.population = new_population


    def calc_fitness(self) -> None:
        """
        Sort the population and extract the maximal fitness from
        the population
        """
        self.population = sorted(
            self.population,
            key=lambda x: x.fitness,
            reverse=True)
        self.max_fitness = self.population[0].fitness

In [10]:
def main() -> None:
    """
    Control the genetic algorithm:
    
    First create a population. The population is passed all global 
    variables. Then initiate the population and calculate the fitness.
    Start the loop, print the current fitness and best dna sequence, 
    when max fitness is reached, break out of loop and quit.
    """

    print(f"""
Target: {TARGET}
Population Size: {POPULATION_SIZE}
Mutation_rate: {MUTATION_RATE}
""")

    population = Population(GENES,
                            POPULATION_SIZE,
                            TARGET,
                            TARGET_LENGTH,
                            MUTATION_RATE)
    population.init_population()
    population.calc_fitness()
    max_fitness = population.max_fitness

    breed = True
    iteration = 0

    while breed:
        iteration += 1
        max_fitness = population.max_fitness

        # Print on a line in terminal, then print over it
        sys.stdout.flush()
        sys.stdout.write("\r")
        sys.stdout.flush()
        sys.stdout.write(
            f"{iteration:>5} : {max_fitness:.2f} : {population.population[0].dna}")

        population.create_offspring()
        population.calc_fitness()

        # Escape loop
        if max_fitness == 1.0:
            breed = False

    print("\n\nFinished\n")

In [11]:
main()


Target: To be, or not to be.
Population Size: 300
Mutation_rate: 0.01

  145 : 1.00 : To be, or not to be.

Finished

