## Single Mutant Walker
Author: Michael Meier

This notebook introduces a SMW class to generate new DNA Sequences and Score them. 

In [1]:
#imports
import numpy as np
import pandas as pd

In [11]:
class SMW:

    def __init__(self, nucleotides):
        self.nucleotides = nucleotides
        self.n = len(nucleotides)
        

    def mutate_sequence(self, sequence, pos):

        sequence = list(sequence)
        if pos is None:
            pos = np.random.randint(0, len(sequence))

        # Get current nucleotide and possible mutations, make sure that nuc is different
        current_nucleotide = sequence[pos]
        possible_mutations = [nuc for nuc in self.nucleotides if nuc !=
                                current_nucleotide]
        
        # Replace nucleotide at position 
        new_nucleotide = np.random.choice(possible_mutations)
        sequence[pos] = new_nucleotide
        return ''.join(sequence)
    
    def generate_all_mutations(self, sequence):
        mutants = []
        for pos in range(len(sequence)):
            current_nucleotide = sequence[pos]
            for nuc in self.nucleotides:
                if nuc != current_nucleotide:
                    mutated = sequence[:pos] + nuc + sequence[pos+1:]
                    mutants.append(mutated)
        return mutants
    
    def walk(self, start_sequence, n_steps, fitness_function):
        current_seq = start_sequence
        current_fitness = fitness_function(current_seq)

        history = [(current_seq, current_fitness)]

        for step in range(n_steps):
            mutant = self.mutate_sequence(current_seq, None)
            mutant_fitness = fitness_function(mutant)

            # Check if mutant is better than current
            # Yes: move to mutant
            if mutant_fitness > current_fitness:
                current_seq = mutant
                current_fitness = mutant_fitness

            history.append((current_seq, current_fitness))

        return history


In [14]:
# Test the Walker class

def fitness_function(sequence):
    return sequence.count('G')


walker = SMW(nucleotides=['A', 'C', 'G', 'T'])
start_seq = 'AAAAAAAA'
history = walker.walk(start_seq, n_steps = 10, fitness_function=fitness_function)
print(f'Generated history of length {len(history)}:')
print(f'Final sequence: {history[-1][0]} with fitness {history[-1][1]}')
print('Full history:')
for seq, fit in history:
    print(f'Sequence: {seq}, Fitness: {fit}')

Generated history of length 11:
Final sequence: GAAAGGAG with fitness 4
Full history:
Sequence: AAAAAAAA, Fitness: 0
Sequence: AAAAAAAA, Fitness: 0
Sequence: AAAAAAAA, Fitness: 0
Sequence: AAAAAAAG, Fitness: 1
Sequence: AAAAAAAG, Fitness: 1
Sequence: AAAAAGAG, Fitness: 2
Sequence: AAAAGGAG, Fitness: 3
Sequence: AAAAGGAG, Fitness: 3
Sequence: GAAAGGAG, Fitness: 4
Sequence: GAAAGGAG, Fitness: 4
Sequence: GAAAGGAG, Fitness: 4
