In [9]:
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = [10, 10]

In [10]:
deletion_score = -1
gap_score = -1
mismatch_score = -1
match_score = +1

In [11]:
def align(template, sequence, cache = None):
    if cache is None:
        cache = dict()
    
    if (template, sequence) in cache:
        return cache[(template, sequence)]

    #print("E:" + template, "Q:" + sequence)
    
    if not template:
        return len(sequence) * deletion_score, ""
    
    if not sequence:
        return len(template) * gap_score, template
    
    template_head, template_tail = template[0], template[1:]
    sequence_head, sequence_tail = sequence[0], sequence[1:]
    
    if template_head == sequence_head:
        match_mismatch_score = match_score
    else:
        match_mismatch_score = mismatch_score
    
    s, seq = align(template_tail, sequence_tail, cache)
    match_result = s + match_mismatch_score, template_head + seq
    cache[(template_tail, sequence_tail)] = match_result
    
    s, seq = align(template, sequence_tail, cache)
    delete_result = s + deletion_score, seq
    cache[(template, sequence_tail)] = delete_result
    
    s, seq = align(template_tail, sequence, cache)
    gap_result = s + gap_score, template_head + seq
    cache[(template, sequence)] = gap_result
    
    results = [match_result, delete_result, gap_result]
    
    results.sort()
    
    return results[-1]

In [12]:
align("CAT", "CA")

(1, 'CAT')

In [13]:
def produce_sequence_and_align(sequence, outer_index, inner_index, pattern, template = "", cache = None):
    print(template, outer_index, inner_index, pattern)
    if cache is None:
        cache = dict()
    if len(template) > len(sequence):
        return align(template, sequence, cache)
    else:
        alignments = []
        current_alignment = align(template, sequence, cache)
        alignments.append(current_alignment)
        last_group = outer_index == len(pattern) - 1
        current_pattern = pattern[outer_index]
        last_element = inner_index == len(current_pattern) - 1
        if last_element:
            # deal with looping to the beginning of the pattern.
            next_inner_loop = 0
            next_template = template + pattern[outer_index][next_inner_loop]
            loop_alignment = produce_sequence_and_align(sequence, outer_index, next_inner_loop, pattern, next_template, cache)
            alignments.append(loop_alignment)
            # deal with jumping to the next patter
            if not last_group:
                next_inner_forward = 0
                next_outer = outer_index + 1
                next_template = template + pattern[next_outer][next_inner_forward]
                forward_alignment = produce_sequence_and_align(sequence, next_outer, next_inner_forward, pattern, next_template, cache)
                alignments.append(forward_alignment)
        else:
            next_inner_step = inner_index + 1
            next_template = template + pattern[outer_index][next_inner_step] 
            step_alignment = produce_sequence_and_align(sequence, outer_index, next_inner_step, pattern, next_template, cache)
            alignments.append(step_alignment)
        alignments.sort()
        return alignments[-1]
    

In [1]:
produce_sequence_and_align("CTTGCTGCCGCTGCCGCTCGCTGCTGCTTG", 0, 0, ["CTG", "CCGCTG", "CTG"])

NameError: name 'produce_sequence_and_align' is not defined

In [16]:
produce_sequence_and_align("CTTGCTGCCGCTGCCGCTCGCTGCTGCTTG", 0, 0, ["CC", "AA", "TT"], template="C")

C 0 0 ['CC', 'AA', 'TT']
CC 0 1 ['CC', 'AA', 'TT']
CCC 0 0 ['CC', 'AA', 'TT']
CCCC 0 1 ['CC', 'AA', 'TT']
CCCCC 0 0 ['CC', 'AA', 'TT']
CCCCCC 0 1 ['CC', 'AA', 'TT']
CCCCCCC 0 0 ['CC', 'AA', 'TT']
CCCCCCCC 0 1 ['CC', 'AA', 'TT']
CCCCCCCCC 0 0 ['CC', 'AA', 'TT']
CCCCCCCCCC 0 1 ['CC', 'AA', 'TT']
CCCCCCCCCCC 0 0 ['CC', 'AA', 'TT']
CCCCCCCCCCCC 0 1 ['CC', 'AA', 'TT']
CCCCCCCCCCCCC 0 0 ['CC', 'AA', 'TT']
CCCCCCCCCCCCCC 0 1 ['CC', 'AA', 'TT']
CCCCCCCCCCCCCCC 0 0 ['CC', 'AA', 'TT']
CCCCCCCCCCCCCCCC 0 1 ['CC', 'AA', 'TT']
CCCCCCCCCCCCCCCCC 0 0 ['CC', 'AA', 'TT']
CCCCCCCCCCCCCCCCCC 0 1 ['CC', 'AA', 'TT']
CCCCCCCCCCCCCCCCCCC 0 0 ['CC', 'AA', 'TT']
CCCCCCCCCCCCCCCCCCCC 0 1 ['CC', 'AA', 'TT']
CCCCCCCCCCCCCCCCCCCCC 0 0 ['CC', 'AA', 'TT']
CCCCCCCCCCCCCCCCCCCCCC 0 1 ['CC', 'AA', 'TT']
CCCCCCCCCCCCCCCCCCCCCCC 0 0 ['CC', 'AA', 'TT']
CCCCCCCCCCCCCCCCCCCCCCCC 0 1 ['CC', 'AA', 'TT']
CCCCCCCCCCCCCCCCCCCCCCCCC 0 0 ['CC', 'AA', 'TT']
CCCCCCCCCCCCCCCCCCCCCCCCCC 0 1 ['CC', 'AA', 'TT']
CCCCCCCCCCCCCCCCCCCCCCCCC

CCCCCCCCCCCCAAAATTTTTTTTTT 2 1 ['CC', 'AA', 'TT']
CCCCCCCCCCCCAAAATTTTTTTTTTT 2 0 ['CC', 'AA', 'TT']
CCCCCCCCCCCCAAAATTTTTTTTTTTT 2 1 ['CC', 'AA', 'TT']
CCCCCCCCCCCCAAAATTTTTTTTTTTTT 2 0 ['CC', 'AA', 'TT']
CCCCCCCCCCCCAAAATTTTTTTTTTTTTT 2 1 ['CC', 'AA', 'TT']
CCCCCCCCCCCCAAAATTTTTTTTTTTTTTT 2 0 ['CC', 'AA', 'TT']
CCCCCCCCCCCCAAT 2 0 ['CC', 'AA', 'TT']
CCCCCCCCCCCCAATT 2 1 ['CC', 'AA', 'TT']
CCCCCCCCCCCCAATTT 2 0 ['CC', 'AA', 'TT']
CCCCCCCCCCCCAATTTT 2 1 ['CC', 'AA', 'TT']
CCCCCCCCCCCCAATTTTT 2 0 ['CC', 'AA', 'TT']
CCCCCCCCCCCCAATTTTTT 2 1 ['CC', 'AA', 'TT']
CCCCCCCCCCCCAATTTTTTT 2 0 ['CC', 'AA', 'TT']
CCCCCCCCCCCCAATTTTTTTT 2 1 ['CC', 'AA', 'TT']
CCCCCCCCCCCCAATTTTTTTTT 2 0 ['CC', 'AA', 'TT']
CCCCCCCCCCCCAATTTTTTTTTT 2 1 ['CC', 'AA', 'TT']
CCCCCCCCCCCCAATTTTTTTTTTT 2 0 ['CC', 'AA', 'TT']
CCCCCCCCCCCCAATTTTTTTTTTTT 2 1 ['CC', 'AA', 'TT']
CCCCCCCCCCCCAATTTTTTTTTTTTT 2 0 ['CC', 'AA', 'TT']
CCCCCCCCCCCCAATTTTTTTTTTTTTT 2 1 ['CC', 'AA', 'TT']
CCCCCCCCCCCCAATTTTTTTTTTTTTTT 2 0 ['CC', 'AA', '

CCCCAAAAAAAAAAAAAAAAAAT 2 0 ['CC', 'AA', 'TT']
CCCCAAAAAAAAAAAAAAAAAATT 2 1 ['CC', 'AA', 'TT']
CCCCAAAAAAAAAAAAAAAAAATTT 2 0 ['CC', 'AA', 'TT']
CCCCAAAAAAAAAAAAAAAAAATTTT 2 1 ['CC', 'AA', 'TT']
CCCCAAAAAAAAAAAAAAAAAATTTTT 2 0 ['CC', 'AA', 'TT']
CCCCAAAAAAAAAAAAAAAAAATTTTTT 2 1 ['CC', 'AA', 'TT']
CCCCAAAAAAAAAAAAAAAAAATTTTTTT 2 0 ['CC', 'AA', 'TT']
CCCCAAAAAAAAAAAAAAAAAATTTTTTTT 2 1 ['CC', 'AA', 'TT']
CCCCAAAAAAAAAAAAAAAAAATTTTTTTTT 2 0 ['CC', 'AA', 'TT']
CCCCAAAAAAAAAAAAAAAAT 2 0 ['CC', 'AA', 'TT']
CCCCAAAAAAAAAAAAAAAATT 2 1 ['CC', 'AA', 'TT']
CCCCAAAAAAAAAAAAAAAATTT 2 0 ['CC', 'AA', 'TT']
CCCCAAAAAAAAAAAAAAAATTTT 2 1 ['CC', 'AA', 'TT']
CCCCAAAAAAAAAAAAAAAATTTTT 2 0 ['CC', 'AA', 'TT']
CCCCAAAAAAAAAAAAAAAATTTTTT 2 1 ['CC', 'AA', 'TT']
CCCCAAAAAAAAAAAAAAAATTTTTTT 2 0 ['CC', 'AA', 'TT']
CCCCAAAAAAAAAAAAAAAATTTTTTTT 2 1 ['CC', 'AA', 'TT']
CCCCAAAAAAAAAAAAAAAATTTTTTTTT 2 0 ['CC', 'AA', 'TT']
CCCCAAAAAAAAAAAAAAAATTTTTTTTTT 2 1 ['CC', 'AA', 'TT']
CCCCAAAAAAAAAAAAAAAATTTTTTTTTTT 2 0 ['CC', 'AA

(-7, 'CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCA')

In [14]:
align("CTTGCTGCCGCTGCCGCTCGCTGCTGCTTG", "CTTGCTGCCGCTGCCGCTCGCTGCTGCTTG")

(30, 'CTTGCTGCCGCTGCCGCTCGCTGCTGCTTG')

In [21]:
align("CTTGCTGCCGCTGCCGCTCGCTGCTGCTTG", "CTGCTGCCTGCTGCCTGCTGCTGCTGCTGCT")

(7, 'CTTGCTGCCGCTGCCGCTCCGCTGCTGCTTG')