In [1]:
from enum import Enum
from queue import LifoQueue
from typing import TypeVar, Callable, Sequence

X = TypeVar('X')

In [None]:
class Operation(Enum):
    DELETE = 0
    CHANGE = 1
    INSERT = 2
    NO_CHANGE = 3


class EditTableCell:

    def __init__(self, weight: int, parent: 'EditTableCell' = None) -> None:
        super().__init__()
        self.weight = weight
        self.parent = parent
        self.comparator_weight = weight
        self.operation: Operation = Operation.NO_CHANGE

    def edit_sequence(self, queue = None):
        if queue is None:
            queue = LifoQueue()
        queue.put(self.operation)
        if self.parent is not None:
            return self.parent.edit_sequence(queue)
        return queue


def get_edit_sequence(x: Sequence[X], y: Sequence[X], delta: Callable[[X, X], int] = lambda a, b: 0 if a == b else 1):
    edit_table = [[EditTableCell(0) for _ in range(len(y) + 1)] for _ in range(len(x) + 1)]
    for i in range(1, len(x) + 1):
        edit_table[i][0].parent = edit_table[i - 1][0]
        edit_table[i][0].weight = i
        edit_table[i][0].operation = Operation.DELETE
    for i in range(1, len(y) + 1):
        edit_table[0][i].parent = edit_table[0][i - 1]
        edit_table[0][i].weight = i
        edit_table[0][i].operation = Operation.INSERT
    for i in range(len(x)):
        k = i + 1
        for j in range(len(y)):
            l = j + 1
            edit_table[k - 1][l].comparator_weight = edit_table[k - 1][l].weight + 1
            edit_table[k][l - 1].comparator_weight = edit_table[k][l - 1].weight + 1
            edit_table[k - 1][l - 1].comparator_weight = edit_table[k - 1][l - 1].weight + delta(x[i], y[j])
            parent = min([edit_table[k - 1][l],
                          edit_table[k][l - 1],
                          edit_table[k - 1][l - 1]], key=lambda a: a.comparator_weight)
            if edit_table[k - 1][l] == parent:
                edit_table[k][l].operation = Operation.DELETE
            elif edit_table[k][l - 1] == parent:
                edit_table[k][l].operation = Operation.INSERT
            elif edit_table[k - 1][l - 1] == parent:
                if delta(x[i], y[j]) == 0:
                    edit_table[k][l].operation = Operation.NO_CHANGE
                else:
                    edit_table[k][l].operation = Operation.CHANGE
            edit_table[k][l].weight = parent.comparator_weight
            edit_table[k][l].parent = parent
    q = edit_table[len(x)][len(y)].edit_sequence()
    q.get()
    return q, edit_table[len(x)][len(y)].weight

# edit distance

In [5]:
def edit_distance(x: Sequence[X], y: Sequence[X], delta: Callable[[X, X], int] = lambda a, b: 0 if a == b else 1):
    q, weight = get_edit_sequence(x, y, delta)
    result = x
    print(result)
    x_i = 0
    y_i = 0
    while not q.empty():
        operation = q.get()
        if operation == Operation.DELETE:
            result = result[:x_i] + result[x_i + 1:]
            print(result)
            continue
        elif operation == Operation.INSERT:
            result = result[:x_i] + y[y_i] + result[x_i:]
            print(result)
        elif operation == operation.CHANGE:
            result = result[:x_i] + y[y_i] + result[x_i + 1:]
            print(result)
        y_i += 1
        x_i += 1
    return weight

In [6]:
edit_distance('los', 'kloc')

los
klos
kloc


2

In [7]:
edit_distance('Łódź', 'Lodz')

Łódź
Lódź
Lodź
Lodz


3

In [8]:
edit_distance('kwintesencja', 'quintessence')

kwintesencja
qwintesencja
quintesencja
quintessencja
quintessencea
quintessence


5

In [9]:
edit_distance('ATGAATCTTACCGCCTCG', 'ATGAGGCTCTGGCCCCTG')

ATGAATCTTACCGCCTCG
ATGAGTCTTACCGCCTCG
ATGAGGCTTACCGCCTCG
ATGAGGCTCTACCGCCTCG
ATGAGGCTCTGCCGCCTCG
ATGAGGCTCTGGCCGCCTCG
ATGAGGCTCTGGCCCCTCG
ATGAGGCTCTGGCCCCTG


7

# longest common subsequence

In [10]:
def lcs(x: Sequence[X], y: Sequence[X]):
    q, _ = get_edit_sequence(x, y)
    result = []
    x_i = 0
    while not q.empty():
        operation = q.get()
        if operation == Operation.INSERT:
            continue
        elif operation == Operation.NO_CHANGE:
            result.append(x[x_i])
        x_i+=1
    return result

# diff

In [None]:
def diff(a: Sequence[X], b: Sequence[X]):
    subsequence = lcs(a, b)
    letters = []
    actions = []
    i = j = 0
    for l in subsequence:
        while a[i] != l:
            letters.append(a[i])
            actions.append('-')
            i += 1
        while b[j] != l:
            letters.append(b[j])
            actions.append('+')
            j += 1
        letters.append(l)
        actions.append(' ')
        i += 1
        j += 1
    while i < len(a):
        letters.append(a[i])
        actions.append('-')
        i += 1
    while j < len(b):
        letters.append(b[j])
        actions.append('+')
        j += 1
    result = ''.join(letters) + '\n' + ''.join(actions) + '\n'
    return result