In [None]:
from enum import Enum
from queue import LifoQueue
from typing import TypeVar, Callable, Sequence
from bisect import bisect

X = TypeVar('X')

# edit distance

In [None]:
class Operation(Enum):
    DELETE = 0
    CHANGE = 1
    INSERT = 2
    NO_CHANGE = 3


class EditTableCell:

    def __init__(self, weight: int, parent: 'EditTableCell' = None) -> None:
        super().__init__()
        self.weight = weight
        self.parent = parent
        self.comparator_weight = weight
        self.operation: Operation = Operation.NO_CHANGE

    def edit_sequence(self, queue = None):
        if queue is None:
            queue = LifoQueue()
        queue.put(self.operation)
        if self.parent is not None:
            return self.parent.edit_sequence(queue)
        return queue

def edit_distance(x: Sequence[X], y: Sequence[X], delta: Callable[[X, X], int] = lambda a, b: 0 if a == b else 1):
    edit_table = [[EditTableCell(0) for _ in range(len(y) + 1)] for _ in range(len(x) + 1)]
    for i in range(1, len(x) + 1):
        edit_table[i][0].parent = edit_table[i - 1][0]
        edit_table[i][0].weight = i
        edit_table[i][0].operation = Operation.DELETE
    for i in range(1, len(y) + 1):
        edit_table[0][i].parent = edit_table[0][i - 1]
        edit_table[0][i].weight = i
        edit_table[0][i].operation = Operation.INSERT
    for i in range(len(x)):
        k = i + 1
        for j in range(len(y)):
            l = j + 1
            edit_table[k - 1][l].comparator_weight = edit_table[k - 1][l].weight + 1
            edit_table[k][l - 1].comparator_weight = edit_table[k][l - 1].weight + 1
            edit_table[k - 1][l - 1].comparator_weight = edit_table[k - 1][l - 1].weight + delta(x[i], y[j])
            parent = min([edit_table[k - 1][l],
                          edit_table[k][l - 1],
                          edit_table[k - 1][l - 1]], key=lambda a: a.comparator_weight)
            if edit_table[k - 1][l] == parent:
                edit_table[k][l].operation = Operation.DELETE
            elif edit_table[k][l - 1] == parent:
                edit_table[k][l].operation = Operation.INSERT
            elif edit_table[k - 1][l - 1] == parent:
                if delta(x[i], y[j]) == 0:
                    edit_table[k][l].operation = Operation.NO_CHANGE
                else:
                    edit_table[k][l].operation = Operation.CHANGE
            edit_table[k][l].weight = parent.comparator_weight
            edit_table[k][l].parent = parent
    q = edit_table[len(x)][len(y)].edit_sequence()
    result = x
    print(result)
    x_i = 0
    y_i = 0
    q.get()
    while not q.empty():
        operation = q.get()
        if operation == Operation.INSERT:
            result = result[:x_i] + y[y_i] + result[x_i:]
            y_i += 1
            x_i += 1
            print(result)
        elif operation == Operation.DELETE:
            result = result[:x_i] + result[x_i + 1:]
            print(result)
        elif operation == operation.CHANGE:
            result = result[:x_i] + y[y_i] + result[x_i + 1:]
            y_i += 1
            x_i += 1
            print(result)
        elif operation == operation.NO_CHANGE:
            y_i += 1
            x_i += 1
    return edit_table[len(x)][len(y)].weight

In [None]:
edit_distance('los', 'kloc')

In [None]:
edit_distance('Łódź', 'Lodz')

In [None]:
edit_distance('kwintesencja', 'quintessence')

In [None]:
edit_distance('ATGAATCTTACCGCCTCG', 'ATGAGGCTCTGGCCCCTG')

# longest common subsequence

In [None]:
def lcs(x: Sequence[X], y: Sequence[X]):
    ranges = [len(y)]
    y_letters = list(y)
    for i in range(len(x)):
        positions = [j for j, l in enumerate(y_letters) if l == x[i]]
        positions.reverse()
        for p in positions:
            k = bisect(ranges, p)
            if k == bisect(ranges, p - 1):
                if k < len(ranges) - 1:
                    ranges[k] = p
                else:
                    ranges[k:k] = [p]
    longest = ""
    i = 1
    for l in x:
        if l in y[ranges[i - 1]:ranges[i]]:
            longest += l
            i += 1
    return longest


def diff(a: Sequence[X], b: Sequence[X]):
    subsequence = lcs(a, b)
    letters = []
    actions = []
    i = j = 0
    for l in subsequence:
        while a[i] != l:
            letters.append(a[i])
            actions.append('-')
            i += 1
        while b[j] != l:
            letters.append(b[j])
            actions.append('+')
            j += 1
        letters.append(l)
        actions.append(' ')
        i += 1
        j += 1
    while i < len(a):
        letters.append(a[i])
        actions.append('-')
        i += 1
    while j < len(b):
        letters.append(b[j])
        actions.append('+')
        j += 1
    result = ''.join(letters) + '\n' + ''.join(actions) + '\n'
    return result