# Edit Distance

In this problem we want to find the edit distance between two strings, where edits are character swaps and gaps.

In [2]:
# It takes 3 edits to change "habits" to "hobbit"
print("hab-its")
print("hobbit-")

hab-its
hobbit-


In [7]:
# This function returns the edit distance between two strings.
# It considers all possible alignments, which takes exponential time.
def combinatorial_distance(s, t):
    # base case
    if len(s) == 0:
        return len(t)
    if len(t) == 0:
        return len(s)
    
    # possibilities: s->t, delete s, or add t
    
    # Cost of aligning the last two characters
    if s[-1] == t[-1]:
        match_cost = 0
    else:
        match_cost = 1
        
    # Three possible solutions based on alignment of the last two characters
    dist_using_both = match_cost + combinatorial_distance(s[:-1], t[:-1])
    dist_using_s = 1 + combinatorial_distance(s[:-1], t)
    dist_using_t = 1 + combinatorial_distance(s, t[:-1])
    
    return min(dist_using_both, dist_using_s, dist_using_t)

In [9]:
print(combinatorial_distance("habits", "hobbit"))

3


In [16]:
# This function returns the edit distance between two strings.
# The algorithm is O(nk) where n = len(s) and k = len(t).
def dynamic_distance(s, t):
    sub = dict()
    
     # making the dictionary a 2D array and putting in base cases
    for i in range(len(s) + 1): # last problem is s[:len(s)]
        sub[i, 0] = i
    for j in range(len(t) + 1):
        sub[0, j] = j
    
    # Subproblems: sub[i, j] will be the edit distance of s[:i] and t[:j]
    for i in range(1, len(s) + 1):
        for j in range(1, len(t) + 1):
            
            # Cost of aligning the last two characters
            if s[i-1] == t[j-1]:
                match_cost = 0
            else:
                match_cost = 1

            # rather than recursion like in the combinatorial, use the dictionary
            # Three possible solutions based on alignment of the last two characters
            dist_using_both = match_cost + sub[i-1, j-1]
            dist_using_s = 1 + sub[i-1, j]
            dist_using_t = 1 + sub[i, j-1]
    
            sub[i, j] =  min(dist_using_both, dist_using_s, dist_using_t)
        
    # The overall solution is just the last subproblem solution.
    return sub[len(s), len(t)]

In [17]:
# Testing
print(dynamic_distance("habits", "hobbit"))

3
