## Levenshtein Edit Distance
The levenshtein distance calculates the number of steps (insertions, deletions or substitutions) required to go from source string to target string.

In [1]:
import pandas as pd

In [2]:
def lev_distance(source='', target=''):
    import pandas as pd
    """Make a Levenshtein Distances Matrix"""
    
    # get length of both strings
    n1, n2 = len(source), len(target)
    
    # create matrix using length of both strings - source string sits on columns, target string sits on rows
    matrix = [ [ 0 for i1 in range(n1 + 1) ] for i2 in range(n2 + 1) ]
    
    # fill the first row - (0 to n1-1)
    for i1 in range(1, n1 + 1):
        matrix[0][i1] = i1
    
    # fill the first column - (0 to n2-1)
    for i2 in range(1, n2 + 1):
        matrix[i2][0] = i2
    
    # fill the matrix
    for i2 in range(1, n2 + 1):
        for i1 in range(1, n1 + 1):
            
            # check whether letters being compared are same
            if (source[i1-1] == target[i2-1]):
                value = matrix[i2-1][i1-1]               # top-left cell value
            else:
                value = min(matrix[i2-1][i1]   + 1,      # left cell value     + 1
                            matrix[i2][i1-1]   + 1,      # top cell  value     + 1
                            matrix[i2-1][i1-1] + 1)      # top-left cell value + 1
            
            matrix[i2][i1] = value
    
    # return bottom-right cell value
    frame = pd.DataFrame(matrix, columns= [c for c in "."+source], index= [c for c in "."+target])
    return matrix[-1][-1], frame

In [6]:
d, _ = lev_distance('prospective', 'perspective')
d

2

In [5]:
d, _ = lev_distance('perspective', 'prospective')
d

2

In [3]:
_, m = lev_distance('sparking', 'parking')
m

Unnamed: 0,.,s,p,a,r,k,i,n,g
.,0,1,2,3,4,5,6,7,8
p,1,1,1,2,3,4,5,6,7
a,2,2,2,1,2,3,4,5,6
r,3,3,3,2,1,2,3,4,5
k,4,4,4,3,2,1,2,3,4
i,5,5,5,4,3,2,1,2,3
n,6,6,6,5,4,3,2,1,2
g,7,7,7,6,5,4,3,2,1


In [4]:
_, m = lev_distance('parking', 'sparking')
m

Unnamed: 0,.,p,a,r,k,i,n,g
.,0,1,2,3,4,5,6,7
s,1,1,2,3,4,5,6,7
p,2,1,2,3,4,5,6,7
a,3,2,1,2,3,4,5,6
r,4,3,2,1,2,3,4,5
k,5,4,3,2,1,2,3,4
i,6,5,4,3,2,1,2,3
n,7,6,5,4,3,2,1,2
g,8,7,6,5,4,3,2,1


In [8]:
lev_distance('cat', 'cta')

(2,
    .  c  a  t
 .  0  1  2  3
 c  1  0  1  2
 t  2  1  1  1
 a  3  2  1  2)

In [11]:
lev_distance('Damerau', 'Levenshtein')


(10,
     .   D   a   m   e   r   a   u
 .   0   1   2   3   4   5   6   7
 L   1   1   2   3   4   5   6   7
 e   2   2   2   3   3   4   5   6
 v   3   3   3   3   4   4   5   6
 e   4   4   4   4   3   4   5   6
 n   5   5   5   5   4   4   5   6
 s   6   6   6   6   5   5   5   6
 h   7   7   7   7   6   6   6   6
 t   8   8   8   8   7   7   7   7
 e   9   9   9   9   8   8   8   8
 i  10  10  10  10   9   9   9   9
 n  11  11  11  11  10  10  10  10)

In [9]:
d, m = lev_distance('courageous', 'courageus')
m

Unnamed: 0,.,c,o,u,r,a,g,e,o.1,u.1,s
.,0,1,2,3,4,5,6,7,8,9,10
c,1,0,1,2,3,4,5,6,7,8,9
o,2,1,0,1,2,3,4,5,6,7,8
u,3,2,1,0,1,2,3,4,5,6,7
r,4,3,2,1,0,1,2,3,4,5,6
a,5,4,3,2,1,0,1,2,3,4,5
g,6,5,4,3,2,1,0,1,2,3,4
e,7,6,5,4,3,2,1,0,1,2,3
u,8,7,6,5,4,3,2,1,1,1,2
s,9,8,7,6,5,4,3,2,2,2,1


## Levenshtein distance in nltk library

In [10]:
# import library
from nltk.metrics.distance import edit_distance

In [4]:
edit_distance("apple", "appel")

2

## Damerau-Levenshtein Distance
The Damerau-Levenshtein distance allows transpositions (swap of two letters which are adjacent to each other) as well.

In [3]:
edit_distance("apple", "appel", transpositions=False, )

2