Skip to content

Commit

Permalink
Lavenshtein minimum edit distance algorithm
Browse files Browse the repository at this point in the history
  • Loading branch information
thammegowda committed Sep 14, 2015
1 parent c35c760 commit 0dd1930
Show file tree
Hide file tree
Showing 5 changed files with 87 additions and 0 deletions.
1 change: 1 addition & 0 deletions __init__.py
@@ -0,0 +1 @@
__author__ = 'tg'
1 change: 1 addition & 0 deletions text/__init__.py
@@ -0,0 +1 @@
__author__ = 'tg'
56 changes: 56 additions & 0 deletions text/edit_distance.py
@@ -0,0 +1,56 @@

__author__ = 'tg'

from util.arrays import format_2d_array
import sys



def min_lavenshtein_distance(s1, s2):
'''
Computes Minimum Lavenshtein Distance between two strings
:param s1: string 1
:param s2: string 2
:return: minimum edit distance
'''


s1 = '#' + s1 # An empty space is added to make space for base case
s2 = '#' + s2 # An empty space is added to make space for base case
n = len(s1)
m = len(s2)

# declaring an empty array
matrix = [[0 for i in range(m)] for i in range(n)]

for i in range(n):
for j in range(m):

if i == 0: # base case 1
distance = j
elif j == 0: # base case 2
distance = i
else: # general case
distance = sys.maxint # use this as reference to find minimum distance
if matrix[i-1][j] + 1 < distance: #horizontal move, insert s2 char
distance = matrix[i-1][j] + 1
if matrix[i][j-1] + 1 < distance: #vertical move, insert a char from s1
distance = matrix[i][j-1] + 1

# diagonal move :
# the distance increases if the characters are different (substitution)
# the distance remains same if characters are same
diagonal_dist = matrix[i-1][j-1] + (0 if s1[i] == s2[j] else 2)
if diagonal_dist < distance:
distance = diagonal_dist
matrix[i][j] = distance

print format_2d_array(matrix, s1, s2)

return matrix[n-1][m-1]

if __name__ == '__main__':
s1 = 'intention'
s2 = 'execution'
min_edit_distance = min_lavenshtein_distance(s1, s2)
print min_edit_distance
1 change: 1 addition & 0 deletions util/__init__.py
@@ -0,0 +1 @@
__author__ = 'tg'
28 changes: 28 additions & 0 deletions util/arrays.py
@@ -0,0 +1,28 @@
__author__ = 'tg'

def format_2d_array(arr, col_head, row_head):
'''
Formats a 2d array (list of lists) into a matrix like rows x columns representation
:param arr: The array to be formatted
:param col_head: the header for each column
:param row_head: the header for each row
:return: the string created by formatting the contents of array with headers
'''
res = '*'
for i in col_head: ## adding the header to columns
res += '\t' + str(i)
res += '\n'

for row_idx in range(0, len(row_head)):
res += row_head[row_idx] # row head
for j in arr[row_idx]:
res += '\t' + str(j)
res += '\n'
return res

if __name__ == '__main__':
arr = [[i for i in range(4)] for j in range(5)]
res = format_2d_array(arr, 'abcd', 'xyzf')
print res


0 comments on commit 0dd1930

Please sign in to comment.