Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Lavenshtein minimum edit distance algorithm
- Loading branch information
1 parent
c35c760
commit 0dd1930
Showing
5 changed files
with
87 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
__author__ = 'tg' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
__author__ = 'tg' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
|
||
__author__ = 'tg' | ||
|
||
from util.arrays import format_2d_array | ||
import sys | ||
|
||
|
||
|
||
def min_lavenshtein_distance(s1, s2): | ||
''' | ||
Computes Minimum Lavenshtein Distance between two strings | ||
:param s1: string 1 | ||
:param s2: string 2 | ||
:return: minimum edit distance | ||
''' | ||
|
||
|
||
s1 = '#' + s1 # An empty space is added to make space for base case | ||
s2 = '#' + s2 # An empty space is added to make space for base case | ||
n = len(s1) | ||
m = len(s2) | ||
|
||
# declaring an empty array | ||
matrix = [[0 for i in range(m)] for i in range(n)] | ||
|
||
for i in range(n): | ||
for j in range(m): | ||
|
||
if i == 0: # base case 1 | ||
distance = j | ||
elif j == 0: # base case 2 | ||
distance = i | ||
else: # general case | ||
distance = sys.maxint # use this as reference to find minimum distance | ||
if matrix[i-1][j] + 1 < distance: #horizontal move, insert s2 char | ||
distance = matrix[i-1][j] + 1 | ||
if matrix[i][j-1] + 1 < distance: #vertical move, insert a char from s1 | ||
distance = matrix[i][j-1] + 1 | ||
|
||
# diagonal move : | ||
# the distance increases if the characters are different (substitution) | ||
# the distance remains same if characters are same | ||
diagonal_dist = matrix[i-1][j-1] + (0 if s1[i] == s2[j] else 2) | ||
if diagonal_dist < distance: | ||
distance = diagonal_dist | ||
matrix[i][j] = distance | ||
|
||
print format_2d_array(matrix, s1, s2) | ||
|
||
return matrix[n-1][m-1] | ||
|
||
if __name__ == '__main__': | ||
s1 = 'intention' | ||
s2 = 'execution' | ||
min_edit_distance = min_lavenshtein_distance(s1, s2) | ||
print min_edit_distance |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
__author__ = 'tg' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
__author__ = 'tg' | ||
|
||
def format_2d_array(arr, col_head, row_head): | ||
''' | ||
Formats a 2d array (list of lists) into a matrix like rows x columns representation | ||
:param arr: The array to be formatted | ||
:param col_head: the header for each column | ||
:param row_head: the header for each row | ||
:return: the string created by formatting the contents of array with headers | ||
''' | ||
res = '*' | ||
for i in col_head: ## adding the header to columns | ||
res += '\t' + str(i) | ||
res += '\n' | ||
|
||
for row_idx in range(0, len(row_head)): | ||
res += row_head[row_idx] # row head | ||
for j in arr[row_idx]: | ||
res += '\t' + str(j) | ||
res += '\n' | ||
return res | ||
|
||
if __name__ == '__main__': | ||
arr = [[i for i in range(4)] for j in range(5)] | ||
res = format_2d_array(arr, 'abcd', 'xyzf') | ||
print res | ||
|
||
|