### Hamming Distance

In [1]:
from scipy.spatial import distance

In [2]:
# Example 1:
# Strings:

string1 = 'TIME'
string2 = 'MINE'

In [3]:
# Normalized Hamming Distance

Normalized_HD = distance.hamming(list(string1), list(string2))
print('The Normalized Hamming Distance between {} and {} is {}.'.format(string1, string2, Normalized_HD))

The Normalized Hamming Distance between TIME and MINE is 0.5.


In [4]:
# Original Hamming Distance 
print('The Hamming Distance between {} and {} is {}'. format(string1, string2, Normalized_HD*len(string1)))

The Hamming Distance between TIME and MINE is 2.0


In [5]:
# Example 2:

# Strings:
word1 = 'MAN'
word2 = 'WOMAN'

In [6]:
# Normalized Hamming Distance

Normalized_HD_ = distance.hamming(list(word1), list(word2))
print('The Normalized Hamming Distance between {} and {} is {}.'.format(word1, word2, Normalized_HD_))

ValueError: The 1d arrays must have equal lengths.

In [None]:
# Original Hamming Distance 
print('The Hamming Distance between {} and {} is {}'. format(word1, word2, Normalized_HD*len(word1)))

### Levenshtein Distance

In [7]:
import numpy as np

In [11]:
# Example 1:

# define the two strings:

string_1 = 'CLOCK'
string_2 = 'CLONE'

In [22]:
# Step 1a: Set the size of the matrix

size_a = len(string_1) + 1
size_b = len(string_2) + 1

# Step 1b: Create a matrix with all elements as zeros

matrix = np.zeros((size_a, size_b))

# print the matrix
matrix

array([[0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.]])

In [25]:
# Step 2: 

# Fill the first row with the index of the characters from the first string:   

for i in range(size_a):
    matrix[i,0] = i
    
# Fill the first column with the index of the characters from the second string:

for j in range(size_b):
    matrix[0,j] = j
    
# print the matrix
matrix

array([[0., 1., 2., 3., 4., 5.],
       [1., 0., 0., 0., 0., 0.],
       [2., 0., 0., 0., 0., 0.],
       [3., 0., 0., 0., 0., 0.],
       [4., 0., 0., 0., 0., 0.],
       [5., 0., 0., 0., 0., 0.]])

In [26]:
# Step 3: Fill the values of the matrix depending upon if the characters of the matrix are same or different:

# For each column:
for i in range(1, size_a):
    
    # For each row:
    for j in range(1, size_b):
        
        # check if the characters are same:
        if string_1[i-1] == string_2[j-1]:
            matrix[i,j] = min(matrix[i-1, j-1], matrix[i-1,j] + 1, matrix[i,j-1] + 1)

        # else if the characters are not same:
        else:
            matrix[i,j] = min(matrix[i-1, j]+1, matrix[i-1, j-1]+1, matrix[i, j-1]+1)
            
# printing the final matrix: output
matrix

array([[0., 1., 2., 3., 4., 5.],
       [1., 0., 1., 2., 3., 4.],
       [2., 1., 0., 1., 2., 3.],
       [3., 2., 1., 0., 1., 2.],
       [4., 2., 2., 1., 1., 2.],
       [5., 3., 3., 2., 2., 2.]])

In [13]:
levenshtein_distance(string_1, string_2)

[[0. 1. 2. 3. 4. 5.]
 [1. 0. 1. 2. 3. 4.]
 [2. 1. 0. 1. 2. 3.]
 [3. 2. 1. 0. 1. 2.]
 [4. 3. 2. 1. 1. 2.]
 [5. 4. 3. 2. 2. 2.]]


2.0

In [51]:
# Consolidating all the above steps in the function:

# Define function to compute levenshtein distance between two strings:

def levenshtein_distance(s1, s2):
    
    # Step 1a: Set the size of the matrix
    
    size_x = len(s1) + 1
    size_y = len(s2) + 1
    
    # Step 1b: Create a matrix with all elements as zeros
    matrix = np.zeros((size_x, size_y))


    # Step 2: 

    # Fill the first column with the index of the characters from the first string:
    for x in range(size_x):
        matrix [x, 0] = x
        
    # Fill the first row with the index of the characters from the second string:
    for y in range(size_y):
        matrix [0, y] = y

    # Step 3: Fill the values of the matrix depending upon if the characters of the matrix are same or different:

    # For each column:
    
    for x in range(1, size_x):
        
        # For each row
        for y in range(1, size_y):
            
            # check if the characters are same
            if s1[x-1] == s2[y-1]:
                matrix[x,y] = min(matrix[x-1, y-1], matrix[x-1, y] + 1, matrix[x, y-1] + 1)
                
            # else if the characters are not same:
            else:
                # fill the element with the minimum of corresponding values + 1
                matrix[x,y] = min(matrix[x-1,y] + 1, matrix[x-1,y-1] + 1, matrix[x,y-1] + 1)


   # print the final matrix            
    print(matrix)
    print('')
    
    # print the levenshtein distance between the sttrings:
    print('The Levenshtein Distance between {} and {} is {}.'.format(s1, s2, matrix[size_x - 1, size_y - 1]))

In [53]:
levenshtein_distance(string_1, string_2)

[[0. 1. 2. 3. 4. 5.]
 [1. 0. 1. 2. 3. 4.]
 [2. 1. 0. 1. 2. 3.]
 [3. 2. 1. 0. 1. 2.]
 [4. 3. 2. 1. 1. 2.]
 [5. 4. 3. 2. 2. 2.]]

The Levenshtein Distance between CLOCK and CLONE is 2.0.


In [54]:
levenshtein_distance(word1, word2)

[[0. 1. 2. 3. 4. 5.]
 [1. 1. 2. 2. 3. 4.]
 [2. 2. 2. 3. 2. 3.]
 [3. 3. 3. 3. 3. 2.]]

The Levenshtein Distance between MAN and WOMAN is 2.0.


-----