In [1]:
# 1. (DNA)
X = 'ACTACTAGATTACTTACGGATCAGGTACTTTAGAGGCTTGCAACCA'
Y = 'TACTAGCTTACTTACCCATCAGGTTTTAGAGATGGCAACCA'

In [2]:
# 2. (Proteins)
X = 'AASRPRSGVPAQSDSDPCQNLAATPIPSRPPSSQSCQKCRADARQGRWGP'
Y = 'SGAPGQRGEPGPQGHAGAPGPPGPPGSDG'

In [3]:
# Import numpy  
import numpy as np

# Import time 
from time import time

In [4]:
# Using matrix dynamic programming, define a function which given string 1 and string 2, we calculate the edit distance

def edit_distance(string1, string2):
    """ Construct the initial matrix """
    m = len(string1) + 1 #length of string 1
    n = len(string2) + 1 #length of string 2
    matrix = np.zeros((m, n)) #create a matrix of dimensions m x n with zeros
    for x in range(m): #iterate over range of m
        matrix[x, 0] = x #put the values of the iteration in the first column
    for y in range(n): #iterate over range of n
        matrix[0, y] = y #put the values of the iteration in the first row

    """ Fill in the matrix with minimum cost """
    for x in range(1, m): #iterate over range of m skipping the first value [0]
        for y in range(1, n): #iterate over range of n skipping the first value [0]
            subst_cost = 1 if string1[x-1] != string2[y-1] else 0 #calculate the substitution cost
            matrix[x,y] = min( #select the action with minimum cost 
                matrix[x-1, y-1] + subst_cost, #calculate the substitution cost
                matrix[x-1, y] + 1, #calculate the deletion cost
                matrix[x, y-1] + 1 #calculate the insertion cost
            )
            
    """ Print the results """
    print (matrix) #print the contructed matrix
    return (int(matrix[m - 1, n - 1])) #return the edit distance

In [5]:
# Print the results
st = time()
print('Edit distance: ', edit_distance(X, Y))

# Print time
print('Time: %0.5f seconds' % (time() - st))

[[ 0.  1.  2. ... 27. 28. 29.]
 [ 1.  1.  2. ... 26. 27. 28.]
 [ 2.  2.  2. ... 25. 26. 27.]
 ...
 [48. 47. 46. ... 37. 36. 35.]
 [49. 48. 47. ... 38. 37. 36.]
 [50. 49. 48. ... 39. 38. 37.]]
Edit distance:  37
Time: 0.00658 seconds


In [6]:
# Import numpy  
import numpy as np

# Using matrix dynamic programming, define a function which given string 1, string 2, 
# the substitution cost, the deletion cost and the insertion cost, we calculate the edit distance

def edit_distance_penalty(string1, string2, substitution_cost=1, deletion_cost=1, insertion_cost=1):
    """ Construct the initial matrix """
    m = len(string1) + 1 #length of string 1
    n = len(string2) + 1 #length of string 2
    matrix = np.zeros((m, n)) #create a matrix of dimensions m x n with zeros
    for x in range(m): #iterate over range of m
        matrix[x, 0] = x * deletion_cost #put the values of the iteration in the first column
    for y in range(n): #iterate over range of n
        matrix[0, y] = y * insertion_cost #put the values of the iteration in the first row

    """ Fill in the matrix with minimum cost """
    for x in range(1, m): #iterate over range of m skipping the first value [0]
        for y in range(1, n): #iterate over range of n skipping the first value [0]
            subst_cost = substitution_cost if string1[x-1] != string2[y-1] else 0 #calculate the substitution cost
            matrix[x,y] = min( #select the action with minimum cost 
                matrix[x-1, y-1] + subst_cost, #calculate the substitution cost
                matrix[x-1, y] + deletion_cost, #calculate the deletion cost
                matrix[x, y-1] + insertion_cost #calculate the insertion cost
            )
            
    """ Print the results """
    print (matrix) #print the contructed matrix
    return (int(matrix[m - 1, n - 1])) #return the edit distance

In [7]:
# Print the results
st = time()
print('Edit distance: ', edit_distance_penalty(X, Y))

# Print time
print('Time: %0.5f seconds' % (time() - st))

[[ 0.  1.  2. ... 27. 28. 29.]
 [ 1.  1.  2. ... 26. 27. 28.]
 [ 2.  2.  2. ... 25. 26. 27.]
 ...
 [48. 47. 46. ... 37. 36. 35.]
 [49. 48. 47. ... 38. 37. 36.]
 [50. 49. 48. ... 39. 38. 37.]]
Edit distance:  37
Time: 0.00487 seconds
