<a href="https://colab.research.google.com/github/rz-pb/Bioinformatics-Codes/blob/main/SemiGlobal_Pairwise_Sequence_Alignment.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# SemiGlobal Pairwise Sequence Alignment

In [None]:
import numpy as np
np.set_printoptions(linewidth=200)

# Bottom-up Fashion

In [None]:
V = "TACGGGTAT"
W = "GGACGTACG"

# Gap Penalty
d = 1

# Scoring Matrix
Scoring_Matrix = np.array([[ 1,    -1,      -1,     -1],
                           [-1,     1,      -1,     -1],
                           [-1,    -1,       1,     -1],
                           [-1,    -1,      -1,      1]])
 




# In case you want to check another example, call function SGPSA_DP_BU with following data:

X = "CTCTGCCTCTG"
Y = "CACTCCTGATG"

# Gap Penalty
Another_d = 2

# Scoring Matrix
Another_Scoring_Matrix = np.array([[ 1,    -1,      -1,      0],
                                   [-1,     1,      -1,     -1],
                                   [-1,    -1,       1,     -1],
                                   [ 0,    -1,      -1,      1]])

In [None]:
def Score(v,u,Score_Matrix) :
  
  Residue_Index = {'A' : 0 , 'C' : 1 , 'G' : 2 , 'T' : 3}
  return Score_Matrix[Residue_Index[v],Residue_Index[u]]

In [None]:
def DP_tables(X,Y) :
  
  SGPSA_BU_table_temp = np.full((len(Y)+1,len(X)+1), -1)

  SGPSA_BU_operations_table_temp = (len(Y)+1)*(len(X)+1)*["E"]
  SGPSA_BU_operations_table_temp = np.array(SGPSA_BU_operations_table_temp , dtype=str)
  SGPSA_BU_operations_table_temp = np.reshape(SGPSA_BU_operations_table_temp,(len(Y)+1,len(X)+1))

  return (SGPSA_BU_table_temp , SGPSA_BU_operations_table_temp) 

In [None]:
def SGPSA_DP_BU(X,Y,S,d) :


  SGPSA_BU_table , SGPSA_BU_operations_table = DP_tables(X,Y)
  
  

  for i in range(0,len(Y)+1) :
    for j in range(0,len(X)+1) :
      
      if i == 0 and j == 0 :
        SGPSA_BU_table[i,j] = 0
        SGPSA_BU_operations_table[i,j] = "N"

      if i == 0 and j != 0 :
        SGPSA_BU_table[i,j] = 0
        SGPSA_BU_operations_table[i,j] = "D"   # DELETE
      
      if j == 0 and i != 0 :
        SGPSA_BU_table[i,j] = 0
        SGPSA_BU_operations_table[i,j] = "I"   # INSERT


      
      if i > 0 and j > 0 :


          SGPSA_BU_table[i,j] = max( SGPSA_BU_table[i-1,j-1] + Score(X[j-1],Y[i-1],S) , SGPSA_BU_table[i,j-1] - d , SGPSA_BU_table[i-1,j] - d )
          
          if SGPSA_BU_table[i,j] == SGPSA_BU_table[i-1,j-1] + Score(X[j-1],Y[i-1],S) :
            SGPSA_BU_operations_table[i,j] = "S" # SUBSTITUTION

          if SGPSA_BU_table[i,j] == SGPSA_BU_table[i,j-1] - d :
            SGPSA_BU_operations_table[i,j] = "D" # DELETE

          if SGPSA_BU_table[i,j] == SGPSA_BU_table[i-1,j] - d :
            SGPSA_BU_operations_table[i,j] = "I" # INSERT
            

  return (SGPSA_BU_table , SGPSA_BU_operations_table , max(SGPSA_BU_table[-1,:]))

In [None]:
optimal_value_table , optimal_solution_table ,global_optimal_value = SGPSA_DP_BU(V,W,Scoring_Matrix,d)

In [None]:
optimal_value_table

array([[ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0],
       [ 0, -1, -1, -1,  1,  1,  1,  0, -1, -1],
       [ 0, -1, -2, -2,  0,  2,  2,  1,  0, -1],
       [ 0, -1,  0, -1, -1,  1,  1,  1,  2,  1],
       [ 0, -1, -1,  1,  0,  0,  0,  0,  1,  1],
       [ 0, -1, -2,  0,  2,  1,  1,  0,  0,  0],
       [ 0,  1,  0, -1,  1,  1,  0,  2,  1,  1],
       [ 0,  0,  2,  1,  0,  0,  0,  1,  3,  2],
       [ 0, -1,  1,  3,  2,  1,  0,  0,  2,  2],
       [ 0, -1,  0,  2,  4,  3,  2,  1,  1,  1]])

In [None]:
optimal_solution_table

array([['N', 'D', 'D', 'D', 'D', 'D', 'D', 'D', 'D', 'D'],
       ['I', 'I', 'I', 'I', 'S', 'S', 'S', 'D', 'I', 'I'],
       ['I', 'D', 'I', 'I', 'I', 'S', 'S', 'D', 'D', 'D'],
       ['I', 'D', 'S', 'D', 'I', 'I', 'I', 'S', 'S', 'D'],
       ['I', 'D', 'I', 'S', 'D', 'I', 'I', 'I', 'I', 'S'],
       ['I', 'D', 'I', 'I', 'S', 'D', 'S', 'D', 'I', 'I'],
       ['I', 'S', 'D', 'I', 'I', 'S', 'I', 'S', 'D', 'S'],
       ['I', 'I', 'S', 'D', 'I', 'I', 'S', 'I', 'S', 'D'],
       ['I', 'I', 'I', 'S', 'D', 'D', 'D', 'I', 'I', 'S'],
       ['I', 'D', 'I', 'I', 'S', 'D', 'D', 'D', 'I', 'I']], dtype='<U1')

In [None]:
global_optimal_value

4

In [None]:
sgv = "ACAGATA"
sgw = "AGT"

SGPSA_DP_BU(sgv,sgw,Scoring_Matrix,d)

(array([[ 0,  0,  0,  0,  0,  0,  0,  0],
        [ 0,  1,  0,  1,  0,  1,  0,  1],
        [ 0,  0,  0,  0,  2,  1,  0,  0],
        [ 0, -1, -1, -1,  1,  1,  2,  1]]),
 array([['N', 'D', 'D', 'D', 'D', 'D', 'D', 'D'],
        ['I', 'S', 'D', 'S', 'D', 'S', 'D', 'S'],
        ['I', 'I', 'S', 'I', 'S', 'D', 'D', 'I'],
        ['I', 'I', 'I', 'I', 'I', 'S', 'S', 'D']], dtype='<U1'),
 2)