In [9]:
from Bio.pairwise2 import format_alignment
from Bio.SubsMat import MatrixInfo 
from Bio import pairwise2

## Biopython Pairwise Sequence Alignment

- <code>Bio.pairwise2</code> has implementations of Dynamic Programing (DP) Algorithms for __Pairwise Sequence Alignment__.
- Alignment Type option is written after <code>pairwise2.align.X</code>, where <code>.X</code> is replaced by the type; <code>.global</code> and <code>.local</code>.
- After <code>.local</code> or <code>.global</code>, two letters are written to indicate which parameters it takes; eg. <code>.localXX</code>:

  - The first indicates __a substitution matrix__ or __the parameters for match/missmatches__
  - The second indicates the parameters for the penalty of the gap.
  
  - Some more specifics (first letter):
  
    If __x__ is provided, a score of 1 is considered for the alignment & a mismatch score of 0. <br>
    If __m__ is provided, the function will allow us to define a match/mismatch score using appropriate parameters. <br>
    If __d__ is provided, we can pass on a dictionary to the function, defining a full substitution matrix. <br>
    
  - Some more specifics (second letter):
    
    If __x__ is provided, no gap penalties are imposed (g=0) <br>
    if __s__ is provided, we can define an affinite gap penalty model <br>
    

In [2]:
''' Global Alignment of two DNA sequences'''

seq1 = 'ATGGCAGATAGA'
seq2 = 'ATAGAGAATAG'
# - Global Alignment Problem
# - (match score = 1, missmatch = 0), gap penalties = 0
GDNA = pairwise2.align.globalxx(seq1,seq2)
print (f'# alternative optimal alignments: {len(GDNA)}')

# print all the alignments
for i in GDNA: 
    print(format_alignment(*i))

# alternative optimal alignments: 7
ATG-GCAGA-TAGA
||  | ||| ||| 
AT-AG-AGAATAG-
  Score=9

ATGGCAGA-TAGA
||.| ||| ||| 
ATAG-AGAATAG-
  Score=9

AT-GGCAGA-TAGA
|| |  ||| ||| 
ATAG--AGAATAG-
  Score=9

ATG-GCAG-ATAGA
||  | || |||| 
AT-AG-AGAATAG-
  Score=9

ATGGCAG-ATAGA
||.| || |||| 
ATAG-AGAATAG-
  Score=9

AT-GGCAG-ATAGA
|| |  || |||| 
ATAG--AGAATAG-
  Score=9

AT-G-GCAGATAGA
|| | | | |||| 
ATAGAG-A-ATAG-
  Score=9



In [10]:
''' Global Alignment of two protein sequences ''' 
# - Global Alignment Problem of Protein Sequences
# - Using a substitute matrix BLOSUM64, opening gap penalty -4, extension penalty -1
    
pseq1 = "EVSAW"
pseq2 = "KEVLA"

sm = MatrixInfo.blosum62 # load BLOSUM62 Substitution Matrix
lPRT = pairwise2.align.globalds(pseq1,pseq2,sm,-4,-1)
for i in lPRT:
    print(format_alignment(*i))

-EVSAW
 ||.| 
KEVLA-
  Score=3



In [4]:
''' Local Alignment of two DNA sequences ''' 
# Local Alignment Problem of DNA Sequences
# Match score = 3, mismatch score = -2, constant gap penalty g=-3 x2

lDNA = pairwise2.align.localms(seq1,seq2, 3,-2,-3,-3)
for i in lDNA: 
    print(format_alignment(*i))

1 ATGGCAGA-TAG
  ||.| ||| |||
1 ATAG-AGAATAG
  Score=19

1 ATGGCAG-ATAG
  ||.| || ||||
1 ATAG-AGAATAG
  Score=19



In [5]:
''' Local Alignment of Protein Sequences '''
# - Local Alignment Problem of Protein Sequences
# - Using a substitute matrix BLOSUM64, opening gap penalty -4, extension penalty -1

pPROT = pairwise2.align.localds(pseq1,pseq2,sm,-4,-1)
for i in pPROT:
    print(format_alignment(*i))

1 EVSA
  ||.|
2 EVLA
  Score=11

