# Protein Alignment Analysis
A comparison of protein alignment algorithms: BLAST using BLOSUM ranking, PAM, Smith-Waterman, and Needleman-Wunsch.

In [12]:
import numpy as np
import pandas as pd

## BLOSUM62

In [13]:
blosum62 = {
    ('W', 'F'): 1, ('L', 'R'): -2, ('S', 'P'): -1, ('V', 'T'): 0,
    ('Q', 'Q'): 5, ('N', 'A'): -2, ('Z', 'Y'): -2, ('W', 'R'): -3,
    ('Q', 'A'): -1, ('S', 'D'): 0, ('H', 'H'): 8, ('S', 'H'): -1,
    ('H', 'D'): -1, ('L', 'N'): -3, ('W', 'A'): -3, ('Y', 'M'): -1,
    ('G', 'R'): -2, ('Y', 'I'): -1, ('Y', 'E'): -2, ('B', 'Y'): -3,
    ('Y', 'A'): -2, ('V', 'D'): -3, ('B', 'S'): 0, ('Y', 'Y'): 7,
    ('G', 'N'): 0, ('E', 'C'): -4, ('Y', 'Q'): -1, ('Z', 'Z'): 4,
    ('V', 'A'): 0, ('C', 'C'): 9, ('M', 'R'): -1, ('V', 'E'): -2,
    ('T', 'N'): 0, ('P', 'P'): 7, ('V', 'I'): 3, ('V', 'S'): -2,
    ('Z', 'P'): -1, ('V', 'M'): 1, ('T', 'F'): -2, ('V', 'Q'): -2,
    ('K', 'K'): 5, ('P', 'D'): -1, ('I', 'H'): -3, ('I', 'D'): -3,
    ('T', 'R'): -1, ('P', 'L'): -3, ('K', 'G'): -2, ('M', 'N'): -2,
    ('P', 'H'): -2, ('F', 'Q'): -3, ('Z', 'G'): -2, ('X', 'L'): -1,
    ('T', 'M'): -1, ('Z', 'C'): -3, ('X', 'H'): -1, ('D', 'R'): -2,
    ('B', 'W'): -4, ('X', 'D'): -1, ('Z', 'K'): 1, ('F', 'A'): -2,
    ('Z', 'W'): -3, ('F', 'E'): -3, ('D', 'N'): 1, ('B', 'K'): 0,
    ('X', 'X'): -1, ('F', 'I'): 0, ('B', 'G'): -1, ('X', 'T'): 0,
    ('F', 'M'): 0, ('B', 'C'): -3, ('Z', 'I'): -3, ('Z', 'V'): -2,
    ('S', 'S'): 4, ('L', 'Q'): -2, ('W', 'E'): -3, ('Q', 'R'): 1,
    ('N', 'N'): 6, ('W', 'M'): -1, ('Q', 'C'): -3, ('W', 'I'): -3,
    ('S', 'C'): -1, ('L', 'A'): -1, ('S', 'G'): 0, ('L', 'E'): -3,
    ('W', 'Q'): -2, ('H', 'G'): -2, ('S', 'K'): 0, ('Q', 'N'): 0,
    ('N', 'R'): 0, ('H', 'C'): -3, ('Y', 'N'): -2, ('G', 'Q'): -2,
    ('Y', 'F'): 3, ('C', 'A'): 0, ('V', 'L'): 1, ('G', 'E'): -2,
    ('G', 'A'): 0, ('K', 'R'): 2, ('E', 'D'): 2, ('Y', 'R'): -2,
    ('M', 'Q'): 0, ('T', 'I'): -1, ('C', 'D'): -3, ('V', 'F'): -1,
    ('T', 'A'): 0, ('T', 'P'): -1, ('B', 'P'): -2, ('T', 'E'): -1,
    ('V', 'N'): -3, ('P', 'G'): -2, ('M', 'A'): -1, ('K', 'H'): -1,
    ('V', 'R'): -3, ('P', 'C'): -3, ('M', 'E'): -2, ('K', 'L'): -2,
    ('V', 'V'): 4, ('M', 'I'): 1, ('T', 'Q'): -1, ('I', 'G'): -4,
    ('P', 'K'): -1, ('M', 'M'): 5, ('K', 'D'): -1, ('I', 'C'): -1,
    ('Z', 'D'): 1, ('F', 'R'): -3, ('X', 'K'): -1, ('Q', 'D'): 0,
    ('X', 'G'): -1, ('Z', 'L'): -3, ('X', 'C'): -2, ('Z', 'H'): 0,
    ('B', 'L'): -4, ('B', 'H'): 0, ('F', 'F'): 6, ('X', 'W'): -2,
    ('B', 'D'): 4, ('D', 'A'): -2, ('S', 'L'): -2, ('X', 'S'): 0,
    ('F', 'N'): -3, ('S', 'R'): -1, ('W', 'D'): -4, ('V', 'Y'): -1,
    ('W', 'L'): -2, ('H', 'R'): 0, ('W', 'H'): -2, ('H', 'N'): 1,
    ('W', 'T'): -2, ('T', 'T'): 5, ('S', 'F'): -2, ('W', 'P'): -4,
    ('L', 'D'): -4, ('B', 'I'): -3, ('L', 'H'): -3, ('S', 'N'): 1,
    ('B', 'T'): -1, ('L', 'L'): 4, ('Y', 'K'): -2, ('E', 'Q'): 2,
    ('Y', 'G'): -3, ('Z', 'S'): 0, ('Y', 'C'): -2, ('G', 'D'): -1,
    ('B', 'V'): -3, ('E', 'A'): -1, ('Y', 'W'): 2, ('E', 'E'): 5,
    ('Y', 'S'): -2, ('C', 'N'): -3, ('V', 'C'): -1, ('T', 'H'): -2,
    ('P', 'R'): -2, ('V', 'G'): -3, ('T', 'L'): -1, ('V', 'K'): -2,
    ('K', 'Q'): 1, ('R', 'A'): -1, ('I', 'R'): -3, ('T', 'D'): -1,
    ('P', 'F'): -4, ('I', 'N'): -3, ('K', 'I'): -3, ('M', 'D'): -3,
    ('V', 'W'): -3, ('W', 'W'): 11, ('M', 'H'): -2, ('P', 'N'): -2,
    ('K', 'A'): -1, ('M', 'L'): 2, ('K', 'E'): 1, ('Z', 'E'): 4,
    ('X', 'N'): -1, ('Z', 'A'): -1, ('Z', 'M'): -1, ('X', 'F'): -1,
    ('K', 'C'): -3, ('B', 'Q'): 0, ('X', 'B'): -1, ('B', 'M'): -3,
    ('F', 'C'): -2, ('Z', 'Q'): 3, ('X', 'Z'): -1, ('F', 'G'): -3,
    ('B', 'E'): 1, ('X', 'V'): -1, ('F', 'K'): -3, ('B', 'A'): -2,
    ('X', 'R'): -1, ('D', 'D'): 6, ('W', 'G'): -2, ('Z', 'F'): -3,
    ('S', 'Q'): 0, ('W', 'C'): -2, ('W', 'K'): -3, ('H', 'Q'): 0,
    ('L', 'C'): -1, ('W', 'N'): -4, ('S', 'A'): 1, ('L', 'G'): -4,
    ('W', 'S'): -3, ('S', 'E'): 0, ('H', 'E'): 0, ('S', 'I'): -2,
    ('H', 'A'): -2, ('S', 'M'): -1, ('Y', 'L'): -1, ('Y', 'H'): 2,
    ('Y', 'D'): -3, ('E', 'R'): 0, ('X', 'P'): -2, ('G', 'G'): 6,
    ('G', 'C'): -3, ('E', 'N'): 0, ('Y', 'T'): -2, ('Y', 'P'): -3,
    ('T', 'K'): -1, ('A', 'A'): 4, ('P', 'Q'): -1, ('T', 'C'): -1,
    ('V', 'H'): -3, ('T', 'G'): -2, ('I', 'Q'): -3, ('Z', 'T'): -1,
    ('C', 'R'): -3, ('V', 'P'): -2, ('P', 'E'): -1, ('M', 'C'): -1,
    ('K', 'N'): 0, ('I', 'I'): 4, ('P', 'A'): -1, ('M', 'G'): -3,
    ('T', 'S'): 1, ('I', 'E'): -3, ('P', 'M'): -2, ('M', 'K'): -1,
    ('I', 'A'): -1, ('P', 'I'): -3, ('R', 'R'): 5, ('X', 'M'): -1,
    ('L', 'I'): 2, ('X', 'I'): -1, ('Z', 'B'): 1, ('X', 'E'): -1,
    ('Z', 'N'): 0, ('X', 'A'): 0, ('B', 'R'): -1, ('B', 'N'): 3,
    ('F', 'D'): -3, ('X', 'Y'): -1, ('Z', 'R'): 0, ('F', 'H'): -1,
    ('B', 'F'): -3, ('F', 'L'): 0, ('X', 'Q'): -1, ('B', 'B'): 4
}

In [14]:
GAP = -5    # gap penalty

def blosum_score(a, b):
    """Return BLOSUM62 score for characters a and b."""
    if (a, b) in blosum62:
        return blosum62[(a, b)]
    if (b, a) in blosum62:
        return blosum62[(b, a)]
    raise KeyError(f"No score for pair({a}, {b})")

def blosum_read_fasta(path):
    """
    Read a FASTA file containing one or more sequences.
    Returns a list of (header, sequence) tuples.
    """
    sequences = []
    header = None
    seq_lines = []

    with open(path) as f:
        for line in f:
            line = line.strip()
            if not line:
                continue
            if line.startswith(">"):
                # save previous sequence
                if header is not None:
                    sequences.append((header, "".join(seq_lines).upper()))
                    seq_lines = []
                header = line[1:]  # drop ">"
            else:
                seq_lines.append(line)

        # last sequence
        if header is not None:
            sequences.append((header, "".join(seq_lines).upper()))

    return sequences

def blosum_align(seq1, seq2):
    """Needleman-Wunsch global alignment using BLOSUM62."""
    n = len(seq1)
    m = len(seq2)

    dp = [[0] * (m+1) for _ in range(n+1)]
    ptr = [[None] * (m+1) for _ in range(n+1)]

    for i in range(1, n+1):
        dp[i][0] = i * GAP
        ptr[i][0] = 'U'
    for j in range(1, m+1):
        dp[0][j] = j * GAP
        ptr[0][j] = 'L'

    for i in range(1, n+1):
        for j in range(1, m+1):
            match = dp[i-1][j-1] + blosum_score(seq1[i-1], seq2[j-1])
            delete = dp[i-1][j] + GAP
            insert = dp[i][j-1] + GAP

            dp[i][j] = max(match, delete, insert)

            if dp[i][j] == match:
                ptr[i][j] = 'D'
            elif dp[i][j] == delete:
                ptr[i][j] = 'U'
            else:
                ptr[i][j] = 'L'

    aligned1 = []
    aligned2 = []
    i, j = n, m

    while i > 0 or j > 0:
        move = ptr[i][j]
        if move == 'D':
            aligned1.append(seq1[i-1])
            aligned2.append(seq2[j-1])
            i -= 1
            j -= 1
        elif move == 'U':
            aligned1.append(seq1[i-1])
            aligned2.append('-')
            i -= 1
        elif move == 'L':
            aligned1.append('-')
            aligned2.append(seq2[j-1])
            j -= 1

    return dp[n][m], "".join(reversed(aligned1)), "".join(reversed(aligned2))

def blosum_align_fasta(file1, file2):
    """
    Align the FIRST sequence in each FASTA file.
    Returns score, aligned_seq1, aligned_seq2.
    """
    seqs1 = blosum_read_fasta(file1)
    seqs2 = blosum_read_fasta(file2)

    if not seqs1 or not seqs2:
        raise ValueError("One of the FASTA files contains no sequences.")

    # Use first entry by default
    seq1 = seqs1[0][1]
    seq2 = seqs2[0][1]

    return blosum_align(seq1, seq2)

## PAM

In [15]:
# PAM Algorithm
# Author: Yashesha Kothari

#PAM250 Scoring Matrix - copied from source online
PAM250 = {
    ('A','A'):2,('A','R'):-2,('A','N'):0,('A','D'):0,('A','C'):-2,('A','Q'):0,('A','E'):0,('A','G'):1,('A','H'):-1,('A','I'):-1,
    ('A','L'):-2,('A','K'):-1,('A','M'):-1,('A','F'):-3,('A','P'):1,('A','S'):1,('A','T'):1,('A','W'):-6,('A','Y'):-3,('A','V'):0,
    ('R','A'):-2,('R','R'):6,('R','N'):0,('R','D'):-1,('R','C'):-4,('R','Q'):1,('R','E'):-1,('R','G'):-3,('R','H'):2,('R','I'):-2,
    ('R','L'):-3,('R','K'):3,('R','M'):0,('R','F'):-4,('R','P'):0,('R','S'):0,('R','T'):-1,('R','W'):2,('R','Y'):-4,('R','V'):-2,
    ('N','A'):0,('N','R'):0,('N','N'):2,('N','D'):2,('N','C'):-4,('N','Q'):1,('N','E'):1,('N','G'):0,('N','H'):2,('N','I'):-2,
    ('N','L'):-3,('N','K'):1,('N','M'):-2,('N','F'):-3,('N','P'):0,('N','S'):1,('N','T'):0,('N','W'):-4,('N','Y'):-2,('N','V'):-2,
    ('D','A'):0,('D','R'):-1,('D','N'):2,('D','D'):4,('D','C'):-5,('D','Q'):2,('D','E'):3,('D','G'):1,('D','H'):1,('D','I'):-2,
    ('D','L'):-4,('D','K'):0,('D','M'):-3,('D','F'):-6,('D','P'):-1,('D','S'):0,('D','T'):0,('D','W'):-7,('D','Y'):-4,('D','V'):-2,
    ('C','A'):-2,('C','R'):-4,('C','N'):-4,('C','D'):-5,('C','C'):12,('C','Q'):-5,('C','E'):-5,('C','G'):-3,('C','H'):-3,('C','I'):-2,
    ('C','L'):-6,('C','K'):-5,('C','M'):-5,('C','F'):-4,('C','P'):-3,('C','S'):0,('C','T'):-2,('C','W'):-8,('C','Y'):0,('C','V'):-2,
    ('Q','A'):0,('Q','R'):1,('Q','N'):1,('Q','D'):2,('Q','C'):-5,('Q','Q'):4,('Q','E'):2,('Q','G'):-1,('Q','H'):3,('Q','I'):-2,
    ('Q','L'):-2,('Q','K'):1,('Q','M'):-1,('Q','F'):-5,('Q','P'):0,('Q','S'):-1,('Q','T'):-1,('Q','W'):-5,('Q','Y'):-4,('Q','V'):-2,
    ('E','A'):0,('E','R'):-1,('E','N'):1,('E','D'):3,('E','C'):-5,('E','Q'):2,('E','E'):4,('E','G'):0,('E','H'):1,('E','I'):-2,
    ('E','L'):-3,('E','K'):0,('E','M'):-2,('E','F'):-5,('E','P'):-1,('E','S'):0,('E','T'):0,('E','W'):-7,('E','Y'):-4,('E','V'):-2
}

#PAM Scoring Function
def pam_score(seq1, seq2):
    score = 0
    length = min(len(seq1), len(seq2))

    for i in range(length):
        a = seq1[i]
        b = seq2[i]
        score += PAM250.get((a,b), -1)

    return score

## Smith Waterman

In [16]:
class SmithWaterman:
    """
    Performs the Smith-Waterman local alignment algorithm.

    Important Attributes:
    score (int) : the score for the matching algorithm
    alignment (tuple (str, str)) : the alignments for sequence 1 and 2

    Important Methods:
    display_matrix() : displays the scored matrix as a DataFrame
                       where cyan is the traceback method.
    """

    def __init__(self, seq1, seq2, match_score=1, mismatch_penalty=-1, gap_penalty=2):

        """
        Initializing our variables to perform Smith Waterman alignment
        """

        self.seq1 = seq1
        self.seq2 = seq2
        self.match_score = match_score
        self.mismatch_penalty = mismatch_penalty
        self.gap_penalty = gap_penalty

        self.rows = len(seq1) + 1
        self.cols = len(seq2) + 1

        self.max_score = 0
        self.max_pos = (0, 0)

        self.fill_matrix()

        self.score = self.find_score()
        self.alignment = self.traceback()
    
    def fill_matrix(self):

        # Initialize the scoring matrix with zeros in the first row and column
        self.matrix = np.zeros((self.rows, self.cols), dtype = int)

        # Fill the scoring matrix
        for i in range(1, self.rows):
            for j in range(1, self.cols):
                score_diag = self.matrix[i - 1, j - 1] + (self.match_score if self.seq1[i - 1] == self.seq2[j - 1] else self.mismatch_penalty)
                score_up = self.matrix[i - 1, j] - self.gap_penalty
                score_left = self.matrix[i, j - 1] - self.gap_penalty
            
                # Current cell score is the maximum of possible scores, or 0 if negative
                self.matrix[i, j] = max(0, score_diag, score_up, score_left)

    def find_score(self):
        for i in range(1, self.rows):
            for j in range(1, self.cols):
                if self.matrix[i, j] > self.max_score:
                    self.max_score = self.matrix[i, j]
                    self.max_pos = (i, j)

        return self.max_score

    def traceback(self):
        """
        Traceback will begin at the highest score and stop when we reach a 0
        """
        i, j = self.max_pos
        aligned_seq1, aligned_seq2 = "", ""
        traceback_coords = []
    
        #Traceback and create the alignment
        while self.matrix[i][j] != 0:
            traceback_coords.append((i,j))
            current_score = self.matrix[i][j]
            if current_score == self.matrix[i-1][j-1] + (self.match_score if self.seq1[i-1] == self.seq2[j-1] else self.mismatch_penalty):
                aligned_seq1 = self.seq1[i-1] + aligned_seq1
                aligned_seq2 = self.seq2[j-1] + aligned_seq2
                i -= 1
                j -= 1
            elif current_score == self.matrix[i-1][j] + self.gap_penalty:
                aligned_seq1 = self.seq1[i-1] + aligned_seq1
                aligned_seq2 = "-" + aligned_seq2
                i -= 1
            else:
                aligned_seq1 = "-" + aligned_seq1
                aligned_seq2 = self.seq2[j-1] + aligned_seq2
                j -= 1
                
        self.traceback_coords = traceback_coords

        #Fill with gaps to align
        while i > 0:
            aligned_seq1 = self.seq1[i - 1] + aligned_seq1
            aligned_seq2 = "-" + aligned_seq2
            i -= 1

        while j > 0:
            aligned_seq1 = "-" + aligned_seq1
            aligned_seq2 = self.seq2[j - 1] + aligned_seq2
            j -= 1
        
        return aligned_seq1, aligned_seq2
    
    def display_matrix(self):
        """
        Returns the scoring matrix with the traceback labeled in cyan
        """
        df = pd.DataFrame(self.matrix) 

        styled_df = df.style.apply( lambda row: [
            "background-color: cyan" 
            if (row.name, col_idx) in self.traceback_coords else ""
            for col_idx in range(df.shape[1]) ], axis=1 )
        

        return styled_df

In [17]:
example_sw = SmithWaterman("AGGTA", "ACGT")
print(f"Smith Waterman Score: {example_sw.score}\n")
print(f"Smith Waterman Alignment:\n{example_sw.alignment[0]}\n{example_sw.alignment[1]}")

example_sw.display_matrix()

Smith Waterman Score: 2

Smith Waterman Alignment:
--AGGT
AC--GT


Unnamed: 0,0,1,2,3,4
0,0,0,0,0,0
1,0,1,0,0,0
2,0,0,0,1,0
3,0,0,0,1,0
4,0,0,0,0,2
5,0,1,0,0,0


## Needleman-Wunsch Algorithm

This algorithm, developed by Saul B. Needleman and Christian D. Wunsch, is utilized for global sequence alignment of amino acids and nucleotides.

In [18]:
class NeedlemanWunsch:
    '''
    Contains the Needleman-Wunsch score & matrix for two sequences.

    Important Attributes:
    score (int) : the score for the matching algorithm
    alignment (tuple (str, str)) : the alignments for sequence 1 and 2

    Important Methods:
    display_matrix() : displays the scored matrix as a DataFrame
                       where red is the traceback method.
    '''
    def __init__(self, seq1, seq2, match = 1, mismatch = -1, gap = -1):
        self.seq1 = seq1
        self.seq2 = seq2
        self.match = match
        self.mismatch = mismatch
        self.gap = gap
    
        self.rows = len(seq1) + 1
        self.columns = len(seq2) + 1

        self.fill_matrix()

        self.score = self.find_score()
        self.alignment = self.traceback()

    def init_matrix(self):
        self.matrix = np.zeros((self.rows, self.columns), dtype = int)
        self.matrix[0, :] = np.arange(0, (self.columns) * self.gap, self.gap)
        self.matrix[:, 0] = np.arange(0, (self.rows) * self.gap, self.gap)

    def fill_matrix(self):

        self.init_matrix()

        for i in range(1, self.rows):                   #iterate through each cell of the matrix
            for j in range(1, self.columns):

                if self.seq1[i - 1] == self.seq2[j - 1]: #if the same, it's a match
                    match_score = self.match
                else:                                    #otherwise, penalize as a mismatch
                    match_score = self.mismatch
            
                up = self.matrix[i - 1, j] + self.gap
                left = self.matrix[i, j - 1] + self.gap
                diagonal = self.matrix[i - 1, j - 1] + match_score

                self.matrix[i, j] = max(diagonal, up, left)     #save the best score

    def find_score(self):
        #score is the bottom right cell of the matrix
        return self.matrix[self.rows - 1 , self.columns - 1]
    
    def traceback(self):
        i = self.rows  - 1
        j = self.columns - 1

        align1, align2 = "", ""
        traceback_coords = []

        while i > 0 and j > 0:
            traceback_coords.append((i, j))
            current = self.matrix[i, j]
            x1 = self.seq1[i - 1]
            x2 = self.seq2[j - 1]


            if x1 == x2: #determine score
                match_score = self.match
            else:
                match_score = self.mismatch

            #diagonal
            if current == self.matrix[i - 1, j - 1] + match_score:
                align1 = x1 + align1
                align2 = x2 + align2
                i -= 1
                j -= 1

            #up
            elif current == self.matrix[i - 1, j] + self.gap:
                align1 = x1 + align1
                align2 = "-" + align2
                i -= 1

            #left
            else:
                align1 = "-" + align1
                align2 = x2 + align2
                j -= 1

        #fill rest with gaps if one side already aligned
        while i > 0:
            traceback_coords.append((i, j))
            align1 = self.seq1[i - 1] + align1
            align2 = "-" + align2
            i -= 1

        while j > 0:
            traceback_coords.append((i, j))
            align1 = "-" + align1
            align2 = self.seq2[j - 1] + align2
            j -= 1

        self.traceback_coords = traceback_coords
        return align1, align2

    def display_matrix(self):
        '''
        Returns the scoring matrix with the traceback labeled in red
        '''
        df = pd.DataFrame(self.matrix) 

        styled_df = df.style.apply( lambda row: [
            "background-color: crimson" 
            if (row.name, col_idx) in self.traceback_coords else ""
            for col_idx in range(df.shape[1]) ], axis=1 )
        

        return styled_df


In [19]:

example_nw = NeedlemanWunsch("AGGTA", "ACGT")
print(f"Needleman-Wunsch Score: {example_nw.score}\n")
print(f"Needleman-Wunsch Alignment:\n{example_nw.alignment[0]}\n{example_nw.alignment[1]}")

example_nw.display_matrix()

Needleman-Wunsch Score: 1

Needleman-Wunsch Alignment:
AGGTA
ACGT-


Unnamed: 0,0,1,2,3,4
0,0,-1,-2,-3,-4
1,-1,1,0,-1,-2
2,-2,0,0,1,0
3,-3,-1,-1,1,0
4,-4,-2,-2,0,2
5,-5,-3,-3,-1,1


# Real world examples

In [20]:
def read_fasta(filename):
    with open(filename, 'r') as f:
        sequence = ""
        for line in f:
            line = line.strip()
            if line.startswith(">"): #ignore first line with label
                pass
            else:
                sequence += line
    return sequence

In [21]:
human_hbb = read_fasta("data/HBB_HUMAN.fasta")
mouse_hbb = read_fasta("data/HBB1_MOUSE.fasta")
pigeon_hbb = read_fasta("data/HBB_COLLI.fasta")

human_hbb_bl = blosum_read_fasta("data/HBB_HUMAN.fasta")
mouse_hbb_bl = blosum_read_fasta("data/HBB1_MOUSE.fasta")
pigeon_hbb_bl = blosum_read_fasta("data/HBB_COLLI.fasta")

In [None]:
#Human Hemoglobin VS Mouse Hemoglobin
print("Human vs Mouse\n")

humanVmouse_pam_score = pam_score(human_hbb, mouse_hbb)
print("PAM Score:", humanVmouse_pam_score)

#humanVmouse_bl_score = blosum_score(human_hbb_bl, mouse_hbb_bl)
humanVmouse_bl_align = blosum_align_fasta("data/HBB_HUMAN.fasta", "data/HBB1_MOUSE.fasta")
print(f"BLOSUM62 Score: {humanVmouse_bl_align[0]}\n")
print(f"BLOSUM62 Alignment:\n{humanVmouse_bl_align[1]}\n{humanVmouse_bl_align[2]}\n")

humanVmouse_sw = SmithWaterman(human_hbb, mouse_hbb)
print(f"Smith Waterman Score: {humanVmouse_sw.score}\n")
print(f"Smith Waterman Alignment:\n{humanVmouse_sw.alignment[0]}\n{humanVmouse_sw.alignment[1]}\n")

humanVmouse_nw = NeedlemanWunsch(human_hbb, mouse_hbb)
print(f"Needleman-Wunsch Score: {humanVmouse_nw.score}\n")
print(f"Needleman-Wunsch Alignment:\n{humanVmouse_nw.alignment[0]}\n{humanVmouse_nw.alignment[1]}")

Human vs Mouse

PAM Score: 19
BLOSUM62 Alignment:
638
MVHLTPEEKSAVTALWGKVNVDEVGGEALGRLLVVYPWTQRFFESFGDLSTPDAVMGNPKVKAHGKKVLGAFSDGLAHLDNLKGTFATLSELHCDKLHVDPENFRLLGNVLVCVLAHHFGKEFTPPVQAAYQKVVAGVANALAHKYH

Smith Waterman Score: 89

Smith Waterman Alignment:
MVHLTPEEKSAVTALWGKVNVDEVGGEALGRLLVVYPWTQRFFESFGDLSTPDAVMGNPKVKAHGKKVLGAFSDGLAHLDNLKGTFATLSELHCDKLHVDPENFRLLGNVLVCVLAHHFGKEFTPPVQAAYQKVVAGVANALAHKYH
MVHLTDAEKAAVSCLWGKVNSDEVGGEALGRLLVVYPWTQRYFDSFGDLSSASAIMGNAKVKAHGKKVITAFNDGLNHLDSLKGTFASLSELHCDKLHVDPENFRLLGNMIVIVLGHHLGKDFTPAAQAAFQKVVAGVATALAHKYH

Needleman-Wunsch Score: 89

Needleman-Wunsch Alignment:
MVHLTPEEKSAVTALWGKVNVDEVGGEALGRLLVVYPWTQRFFESFGDLSTPDAVMGNPKVKAHGKKVLGAFSDGLAHLDNLKGTFATLSELHCDKLHVDPENFRLLGNVLVCVLAHHFGKEFTPPVQAAYQKVVAGVANALAHKYH
MVHLTDAEKAAVSCLWGKVNSDEVGGEALGRLLVVYPWTQRYFDSFGDLSSASAIMGNAKVKAHGKKVITAFNDGLNHLDSLKGTFASLSELHCDKLHVDPENFRLLGNMIVIVLGHHLGKDFTPAAQAAFQKVVAGVATALAHKYH


In [None]:
#Human Hemoglobin VS Rock Dove Hemoglobin
print("Human vs Pigeon \n")

humanVpigeon_pam_score = pam_score(human_hbb, mouse_hbb)
print("PAM Score:", humanVpigeon_pam_score)

#humanVpigeon_bl_score = blosum_score(human_hbb_bl, pigeon_hbb_bl)
humanVpigeon_bl_align = blosum_align_fasta("data/HBB_HUMAN.fasta", "data/HBB_COLLI.fasta")
print(f"BLOSUM62 Score: {humanVpigeon_bl_align[0]}\n")
print(f"BLOSUM62 Alignment:\n{humanVpigeon_bl_align[1]}\n{humanVpigeon_bl_align[2]}\n")

humanVpigeon_sw = SmithWaterman(human_hbb, pigeon_hbb)
print(f"Smith Waterman Score: {humanVpigeon_sw.score}\n")
print(f"Smith Waterman Alignment:\n{humanVpigeon_sw.alignment[0]}\n{humanVpigeon_sw.alignment[1]}\n")

humanVpigeon_nw = NeedlemanWunsch(human_hbb, pigeon_hbb)
print(f"Needleman-Wunsch Score: {humanVpigeon_nw.score}\n")
print(f"Needleman-Wunsch Alignment:\n{humanVpigeon_nw.alignment[0]}\n{humanVpigeon_nw.alignment[1]}")

Human vs Pigeon 

PAM Score: 19
BLOSUM62 Alignment:
545
MVHLTPEEKSAVTALWGKVNVDEVGGEALGRLLVVYPWTQRFFESFGDLSTPDAVMGNPKVKAHGKKVLGAFSDGLAHLDNLKGTFATLSELHCDKLHVDPENFRLLGNVLVCVLAHHFGKEFTPPVQAAYQKVVAGVANALAHKYH

Smith Waterman Score: 58

Smith Waterman Alignment:
--------------MVHLTPEEKSAVTALWGKVNVDEVGGEALGRLLVVYPWTQRFFESFGDLSTPDAVMGNPKVKAHGKKVLGAFSDGLAHLDNLKGTFATLSELHCDKLHVDPENFRLLGNVLVCVLAHHFGKEFTPPVQAAYQKVVAGVANALAHKYH
VHWSAEEKQLITSI---------------WGKVNVADCGAEALARLLIVYPWTQRFFSSFGNLSSATAISGNPNVKAHGKKVLTSFGDAVKNLDNIKGTFAQLSELHCDKLHVDPENFRLLGDILVIILAAHFGKDFTPECQAAWQKLVRVVAHALARKYH

Needleman-Wunsch Score: 56

Needleman-Wunsch Alignment:
MVHLTPEEKSAVTALWGKVNV-DEVGGEALGRLLVVYPWTQRFFESFGDLSTPDAVMGNPKVKAHGKKVLGAFSDGLAHLDNLKGTFATLSELHCDKLHVDPENFRLLGNVLVCVLAHHFGKEFTPPVQAAYQKVVAGVANALAHKYH
-VHWSAEEKQLITSIWGKVNVAD-CGAEALARLLIVYPWTQRFFSSFGNLSSATAISGNPNVKAHGKKVLTSFGDAVKNLDNIKGTFAQLSELHCDKLHVDPENFRLLGDILVIILAAHFGKDFTPECQAAWQKLVRVVAHALARKYH
