In [1]:
#!/usr/bin/env python3

########################################################################
# File: problem19.ipynb

# Author: Nicholas Chan
# History: 11/25/2021 Created
########################################################################

# Assignment 6: Problem 19
<br>
In this assignment problem, I was required to find the probability <br>
of a path of hidden states occurring. This was done by taking the product <br>
of all transitions between two states in the hidden state path given on input. <br>
The probability of certain transitions was given through input in the <br>
form of a transition table.<br>

# HiddenPathProb Class
<br>
The HiddenPathProb Class creates objects that can store a path of hidden states, <br>
list of possible hidden states, table for counting transition occurrences, and <br>
a transition table for some HMM. With this, HiddenPathProb objects can call the <br>
computeProb method to compute the probability of a hidden path occurring. <br>

In [2]:
import numpy as np
class HiddenPathProb:
    '''
    The HiddenPathProb class takes in a hidden path, hidden states, and a 
    transition table created from parsed input to compute the probability 
    of a sequence of states (the path) occuring.
    '''
    def __init__(self, hiddenPath, hiddenStates, countTable, transitionTable):
        '''
        Initializes a HiddenPathProb object with the hidden path, hidden states, and
        transition table from parsed input. A HiddenPathProb object also initializes
        a count table to count the occurrences of transitions in the hidden path given
        as input
        '''
        self.hS = hiddenStates # Stores list of hidden states
        self.hp = hiddenPath # stores path of states as a string
        self.tT = transitionTable # stores transition table parsed from input
        self.cT = countTable # stores count table created from hidden states parsed on input
        
    def computeProb(self):
        '''
        Method computes the product of transition probabilities from the given
        hidden path.
        '''
        if len(self.hp) >= 2: # If the hidden path has at least one transition aside from the first
            for i in range(1,len(self.hp)):
                self.cT[self.hp[i-1:i+1]] += 1
            
            productList = [0.5]
            for idxL, stateL in enumerate(self.hS):
                for idxK, stateK in enumerate(self.hS):
                    productList.append(self.tT[stateL+stateK]**self.cT[stateL+stateK])
#             print(self.cT)
#             print(productList)
            return np.prod(productList)
        else:
            return 0.5 # If the hidden path only has the first transition

# Main Function
<br>
Parses an input text file as a list of ints which the HiddenPathProb class requires as input. <br>
Input is assumed to contain a path of hidden states, a list of possible hidden states, <br>
and a transition table. 

In [34]:
def main(infile, outfile='', inCL=None):
    import numpy as np
    # use long double floating point type for all computation
    # particularly, initialize the emission and transition tables to this
    '''
    Parses input to populate the fields of the HiddenPathProb object.
    Parses information on the hidden path, hidden states, transition table,
    and transition count table. Initializes a HiddenPathProb object and
    prints the probabiolity of a hidden path occuring to stdout.
    '''

    
    with open(infile,'r') as myfile:
        hPath = myfile.readline().rstrip() # STRING OF HIDDEN PATH
        myfile.readline()
        hStates = myfile.readline().rstrip().split('\t') # LIST OF HIDDEN STATES
        myfile.readline() # Clears over string of dashes
        myfile.readline() # Clears over top row of the table which reiterates the states
        trTable = dict() # Initialize a transition table
        countTable = dict() # Initialize a count table
        
        # TRANSITION TABLE PARSING
        trRows = []
        trInRow = []
        while '-' not in trInRow:
            trInRow = myfile.readline().rstrip()
            if len(trInRow) == 0:
                break
            else:
                trRows.append(np.longdouble(trInRow.split('\t')[1:]))
        for stateIdx1, state1 in enumerate(hStates): # Iterate over hidden states and their idxes
            for stateIdx2, state2 in enumerate(hStates): # Iterate over symbols and their idxes
                trTable[state1+state2] = np.longdouble(trRows[stateIdx1][stateIdx2]) # Populate transition table probabilities where each key is a string: "{state1}+{state2}"
                
        for i in hStates:
            for j in hStates:
                countTable[i+j] = np.longdouble(0)
        myHPP = HiddenPathProb(hPath, hStates, countTable, trTable)
        x = myHPP.computeProb()
        x = np.format_float_scientific(x, 11)
        print(x)
        
if __name__ == "__main__":
#     main("testCases/test cases/19.txt")
    main("data/rosalind_ba10a.txt")
#     main("data/p19-simple-input.txt")

1.79454527550e-33


In [4]:
# INSPECTION

# INSPECTION TEAM
# Jodi Lee
# Nabil Mohammed

# RESPONSES
# - Work on implementing main function
# - Write more markdown comments
# - Clean up code
# - Add more docstrings and inline comments

# CORRECTIONS
# - Finished main function
# - Wrote markdown comments
# - Cleaned code
# - Added more docstrings and inline comments