# Project 2: Hidden Markov Model

#### Nivas Hegde, Tanya Lenora Rego  

Method to read data - 

In [8]:
import pandas as pd
import numpy as np
from IPython.display import display, HTML

def loadData(filename):
    return pd.read_csv(filename, names = ['state', 'umbrella'])   



Method to generate emission probablity - 

In [9]:
def createEmitionProbs(dataset):
    sunny = dataset[dataset['state'].isin(['sunny'])]
    rainy = dataset[dataset['state'].isin(['rainy'])]
    foggy = dataset[dataset['state'].isin(['foggy'])]
        
    emissionMatrix = np.zeros((3, 2))
    emissionMatrix[0,0] = len(sunny[sunny['umbrella'].isin(['yes'])]) / len(sunny)
    emissionMatrix[0,1] = len(sunny[sunny['umbrella'].isin(['no'])]) / len(sunny)
    emissionMatrix[1,0] = len(rainy[rainy['umbrella'].isin(['yes'])]) / len(rainy)
    emissionMatrix[1,1] = len(rainy[rainy['umbrella'].isin(['no'])]) / len(rainy)
    emissionMatrix[2,0] = len(foggy[foggy['umbrella'].isin(['yes'])]) / len(foggy)
    emissionMatrix[2,1] = len(foggy[foggy['umbrella'].isin(['no'])]) / len(foggy)
    
    print('\n\nEmission matrix - ')    
    em = pd.DataFrame(data = emissionMatrix, columns = ['yes', 'no'], index = ['sunny', 'rainy', 'foggy'])
    display(HTML(em.to_html()))
    return emissionMatrix



Method to generate transistion probablity - 

In [10]:
def createTransitionProbs(dataset):
      
    transitionMatrixTemp = {r : { c : 0 for c in ['sunny', 'rainy', 'foggy']} for r in ['sunny', 'rainy', 'foggy']}
    for i in range(len(dataset)-1):
        transitionMatrixTemp[dataset['state'].iloc[i]][dataset['state'].iloc[i + 1]] += 1
    
    matrixState = ['sunny', 'rainy', 'foggy']
    matrixRowSum = [0, 0, 0]
    
    for i in range(3):
        matrixRowSum[i] = transitionMatrixTemp[matrixState[i]]['sunny'] + transitionMatrixTemp[matrixState[i]]['rainy'] + transitionMatrixTemp[matrixState[i]]['foggy']
    
    transitionMatrix = np.zeros((3, 3))
    for i in range(3):
        for j in range(3):
            transitionMatrix[i, j] = transitionMatrixTemp[matrixState[i]][matrixState[j]] / matrixRowSum[i]

    print('\n\nTransition matrix - ')
    tr = pd.DataFrame(data = transitionMatrix, columns = ['sunny', 'rainy', 'foggy'], index = ['sunny', 'rainy', 'foggy'])
    display(HTML(tr.to_html()))
    
    return transitionMatrix



Method to calculate initial probablity - 

In [11]:
def calcInitialProb(dataset):  
    sunnyProb = len(dataset[dataset['state'].isin(['sunny'])]) / len(dataset)
    foggyProb = len(dataset[dataset['state'].isin(['foggy'])]) / len(dataset)
    rainyProb = len(dataset[dataset['state'].isin(['rainy'])]) / len(dataset)   
    return [sunnyProb, rainyProb, foggyProb]



Method to create alpha matrix - 

In [12]:
def createAlpha(emissionMatrix, transitionMatrix, inputSequence, initialProbablity):
    
    np.set_printoptions(suppress=True)
    alphaMatrix = np.zeros((11, 3))
    
    for i in range(len(inputSequence) + 1):             
        if i == 0:
            alphaMatrix[i, 0] = 1
            continue          
        for j in range(3):
            for k in range(3):
                alphaMatrix[i, j] += alphaMatrix[i-1, k] * transitionMatrix[k, j]
            alphaMatrix[i, j] = alphaMatrix[i, j] * emissionMatrix[j, 0 if inputSequence[i-1] == 'yes' else 1]
            
    print('\n\nAlpha matrix - ')
    ap = pd.DataFrame(data = alphaMatrix, columns = ['sunny', 'rainy', 'foggy'])
    display(HTML(ap.to_html()))
    print('10th row - final probablity used to calculate ω0')
    
    finalProb = alphaMatrix[10, 0] * initialProbablity[0] + alphaMatrix[10, 1] * initialProbablity[1] + alphaMatrix[10, 2] * initialProbablity[2]
    print('\n\nFinal state probablity ω0 - ', finalProb)
      
    return alphaMatrix



Method to generate sequence using alpha matrix - 

In [13]:
def runViterbi(forwardProb):
    
    predictedStates = [] * 10
    for i in forwardProb:
        if max([i[0], i[1], i[2]]) == i[0]:
            predictedStates.append('sunny')
        if max([i[0], i[1], i[2]]) == i[1]:
            predictedStates.append('rainy')
        if max([i[0], i[1], i[2]]) == i[2]:
            predictedStates.append('foggy')
    
    return predictedStates

Main function - 

In [14]:
def main():
    
    #Change project filename here - 
    dataset = loadData('Project2Data.txt')
    
    emissionMatrix = createEmitionProbs(dataset) 
    transitionMatrix = createTransitionProbs(dataset)
    initialProbablity = calcInitialProb(dataset)
     
    #Enter sequence here
    inputSequence = ['no', 'no', 'no', 'yes', 'no', 'no', 'yes', 'yes', 'no', 'yes']
    
    forwardProb = createAlpha(emissionMatrix, transitionMatrix, inputSequence, initialProbablity)
    predictedStates = runViterbi(forwardProb)
    
    print('\n\nPredicted states - ')
    print(predictedStates)
      
main()





Emission matrix - 


Unnamed: 0,yes,no
sunny,0.08502,0.91498
rainy,0.812245,0.187755
foggy,0.302682,0.697318




Transition matrix - 


Unnamed: 0,sunny,rainy,foggy
sunny,0.795132,0.054767,0.150101
rainy,0.195918,0.579592,0.22449
foggy,0.206897,0.291188,0.501916




Alpha matrix - 


Unnamed: 0,sunny,rainy,foggy
0,1.0,0.0,0.0
1,0.72753,0.010283,0.104668
2,0.550957,0.014322,0.114392
3,0.42506,0.013478,0.099947
4,0.030718,0.048892,0.035412
5,0.037816,0.007572,0.023263
6,0.033274,0.002485,0.013285
7,0.002524,0.005792,0.003699
8,0.000332,0.003714,0.00107
9,0.00111,0.000466,0.000991


10th row - final probablity used to calculate ω0


Final state probablity ω0 -  0.00023348764811321137


Predicted states - 
['sunny', 'sunny', 'sunny', 'sunny', 'rainy', 'sunny', 'sunny', 'rainy', 'rainy', 'sunny', 'rainy']
