# HMM model from scratch

Implementation of a simple HMM model from scratch for a discrete dataset.

In [1]:
import numpy as np
import pandas as pd
import csv

In [2]:
def LoadData():
    df = pd.read_csv('Project2Data.txt', sep="," ,names=['Weather','Outcome'])
    return df

In [3]:
def SplitDataByWeather(dataframe):
    data = dataframe.groupby('Weather')
    TS = data.get_group('sunny')
    TR = data.get_group('rainy')
    TF = data.get_group('foggy')
    return TS,TR,TF

In [4]:
def CreateTransitionProbs(data, len_of_TS, len_of_TR, len_of_TF):
    w, h = 3, 3;
    transprob = [[0 for x in range(w)] for y in range(h)]  #Allocate space for Transition Probability Matrix
   
    sumss=sumrr=sumff=sumsr=sumsf=sumrs=sumrf=sumfs=sumfr=0
    
    for i in range(len(data)-1): 
        j = i + 1
        if data[i]==data[j]=='sunny':
            sumss  += 1
        elif data[i]==data[j]=='rainy':
            sumrr += 1
        elif data[i]==data[j]=='foggy':
            sumff += 1
        elif data[i]=='sunny' and data[j]=='rainy':
            sumsr += 1 
        elif data[i]=='sunny' and data[j]=='foggy':
            sumsf += 1
        elif data[i]=='rainy' and data[j]=='sunny':
            sumrs += 1
        elif data[i]=='rainy' and data[j]=='foggy':
            sumrf += 1
        elif data[i]=='foggy' and data[j]=='sunny':
            sumfs += 1
        else:
            sumfr += 1
    
    transprob[0][0] = round(sumss/len_of_TS, 2)
    transprob[0][1] = round(sumrs/len_of_TR, 2)
    transprob[0][2] = round(sumfs/len_of_TF, 2)
    transprob[1][0] = round(sumsr/len_of_TS, 2)
    transprob[1][1] = round(sumrr/len_of_TR, 2)
    transprob[1][2] = round(sumfr/len_of_TF, 2)
    transprob[2][0] = round(sumsf/len_of_TS, 2)
    transprob[2][1] = round(sumrf/len_of_TR, 2)
    transprob[2][2] = round(sumff/len_of_TF, 2)
    
    DisplayTransitionProbs(transprob, w, h)
    return transprob

In [5]:
def DisplayTransitionProbs(transprob, w, h):
    print ("a) Transition Probabilities (𝑎𝑖𝑗 matrix):")
    print ("         Sunny |  Rainy |  Foggy")
    print ("       ------------------------")
    
    for i in range(w):
        if i == 0:
            print ("Sunny | ", end = " ")
        elif i == 1:
            print ("Rainy | ", end = " ")
        elif i == 2:
            print ("Foggy | ", end = " ")
        for j in range(h):
            print (format(transprob[i][j], '.2f'), end = "  |  ")
        print("\n")

In [6]:
def CreateEmitionProbs(TS, TR, TF):
    
    w, h = 2, 3;  #Here the first column is taking the value.
    emmisionprob = [[0 for x in range(w)] for y in range(h)] 
    
    emmisionprob [0][0] = SY = round(Count(TS, 'yes')/len(TS), 2)
    emmisionprob [0][1] = SN = round(Count(TS, 'no')/len(TS), 2)
    emmisionprob [1][0] = RY = round(Count(TR, 'yes')/len(TR), 2)
    emmisionprob [1][1] = RN = round(Count(TR, 'no')/len(TR), 2)
    emmisionprob [2][0] = FY = round(Count(TF, 'yes')/len(TF), 2)
    emmisionprob [2][1] = FN = round(Count(TF, 'no')/len(TF), 2)
   
    DisplayEmitionProbs(emmisionprob, w, h)
    return emmisionprob

In [7]:
def DisplayEmitionProbs(emmisionprob, w, h):
    
    print ("\nb) Emmision Probabilities (𝑏𝑗𝑘 matrix): ")
    print ("         Yes   |   No  ")
    print ("       ----------------")
    for i in range(h):
        if i == 0:
            print ("Sunny | ", end = " ")
        elif i == 1:
            print ("Rainy | ", end = " ")
        elif i == 2:
            print ("Foggy | ", end = " ")
        for j in range(w):
             print (format(emmisionprob [i][j],'.2f'), end = "  |  ")
        print("\n")

In [8]:
def Count(data, strsrch):
    
    data = data.groupby('Outcome')    
    n = len(data.get_group(strsrch))
    return n

In [9]:
def CreateAlphas(startstate, obs, transition_prob_matrix, emmision_prob_matrix, inprobsunny, inprobrainy, inprobfoggy):
    
    finalprob = 0
    k=0
    w, h = (len(obs)+1), 3 # Here 3 is getting assigned to h.
    fwdprob = [[0 for x in range(w)] for y in range(h)]
    
    if(startstate == 'sunny'):
        fwdprob [0][0]= 1
        fwdprob [1][0]= 0
        fwdprob [2][0]= 0
    elif(startstate == 'rainy'):
        fwdprob [0][0]= 0
        fwdprob [1][0]= 1
        fwdprob [2][0]= 0
    else:
        fwdprob [0][0]= 0
        fwdprob [1][0]= 0
        fwdprob [2][0]= 1

    for j in range(1,w):
        for i in range(h):
            fwdprob[i][j] = Probability(i, j, h, fwdprob, transition_prob_matrix, emmision_prob_matrix, obs, k)
        k+=1
        
    finalprob+=fwdprob[0][w-1] * inprobsunny * 1 #emmision prob of V0 at w0 is 1
    finalprob+=fwdprob[1][w-1] * inprobrainy * 1
    finalprob+=fwdprob[2][w-1] * inprobfoggy * 1
    
    print ("\nc) Probability for the sequence to occur in the given model is: ",finalprob)
    print()
    return (fwdprob,w)

In [10]:
def Probability(geni, genj, h, fwdprob, transition_prob_matrix, emmision_prob_matrix, obs,k):
    
    probab = 0
    i=geni
    for j in range(h):
        probab+=fwdprob[j][genj-1] * transition_prob_matrix[i][j] * emmision_prob_matrix[i][obs[k]]
        probab = round(probab,5)
    return probab

In [11]:
def DisplayAlphas(fwdmatrix, w, h, obs1):
    print("      |    Φ     ", end = " ")
    for k in range(w-1):
        print (obs1[k], end = "       ")
    print()    
    print("-----------------------------------------------------------------------------------------------------------")
    for i in range(h):
        if i == 0:
            print ("Sunny | ", end = " ")
        elif i == 1:
            print ("Rainy | ", end = " ")
        elif i == 2:
            print ("Foggy | ", end = " ")
        for j in range(w):
            print(format(fwdmatrix[i][j],'.4f'), end = " | ")
        print("\n")

In [12]:
def RunViterbi(fwdmatrix, w, h):
    
    print ("\nd) Decoding Problem: Solved by Viterbi algorithm ")
    print ("\nSequence of the hidden states is given below :")
    for j in range(1,w):
        
        a = fwdmatrix[0][j]
        b = fwdmatrix[1][j]
        c = fwdmatrix[2][j]
        
        if (a>b and a>c):
            print ("sunny", end=", ")
        elif (b>a and b>c):
            print ("rainy", end=", ")
        else:
            print ("foggy", end=", ")            

In [13]:
def main():
    
    df = LoadData()
    dflength = len(df)
    obs1 = np.array(['no', 'no', 'no', 'yes', 'no', 'no', 'yes', 'yes', 'no', 'yes'])
    startstate = 'sunny'
    
    obs_for_alphas = np.array(obs1)
    obs1[obs1=='yes'] = 0
    obs1[obs1=='no'] = 1
    obs1=obs1.astype(int)
    number_of_observations = len(obs1)
     
    TS,TR,TF = SplitDataByWeather(df) 
    column1 = df['Weather'].tolist()
    
    print("Project 2 | Output:\n")
    
    transition_prob_matrix = CreateTransitionProbs(column1, len(TS), len(TR), len(TF))
    emmision_prob_matrix = CreateEmitionProbs(TS, TR, TF)
    
    inprobsunny = len(TS)/dflength
    inprobrainy = len(TR)/dflength
    inprobfoggy = len(TF)/dflength
    
    fwdmatrix, w= CreateAlphas(startstate, obs1, transition_prob_matrix, emmision_prob_matrix, inprobsunny, inprobrainy, inprobfoggy)
    DisplayAlphas(fwdmatrix, w, 3, obs_for_alphas)
    
    RunViterbi(fwdmatrix, w, 3)

main()

Project 2 | Output:

a) Transition Probabilities (𝑎𝑖𝑗 matrix):
         Sunny |  Rainy |  Foggy
       ------------------------
Sunny |  0.79  |  0.20  |  0.21  |  

Rainy |  0.05  |  0.58  |  0.29  |  

Foggy |  0.15  |  0.22  |  0.50  |  


b) Emmision Probabilities (𝑏𝑗𝑘 matrix): 
         Yes   |   No  
       ----------------
Sunny |  0.09  |  0.91  |  

Rainy |  0.81  |  0.19  |  

Foggy |  0.30  |  0.70  |  


c) Probability for the sequence to occur in the given model is:  0.00022691

      |    Φ      no       no       no       yes       no       no       yes       yes       no       yes       
-----------------------------------------------------------------------------------------------------------
Sunny |  1.0000 | 0.7189 | 0.5386 | 0.4114 | 0.0313 | 0.0374 | 0.0325 | 0.0026 | 0.0003 | 0.0011 | 0.0001 | 

Rainy |  0.0000 | 0.0095 | 0.0137 | 0.0129 | 0.0459 | 0.0072 | 0.0024 | 0.0054 | 0.0035 | 0.0004 | 0.0005 | 

Foggy |  0.0000 | 0.1050 | 0.1137 | 0.0985 | 0.0341 | 0.0223 |