In [2]:
from Q1F import Viterbi
from Q1F import Forward
import sys

'''This class uses a single M-step to train a Hidden Markov Model.'''
class MTrain:
    def __init__(self, filename):

        self.num_observations = 0
        self.num_states = 0
        self.pair_list = []
        self.state_list = []
        self.obs_list = []
        self.observation_dict = {}
        self.state_dict = {}
        self.trans_mat = []
        self.emiss_mat = []
        self.list_dict = {}

        self.load_file(filename)
        print self.state_dict
        print self.observation_dict
    
    '''Loads the data file the TAs give us.'''
    def load_file(self, filename):

        file_obj = open(filename)
        for line in file_obj.readlines():
            state, observation = line.split()
            if observation not in self.observation_dict:
                self.observation_dict[observation] = self.num_observations
                self.num_observations += 1
            if state not in self.state_dict:
                self.state_dict[state] = self.num_states
                self.num_states += 1
            obs_num = self.observation_dict[observation]
            state_num = self.state_dict[state]
            pair = [obs_num, state_num]
            self.pair_list.append(pair)
            self.state_list.append(state_num)
            self.obs_list.append(obs_num)
        # Save original data for later use (cross validation)
        self.list_dict['pair_list'] = self.pair_list
        self.list_dict['state_list'] = self.state_list
        self.list_dict['obs_list'] = self.obs_list

    '''Makes the transition and emission matrices, and prints them out.'''
    def run(self):
        
        print 'Running'
        self.make_trans_mat()
        self.make_emiss_mat()
        
        
    '''Makes the transition matrix. The rows represent initial hidden
        states (moods) and the columns represent subsequent hidden states (moods).
        So as an example, self.trans_mat[i][j] is the probability that Ron
        transitions from mood i to mood j.'''
    def make_trans_mat(self):
        
        # Initialize matrix
        self.trans_mat = [[0] * (self.num_states) for i in range(self.num_states)]
        # For every state combination i/j, see how many times state i appears
        # in the state list (not counting the last position in the list),
        # then see how many times state j follows state i in the state list.
        # The latter divided by the former is the ratio of how many state transitions i to
        # j there are to how many to how many states i there are total, which
        # is just the transition probability.
        for i in range(0, self.num_states):
            # Don't count last element because nothing follows it
            count = (self.state_list[:len(self.state_list) - 1]).count(i)
            for j in range(0, self.num_states):
                follow_count = 0
                for k in range(0, len(self.state_list) - 1):
                    # j follows i
                    if self.state_list[k] == i and self.state_list[k + 1] == j:
                        follow_count += 1
                value = follow_count / float(count)
                self.trans_mat[i][j] = value
        self.print_mat('Transition Matrix', self.trans_mat, True)

        
    '''Makes the emission matrix. The rows represent hidden states (moods)
        and the columns represent observations (music genres). So as an
        example, self.emiss_mat[i][j] is the probability that Ron is in mood
        i given that he is listening to genre j.'''
    def make_emiss_mat(self):

        # Initialize matrix
        self.emiss_mat = [[0] * (self.num_observations) for i in range(self.num_states)]
        # For every state/observation combination i/j, see how many times state
        # i appears in the state list, then see how many times state i and observation
        # j appear together and the pair list. The latter divided by the former
        # is the probability that Ron is in hidden state i given that his observation state
        # j, which is just the emission probability.
        for i in range(0, self.num_states):
            count = self.state_list.count(i)
            for j in range(0, self.num_observations):
                both_count = 0
                for k in range(0, len(self.pair_list)):
                    # If we have the state AND the observation
                    if self.pair_list[k][0] == j and self.pair_list[k][1] == i:
                        both_count += 1
                value = both_count / float(count)
                self.emiss_mat[i][j] = value
        self.print_mat('Emission Matrix', self.emiss_mat, True)

    def print_mat(self, label, mat, newline):
        print '=====', label, '====='
        print label
        for row in mat:
            print row
            print '\n'
        if newline:
            print '\n'

if __name__ == '__main__':
    mtrain = MTrain('/Users/LeiyaMa/Desktop/ron.txt')
    mtrain.run()


{'mellow': 1, 'sad': 2, 'angry': 3, 'happy': 0}
{'classical': 9, 'house': 2, 'jazz': 7, 'metal': 3, 'pop': 1, 'dubstep': 6, 'rap': 8, 'rock': 0, 'blues': 5, 'folk': 4}
Running
===== Transition Matrix =====
Transition Matrix
[0.2830188679245283, 0.4716981132075472, 0.12971698113207547, 0.11556603773584906]


[0.2335907335907336, 0.3803088803088803, 0.29343629343629346, 0.09266409266409266]


[0.1035031847133758, 0.09713375796178345, 0.3678343949044586, 0.4315286624203822]


[0.18870967741935485, 0.09838709677419355, 0.3064516129032258, 0.4064516129032258]




===== Emission Matrix =====
Emission Matrix
[0.14858490566037735, 0.22877358490566038, 0.15330188679245282, 0.1179245283018868, 0.04716981132075472, 0.05188679245283019, 0.02830188679245283, 0.12971698113207547, 0.09198113207547169, 0.0023584905660377358]


[0.10597302504816955, 0.009633911368015413, 0.019267822736030827, 0.030828516377649325, 0.16955684007707128, 0.046242774566473986, 0.14065510597302505, 0.23892100192678228, 0.13