In [None]:
# naive_bayes.py
# ---------------
# Licensing Information:  You are free to use or extend this projects for
# educational purposes provided that (1) you do not distribute or publish
# solutions, (2) you retain this notice, and (3) you provide clear
# attribution to the University of Illinois at Urbana-Champaign
#
# Created by Justin Lizama (jlizama2@illinois.edu) on 09/28/2018

"""
This is the main entry point for MP4. You should only modify code
within this file -- the unrevised staff files will be used for all other
files and classes when code is run, so be careful to not modify anything else.
"""
import nltk
import numpy as np
from math import log

def naiveBayes(train_set, train_labels, dev_set, smoothing_parameter, pos_prior):
    """
    train_set - List of list of words corresponding with each movie review
    example: suppose I had two reviews 'like this movie' and 'i fall asleep' in my training set
    Then train_set := [['like','this','movie'], ['i','fall','asleep']]

    train_labels - List of labels (instruments) corresponding with train_set
    example: Suppose I had two reviews, first one was positive and second one was negative.
    Then train_labels := [1, 0]

    dev_set - List of list of words corresponding with each review that we are testing on
              It follows the same format as train_set

    smoothing_parameter - The smoothing parameter you provided with --laplace (1.0 by default)
    """
    # TODO: Write your code here
    # return predicted labels of development set
    smoothing_parameter = 0.005
    instrument1 = dict()
    instrument2 = dict()
#     instrument3 = dict()
#     instrument4 = dict()
#     instrument5 = dict()

    for i in range(len(train_set)):
        if train_labels[i] == "":       # the first instrument name. 某一个音有哪些harmonic 和 harmonic number
                                        # dict{(array of amplitudes):string (tone of an instrument)}
            for harmonics in train_set[i]:
                if harmonics in instrument1:
                    instrument1[harmonics] += 1
                else:
                    instrument1[harmonics] = 1
                    
        else:
            for harmonics in train_set[i]:
                if harmonics in instrument2:
                    instrument2[harmonics] += 1
                else:
                    instrument2[harmonics] = 1

    instrument1['unknown sound'] = 0
    instrument2['unknown sound'] = 0

    for harmonics in instrument1:
        instrument1[harmonics] = instrument1[harmonics] + smoothing_parameter

    for harmonics in instrument2:
        instrument2[harmonics] = instrument2[harmonics] + smoothing_parameter

    Sum1 = sum(instrument1.values())
    Sum2 = sum(instrument2.values())

    probability_1 = dict()
    probability_2 = dict()
    for harmonics in instrument1:
        probability_1[harmonics] = instrument1[harmonics]/Sum1
    for harmonics in instrument1:
        probability_2[harmonics] = instrument2[harmonics]/Sum2
    ret = list()
    for i in range(len(dev_set)):
        log_instrument1 = log(1./2.)
        log_instrument2 = log(1./2.)

        for harmonic in dev_set[i]:
            if harmonic in instrument1:
                log_instrument1 += log(probability_1[harmonic])
            else:
                log_instrument1 += log(probability_1['unknown sound'])

            if harmonic in instrument2:
                log_instrument2 += log(probability_2[harmonic])
            else:
                log_instrument2 += log(probability_2['unknown words'])

        if log_instrument1 > log_instrument2:
            ret.append(1)
        else:
            ret.append(0)
    return ret

