In [1]:
def harmonic_in_instrument(harmonic, instrument):
    threshold = 50        # this constant requires further modification
    for key, value in instrument:
        if np.linalg.norm(key, harmonic) < threshold:
            return 1
    return 0


In [2]:
# naive_bayes.py
# ---------------
# Licensing Information:  You are free to use or extend this projects for
# educational purposes provided that (1) you do not distribute or publish
# solutions, (2) you retain this notice, and (3) you provide clear
# attribution to the University of Illinois at Urbana-Champaign
#
# Created by Justin Lizama (jlizama2@illinois.edu) on 09/28/2018

"""
This is the main entry point for MP4. You should only modify code
within this file -- the unrevised staff files will be used for all other
files and classes when code is run, so be careful to not modify anything else.
"""
import nltk
import numpy as np
from math import log

def naiveBayes(train_set, train_labels, dev_set, smoothing_parameter, pos_prior):
    """
    train_set - List of list of words corresponding with each movie review
    example: suppose I had two reviews 'like this movie' and 'i fall asleep' in my training set
    Then train_set := [['like','this','movie'], ['i','fall','asleep']]

    train_labels - List of labels (instruments) corresponding with train_set
    example: Suppose I had two reviews, first one was positive and second one was negative.
    Then train_labels := [1, 0]

    dev_set - List of list of words corresponding with each review that we are testing on
              It follows the same format as train_set

    smoothing_parameter - The smoothing parameter you provided with --laplace (1.0 by default)
    """
    # TODO: Write your code here
    # return predicted labels of development set
    smoothing_parameter = 0.005
    instrument1 = dict()
    instrument2 = dict()
#     instrument3 = dict()
#     instrument4 = dict()
#     instrument5 = dict()

    for i in range(len(train_set)):
        if train_labels[i] == "":       # the first instrument name. 某一个音有哪些harmonic 和 harmonic number
                                        # dict{(array of amplitudes):string (tone of an instrument)}
#             for harmonics in train_set[i]:
                if train_set[i] in instrument1:             #if harmonics in instrument1:
                    instrument1[train_set[i]] += 1          #train_set[i] <---> harmonics
                else:
                    instrument1[train_set[i]] = 1
                    
        else:
#             for harmonics in train_set[i]:
                if train_set[i] in instrument2:
                    instrument2[train_set[i]] += 1
                else:
                    instrument2[train_set[i]] = 1

    instrument1['unknown sound'] = 0
    instrument2['unknown sound'] = 0

    for harmonics in instrument1:
        instrument1[harmonics] = instrument1[harmonics] + smoothing_parameter

    for harmonics in instrument2:
        instrument2[harmonics] = instrument2[harmonics] + smoothing_parameter

    Sum1 = sum(instrument1.values())
    Sum2 = sum(instrument2.values())

    probability_1 = dict()
    probability_2 = dict()
    for harmonics in instrument1:
        probability_1[harmonics] = instrument1[harmonics]/Sum1
    for harmonics in instrument1:
        probability_2[harmonics] = instrument2[harmonics]/Sum2
   
    ret = list()
    
    for i in range(len(dev_set)):
        log_instrument1 = log(1./2.)
        log_instrument2 = log(1./2.)

        for harmonic in dev_set[i]:
            if harmonic_in_instrument(harmonic, instrument1):
                log_instrument1 += log(probability_1[harmonic])
            else:
                log_instrument1 += log(probability_1['unknown sound'])

            if harmonic_in_instrument(harmonic, instrument2):
                log_instrument2 += log(probability_2[harmonic])
            else:
                log_instrument2 += log(probability_2['unknown sound'])

        if log_instrument1 > log_instrument2:
            ret.append(1)
        else:
            ret.append(0)
    return ret


In [None]:
# mp3.py
# ---------------
# Licensing Information:  You are free to use or extend this projects for
# educational purposes provided that (1) you do not distribute or publish
# solutions, (2) you retain this notice, and (3) you provide clear
# attribution to the University of Illinois at Urbana-Champaign
#
# Created by Justin Lizama (jlizama2@illinois.edu) on 09/28/2018
import sys
import argparse
import configparser
import copy
import numpy as np

import reader
import naive_bayes as nb

"""
This file contains the main application that is run for this MP.
"""

def compute_accuracies(predicted_labels, dev_set, dev_labels):
    yhats = predicted_labels
    accuracy = np.mean(yhats == dev_labels)
    tp = np.sum([yhats[i] == dev_labels[i] and yhats[i] == 1 for i in range(len(yhats))])
    precision = tp / np.sum([yhats[i] == 1 for i in range(len(yhats))])
    recall = tp / (np.sum([yhats[i] != dev_labels[i] and yhats[i] == 0 for i in range(len(yhats))]) + tp)
    f1 = 2 * (precision * recall) / (precision + recall)
    return accuracy, f1, precision, recall


def main(args):
    train_set = np.load("train_set")
    train_labels = np.load("labels")
    dev_set, dev_labels = reader.load_dataset(args.training_dir,args.development_dir,args.stemming,args.lower_case)
    predicted_labels = nb.naiveBayes(train_set, train_labels, dev_set, args.laplace, args.pos_prior)

    accuracy, f1, precision, recall = compute_accuracies(predicted_labels, dev_set, dev_labels)
    print("Accuracy:",accuracy)
    print("F1-Score:",f1)
    print("Precision:",precision)
    print("Recall:",recall)


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description='CS440 MP4 Naive Bayes')

    parser.add_argument('--training', dest='training_dir', type=str, default = '../data/movies_review/train',
                        help='the directory of the training data')
    parser.add_argument('--development', dest='development_dir', type=str, default = '../data/movies_review/dev',
                        help='the directory of the development data')
    parser.add_argument('--stemming',dest="stemming", type=bool, default=False,
                        help='Use porter stemmer')
    parser.add_argument('--lower_case',dest="lower_case", type=bool, default=False,
                        help='Convert all word to lower case')
    parser.add_argument('--laplace',dest="laplace", type=float, default = 1.0,
                        help='Laplace smoothing parameter - default 1.0')
    parser.add_argument('--pos_prior',dest="pos_prior", type=float, default = 0.8,
                        help='Positive prior, i.e. Num_positive_comments / Num_comments')
    args = parser.parse_args()
    main(args)
