In [9]:
import numpy as np
import random

def load_monks(data_set_number):
    """Load the MONK's data.

    Args:
        data_set_number: which MONK's data set to use (1, 2, or 3)
    Returns:
        a tuple of numpy matrices, the first in the tuple is the training 
        data, second is test data. Each matrix row represents a data point 
        as a row vector: the first element of the row vector corresponds to 
        the label and the following elements correspond to attributes.
    """

    if data_set_number not in [1, 2, 3]:
        print("Invalid argument")
        return

    s = 'data/monks-'+str(data_set_number)+'.train'
    f = open(s, 'r')
    training = None
    for line in f.readlines():
        data = line.split(' ')
        vector = [int(i) for i in data[1:-1]]
        vector = np.array(vector)
        if training is None:
            training = vector
        else:
            training = np.vstack((training, vector))

    s = 'data/monks-'+str(data_set_number)+'.test'
    f = open(s, 'r')
    test = None
    for line in f.readlines():
        data = line.split(' ')
        vector = [int(i) for i in data[1:-1]]
        vector = np.array(vector)
        if test is None:
            test = vector
        else:
            test = np.vstack((training, vector))
    
    return (training, test)
(training, test) = load_monks(1)


In [11]:
def step_one(training):
    class0 = 0
    for col in training[:,0]:
        if col == 0:
            class0 += 1
    return [class0, training.shape[0] - class0, class0/training.shape[0], (training.shape[0]-class0)/training.shape[0]]

def step_two(training, feature):
    p1class0 = 0
    p2class0 = 0
    p3class0 = 0
    p4class0 = 0
    p1class1 = 0
    p2class1 = 0
    p3class1 = 0
    p4class1 = 0
    [class0, class1, pclass0, pclass1] = step_one(training)
    for row in training:
        if row[0] == 0:
            if row[feature] == 1:
                p1class0 += 1
            elif row[feature] == 2:
                p2class0 += 1
            elif row[feature] == 3:
                p3class0 += 1
            else:
                p4class0 += 1
        else:
            if row[feature] == 1:
                p1class1 += 1
            elif row[feature] == 2:
                p2class1 += 1
            elif row[feature] == 3:
                p3class1 += 1
            else:
                p4class1 += 1
    answer = [p1class0/class0, p2class0/class0, p3class0/class0, p4class0/class0, 
              p1class1/class1, p2class1/class1, p3class1/class1, p4class1/class1]
    return answer

def get_prob_matrix(training):
    matrix = []
    for feature in range(1, len(training[0])):
            row = step_two(training, feature)
            matrix.append(row)
    matrix = np.asarray(matrix)
    return matrix



def step_three(training, matrix, test):
    [class0, class1, pclass0, pclass1] = step_one(training)
    matrix = get_prob_matrix(training)
    answer=[]
    for row in test:
        p0 = 1
        p1 = 1
        for feature in range(0, len(row)-1):
            if row[feature+1] == 1:
                p0 = p0 * matrix[feature][0]
                p1 = p1 * matrix[feature][4]
            elif row[feature+1] == 2:
                p0 = p0 * matrix[feature][1]
                p1 = p1 * matrix[feature][5]
            elif row[feature+1] == 3:
                p0 = p0 * matrix[feature][2]
                p1 = p1 * matrix[feature][6]
            else:
                p0 = p0 * matrix[feature][3]
                p1 = p1 * matrix[feature][7]
        p0 = p0 * pclass0
        p1 = p1 * pclass1
        if p0 > p1:
            answer.append(0)
        else: 
            answer.append(1)
    return answer

In [13]:
def accuracy():
    acc = []
    for n in range(1, 4):
        (training, test) = load_monks(n)
        correct = 0
        index = 0
        prediction = step_three(training, get_prob_matrix(training), test)
        for row in test[:,0]:
            if row == prediction[index]:
                correct +=1
            index +=1
        acc.append(correct/test.shape[0])
    return acc
print("The accuracy is ", accuracy())

The accuracy is  [0.792, 0.6411764705882353, 0.9349593495934959]
