In [3]:
import numpy as np
import random

def load_monks(data_set_number):
    """Load the MONK's data.

    Args:
        data_set_number: which MONK's data set to use (1, 2, or 3)
    Returns:
        a tuple of numpy matrices, the first in the tuple is the training 
        data, second is test data. Each matrix row represents a data point 
        as a row vector: the first element of the row vector corresponds to 
        the label and the following elements correspond to attributes.
    """

    if data_set_number not in [1, 2, 3]:
        print("Invalid argument")
        return

    s = 'data/monks-'+str(data_set_number)+'.train'
    f = open(s, 'r')
    training = None
    for line in f.readlines():
        data = line.split(' ')
        vector = [int(i) for i in data[1:-1]]
        vector = np.array(vector)
        if training is None:
            training = vector
        else:
            training = np.vstack((training, vector))

    s = 'data/monks-'+str(data_set_number)+'.test'
    f = open(s, 'r')
    test = None
    for line in f.readlines():
        data = line.split(' ')
        vector = [int(i) for i in data[1:-1]]
        vector = np.array(vector)
        if test is None:
            test = vector
        else:
            test = np.vstack((training, vector))
    
    return (training, test)
(training, test) = load_monks(1)


In [5]:
# S is a numpy array
# this function returns sigmoid function of each element of S
def sigmoid(S):
    return 1/(1+ np.exp(-S))

# X is a 2 dimenstions numpy array with N datapoints and d features
# w is a 1 dimension numpy array of shape d
# this function returns sigmoid function of the dot product between X and w
def prob(w, X):
    return sigmoid(np.dot(X, w))


def loss(w, X, y):
    a = prob(w, X)
    return -np.mean(y*np.log(a) + (1-y)*np.log(1-a))

(124, 7)


In [13]:
def accuracy():
    acc = []
    for n in range(1, 4):
        (training, test) = load_monks(n)
        correct = 0
        index = 0
        prediction = step_three(training, get_prob_matrix(training), test)
        for row in test[:,0]:
            if row == prediction[index]:
                correct +=1
            index +=1
        acc.append(correct/test.shape[0])
    return acc
print("The accuracy is ", accuracy())

The accuracy is  [0.792, 0.6411764705882353, 0.9349593495934959]
