In [3]:
import numpy as np
import random

In [4]:
def load_congress_data(training_ratio):
    """Load the congress data.

    Note that missing values (denoted '?', where a voter abstained) are 
    instead treated as a third type of attribute. Therefore every feature
    has 3 possible attributes.

    Args:
        training_ratio: the ratio of examples that go into the training set
    Returns:
        a tuple of numpy matrices, the first in the tuple is the training 
        data, second is test data. Each matrix row represents a data point 
        as a row vector: the first element of the row vector corresponds to 
        the label and the following elements correspond to attributes.
    """
    random.seed(1) # get same data every time
    label_conversions = {'republican' : 0, 'democrat' : 1, 
                         'n' : 0, 'y' : 1, '?' : 2} 
    f = open('data/house-votes-84.data', 'r')

    training = None
    test = None
    lines = f.readlines()
    train_index = int(len(lines)*training_ratio)
    random.shuffle(lines)
    for k, line in enumerate(lines):
        data = line.split(',')
        vector = [float(label_conversions[i.rstrip('\n')]) for i in data]
        vector = np.array(vector)
        if k < train_index:
            if training is None:
                training = vector
            else:
                training = np.vstack((training, vector))
        else:
            if test is None:
                test = vector
            else:
                test = np.vstack((test, vector))
    return (training, test)
(training, test) = load_congress_data(0.6)

AttributeError: module 'numpy' has no attribute 'array'

In [None]:
def step_one(training):
    rep = 0
    for col in training[:,0]:
        if col == 0:
            rep = rep+1
    return [rep, training.shape[0]-rep , rep/training.shape[0], (training.shape[0]-rep)/training.shape[0]]

In [None]:
def step_two(training, feature):
    p0givenRep = 0
    p1givenRep = 0
    p2givenRep = 0
    p0givenDem = 0 
    p1givenDem = 0
    p2givenDem = 0
    [nr, nd, rep, dem] = step_one(training)
    for col in training:
        if col[0] == 0:
            if col[feature] == 0:
                p0givenRep = p0givenRep + 1
            elif col[feature] == 1:
                p1givenRep = p1givenRep + 1
            else:
                p2givenRep = p2givenRep + 1
        else:
            if col[feature] == 0:
                p0givenDem = p0givenDem + 1
            elif col[feature] == 1:
                p1givenDem = p1givenDem + 1
            else:
                p2givenDem = p2givenDem + 1
    answer = [p0givenRep/nr, p1givenRep/nr, p2givenRep/nr, p0givenDem/nd, p1givenDem/nd, p2givenDem/nd]
    return answer

def get_prob_matrix(training):
    matrix = []
    for feature in range(1, len(training[0])):
            row = step_two(training, feature)
            matrix.append(row)
    matrix = np.asarray(matrix)
    return matrix

def step_three(training, matrix, test):
    [rep, dem, nrep, ndem] = step_one(training)
    matrix = get_prob_matrix(training)
    answer=[]
    for row in test:
        prep = 1
        pdem = 1
        for feature in range(0, len(row)-1):
            if row[feature+1] == 0:
                prep = prep * matrix[feature][0]
                pdem = pdem * matrix[feature][3]
            elif row[feature+1] == 1:
                prep = prep * matrix[feature][1]
                pdem = pdem * matrix[feature][4]
            else:
                prep = prep * matrix[feature][2]
                pdem = pdem * matrix[feature][5]
        prep = prep * rep
        pdem = pdem * dem
        if prep > pdem:
            answer.append(0)
        else: 
            answer.append(1)
    return answer

In [None]:
def accuracy():
    (training, test) = load_congress_data(0.6)
    correct = 0
    index = 0
    prediction = step_three(training, get_prob_matrix(training), test)
    for col in test[:,0]:
        if col == prediction[index]:
            correct +=1
        index +=1
    return correct/test.shape[0]
print("The accuracy is ", accuracy())