In [30]:
import arff
import numpy as np

from sklearn.decomposition import PCA

train = arff.load(open('train.arff', 'rb'))
test = arff.load(open('test.arff', 'rb'))

train = train['data']
test = test['data']

train_num = len(train)
test_num = len(test)
traindata=np.zeros((train_num, 20))
trainlabel = np.zeros(train_num)
testdata = np.zeros((len(test), 20))
testlabel = np.zeros(test_num)

for d in range(0, train_num):
    data = train[d]
    traindata[:d] = np.asarray(data[:-1])
    trainlabel[d] = data[-1]
    
for d in range(0, test_num):
    data = test[d]
    testdata[:d] = np.asarray(data[:-1])
    testlabel[d] = data[-1]

total = np.vstack((traindata, testdata))
pca = PCA(n_components=2).fit_transform(total)
traindata = pca[:train_num]
testdata = pca[:test_num]         

clf = svm.SVC(kernel='linear', probability=True)
clf.fit(traindata, trainlabel)
print clf.score(testdata, testlabel)
    
    

0.506963788301


In [55]:
from util import *
import sys


def InitNN(num_inputs, num_hiddens, num_outputs):
    """Initializes NN parameters."""
    W1 = 0.01 * np.random.randn(num_inputs, num_hiddens)
    W2 = 0.01 * np.random.randn(num_hiddens, num_outputs)
    b1 = np.zeros((num_hiddens, 1))
    b2 = np.zeros((num_outputs, 1))
    return W1, W2, b1, b2

def TrainNN(num_hiddens, eps, momentum, num_epochs):
    
    
    """Trains a single hidden layer NN.
    
    Inputs:
    num_hiddens: NUmber of hidden units.
    eps: Learning rate.
    momentum: Momentum.
    num_epochs: Number of epochs to run training for.

    Returns:
    W1: First layer weights.
    W2: Second layer weights.
    b1: Hidden layer bias.
    b2: Output layer bias.
    train_error: Training error at at epoch.
    valid_error: Validation error at at epoch.
    """
        
    #read data
    train = arff.load(open('train.arff', 'rb'))
    test = arff.load(open('test.arff', 'rb'))

    train = train['data']
    test = test['data']

    train_num = len(train)
    test_num = len(test)
    traindata=np.zeros((train_num, 20))
    trainlabel = np.zeros(train_num)
    testdata = np.zeros((len(test), 20))
    testlabel = np.zeros(test_num)

    for d in range(0, train_num):
        data = train[d]
        traindata[:d] = np.asarray(data[:-1])
        trainlabel[d] = data[-1]

    for d in range(0, test_num):
        data = test[d]
        testdata[:d] = np.asarray(data[:-1])
        testlabel[d] = data[-1]
    
    inputs_train = traindata.T
    target_train = (trainlabel.reshape(1,trainlabel.shape[0]))/4.0
    inputs_test = testdata.T
    target_test = (testlabel.reshape(1,testlabel.shape[0]))/4.0
    
    print inputs_train.shape, target_train.shape, inputs_test.shape, target_test.shape
    
    W1, W2, b1, b2 = InitNN(inputs_train.shape[0], num_hiddens, target_train.shape[0])
    dW1 = np.zeros(W1.shape)
    dW2 = np.zeros(W2.shape)
    db1 = np.zeros(b1.shape)
    db2 = np.zeros(b2.shape)
    train_error = []
    valid_error = []
    num_train_cases = inputs_train.shape[1]
    for epoch in xrange(num_epochs):
        # Forward prop
        h_input = np.dot(W1.T, inputs_train) + b1  # Input to hidden layer.
        h_output = 1 / (1 + np.exp(-h_input))  # Output of hidden layer.
        logit = np.dot(W2.T, h_output) + b2  # Input to output layer.
        prediction = 1 / (1 + np.exp(-logit))  # Output prediction.  #probability if >0.5 correct
        
        print prediction.shape
        # Compute cross entropy
        train_CE = -np.mean(target_train * np.log(prediction) + (1 - target_train) * np.log(1 - prediction))
        print train_CE
        # Compute deriv
        dEbydlogit = prediction - target_train

        # Backprop
        dEbydh_output = np.dot(W2, dEbydlogit)
        dEbydh_input = dEbydh_output * h_output * (1 - h_output)

        # Gradients for weights and biases.
        dEbydW2 = np.dot(h_output, dEbydlogit.T)
        dEbydb2 = np.sum(dEbydlogit, axis=1).reshape(-1, 1)
        dEbydW1 = np.dot(inputs_train, dEbydh_input.T)
        dEbydb1 = np.sum(dEbydh_input, axis=1).reshape(-1, 1)

        #%%%% Update the weights at the end of the epoch %%%%%%
        dW1 = momentum * dW1 - (eps / num_train_cases) * dEbydW1
        dW2 = momentum * dW2 - (eps / num_train_cases) * dEbydW2
        db1 = momentum * db1 - (eps / num_train_cases) * dEbydb1
        db2 = momentum * db2 - (eps / num_train_cases) * dEbydb2

        W1 = W1 + dW1
        W2 = W2 + dW2
        b1 = b1 + db1
        b2 = b2 + db2

        valid_CE = Evaluate(inputs_test, target_test, W1, W2, b1, b2)

        train_error.append(train_CE)
        valid_error.append(valid_CE)
        sys.stdout.write('\rStep %d Train CE %.5f Validation CE %.5f' % (epoch, train_CE, valid_CE))
        sys.stdout.flush()
        if (epoch % 100 == 0):
            sys.stdout.write('\n')

    sys.stdout.write('\n')
    final_train_error = Evaluate(inputs_train, target_train, W1, W2, b1, b2)
    #final_valid_error = Evaluate(inputs_valid, target_valid, W1, W2, b1, b2)
    #final_test_error = Evaluate(inputs_test, target_test, W1, W2, b1, b2)
    #print 'Error: Train %.5f Validation %.5f Test %.5f' % (final_train_error, final_valid_error, final_test_error)
    return W1, W2, b1, b2, train_error, valid_error
  

def Evaluate(inputs, target, W1, W2, b1, b2):
    """Evaluates the model on inputs and target."""
    h_input = np.dot(W1.T, inputs) + b1  # Input to hidden layer.
    h_output = 1 / (1 + np.exp(-h_input))  # Output of hidden layer.
    logit = np.dot(W2.T, h_output) + b2  # Input to output layer.
    prediction = 1 / (1 + np.exp(-logit))  # Output prediction.
    #CE = -np.mean(target * np.log(prediction) + (1 - target) * np.log(1 - prediction))
    CE = np.mean(np.round(abs(target-prediction), 0)) #correct rate
    return CE


def main():
    num_hiddens = 50
    eps = 0.02
    momentum = 0.5
    num_epochs = 2000
    W1, W2, b1, b2, train_error, valid_error = TrainNN(num_hiddens, eps, momentum, num_epochs)
    DisplayErrorPlot(train_error, valid_error) 
    # If you wish to save the model for future use :
    # outputfile = 'model.npz'
    # SaveModel(outputfile, W1, W2, b1, b2, train_error, valid_error)

if __name__ == '__main__':
    main()


(20, 11000) (1, 11000) (20, 359) (1, 359)
(1, 11000)
0.61095078625
Step 0 Train CE 0.61095 Validation CE 0.02973
(1, 11000)
0.0374566842881
Step 1 Train CE 0.03746 Validation CE -0.75484(1, 11000)
-0.727707185239
Step 2 Train CE -0.72771 Validation CE -1.57872(1, 11000)
-1.51468012986
Step 3 Train CE -1.51468 Validation CE -2.41544(1, 11000)
-2.28753783266
Step 4 Train CE -2.28754 Validation CE -3.29223(1, 11000)
-3.0643046004
Step 5 Train CE -3.06430 Validation CE -4.24906(1, 11000)
-3.8766328905
Step 6 Train CE -3.87663 Validation CE -5.32287(1, 11000)
-4.75625228005
Step 7 Train CE -4.75625 Validation CE -6.54045(1, 11000)
-5.73100217836
Step 8 Train CE -5.73100 Validation CE -7.91369(1, 11000)
-6.82295958719
Step 9 Train CE -6.82296 Validation CE -9.43731(1, 11000)
-8.04673271496
Step 10 Train CE -8.04673 Validation CE -11.09054(1, 11000)
-9.40801887583
Step 11 Train CE -9.40802 Validation CE -12.84314(1, 11000)
-10.9031157862
Step 12 Train CE -10.90312 Validation CE -14.66360(1, 1



KeyboardInterrupt: 