In [280]:
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt
import scipy.io as sio
from sklearn import preprocessing as pre
import string
from scipy.special import expit as sigmoid
from __future__ import division

In [73]:
def print_letter(arr, label):
    label = label-1
    mnist_image = ""
    for x in range(28 * 28 - 1):
        if x % 28 == 0:
            mnist_image += '\n'
        if (arr[x] == 0):
            mnist_image += ' '
        else:
            mnist_image += '*'
    print(mnist_image)
    print list(string.ascii_lowercase)[label], label

In [578]:
data = sio.loadmat('hw6_data_dist/letters_data.mat')
train_x = data['train_x']
train_y = np.ndarray.flatten(data['train_y'])

order = np.arange(train_x.shape[0])
np.random.shuffle(order)
shuffled_data = np.take(train_x, order, 0)
shuffled_labels = np.take(train_y, order, 0)

VALID_SPLIT = int(0.8 * train_x.shape[0])
train_data, valid_data = shuffled_data[:VALID_SPLIT], shuffled_data[VALID_SPLIT:]
train_labels, valid_labels = shuffled_labels[:VALID_SPLIT], shuffled_labels[VALID_SPLIT:]

test_data = data['test_x']

train_data = pre.normalize(train_data, axis=0)
pre.scale(train_data, axis=0, copy=False)
train_data = np.append(train_data, np.ones((len(train_data), 1), dtype='int'), axis=1)

valid_data = pre.normalize(valid_data, axis=0)
pre.scale(train_data, axis=0, copy=False)
valid_data = np.append(valid_data, np.ones((len(valid_data), 1), dtype='int'), axis=1)

test_data = pre.normalize(test_data, axis=0)
pre.scale(test_data, axis=0, copy=False)
normalX = np.append(test_data, np.ones((len(test_data), 1), dtype='int'), axis=1)

In [129]:
num_samples = 124800
d, b, k = 784, 200, 26

In [572]:
class NeuralNet:
    """ Initializes NeuralNet
    :param layers: list layer sizes
    :param learning_rate: learning rate for gradient descent
    :param decay: scale learning_rate by decay at the end of each epoch
    :param epochs: max number of epochs
    """
    def __init__(self, layers=[200, 784, 26], learning_rate=0.01, decay=0.9, epochs=10):
        if len(layers) < 2:
            raise ValueError("Must have at least 2 layers")
            
        self.learning_rate = learning_rate
        self.decay = decay
        self.epochs = epochs
        
        self.weights = []
        w = np.random.rand(num_inputs, hidden_layers[1])
        for i in range(len(hidden_layers)-1):
            w = np.random.rand(layers[i], layers[i+1]) - 0.5
            w = np.append(w, np.ones((len(w), 1), dtype='int'), axis=1)
        self.V = np.random.rand(b, d) - 0.5
        self.W = np.random.rand(k, b) - 0.5
        self.V = np.append(self.V, np.ones((b, 1), dtype='int'), axis=1)
        self.W = np.append(self.W, np.ones((k, 1), dtype='int'), axis=1)
        
        self.originalV = self.V
        self.originalW = self.W
        
        self.iterations = []
        self.errors = []
        
    def train(self, images, labels):
        for c in range(self.max_iterations):
            print c
            for x, y in zip(images, labels):
                z, h = self.forward_pass(x)
                y = self.hot_encode(y)
                gradwL, gradvL = self.backward_pass(x, h, z, y)
                self.V = self.V - self.learning_rate * gradvL
                self.W = self.W - self.learning_rate * gradwL
            self.learning_rate *= 0.9
                    
            order = np.arange(images.shape[0])
            np.random.shuffle(order)
            images = np.take(images, order, 0)
            labels = np.take(labels, order, 0)
    
    def predict(self, images):
        predictions = []
        for x in images:
            z, _ = self.forward_pass(x)
            predictions.append(np.argmax(z) + 1)
        return predictions
    
    def validate(self, images, labels):
        predictions = self.predict(images)
        incorrect = 0
        if len(labels) != len(images):
            print 'Mismatch of labels ({}) and images ({})'.format(labels.shape, images.shape)
            
        for i in range(len(labels)):
            if predictions[i] != labels[i]:
                incorrect += 1
                
        print incorrect / len(labels)
        
        return incorrect, predictions
            
    def hot_encode(self, y):
        label = np.zeros((k, 1), dtype=int)
        label[y-1] = 1
        return label
    
    def cross_entropy_loss(self, z, y):
        loss, lnz, inv_lnz = 0, np.log2(z), np.log2(1 - z)
        for i in range(k):
            loss += y[i] * lnz[i] + (1 - y[i]) * inv_lnz[i]
        return -loss[0]
    
    def forward_pass(self, x):
        Vx = self.V.dot(x)
        h = np.append(np.tanh(Vx), [1], axis=0)
        z = sigmoid(self.W.dot(h))
        return z, h
    
    def backward_pass(self, x, h, z, y):
        x = np.reshape(x, (x.shape[0], 1))
        h = np.reshape(h, (h.shape[0], 1))
        z = np.reshape(z, (z.shape[0], 1))
        
        gradhL = self.W.T.dot(z-y)
        gradvh = 1 - np.square(h)
        gradvL = np.outer(gradhL * gradvh, x)
        gradvL = np.delete(gradvL, -1, axis=0)
        gradwL = np.outer(z-y, h)
        return gradwL, gradvL

In [573]:
neuralnet = NeuralNet(0.01, 1)
neuralnet.train(train_data, train_labels)

0


In [575]:
incorrect = neuralnet.validate(train_data, train_labels)

0.225090144231


In [576]:
incorrect, predictions = neuralnet.validate(valid_data, valid_labels)
# for p, label in zip(predictions, valid_labels):
#     print p, label
# print incorrect / len(valid_labels), incorrect, len(valid_labels)

0.243629807692


In [564]:
np.argmax(neuralnet.predict([valid_data[3122]])) + 1, valid_labels[1]

(1, 15)

In [None]:
if c % 10 == 0:
    loss = self.cross_entropy_loss(z, y)
    if not np.isnan(loss) and not np.isinf(loss):
        self.iterations.append(c)
        self.errors.append(loss)