In [280]:
import numpy as np
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt
import scipy.io as sio
from sklearn import preprocessing as pre
import string
from scipy.special import expit as sigmoid
from __future__ import division

In [73]:
def print_letter(arr, label):
    label = label-1
    mnist_image = ""
    for x in range(28 * 28 - 1):
        if x % 28 == 0:
            mnist_image += '\n'
        if (arr[x] == 0):
            mnist_image += ' '
        else:
            mnist_image += '*'
    print(mnist_image)
    print list(string.ascii_lowercase)[label], label

In [213]:
data = sio.loadmat('hw6_data_dist/letters_data.mat')
train_x = data['train_x']
# train_y = data['train_y']
train_y = np.ndarray.flatten(data['train_y'])

X = np.append(train_x, np.ones((num_samples, 1), dtype='int'), axis=1)
normalX = pre.normalize(X, axis=0)
pre.scale(normalX, axis=0, copy=False)

order = np.arange(normalX.shape[0])
np.random.shuffle(order)
shuffled_data = np.take(normalX, order, 0)
shuffled_labels = np.take(train_y, order, 0)

VALID_SPLIT = int(0.8 * train_x.shape[0])
train_data, valid_data = shuffled_data[:VALID_SPLIT], shuffled_data[VALID_SPLIT:]
train_labels, valid_labels = shuffled_labels[:VALID_SPLIT], shuffled_labels[VALID_SPLIT:]

test = data['test_x']

In [129]:
num_samples = 124800
d, b, k = 784, 200, 26

In [418]:
class NeuralNet:
    def __init__(self, learning_rate, max_iterations):
        self.learning_rate = learning_rate
        self.max_iterations = max_iterations
        
        self.V = np.random.rand(b, d)
        self.W = np.random.rand(k, b)
        self.V = np.append(self.V, np.ones((b, 1), dtype='int'), axis=1)
        self.W = np.append(self.W, np.ones((k, 1), dtype='int'), axis=1)
        
        self.originalV = self.V
        self.originalW = self.W
        
        self.iterations = []
        self.errors = []
        
    def train(self, images, labels):
        for c in range(self.max_iterations):
            r = np.random.randint(0, images.shape[0])
            x = images[r].T
            y = self.hot_encode(labels[r])
            _, z, h = self.forward_pass(x)
            if c % 10 == 0:
                loss = self.cross_entropy_loss(z, y)
                if not np.isnan(loss) and not np.isinf(loss):
                    self.iterations.append(c)
                    self.errors.append(loss)
            gradwL, gradvL = self.backward_pass(x, h, z, y)
            self.V = self.V - self.learning_rate * gradvL
            self.W = self.W - self.learning_rate * gradwL
            if c % int(self.max_iterations / 10) == 0:
                print c // int(self.max_iterations / 10), c
    
    def predict(self, images):
        predictions = []
        for x in images:
            label, _, __ = self.forward_pass(x)
            predictions.append(label)
        return predictions
    
    def validate(self, images, labels):
        predictions = self.predict(images)
        incorrect = 0
        if len(labels) != len(images):
            print 'Mismatch of labels ({}) and images ({})'.format(labels.shape, images.shape)
        for i in range(len(labels)):
#             print np.argmax(predictions[i])+1, labels[i]
            if not np.array_equal(predictions[i], self.hot_encode(labels[i])):
                incorrect += 1
        print incorrect / len(labels)
        return incorrect
            
    def hot_encode(self, y):
        label = np.zeros((k, 1), dtype=int)
        label[y-1] = 1
        return label
    
    def cross_entropy_loss(self, z, y):
        loss, lnz, inv_lnz = 0, np.log2(z), np.log2(1 - z)
        for i in range(k):
            loss += y[i] * lnz[i] + (1 - y[i]) * inv_lnz[i]
        return -loss[0]
    
    def forward_pass(self, x):
        Vx = self.V.dot(x)
        h = np.append(np.tanh(Vx), [1], axis=0)
        z = sigmoid(self.W.dot(h))
        
        # hot encode
        i = np.argmax(z)
        label = np.zeros((k, 1), dtype=int)
        label[i] = 1
    
        return label, z, h
    
    def backward_pass(self, x, h, z, y):
        x = np.reshape(x, (d+1, 1))
        h = np.reshape(h, (b+1, 1))
        z = np.reshape(z, (k, 1))
        W = np.delete(self.W, -1, axis=1)
        gradhL = W.T.dot(y-z)
        gradvh = np.delete((1 - np.square(h)).dot(x.T), -1, axis=0)
        gradvL = gradhL * gradvh
        gradwL = np.outer(z-y, h)
        return gradwL, gradvL

In [None]:
neuralnet = NeuralNet(0.5, int(10e5))
neuralnet.train(train_data, train_labels)

0 0




In [None]:
incorrect = neuralnet.validate(train_data, train_labels)
print incorrect, len(train_labels), incorrect / len(train_labels)

In [None]:
incorrect = neuralnet.validate(valid_data, valid_labels)

In [170]:
V = np.random.rand(b, d)
V = np.append(V, np.ones((b, 1), dtype='int'), axis=1)
# print V.shape

x = train_data[3].T
h = V.dot(x)
h = np.append(h, [1], axis=0)
print(h.shape)

W = np.random.rand(k, b)
W = np.append(W, np.ones((k, 1), dtype='int'), axis=1)
print W.dot(np.ones((b+1,1))).shape


r = np.random.randint(0, train_data.shape[0])
x = train_data[r].T
Vx = V.dot(x)
h = np.tanh(np.append(Vx, [1], axis=0))
print h.shape

print train_labels.shape

print np.ndarray.flatten(train_labels).shape

(201,)
(26, 1)
(201,)
(99840,)
(99840,)

(785,)
(785,)
(785,)
(785,)
(785,)
(785,)
(785,)
