In [30]:
import cv2
import numpy as np
import os

from load_dataset import get_dataset

import matplotlib.pyplot as plt
%matplotlib inline
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [32]:
X, Y = get_dataset()
print(X.shape)
print(Y.shape)

39209
(39209, 4096)
(39209,)


In [27]:
def forward_and_backprop(W1, b1, W2, b2, X, y=None, reg=0):
    """
    Softmax loss function, vectorized version.

    Inputs and outputs are the same as softmax_loss_naive.
    """
    # Initialize the loss and gradient to zero.
    loss = 0.0
    dW1 = np.zeros_like(W1)
    dW2 = np.zeros_like(W2)

    #############################################################################
    # TODO: Compute the softmax loss and its gradient using no explicit loops.  #
    # Store the loss in loss and the gradient in dW. If you are not careful     #
    # here, it is easy to run into numeric instability. Don't forget the        #
    # regularization!                                                           #
    #############################################################################
    num_train = X.shape[0]
    num_hidden = W1.shape[1]
    num_classes = W2.shape[1]

    # Forward.
    H = X.dot(W1) + b1 # M x Nh
    H_relu = H.copy()
    H_relu[H_relu < 0] = 0
    
    scores = H_relu.dot(W2) + b2
    scores -= np.max(scores, axis=1)[:,np.newaxis]
    scores = np.exp(scores) / np.sum(np.exp(scores), axis=1)[:,np.newaxis]
    
    loss = np.sum(-np.log(scores[np.arange(num_train),y]))
    loss /= num_train
    loss += reg * (np.sum(W1 * W1) + np.sum(W2 * W2))
    
    if y is None:
        return loss
    
    # Backward.
    grads = {}
    dscores = scores.copy()
    dscores[np.arange(num_train),y] -= 1
    dscores /= num_train

    dW2 = H_relu.T.dot(dscores) + 2 * reg * W2
    db2 = np.sum(dscores, axis=0)
    
    dH_relu = dscores.dot(W2.T)
    dH = (H >= 0) * dH_relu
    
    dW1 = X.T.dot(dH) + 2 * reg * W1
    db1 = np.sum(dH, axis=0)

    grads["W2"] = dW2
    grads["b2"] = db2
    grads["W1"] = dW1
    grads["b1"] = db1
    
    #############################################################################
    #                          END OF YOUR CODE                                 #
    #############################################################################

    return loss, grads

In [33]:
W1 = np.random.rand(4096, 64)*0.001
b1 = np.random.rand(64)*0.001
W2 = np.random.rand(64, 43)*0.001
b2 = np.random.rand(43)*0.001
loss, grads = forward_and_backprop(W1, b1, W2, b2, X, Y)

In [29]:
def train_data(W1, b1, W2, b2, data_x, data_y, lr=1e-6, batch_size=128, num_epoch=10):
    num_train = data_x.shape[0]
    iterate_per_epoch = int(num_train / batch_size) + 1
    for i in range(num_epoch):
        for j in range(iterate_per_epoch):
            indices = np.r

4.58940648465
{'b1': array([  8.05426341e-05,  -4.18433140e-05,   1.92464754e-05,
        -7.03338717e-07,   2.38945794e-05,   2.30595226e-05,
         3.71371723e-05,   1.10810525e-04,   1.39167067e-04,
         1.80137385e-04,   1.47810418e-04,   1.13109601e-04,
        -5.18818787e-05,   1.65533231e-05,   1.12067036e-05,
         5.28565018e-06,  -8.31273936e-05,   1.12586564e-04,
         1.27886035e-04,  -5.19944394e-05,   2.26758140e-05,
         1.34584281e-04,   3.09668588e-05,  -9.61174793e-07,
         6.60720922e-05,   9.80169765e-05,  -5.57116356e-05,
        -1.43088306e-05,   1.22362691e-04,   2.93043535e-05,
        -1.20224948e-04,  -5.18436157e-05,   8.48347803e-06,
         9.83921248e-05,   6.28317287e-05,   2.17668177e-05,
        -7.70435600e-05,  -3.63415425e-05,   3.90055239e-05,
         7.53093774e-05,   8.82911448e-06,   2.45227203e-06,
         2.48733005e-05,   1.33642405e-05,   1.11840243e-04,
         1.00492736e-04,   1.26297589e-04,  -3.74029490e-05,
   