In [1]:
#!/usr/bin/env python

import numpy as np
import random

from q1_softmax import softmax
from q2_sigmoid import sigmoid, sigmoid_grad
from q2_gradcheck import gradcheck_naive


def forward_backward_prop(X, labels, params, dimensions):
    """
    Forward and backward propagation for a two-layer sigmoidal network

    Compute the forward propagation and for the cross entropy cost,
    the backward propagation for the gradients for all parameters.

    Notice the gradients computed here are different from the gradients in
    the assignment sheet: they are w.r.t. weights, not inputs.

    Arguments:
    X -- M x Dx matrix, where each row is a training example x.
    labels -- M x Dy matrix, where each row is a one-hot vector.
    params -- Model parameters, these are unpacked for you.
    dimensions -- A tuple of input dimension, number of hidden units
                  and output dimension
    """

    ### Unpack network parameters (do not modify)
    ofs = 0
    Dx, H, Dy = (dimensions[0], dimensions[1], dimensions[2])

    W1 = np.reshape(params[ofs:ofs+ Dx * H], (Dx, H))
    ofs += Dx * H
    b1 = np.reshape(params[ofs:ofs + H], (1, H))
    ofs += H
    W2 = np.reshape(params[ofs:ofs + H * Dy], (H, Dy))
    ofs += H * Dy
    b2 = np.reshape(params[ofs:ofs + Dy], (1, Dy))

    # Note: compute cost based on `sum` not `mean`.
    ### YOUR CODE HERE: forward propagation
    z1 = np.matmul(X,W1)+b1
    h = sigmoid(z1)
    z2 = np.matmul(h,W2)+b2
    y = softmax(z2)
    cost = -np.sum(np.ravel(labels*np.log(y)))
    ### END YOUR CODE

    ### YOUR CODE HERE: backward propagation
    dz2 = y-labels
    gradW2 = np.matmul(h.T,dz2)
    gradb2 = np.sum(dz2,axis=0)
    dh = np.matmul(dz2,W2.T)
    dhsig = sigmoid_grad(z1)
    gradW1 = np.matmul(X.T,dh*dhsig)
    gradb1 = np.sum(dh*dhsig,axis=0)
    ### END YOUR CODE

    ### Stack gradients (do not modify)
    grad = np.concatenate((gradW1.flatten(), gradb1.flatten(),
        gradW2.flatten(), gradb2.flatten()))

    return cost, grad


def sanity_check():
    """
    Set up fake data and parameters for the neural network, and test using
    gradcheck.
    """


    N = 20
    dimensions = [10, 5, 10]
    data = np.random.randn(N, dimensions[0])   # each row will be a datum
    labels = np.zeros((N, dimensions[2]))
    for i in range(N):
        labels[i, random.randint(0,dimensions[2]-1)] = 1

    params = np.random.randn((dimensions[0] + 1) * dimensions[1] + (
        dimensions[1] + 1) * dimensions[2], )

    gradcheck_naive(lambda params:
        forward_backward_prop(data, labels, params, dimensions), params)





if __name__ == "__main__":
    sanity_check()



65.6002403515
[  2.63463728e-01   7.82702540e-01   2.33682113e-01   3.02061713e-01
  -2.72727759e-01   1.57216041e+00   9.94718656e-01   2.96322644e-01
   1.10181670e+00  -6.82591382e-01  -8.20187107e-01   3.04686074e-01
   9.60101781e-01   2.59920766e-01   1.79525823e-02  -7.61878498e-01
   7.83749801e-03  -9.16297229e-02   2.00762585e-02   8.86259372e-03
   5.49182274e-01  -8.93315938e-02   4.57433717e-01  -2.02110085e-01
   4.91699126e-01  -1.14463970e+00   2.41341562e-01   3.60738837e-01
  -1.06402071e+00  -2.25449641e-01  -9.57838795e-01  -6.27858062e-02
   7.18216223e-02  -1.14558285e+00  -4.50673348e-02   1.13783912e+00
  -2.74193533e-03  -5.62187336e-01   6.37535701e-01  -4.23327207e-01
  -8.89341941e-01  -1.61814336e-01  -3.28392796e-01  -1.59329396e+00
  -3.13485303e-02   9.87271565e-03  -2.27334976e-01   2.41508821e-01
   1.73990096e-01   9.45874416e-01  -9.47257715e-01   5.85657485e-01
   2.82436989e-01   1.16742037e+00  -1.36740851e+00   1.67284877e-01
   9.35316282e-01   