# Assignment 1

## Question 1: Softmax

In [4]:
import numpy as np

In [60]:
def softmax(x):
    """Compute the softmax function for each row of the input x.

    It is crucial that this function is optimized for speed because
    it will be used frequently in later code. You might find numpy
    functions np.exp, np.sum, np.reshape, np.max, and numpy
    broadcasting useful for this task.

    Numpy broadcasting documentation:
    http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html

    You should also make sure that your code works for a single
    D-dimensional vector (treat the vector as a single row) and
    for N x D matrices. This may be useful for testing later. Also,
    make sure that the dimensions of the output match the input.

    You must implement the optimization in problem 1(a) of the
    written assignment!

    Arguments:
    x -- A D dimensional vector or N x D dimensional numpy matrix.

    Return:
    x -- You are allowed to modify x in-place
    """
    orig_shape = x.shape

    if len(x.shape) > 1:
        # Matrix
        ### YOUR CODE HERE
        normalized_x = x - np.amax(x, axis=1)[:, np.newaxis]
        exp_x = np.exp(normalized_x)
        np.sum(exp_x, axis=1)[:, np.newaxis]
        x = np.divide(exp_x, s_sum_rows)
        #raise NotImplementedError
        ### END YOUR CODE
    else:
        # Vector
        ### YOUR CODE HERE
        normalized_x = x - np.max(x)
        exp_x = np.exp(normalized_x)
        x = exp_x / np.sum(exp_x)
        #raise NotImplementedError
        ### END YOUR CODE

    assert x.shape == orig_shape
    return x


In [61]:
def test_softmax_basic():
    """
    Some simple tests to get you started.
    Warning: these are not exhaustive.
    """
    print ("Running basic tests...")
    test1 = softmax(np.array([1,2]))
    print (test1)
    ans1 = np.array([0.26894142,  0.73105858])
    assert np.allclose(test1, ans1, rtol=1e-05, atol=1e-06)

    test2 = softmax(np.array([[1001,1002],[3,4]]))
    print (test2)
    ans2 = np.array([
        [0.26894142, 0.73105858],
        [0.26894142, 0.73105858]])
    assert np.allclose(test2, ans2, rtol=1e-05, atol=1e-06)

    test3 = softmax(np.array([[-1001,-1002]]))
    print (test3)
    ans3 = np.array([0.73105858, 0.26894142])
    assert np.allclose(test3, ans3, rtol=1e-05, atol=1e-06)

    print ("You should be able to verify these results by hand!\n")


In [62]:
def test_softmax():
    """
    Use this space to test your softmax implementation by running:
        python q1_softmax.py
    This function will not be called by the autograder, nor will
    your tests be graded.
    """
    print ("Running your tests...")
    ### YOUR CODE HERE
    raise NotImplementedError
    ### END YOUR CODE

In [63]:
test_softmax_basic()
test_softmax()

Running basic tests...
[0.26894142 0.73105858]
[[0.26894142 0.73105858]
 [0.26894142 0.73105858]]


AxisError: axis 1 is out of bounds for array of dimension 1

----------------------

In [33]:
softmax(np.array([[1,1],[1,1]]))

array([[0.5, 0.5],
       [0.5, 0.5]])

In [34]:
test1 = softmax(np.array([1,2]))

In [35]:
ans1 = np.array([0.26894142,  0.73105858])
assert np.allclose(test1, ans1, rtol=1e-05, atol=1e-06) 

### Test 2

In [36]:
test2 = softmax(np.array([[1001,1002],[3,4]]))
print (test2)
ans2 = np.array([
        [0.26894142, 0.73105858],
        [0.26894142, 0.73105858]])
assert np.allclose(test2, ans2, rtol=1e-05, atol=1e-06)


[[0.5 0.5]
 [0.5 0.5]]


AssertionError: 

In [37]:
x = np.array([[1001,1002],[3,4]])

In [38]:
np.amax(x, axis=1)

array([1002,    4])

In [39]:
normalized_x = x - np.amax(x, axis=1)[:, np.newaxis]
normalized_x

array([[-1,  0],
       [-1,  0]])

In [41]:
exp_x = np.exp(normalized_x)
exp_x

array([[0.36787944, 1.        ],
       [0.36787944, 1.        ]])

In [43]:
s_sum_rows = np.sum(exp_x, axis=1)[:, np.newaxis]
s_sum_rows

array([[1.36787944],
       [1.36787944]])

In [44]:
x = np.divide(exp_x, s_sum_rows)
x

array([[0.26894142, 0.73105858],
       [0.26894142, 0.73105858]])

### Test 3

In [50]:
x = np.array([[-1001,-1002]])

In [64]:
x.shape

(1, 2)

In [51]:
np.amax(x, axis=1)

array([-1001])

In [52]:
normalized_x = x - np.amax(x, axis=1)[:, np.newaxis]
normalized_x

array([[ 0, -1]])

In [53]:
exp_x = np.exp(normalized_x)
exp_x

array([[1.        , 0.36787944]])

In [54]:
s_sum_rows = np.sum(exp_x, axis=1)[:, np.newaxis]
s_sum_rows

array([[1.36787944]])

In [55]:
x = np.divide(exp_x, s_sum_rows)
x

array([[0.73105858, 0.26894142]])

## Question 2: Sigmoid

In [11]:
def sigmoid(x):
    """
    Compute the sigmoid function for the input here.

    Arguments:
    x -- A scalar or numpy array.

    Return:
    s -- sigmoid(x)
    """

    ### YOUR CODE HERE
    s = 1/(1+np.exp(-x))
    #raise NotImplementedError
    ### END YOUR CODE

    return s

In [12]:
def sigmoid_grad(s):
    """
    Compute the gradient for the sigmoid function here. Note that
    for this implementation, the input s should be the sigmoid
    function value of your original input x.

    Arguments:
    s -- A scalar or numpy array.

    Return:
    ds -- Your computed gradient.
    """

    ### YOUR CODE HERE
    ds = s*(1-s)
    #raise NotImplementedError
    ### END YOUR CODE

    return ds

In [13]:
def test_sigmoid_basic():
    """
    Some simple tests to get you started.
    Warning: these are not exhaustive.
    """
    print ("Running basic tests...")
    x = np.array([[1, 2], [-1, -2]])
    f = sigmoid(x)
    g = sigmoid_grad(f)
    print (f)
    f_ans = np.array([
        [0.73105858, 0.88079708],
        [0.26894142, 0.11920292]])
    assert np.allclose(f, f_ans, rtol=1e-05, atol=1e-06)
    print (g)
    g_ans = np.array([
        [0.19661193, 0.10499359],
        [0.19661193, 0.10499359]])
    assert np.allclose(g, g_ans, rtol=1e-05, atol=1e-06)
    print ("You should verify these results by hand!\n")

In [14]:
x = np.array([[1, 2], [-1, -2]])
f = sigmoid(x)
g = sigmoid_grad(f)

In [15]:
f

array([[0.73105858, 0.88079708],
       [0.26894142, 0.11920292]])

In [16]:
g

array([[0.19661193, 0.10499359],
       [0.19661193, 0.10499359]])