In [56]:
import numpy as np

STUDENT={'name': 'Shahar Siegman',
         'ID': '011862141'}

def softmax(x):
    """
    Compute the softmax vector.
    x: a n-dim vector (numpy array)
    returns: an n-dim vector (numpy array) of softmax values
    """
    x0 = x-x.mean()
    ret = np.exp(x0)
    return ret / ret.sum()
    # YOUR CODE HERE
    # Your code should be fast, so use a vectorized implementation using numpy,
    # don't use any loops.
    # With a vectorized implementation, the code should be no more than 2 lines.
    #
    # For numeric stability, use the identify you proved in Ex 2 Q1.
    # return x
    ### why does the original code say return x?

def classifier_output(x, params):
    """
    Return the output layer (class probabilities) 
    of a log-linear classifier with given params on input x.
    """
    W,b = params
    # we use Z = xW + b, where x and b are row vectors
    f_at_x = np.dot(x,W) + b
    probs = softmax(f_at_x)
    return probs

def predict(x, params):
    """
    Returnss the prediction (highest scoring class id) of a
    a log-linear classifier with given parameters on input x.

    params: a list of the form [(W, b)]
    W: matrix
    b: vector
    """
    return np.argmax(classifier_output(x, params))

def loss_and_gradients(x, y, params):
    """
    Compute the loss and the gradients at point x with given parameters.
    y is a scalar indicating the correct label.

    returns:
        loss,[gW,gb]

    loss: scalar
    gW: matrix, gradients of W
    gb: vector, gradients of b
    """
    W,b = params
    # todo: check the shape of b
    shape_x = x.shape
    if len(shape_x)>1 and shape_x[1] != 1:
        print("x should be a column vector, actual shape: {}".format(shape_x))
        raise AssertionError
    in_dim = shape_x[0]
    shape_W = W.shape
    if shape_W[0] != in_dim:
        print("number of rows of W ({}) mismatches length of X ({})".format(shape_W[0],in_dim))
        raise AssertionError
    out_dim = shape_W[1]
    shape_b = b.shape
    if len(shape_b)>1 and shape_b[1] != 1:
        print("b is not a row vector, actual shape: {}".format(shape_b))
        raise AssertionError
    if shape_b[0] != out_dim:
        print("length of b ({}) mismatches columns of W ({})".format(shape_b[1],out_dim))

    y_hat = classifier_output(x,params)
    print("y_hat: {}".format(y_hat))
    loss = logloss(y, y_hat)
    y_diff = np.matrix(y_hat-y)
    gW = np.dot(np.matrix(x).transpose(),y_diff)
    gb = y_diff 
    if not np.all(gW.shape==W.shape):
        print("problem with calculation of gW, size: {}, expected: {}".format(gW.shape, W.shape))
        print("shape y_diff: {}, shape x: {}".format(y_diff.shape, np.matrix(x).shape))
        raise AssertionError
    return loss,[gW,gb]

def logloss(y, y_hat):
    print("in log loss. shape y: {}, shape y_hat: {}".format(y.shape, y_hat.shape))
    return np.dot(y,np.log(y_hat))

def create_classifier(in_dim, out_dim):
    """
    returns the parameters (W,b) for a log-linear classifier
    with input dimension in_dim and output dimension out_dim.
    """
    W = np.zeros((in_dim, out_dim))
    b = np.zeros(out_dim)
    return [W,b]

def ll_sanity():
#if __name__ == '__main__':
    # Sanity checks for softmax. If these fail, your softmax is definitely wrong.
    # If these pass, it may or may not be correct.
    print("running softmax tests")
    test1 = softmax(np.array([1,2]))
    test2 = softmax(np.array([1001,1002]))
    test3 = softmax(np.array([-1001,-1002])) 
    print ("test1: {}".format(test1))
    assert np.amax(np.fabs(test1 - np.array([0.26894142,  0.73105858]))) <= 1e-6
    print ("test2: {}".format(test2))
    assert np.amax(np.fabs(test2 - np.array( [0.26894142, 0.73105858]))) <= 1e-6
    print ("test3: {}".format(test3))
    assert np.amax(np.fabs(test3 - np.array([0.73105858, 0.26894142]))) <= 1e-6
    print("softmax tests passed")

    # Sanity checks. If these fail, your gradient calculation is definitely wrong.
    # If they pass, it is likely, but not certainly, correct.
    # import sys
    #sys.path.append("C:\Shahar\BarIlan\NLP-courses\89687-DL\Assignment1\code\loglinear.py")
    #print(sys.path)
    #from .grad_check import gradient_check
    global W,b
    W,b = create_classifier(3,6)

    def _loss_and_W_grad(W):
        global b
        loss,grads = loss_and_gradients(np.array([1,2,3]),0,[W,b])
        return loss,grads[0]

    def _loss_and_b_grad(b):
        global W
        loss,grads = loss_and_gradients(np.array([1,2,3]),0,[W,b])
        return loss,grads[1]

    for _ in range(10):
        W = np.random.randn(W.shape[0],W.shape[1])
        b = np.random.randn(b.shape[0])
        gradient_check(_loss_and_b_grad, b)
        gradient_check(_loss_and_W_grad, W)


    


In [49]:
import numpy as np

STUDENT={'name': 'Shahar Siegman',
         'ID': '011862141'}

def gradient_check(f, x):
    """ 
    Gradient check for a function f 
    - f should be a function that takes a single argument and outputs the cost and its gradients
    - x is the point (numpy array) to check the gradient at
    """ 
    fx, grad = f(x) # Evaluate function value at original point
    h = 1e-4

    # Iterate over all indexes in x
    it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
    while not it.finished:
        ix = it.multi_index

        ### modify x[ix] with h defined above to compute the numerical gradient.
        ### if you change x, make sure to return it back to its original state for the next iteration.
        v = x[ix] 
        x[ix] = v + h/2
        f2,_ = f(x)
        x[ix] = v - h/2
        f1,_ = f(x)
        x[ix] = v
        print("f1: {}, f2: {}".format(f1,f2))
        numeric_gradient = (f2-f1)/h
        # Compare gradients
        reldiff = abs(numeric_gradient - grad[ix]) / max(1, abs(numeric_gradient), abs(grad[ix]))
        if reldiff > 1e-5:
            print("Gradient check failed.")
            print("First gradient error found at index %s" % str(ix))
            print("Your gradient: %f \t Numerical gradient: %f" % (grad[ix], numeric_gradient))
            return
    
        it.iternext() # Step to next index

    print("Gradient check passed!")

def sanity_check():
    """
    Some basic sanity checks.
    """
    quad = lambda x: (np.sum(x ** 2), x * 2)

    print("Running sanity checks...")
    gradient_check(quad, np.array(123.456))      # scalar test
    x = np.random.randn(3,)
    gradient_check(quad, x)    # 1-D test
    gradient_check(quad, np.random.randn(4,5))   # 2-D test
    print("")

if __name__ == '__main__':
    # If these fail, your code is definitely wrong.
    sanity_check()    
    

Running sanity checks...
f1: 15241.371590402501, f2: 15241.396281602501
Gradient check passed!
f1: 2.181276407852401, f2: 2.1815020283076203
f1: 2.1814741082695495, f2: 2.181304327890471
f1: 2.181432593192951, f2: 2.1813458429670707
Gradient check passed!
f1: 25.453235254823095, f2: 25.45284148622899
f1: 25.453099750789395, f2: 25.45297699026269
f1: 25.4530307233796, f2: 25.453046017672484
f1: 25.45294919600082, f2: 25.453127545051267
f1: 25.453049852584527, f2: 25.453026888467555
f1: 25.452924291996556, f2: 25.453152449055526
f1: 25.45297237321248, f2: 25.453104367839604
f1: 25.453116360133567, f2: 25.45296038091852
f1: 25.452955910693856, f2: 25.453120830358234
f1: 25.452953854038594, f2: 25.453122887013496
f1: 25.45320094759778, f2: 25.452875793454304
f1: 25.453078591690357, f2: 25.45299814936173
f1: 25.45291198425104, f2: 25.453164756801044
f1: 25.4528399019266, f2: 25.453236839125484
f1: 25.452817822796217, f2: 25.453258918255866
f1: 25.453147025350543, f2: 25.45292971570154
f1: 2

In [50]:
ll_sanity()

running softmax tests
test1: [0.26894142 0.73105858]
test2: [0.26894142 0.73105858]
test3: [0.73105858 0.26894142]
softmax tests passed
y_hat: [8.35714812e-01 1.27598991e-01 4.01129364e-06 3.43675380e-02
 1.46421570e-05 2.30000570e-03]
y_hat: [8.35721677e-01 1.27593659e-01 4.01112602e-06 3.43661020e-02
 1.46415452e-05 2.29990959e-03]
y_hat: [8.35707947e-01 1.27604323e-01 4.01146125e-06 3.43689741e-02
 1.46427689e-05 2.30010180e-03]
f1: [0. 0. 0. 0. 0. 0.], f2: [0. 0. 0. 0. 0. 0.]


ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()

In [52]:
global W,b
print(W)
print(b)

[[ 0.18983994 -1.40727523 -0.5841523  -0.27341281  0.49363692 -0.99399498]
 [ 0.40646452  2.27836731 -0.47029934 -0.5459866  -0.67480659  0.53160206]
 [ 0.99333181  0.27874525 -1.27685437  0.4979319  -1.54597973 -0.01091921]]
[ 0.21762629 -1.66469938 -2.69122412  0.88080604 -1.25783171 -1.73143664]


In [53]:
def _loss_and_W_grad(W):
    global b
    loss,grads = loss_and_gradients(np.array([1,2,3]),0,[W,b])
    return loss,grads[0]

def _loss_and_b_grad(b):
    global W
    loss,grads = loss_and_gradients(np.array([1,2,3]),0,[W,b])
    return loss,grads[1]


In [57]:
l, g = _loss_and_W_grad(W)
print(l.shape)
print(g.shape)

y_hat: [8.35714812e-01 1.27598991e-01 4.01129364e-06 3.43675380e-02
 1.46421570e-05 2.30000570e-03]


AttributeError: 'int' object has no attribute 'shape'

In [None]:
np.array([1001, 1002]).mean()

In [None]:
import sys
sys.path