In [1]:
import numpy as np
import random

In [18]:
import ipytest
ipytest.config(rewrite_asserts=True, magics=True)

__file__ = "sandbox.ipynb"

In [29]:
from utils.sanity_checks import dummy
from utils.utils import softmax

In [19]:
inputs = {
    'test_word2vec': {
        'currentCenterWord': "c",
        'windowSize': 3,
        'outsideWords': ["a", "b", "e", "d", "b", "c"]
    },
    'test_naivesoftmax': {
        'centerWordVec': np.array([-0.27323645, 0.12538062, 0.95374082]).astype(float),
        'outsideWordIdx': 3,
        'outsideVectors': np.array([[-0.6831809, -0.04200519, 0.72904007],
                                    [0.18289107, 0.76098587, -0.62245591],
                                    [-0.61517874, 0.5147624, -0.59713884],
                                    [-0.33867074, -0.80966534, -0.47931635],
                                    [-0.52629529, -0.78190408, 0.33412466]]).astype(float)

    },
    'test_sigmoid': {
        'x': np.array([-0.46612273, -0.87671855, 0.54822123, -0.36443576, -0.87671855, 0.33688521
                          , -0.87671855, 0.33688521, -0.36443576, -0.36443576, 0.54822123]).astype(float)
    }
}

outputs = {
    'test_word2vec': {
        'loss': 11.16610900153398,
        'dj_dv': np.array(
            [[0., 0., 0.],
             [0., 0., 0.],
             [-1.26947339, -1.36873189, 2.45158957],
             [0., 0., 0.],
             [0., 0., 0.]]).astype(float),
        'dj_du': np.array(
            [[-0.41045956, 0.18834851, 1.43272264],
             [0.38202831, -0.17530219, -1.33348241],
             [0.07009355, -0.03216399, -0.24466386],
             [0.09472154, -0.04346509, -0.33062865],
             [-0.13638384, 0.06258276, 0.47605228]]).astype(float)

    },
    'test_naivesoftmax': {
        'loss': 2.217424877675181,
        'dj_dvc': np.array([-0.17249875, 0.64873661, 0.67821423]).astype(float),
        'dj_du': np.array([[-0.11394933, 0.05228819, 0.39774391],
                           [-0.02740743, 0.01257651, 0.09566654],
                           [-0.03385715, 0.01553611, 0.11817949],
                           [0.24348396, -0.11172803, -0.84988879],
                           [-0.06827005, 0.03132723, 0.23829885]]).astype(float)
    },
    'test_sigmoid': {
        's': np.array(
            [0.38553435, 0.29385824, 0.63372281, 0.40988622, 0.29385824, 0.5834337, 0.29385824, 0.5834337, 0.40988622,
             0.40988622, 0.63372281]).astype(float),
    }

}

In [3]:
def sigmoid(x):
    """
    Compute the sigmoid function for the input here.
    Arguments:
    x -- A scalar or numpy array.
    Return:
    s -- sigmoid(x)
    """

    s = np.exp(x)/(np.exp(x)+1)

    return s


In [4]:
test_sigmoid_data = {
        'x': np.array([-0.46612273, -0.87671855, 0.54822123, -0.36443576, -0.87671855, 0.33688521
                          , -0.87671855, 0.33688521, -0.36443576, -0.36443576, 0.54822123]).astype(float),
        's': np.array(
            [0.38553435, 0.29385824, 0.63372281, 0.40988622, 0.29385824, 0.5834337, 0.29385824, 0.5834337, 0.40988622,
             0.40988622, 0.63372281]).astype(float),
    }

In [5]:
%%run_pytest[clean] -vv

def test_sigmoid():

    test_sigmoid_data = {
        'x': np.array([-0.46612273, -0.87671855, 0.54822123, -0.36443576, -0.87671855, 0.33688521
                          , -0.87671855, 0.33688521, -0.36443576, -0.36443576, 0.54822123]).astype(float),
        's': np.array(
            [0.38553435, 0.29385824, 0.63372281, 0.40988622, 0.29385824, 0.5834337, 0.29385824, 0.5834337, 0.40988622,
             0.40988622, 0.63372281]).astype(float),
    }
    assert np.allclose(test_sigmoid_data['s'], sigmoid(test_sigmoid_data['x']))

UsageError: Cell magic `%%run_pytest[clean]` not found.


In [46]:
def naiveSoftmaxLossAndGradient(
        centerWordVec,
        outsideWordIdx,
        outsideVectors,
        dataset
):
    """ Naive Softmax loss & gradient function for word2vec models

    Implement the naive softmax loss and gradients between a center word's 
    embedding and an outside word's embedding. This will be the building block
    for our word2vec models.

    Arguments:
    centerWordVec -- numpy ndarray, center word's embedding
                    (v_c in the pdf handout)
    outsideWordIdx -- integer, the index of the outside word
                    (o of u_o in the pdf handout)
    outsideVectors -- outside vectors (rows of matrix) for all words in vocab
                      (U in the pdf handout)
    dataset -- needed for negative sampling, unused here.

    Return:
    loss -- naive softmax loss
    gradCenterVec -- the gradient with respect to the center word vector
                     (dJ / dv_c in the pdf handout)
    gradOutsideVecs -- the gradient with respect to all the outside word vectors
                    (dJ / dU)
                    
    Note:
     we usually use column vector convention (i.e., vectors are in column form) for vectors in matrix U and V (in the handout)
     but for ease of implementation/programming we usually use row vectors (representing vectors in row form).
    """

    ### Please use the provided softmax function (imported earlier in this file)
    ### This numerically stable implementation helps you avoid issues pertaining
    ### to integer overflow.

    # Get word probability distribution with respect to v_c
    prob = np.dot(centerWordVec, outsideVectors.T)
    y_hat = softmax(prob)

    # This will also be the change in weights
    delta = y_hat.copy()
    delta[outsideWordIdx] -= 1

    # Get context word and calculate its naive softmax loss
    loss = -np.log(y_hat[outsideWordIdx])
    
    ### Gradients 
    # Center word gradient

    gradCenterVec = np.dot(delta, outsideVectors)
    
    # Outside word gradient
    # Cf. outer product of matrix x 'column' vector 
    gradOutsideVecs = np.dot(delta[:, np.newaxis], centerWordVec[np.newaxis, :])

    ### YOUR CODE HERE
    ### END YOUR CODE

    return loss, gradCenterVec, gradOutsideVecs

In [47]:
%%run_pytest[clean] -vv

dataset, dummy_vectors, dummy_tokens = dummy()

inputs = {
        'test_naivesoftmax': {
        'centerWordVec': np.array([-0.27323645, 0.12538062, 0.95374082]).astype(float),
        'outsideWordIdx': 3,
        'outsideVectors': np.array([[-0.6831809, -0.04200519, 0.72904007],
                                    [0.18289107, 0.76098587, -0.62245591],
                                    [-0.61517874, 0.5147624, -0.59713884],
                                    [-0.33867074, -0.80966534, -0.47931635],
                                    [-0.52629529, -0.78190408, 0.33412466]]).astype(float)
}
}
outputs = {'test_naivesoftmax': {
        'loss': 2.217424877675181,
        'dj_dvc': np.array([-0.17249875, 0.64873661, 0.67821423]).astype(float),
        'dj_du': np.array([[-0.11394933, 0.05228819, 0.39774391],
                           [-0.02740743, 0.01257651, 0.09566654],
                           [-0.03385715, 0.01553611, 0.11817949],
                           [0.24348396, -0.11172803, -0.84988879],
                           [-0.06827005, 0.03132723, 0.23829885]]).astype(float)
}
}


def test_naivesoftmax():
    loss, dj_dv, dj_du = naiveSoftmaxLossAndGradient(
            inputs['test_naivesoftmax']['centerWordVec'],
            inputs['test_naivesoftmax']['outsideWordIdx'],
            inputs['test_naivesoftmax']['outsideVectors'],
            dataset
        )
    assert np.allclose(loss, outputs['test_naivesoftmax']['loss'])
    assert np.allclose(dj_dv, outputs['test_naivesoftmax']['dj_dvc'])
    assert np.allclose(dj_du, outputs['test_naivesoftmax']['dj_du'])


platform linux -- Python 3.7.4, pytest-5.4.1, py-1.8.1, pluggy-0.13.1 -- /home/ray/.pyenv/versions/xcs224n/bin/python
cachedir: .pytest_cache
rootdir: /home/ray/XCS224N/Assignment_2/XCS224N-A2
collecting ...collected 1 item

sandbox.py::test_naivesoftmax <- <ipython-input-47-997c8349667a>FAILED  [100%]

______________________________ test_naivesoftmax _______________________________

    def test_naivesoftmax():
        loss, dj_dv, dj_du = naiveSoftmaxLossAndGradient(
                inputs['test_naivesoftmax']['centerWordVec'],
                inputs['test_naivesoftmax']['outsideWordIdx'],
                inputs['test_naivesoftmax']['outsideVectors'],
                dataset
            )
        assert np.allclose(loss, outputs['test_naivesoftmax']['loss'])
>       assert np.allclose(dj_dv, outputs['test_naivesoftmax']['dj_dvc'])

<ipython-input-47-997c8349667a>:34:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
<__array_function__ internals>:6: in a