In [1]:
import sys
import collections
import numpy as np

In [2]:
with open('../021 Sentiment Analysis - Trask/reviews.txt','r') as f:
    reviews = list(map(str.strip, f.readlines()))

In [4]:
counter_all = collections.Counter()  # how many times each word occurs is WHOLE DATASET 
for review in reviews:
    for word in review.split():
        counter_all[word] += 1

In [5]:
review_vocab = set(counter_all.keys())
review_vocab_size = len(review_vocab)
print(review_vocab_size)

74073


In [6]:
word2index = {}
index2word = {}
for i, word in enumerate(review_vocab):
    word2index[word] = i
    index2word[i] = word

In [7]:
def review_to_indices(review):
    res = []
    for word in review.split():
        res.append(word2index[word])
    return np.array(res)

In [8]:
inputs = []
targets = []
for review in reviews:
    review_as_ints = review_to_indices(review)
    for i in range(2, len(review_as_ints)-2):
        inputs.append(review_as_ints[[i-2, i-1, i+1, i+2]])
        targets.append(review_as_ints[i])
inputs = np.array(inputs)
targets = np.expand_dims(targets, -1)  # ok

In [16]:
n_rev = 10
n_word = 12

# find location
print('chunk:  ', ' '.join(reviews[n_rev].split()[n_word-2:n_word+3]))

# find index in target array
lens = np.array([len(x.split()) for x in reviews])
ii = np.sum(lens[0:n_rev]-4) + n_word-2

print('inputs: ', index2word[inputs[ii, 0]],
                 index2word[inputs[ii, 1]],
                 '      ',
                 index2word[inputs[ii, 2]],
                 index2word[inputs[ii, 3]])
print('target:            ', index2word[targets[ii, 0]])

chunk:   the quirky insane robin williams
inputs:  the quirky        robin williams
target:             insane


# Neural Network

In [76]:
def sigmoid(x):
    return 1/(1+np.exp(-x))

def sigmoid_deriv(x):
    return sigmoid(x) * (1-sigmoid(x))

In [77]:
def forward(xi, yi, Wh, Wo):
    """Params:
        xi - indices [1234, 2345, ... ]
        yi - indices [9876, 8765, ... ] - target words, including correct target
    """
    assert xi.ndim == 1
    assert yi.ndim == 1
    
    z_hid = np.sum(Wh[xi], axis=0, keepdims=True)
    # do not do hidden activation
    z_out = z_hid @ Wo[:,yi]
    y_hat = sigmoid(z_out)
    
    return y_hat, z_out, z_hid

In [150]:
def backward(xi, yi, y, Wh, Wo, lr):
    """Params:
        xi - indices [1234, 2345]
    """
    assert xi.ndim == 1
    assert yi.ndim == 1
    assert y.ndim == 2
    
    y_hat, z_out, z_hid = forward(xi, yi, Wh, Wo)
    
    ro_out = -(y-y_hat) * sigmoid_deriv(z_out)  # scalar
    del_Wo_i = np.dot(z_hid.T, ro_out)
    # Wo[:,yi] += -lr * del_Wo_i
    
    ro_hid = np.dot(ro_out, Wo[:,yi].T)      # 1 x hid_n
        
    # Optional: numerical gradient check
#     xv = np.zeros([1, len(Wh)])           
#     xv[0,xi] = 1
#     del_Wh = np.zeros_like(Wh)
#     del_Wh[xi] = ro_hid
#     ngrad_Wo, ngrad_Wh = ngrad(xv, y, Wh, Wo)
#     if not np.allclose(del_Wo, ngrad_Wo):
#         raise ValueError('Gradient check fail output')
#     if not np.allclose(del_Wh, ngrad_Wh):
#         raise ValueError('Gradient check fail hidden')
#     print('ngrad OK')
        
        
#     Wh[xi] += -lr * ro_hid
#     Wo[:,yi] += -lr * del_Wo_i
#     return y_hat
    
    del_Wh = np.zeros_like(Wh)
    del_Wh[xi] = ro_hid
    
    del_Wo = np.zeros_like(Wo)
    del_Wo[:,yi] = del_Wo_i

    return del_Wo, del_Wh
    

In [31]:
A = np.array(range(9)).reshape([3,3])
A

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [32]:
dAi = np.array([[5,6], [5,6], [5,6]])
dAi

array([[5, 6],
       [5, 6],
       [5, 6]])

In [222]:
N_in = 10
N_hid = 8
N_out = 12
np.random.seed(1)
W_hid = np.random.normal(0, N_in**-.5, [N_in, N_hid])
W_out = np.random.normal(0, N_hid**-.5, [N_hid, N_out])
xx = np.random.randint(0, 2, size=[1, N_in])
yy = np.random.normal(0, 1, [1, N_out])
xi = np.nonzero(xx)[1]
yi = np.array(range(N_out))

In [223]:
print('xx', xx)
print('xi', xi)
print('yy', yy)
print('yi', yi)

xx [[0 1 0 1 1 1 1 1 0 1]]
xi [1 3 4 5 6 7 9]
yy [[ 0.53317198 -0.58513304 -0.56053692  0.14077318  1.76760095 -0.78777374
  -0.06172207  0.23842679  0.07477245  0.33830476  0.27807663  1.22396588]]
yi [ 0  1  2  3  4  5  6  7  8  9 10 11]


In [252]:
# Test forward pass against non-index version
np.random.seed(1)

for i in range(10):
    xx = np.random.randint(0, 2, size=[1, N_in])
    xi = np.nonzero(xx)[1]
    yi = np.random.choice(range(N_out), size=5, replace=False)

    y_hat, z_out, z_hid = forward_vec(xx, W_hid, W_out)
    y_hati, z_outi, z_hidi = forward(xi, yi, W_hid, W_out)

    assert np.allclose(y_hat[:,yi], y_hati)
    assert np.allclose(z_out[:,yi], z_outi)
    assert np.allclose(z_hid, z_hidi)
    
    print(np.max(np.abs(y_hat[:,yi]-y_hati)), np.max(np.abs(z_out[:,yi]-z_outi)), np.max(np.abs(z_hid-z_hidi)))

0.0 5.551115123125783e-17 0.0
0.0 5.551115123125783e-17 0.0
5.551115123125783e-17 1.1102230246251565e-16 0.0
5.551115123125783e-17 1.1102230246251565e-16 0.0
0.0 1.1102230246251565e-16 0.0
5.551115123125783e-17 1.6653345369377348e-16 0.0
5.551115123125783e-17 1.1102230246251565e-16 0.0
5.551115123125783e-17 1.1102230246251565e-16 0.0
0.0 1.1102230246251565e-16 0.0
5.551115123125783e-17 1.1102230246251565e-16 0.0


In [None]:
ngWo, ngWh = ngrad(xx, yy, W_hid, W_out)
dWo, dWh = backward(xi, yi, yy, W_hid, W_out, None)
assert np.allclose(ngWo, dWo)
assert np.allclose(ngWh, dWh)

In [251]:
# 
np.random.seed(1)
for i in range(10):

    xx = np.random.randint(0, 2, size=[1, N_in])
    yy = np.random.normal(0, 1, [1, N_out])
    xi = np.nonzero(xx)[1]
    yi = np.random.choice(range(N_out), size=5, replace=False)

    temp_yi = np.random.choice(range(N_out), size=5, replace=False)
    temp_yy = yy[:,temp_yi]

    dWo, dWh = backward(xi, temp_yi, temp_yy, W_hid, W_out, None)

    y_hat, z_out, z_hid = forward_vec(xx, W_hid, W_out)
    y_hat[:,temp_yi] = yy[:,temp_yi]
    ngWo, ngWh = ngrad(xx, y_hat, W_hid, W_out)

    assert np.allclose(ngWo, dWo)
    assert np.allclose(ngWh, dWh)
    
    print(np.max(np.abs(ngWh-dWh)), np.max(np.abs(ngWo-dWo)))

1.9941970297310263e-10 8.94230023362752e-10
9.992788541079989e-11 3.041814022886058e-10
2.3733581766549605e-10 4.022676636239453e-10
4.238850243032388e-11 9.43689570931383e-12
1.7664786300386481e-10 2.0341855977434875e-10
1.0316739129656582e-10 3.1466040883998403e-10
4.986805413054185e-11 3.680737936662126e-10
4.5849102292550015e-11 1.6478229891703222e-10
1.237336344495077e-10 2.739610482915822e-10
1.2854056707922723e-10 1.6746970477043988e-10


In [232]:
ngWh

array([[ 0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        ,  0.        ,  0.        ],
       [ 0.01122393, -0.02214843,  0.0408137 , -0.0321047 ,  0.02919282,
         0.10094825,  0.26497728,  0.09398653],
       [ 0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        ,  0.        ,  0.        ],
       [ 0.01122393, -0.02214843,  0.0408137 , -0.0321047 ,  0.02919282,
         0.10094825,  0.26497728,  0.09398653],
       [ 0.01122393, -0.02214843,  0.0408137 , -0.0321047 ,  0.02919282,
         0.10094825,  0.26497728,  0.09398653],
       [ 0.01122393, -0.02214843,  0.0408137 , -0.0321047 ,  0.02919282,
         0.10094825,  0.26497728,  0.09398653],
       [ 0.01122393, -0.02214843,  0.0408137 , -0.0321047 ,  0.02919282,
         0.10094825,  0.26497728,  0.09398653],
       [ 0.01122393, -0.02214843,  0.0408137 , -0.0321047 ,  0.02919282,
         0.10094825,  0.26497728,  0.09398653],
       [ 0.        ,  0.        

In [220]:
dWh

array([[ 0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        ,  0.        ,  0.        ],
       [ 0.01122393, -0.02214843,  0.0408137 , -0.0321047 ,  0.02919282,
         0.10094825,  0.26497728,  0.09398653],
       [ 0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        ,  0.        ,  0.        ],
       [ 0.01122393, -0.02214843,  0.0408137 , -0.0321047 ,  0.02919282,
         0.10094825,  0.26497728,  0.09398653],
       [ 0.01122393, -0.02214843,  0.0408137 , -0.0321047 ,  0.02919282,
         0.10094825,  0.26497728,  0.09398653],
       [ 0.01122393, -0.02214843,  0.0408137 , -0.0321047 ,  0.02919282,
         0.10094825,  0.26497728,  0.09398653],
       [ 0.01122393, -0.02214843,  0.0408137 , -0.0321047 ,  0.02919282,
         0.10094825,  0.26497728,  0.09398653],
       [ 0.01122393, -0.02214843,  0.0408137 , -0.0321047 ,  0.02919282,
         0.10094825,  0.26497728,  0.09398653],
       [ 0.        ,  0.        

In [171]:
dWo

array([[ 0.01597556,  0.12691019,  0.12848776,  0.04808367, -0.08283452,
         0.12665826,  0.02648427,  0.02151276,  0.05937474,  0.01601468,
         0.0080473 , -0.09041316],
       [-0.00264196, -0.0209878 , -0.02124869, -0.00795185,  0.01369877,
        -0.02094613, -0.00437984, -0.00355768, -0.00981911, -0.00264843,
        -0.00133082,  0.01495209],
       [ 0.0089963 ,  0.07146683,  0.07235521,  0.02707732, -0.04664653,
         0.07132496,  0.01491407,  0.01211446,  0.03343565,  0.00901834,
         0.00453167, -0.05091429],
       [-0.03069609, -0.24385034, -0.24688156, -0.0923899 ,  0.15916157,
        -0.24336628, -0.05088795, -0.04133548, -0.11408502, -0.03077126,
        -0.01546241,  0.17372349],
       [-0.04223405, -0.3355082 , -0.33967878, -0.12711718,  0.21898683,
        -0.33484218, -0.07001559, -0.05687255, -0.15696701, -0.04233748,
        -0.02127438,  0.23902223],
       [ 0.01614138,  0.12822747,  0.12982143,  0.04858276, -0.08369431,
         0.12797293,  

In [35]:
A

array([[ 0,  6,  8],
       [ 3,  9, 11],
       [ 6, 12, 14]])

In [44]:
def forward_vec(xv, Wh, Wo):
    """Params:
        xv - sparse [[0, 0, 1, 0, 1, ...]]
        Wh - weights hidden
        Wo - weights output
    """
    assert xv.ndim == 2
    z_hid = xv @ Wh
    # do not do hidden activation
    z_out = z_hid @ Wo
    y_hat = sigmoid(z_out)
    return y_hat, z_out, z_hid

In [37]:
def ngrad(xv, y, Wh, Wo):
    """Params:
    xv - sparse vector [0, 0, 1, 0, 1, 0, ...]
    """
    assert xv.ndim == 2
    assert y.ndim == 2
    
    eps = 1e-4
    
    # numerical gradient check output
    ngrad_Wo = np.zeros_like(Wo)
    for r in range(Wo.shape[0]):
        for c in range(Wo.shape[1]):
            W_min = Wo.copy()
            W_pls = Wo.copy()
            W_min[r, c] -= eps
            W_pls[r, c] += eps

            l_pls = loss(xv, y, Wh, W_pls)
            l_min = loss(xv, y, Wh, W_min)

            ngrad_Wo[r, c] = (l_pls - l_min) / (eps * 2)
    
    # numerical gradient check hidden
    ngrad_Wh = np.zeros_like(Wh)
    _, idx_nonzero = np.nonzero(xv)
    for r in idx_nonzero: #range(self.weights_0_1.shape[0]):
        for c in range(Wh.shape[1]):
            W_min = Wh.copy()
            W_pls = Wh.copy()
            W_min[r, c] -= eps
            W_pls[r, c] += eps

            l_pls = loss(xv, y, W_pls, Wo)
            l_min = loss(xv, y, W_min, Wo)

            ngrad_Wh[r, c] = (l_pls - l_min) / (eps * 2)
            
    return ngrad_Wo, ngrad_Wh     

In [164]:
def loss(xv, y, Wh, Wo):
    y_hat, z_out, z_hid = forward_vec(xv, Wh, Wo)
    return .5 * np.sum((y-y_hat)**2)

In [None]:
np.sum(lens[0:n_rev]-4) + n_word-2

In [None]:
for i, a in enumerate(targets[150:180,0]):
    print(i+150, index2word[a])

In [None]:
reviews[0]

In [None]:
inputs[0]

In [None]:
index2word[inputs[0][3]]

In [None]:
index2word[targets[0]]

In [None]:
review_as_ints = review_to_indices(reviews[0])

In [None]:
review_as_ints = np.array(review_as_ints)

In [None]:
A = np.array([0, 1, 2, 3, 4, 5, 6, 7])

In [None]:
for i in range(2,len(A)-2):
    print(A[[i-2,i-1,i+1,i+2]])

In [None]:
tokens = list(map(lambda x:(x.split(" ")),reviews))

In [None]:
len(tokens[0])

In [None]:
wordcnt = collections.Counter()
for sent in tokens:
    for word in sent:
        wordcnt[word] -= 1
vocab = list(set(map(lambda x:x[0],wordcnt.most_common())))

In [None]:
tokens = list(map(lambda x:(x.split(" ")),raw_reviews))
wordcnt = Counter()
for sent in tokens:
    for word in sent:
        wordcnt[word] -= 1
vocab = list(set(map(lambda x:x[0],wordcnt.most_common())))

word2index = {}
for i,word in enumerate(vocab):
    word2index[word]=i

concatenated = list()
input_dataset = list()
for sent in tokens:
    sent_indices = list()
    for word in sent:
        try:
            sent_indices.append(word2index[word])
            concatenated.append(word2index[word])
        except:
            ""
    input_dataset.append(sent_indices)
concatenated = np.array(concatenated)
np.random.shuffle(input_dataset)

In [None]:
len(review)

In [None]:
[(np.random.rand(negative)*len(concatenated)).astype('int').tolist()]

In [None]:
concatenated[(np.random.rand(negative)*len(concatenated)).astype('int').tolist()]

In [None]:
import sys,random,math
from collections import Counter
import numpy as np

np.random.seed(1)
random.seed(1)
f = open('../021 Sentiment Analysis - Trask/reviews.txt')
raw_reviews = f.readlines()
f.close()

tokens = list(map(lambda x:(x.split(" ")),raw_reviews))
wordcnt = Counter()
for sent in tokens:
    for word in sent:
        wordcnt[word] -= 1
vocab = list(set(map(lambda x:x[0],wordcnt.most_common())))

word2index = {}
for i,word in enumerate(vocab):
    word2index[word]=i

concatenated = list()
input_dataset = list()
for sent in tokens:
    sent_indices = list()
    for word in sent:
        try:
            sent_indices.append(word2index[word])
            concatenated.append(word2index[word])
        except:
            ""
    input_dataset.append(sent_indices)
concatenated = np.array(concatenated)
random.shuffle(input_dataset)

In [None]:
%load_ext line_profiler

In [None]:
alpha, iterations = (0.05, 2)
hidden_size,window,negative = (50,2,5)

weights_0_1 = (np.random.rand(len(vocab),hidden_size) - 0.5) * 0.2
weights_1_2 = np.random.rand(len(vocab),hidden_size)*0

layer_2_target = np.zeros(negative+1)
layer_2_target[0] = 1

def similar(target='beautiful'):
    target_index = word2index[target]

    scores = Counter()
    for word,index in word2index.items():
        raw_difference = weights_0_1[index] - (weights_0_1[target_index])
        squared_difference = raw_difference * raw_difference
        scores[word] = -math.sqrt(sum(squared_difference))
    return scores.most_common(10)

def sigmoid(x):
    return 1/(1 + np.exp(-x))

In [None]:
(np.random.rand(negative) * len(concatenated)).astype('int').tolist()

In [None]:
%timeit len(concatenated)

In [None]:
%timeit np.random.randint(0, len(concatenated), size=5)

In [None]:
%timeit (np.random.rand(negative) * len(concatenated)).astype('int').tolist()

In [None]:
rand_5 = np.random.randint(0, len(concatenated), size=5)
review = input_dataset[0]
target_i = 0

In [None]:
[review[target_i]]+list(concatenated[rand_5])

In [None]:
def train():
    for rev_i, review in enumerate(input_dataset * iterations):
        for target_i in range(len(review)):

            # since it's really expensive to predict every vocabulary
            # we're only going to predict a random subset
            ##rand_5 = (np.random.rand(negative) * len(concatenated)).astype('int').tolist()
            rand_5 = np.random.randint(0, len(concatenated), size=5)
            target_samples = [review[target_i]]+list(concatenated[rand_5])

            left_context = review[max(0,target_i-window):target_i]
            right_context = review[target_i+1:min(len(review),target_i+window)]

            layer_1 = np.mean(weights_0_1[left_context+right_context],axis=0)
            layer_2 = sigmoid(layer_1.dot(weights_1_2[target_samples].T))
            layer_2_delta = layer_2 - layer_2_target
            layer_1_delta = layer_2_delta.dot(weights_1_2[target_samples])

            weights_0_1[left_context+right_context] -= layer_1_delta * alpha
            weights_1_2[target_samples] -= np.outer(layer_2_delta,layer_1)*alpha

#         if(rev_i % 250 == 0):
#             sys.stdout.write('\rProgress:' + str(rev_i/float(len(input_dataset) * iterations)) + "   " + str(similar('terrible')))
#         sys.stdout.write('\rProgress:'+str(rev_i/float(len(input_dataset) * iterations)))
        
        if rev_i >= 100:
            break
#     print(similar('terrible'))


In [None]:
%lprun -f train train()

In [None]:
def analogy(positive=['terrible','good'],negative=['bad']):
    
    norms = np.sum(weights_0_1 * weights_0_1,axis=1)
    norms.resize(norms.shape[0],1)
    
    normed_weights = weights_0_1 * norms
    
    query_vect = np.zeros(len(weights_0_1[0]))
    for word in positive:
        query_vect += normed_weights[word2index[word]]
    for word in negative:
        query_vect -= normed_weights[word2index[word]]
    
    scores = Counter()
    for word,index in word2index.items():
        raw_difference = weights_0_1[index] - query_vect
        squared_difference = raw_difference * raw_difference
        scores[word] = -np.sqrt(sum(squared_difference))
        
    return scores.most_common(10)[1:]

In [None]:
analogy(['terrible','good'],['bad'])

In [None]:
analogy(['elizabeth','he'],['she'])