# WORD2VEC with numpy

In [1]:
import numpy as np

In [2]:
data = """The second architecture is similar to CBOW, but instead of predicting the current word based on the
context, it tries to maximize classification of a word based on another word in the same sentence.
More precisely, we use each current word as an input to a log-linear classifier with continuous
projection layer, and predict words within a certain range before and after the current word. We
found that increasing the range improves quality of the resulting word vectors, but it also increases
the computational complexity. Since the more distant words are usually less related to the current
word than those close to it, we give less weight to the distant words by sampling less from those
words in our training examples."""

In [3]:
def tokenize(string):
    tokens = []
    wrd = ''
    
    for txt in string:
        if txt.isalpha():
            wrd = wrd + txt
        if txt.isalpha() == False:
            if len(wrd) != 0:
                tokens.append(wrd)
            wrd = ''
    
    return tokens

In [4]:
def generate(tks, ws):
    tx = []
    ty = []
    
    for ind in range(len(tks) - 2*ws):
        dummy = []
        ty.append(tks[ind + ws])
        for i in range(ws):
            dummy.append(tks[ind + i])
        for i in range(ws):
            dummy.append(tks[ind + ws + i + 1])
        tx.append(dummy)
        
    return tx, ty

In [5]:
def encode(wrds):
    
    wrds_set = set(wrds)
    wrds_dict = {}
    num = 0
    
    for i in wrds_set:
        wrds_dict[i] = num
        num += 1
        
    return wrds_dict

In [6]:
tokens = tokenize(data)
encoded = encode(tokens)
t_x, t_y = generate(tokens, 3)

In [44]:
def one_hoter(x, y, enc):
    
    len_onehot = len(encoded)
    
    enc_tx = []
    for lis in x:
        dummy = []
        for wrd in lis:
            pos = enc[wrd]
            ar = np.zeros(len_onehot)
            ar[pos] = 1
            dummy.append(ar)
        enc_tx.append(dummy)
    
    enc_ty = []
    for wrd in y:
        pos = enc[wrd]
        ar = np.zeros(len_onehot)
        ar[pos] = 1
        dummy.append(ar)
        enc_ty.append(dummy)
        
    return enc_tx, enc_ty

In [69]:
one_hot_x, one_hot_y = one_hoter(t_x, t_y, encoded)

In [70]:
w1 = np.random.random((79, 10))
w2 = np.random.random((10, 79))

In [89]:
def wrd_emb(inp, w):
    
    op = np.dot(inp, w)
    op = op.mean(axis = 0)
    
    return op


def dense(inp, w):
    
    op = np.dot(inp, w)
    
    return op


def softmax(inp):
    
    op = np.exp(inp) / (np.sum(np.exp(inp)) + 0.00001)
    
    return op

def forward_pass(x, w1, w2):
    
    op1 = wrd_emb(x, w1)
    op2 = dense(op1, w2)
    op3 = softmax(op2)
    
    return op3

def cross_entropy(orig, pred):
    
    loss = -1 * np.sum(orig * np.log(pred))
    
    return loss