In [1]:
import numpy as np
import plotly.graph_objects as go
from sklearn.preprocessing import OneHotEncoder
from brown import Brown
import random
from tqdm import tqdm_notebook as tqdm

In [2]:
word2idx, sentences = Brown().get_limited_vocab(vocab_size=100)
print("Vocabulary Size : {}".format(len(word2idx)))
print("Corpus Size : {}".format(len(sentences)))

Vocabulary Size : 101
Corpus Size : 56610


In [3]:
start_idx = word2idx["START"]
end_idx = word2idx["END"]

In [4]:
def get_bigram_probs(start_idx, end_idx, V, sentences, smoothning=1.0):
    """
    ---------------------------------------------------------------
    Description :
    Calculates the bigram probability of sentences
    
    Input:
    start_idx : index of "START" tag
    end_idx : index of "END" tag
    V : vocab size
    sentences : list of sentence
    smoothning : amt. of smoothning to be applied to the bigram probabilities
    
    Return :
    bigram_probs : a numpy ndarray of size (V x V) containing the bigram probabilites of sentences
    -----------------------------------------------------------------
    """
    bigram_probs = np.ones((V, V)) * smoothning
    
    for sentence in sentences:
        words = sentence
        
        for i in range(len(words)):
            
            if i == 0:
                bigram_probs[start_idx, words[i]] += 1
            
            elif i == len(words) - 1:
                bigram_probs[words[i], end_idx] += 1
            
            else:
                bigram_probs[words[i - 1], words[i]] += 1
                
    
    bigram_probs /= bigram_probs.sum(axis=1, keepdims=True)
    
    return bigram_probs
    

In [5]:
V = len(word2idx)
W_bigram = get_bigram_probs(start_idx, end_idx, V, sentences)

In [6]:
W = np.random.rand(V, V) / np.sqrt(V)

In [2]:
def softmax(z):
    """
    ------------------------------------------------
    Description :
    Function to calculate the softmax function 
    
    Input :
    z : a numpy ndarray
    
    Return :
    soft : the softmax value of the ndarray
    -------------------------------------------------
    """
    z = z - z.max()
    soft = np.exp(z) / np.sum(np.exp(z), axis = 1, keepdims = True) 
    return soft

In [8]:
def forward(X, W):
    """
    -------------------------------------------------
    Description:
    Function to calculate the dot product of two vectors
    
    Input:
    X : a numpy ndarray of size (N x V)
    W : a numpy ndarray of size (V x V)
    
    Return :
    z : a numpy a ndarray of size (N x V)
    --------------------------------------------------
    """
    z = np.dot(X,W)
    return z

In [9]:
def backprop(X, P, Y):
    """
    ---------------------------------------------
    Description :
    Function to calculate the backpropagation of the neural network
    
    Input : 
    X : a numpy ndarray of size (N x V)
    P : a numpy ndarray of size (N x V)
    Y : a numpy ndarray of size (N x V)
    
    Return :
    dW : a numpy ndarray of size (V x V)
    ---------------------------------------------
    """
    dW = X.T.dot(P - Y)
    return dW

In [10]:
W_bigram = np.log(W_bigram) ## This should be the ideal weight that the model is trying to achieve

In [11]:
epochs = 3
learning_rate = 0.01
losses = []
for i in range(epochs):
    random.shuffle(sentences)
    epoch_loss = 0
    j = 0
    with tqdm(total=len(sentences)) as pbar:
        for sentence in sentences:
            complete_sentence = [start_idx] + sentence + [end_idx]
            n = len(complete_sentence)
            input_sentence = np.zeros((n-1, V))
            output_sentence = np.zeros((n-1, V))
            # One hot encoding the input and output
            input_sentence[np.arange(n - 1), complete_sentence[:n-1]] =  1
            output_sentence[np.arange(n - 1), complete_sentence[1:]] =  1

            z = forward(input_sentence, W)
            z = softmax(z)
            dW = backprop(input_sentence, z, output_sentence)
            W = W - learning_rate * dW
            loss = -np.sum(output_sentence * np.log(z)) / (n - 1)
            losses.append(loss)        
            j += 1
            epoch_loss += loss
            pbar.update(1)
    epoch_loss /= j
    
    print("Epoch : {}, Loss : {}".format(i, epoch_loss))
    


This function will be removed in tqdm==5.0.0
Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`



HBox(children=(FloatProgress(value=0.0, max=56610.0), HTML(value='')))


Epoch : 0, Loss : 2.3600890722541243


HBox(children=(FloatProgress(value=0.0, max=56610.0), HTML(value='')))


Epoch : 1, Loss : 2.2251576157852524


HBox(children=(FloatProgress(value=0.0, max=56610.0), HTML(value='')))


Epoch : 2, Loss : 2.206161660817967
