### Import Libraries

In [1]:
import warnings
warnings.filterwarnings("ignore")

In [2]:
import numpy as np
import nltk
import string 
import re
import random
import time

from nltk.corpus import stopwords
from collections import Counter
from datasets import load_dataset
from sklearn.metrics.pairwise import cosine_similarity

In [3]:
nltk.download('stopwords')
RemoveWords = set(stopwords.words('english'))

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/pavasgarg/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


### Preprocessing Data

In [4]:
def softmax(x):
	e_x = np.exp(x - np.max(x))
	return e_x / e_x.sum()

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

In [5]:
def preprocess_sentences(corpus, min_word_freq=5, min_sentence_len=7):
    # Remove punctuation and numbers (Roman numerals and others)
    corpus = re.sub(r'\b[MDCLXVI]+\b|\d+', '', corpus)
    corpus = corpus.translate(str.maketrans('', '', string.punctuation))
    
    # Tokenize the corpus
    sentences = [sent.strip().split() for sent in corpus.lower().split('.')]
    
    # Filter out short sentences
    sentences = [sent for sent in sentences if len(sent) >= min_sentence_len]
    
    # Filter out rare words and short sentences
    word_freq = Counter([word for sent in sentences for word in sent])
    sentences = [[word for word in sent if word_freq[word] >= min_word_freq] for sent in sentences]
    
    return [sent for sent in sentences if len(sent) >= min_sentence_len]

### SkipGram With Softmax

In [6]:
class SkipGramModel(object):
    def __init__(self, embedding_dim=20, learning_rate=0.0005):
        self.Neuron = embedding_dim
        self.lr = learning_rate
        self.initial_lr = learning_rate

        self.X_train = []
        self.y_train = []
        self.words = []
        self.word_index = {}
        self.vocab = {}
  
    def InitializeWeights(self, V, data):
        self.V = V
        self.W = np.random.uniform(-0.4, 0.4, (self.V, self.Neuron))
        self.W1 = np.random.uniform(-0.4, 0.4, (self.Neuron, self.V))
          
        self.words = data
        for i in range(len(data)):
            self.word_index[data[i]] = i
            
    def train(self, mytol, maxepochs=20000):
        # Initialize loss
        self.loss = 0
        self.loss1 = 1  # Random number to start with
        itr = 1
        
        while abs(self.loss1 - self.loss) >= mytol and itr <= maxepochs:
            self.loss1 = self.loss
            self.loss = 0
            for j in range(len(self.X_train)):
                # Implementing feedforward 
                self.h = np.dot(self.W.T, self.X_train[j]).reshape(self.Neuron, 1)
                self.u = np.dot(self.W1.T, self.h)
                self.y = softmax(self.u)
                
                # Backpropagation
                error = self.y - np.asarray(self.y_train[j]).reshape(self.V, 1)
                dLdW1 = np.dot(self.h, error.T)
                X = np.array(self.X_train[j]).reshape(self.V, 1)
                dLdW = np.dot(X, np.dot(self.W1, error).T)

                self.W1 = self.W1 - self.lr * dLdW1
                self.W = self.W - self.lr * dLdW

                # Loss function (cross-entropy)
                for m in range(self.V):
                    if self.y_train[j][m]:
                        self.loss += -1 * self.u[m][0]
                self.loss += np.log(np.sum(np.exp(self.u)))

            print(f"Loss at itr {itr}: {self.loss}")

            # Update adaptive learning rate
            self.lr = self.initial_lr / (1 + 0.01 * itr)  # 0.01 is decay rate
            itr = itr + 1
             
    def predict(self, word, number_of_predictions):
        if word in self.words:
            index = self.word_index[word]
            X = np.zeros(self.V)
            X[index] = 1

            self.h = np.dot(self.W.T, X).reshape(self.Neuron, 1)
            self.u = np.dot(self.W1.T, self.h)
            self.y = softmax(self.u)
            prediction = self.y

            output = {}
            for i in range(self.V):
                output[prediction[i][0]] = i
              
            top_context_words = []
            for k in sorted(output, reverse=True):
                top_context_words.append(self.words[output[k]])
                if len(top_context_words) >= number_of_predictions:
                    break
      
            return top_context_words
        else:
            print("Word not found")
    
    def get_embedding_matrix(self):
        return self.W 
    
    def get_word_embedding(self, word):
        if word in self.word_index:
            word_idx = self.word_index[word]
            return self.W[word_idx]  # The embedding for the word
        else:
            print(f"Word '{word}' not in vocabulary.")
            return None


### SkipGram With Negative Sampling

In [7]:
class SkipGramModelNeg(object):
    def __init__(self, vocab_size, embedding_dim=20, negative_samples=5, learning_rate=0.0005):
        self.vocab_size = vocab_size
        self.embedding_dim = embedding_dim
        self.negative_samples = negative_samples
        self.lr = learning_rate
        
        # Initialize weights for input (W) and output (W_out) layers
        self.W = np.random.uniform(-0.5, 0.5, (vocab_size, embedding_dim))  # Input layer weights
        self.W_out = np.random.uniform(-0.5, 0.5, (embedding_dim, vocab_size))  # Output layer weights
        
        # Initialize word index mapping
        self.word_index = {}

    def train(self, sentences, vocab, window_size=2, epochs=1, batch_size=64):
        # Populate word_index
        self.word_index = {word: idx for idx, word in enumerate(vocab)}
        
        for epoch in range(epochs):
            print(f"Epoch {epoch + 1}/{epochs}")
            total_loss = 0

            for sentence in sentences:
                for i, target_word in enumerate(sentence):
                    if target_word not in vocab:
                        continue

                    target_word_idx = self.word_index[target_word]

                    # Context words within the window
                    context_words = []
                    for j in range(max(0, i - window_size), min(len(sentence), i + window_size + 1)):
                        if j != i and sentence[j] in vocab:
                            context_words.append(sentence[j])

                    for context_word in context_words:
                        context_word_idx = self.word_index[context_word]

                        # Negative sampling (random words from vocabulary)
                        negative_samples = random.sample(
                            [idx for idx in range(self.vocab_size) if idx != target_word_idx],
                            self.negative_samples
                        )

                        # Update model using the target-context pair and negative samples
                        loss = self.train_skipgram_neg_sampling(target_word_idx, context_word_idx, negative_samples)
                        total_loss += loss

            print(f"Loss at epoch {epoch + 1}: {total_loss}")

    def train_skipgram_neg_sampling(self, target_word_idx, context_word_idx, negative_samples):
        # Positive sample (target-context)
        h = self.W[target_word_idx] 
        u_pos = np.dot(h, self.W_out[:, context_word_idx]) 
        pos_loss = -np.log(sigmoid(u_pos)) 

        # Negative samples
        neg_loss = 0
        for neg_word_idx in negative_samples:
            u_neg = np.dot(h, self.W_out[:, neg_word_idx])  
            neg_loss += -np.log(sigmoid(-u_neg)) 

        total_loss = pos_loss + neg_loss

        # Gradients for positive context word
        grad_out_pos = (sigmoid(u_pos) - 1) * h
        grad_in_pos = (sigmoid(u_pos) - 1) * self.W_out[:, context_word_idx]

        for neg_word_idx in negative_samples:
            u_neg = np.dot(h, self.W_out[:, neg_word_idx])  # Calculate u_neg for gradient
            grad_out_neg = (sigmoid(-u_neg)) * h
            grad_in_neg = (sigmoid(-u_neg)) * self.W_out[:, neg_word_idx]

            self.W_out[:, neg_word_idx] -= self.lr * grad_out_neg
            self.W[target_word_idx] -= self.lr * grad_in_neg

        self.W_out[:, context_word_idx] -= self.lr * grad_out_pos
        self.W[target_word_idx] -= self.lr * grad_in_pos

        return total_loss

    def get_embedding_matrix(self):
        return self.W

    def get_word_embedding(self, word):
        if word in self.word_index:
            return self.W[self.word_index[word]]
        else:
            print(f"Word '{word}' not in vocabulary.")
            return None

### Importing Corpus

In [8]:
dataset = load_dataset("wikitext", "wikitext-2-raw-v1")
train_data = dataset['train']
val_data = dataset['validation']
test_data = dataset['test']

In [9]:
train_data[3]["text"]

' Senjō no Valkyria 3 : Unrecorded Chronicles ( Japanese : 戦場のヴァルキュリア3 , lit . Valkyria of the Battlefield 3 ) , commonly referred to as Valkyria Chronicles III outside Japan , is a tactical role @-@ playing video game developed by Sega and Media.Vision for the PlayStation Portable . Released in January 2011 in Japan , it is the third game in the Valkyria series . Employing the same fusion of tactical and real @-@ time gameplay as its predecessors , the story runs parallel to the first game and follows the " Nameless " , a penal military unit serving the nation of Gallia during the Second Europan War who perform secret black operations and are pitted against the Imperial unit " Calamaty Raven " . \n'

### Train Corpus

In [10]:
TRAIN_SET_SIZE_SOFTMAX = 1000
TRAIN_SET_SIZE_NEG = 10000

corpus_softmax = ""
for i in range(TRAIN_SET_SIZE_SOFTMAX):
  corpus_softmax = corpus_softmax + "." + train_data[i]["text"]


corpus_neg = ""
for i in range(TRAIN_SET_SIZE_NEG):
  corpus_neg = corpus_neg + "." + train_data[i]["text"]

In [11]:
# corpus

In [12]:
corpus_softmax_set = preprocess_sentences(corpus_softmax)
print("Number of Sentences in softmax corpus set :", len(corpus_softmax_set))


corpus_neg_set = preprocess_sentences(corpus_neg)
print("Number of Sentences in neg corpus set :", len(corpus_neg_set))

Number of Sentences in softmax corpus set : 1
Number of Sentences in neg corpus set : 1


In [13]:
print(len(corpus_softmax_set[0]))

35071


In [14]:
print(len(corpus_neg_set[0]))

418714


### Creating Skipgram Model and Vocabulary Set

In [15]:
def prepare_data_in_batches(sentences, window_size, vocab, batch_size):
    X_batch = []
    y_batch = []
    current_batch_size = 0

    for sentence in sentences:
        for i, target_word in enumerate(sentence):
            if target_word not in vocab:
                continue

            target_word_idx = vocab[target_word]
            X = np.zeros(len(vocab))
            X[target_word_idx] = 1  # One-hot encode the target word

            # Get the context words within the window size
            for j in range(max(0, i - window_size), min(len(sentence), i + window_size + 1)):
                if j != i and sentence[j] in vocab:
                    context_word_idx = vocab[sentence[j]]
                    y = np.zeros(len(vocab))
                    y[context_word_idx] = 1  # One-hot encode the context word

                    X_batch.append(X)
                    y_batch.append(y)
                    current_batch_size += 1

                    # If the current batch size matches the batch_size, yield the batch
                    if current_batch_size == batch_size:
                        yield np.array(X_batch), np.array(y_batch)
                        X_batch = []
                        y_batch = []
                        current_batch_size = 0

    # Yield the final batch if there are any remaining pairs
    if current_batch_size > 0:
        yield np.array(X_batch), np.array(y_batch)

In [16]:
sentences_softmax = corpus_softmax_set
sentences_neg = corpus_neg_set

vocab_softmax = {word: idx for idx, word in enumerate(set([word for sentence in sentences_softmax for word in sentence]))}
vocab_neg = {word: idx for idx, word in enumerate(set([word for sentence in sentences_neg for word in sentence]))}

### Hyper Parameters for Training

In [17]:
window_sizes = [2, 3, 4]
batch_size = 500
num_epochs = 5
tol = 1e-4

### Training SkipGram With Softmax

In [18]:
def train_skipgram_model(model, window_size, maxepochs_per_batch=1):
    model.InitializeWeights(len(vocab_softmax), list(vocab_softmax.keys()))
    model.vocab = vocab_softmax

    start_time = time.time()

    # Training loop
    for epoch in range(num_epochs):
        print(f"Starting epoch {epoch + 1}/{num_epochs}")
        
        # Generate word-context pairs in batches
        batch_generator = prepare_data_in_batches(sentences_softmax, window_size, vocab_softmax, batch_size)
        
        for X_batch, y_batch in batch_generator:
            model.X_train = X_batch
            model.y_train = y_batch
            
            # Perform one epoch of training on this batch
            model.train(mytol=tol, maxepochs=maxepochs_per_batch)

        print(f"Finished epoch {epoch + 1}/{num_epochs}")
    
    total_time = time.time() - start_time
    
    return model, total_time

#### Window Size 2

In [19]:
model_pos_2, time_pos_2 = train_skipgram_model(SkipGramModel(), window_sizes[0])

Starting epoch 1/5
Loss at itr 1: 3655.0950458636903
Loss at itr 1: 3649.7490398070195
Loss at itr 1: 3651.5733129422147
Loss at itr 1: 3648.856418266573
Loss at itr 1: 3640.8618912999045
Loss at itr 1: 3643.5309299755268
Loss at itr 1: 3640.7132842720403
Loss at itr 1: 3647.0474010843186
Loss at itr 1: 3653.185721078931
Loss at itr 1: 3648.220376135062
Loss at itr 1: 3652.5604948776477
Loss at itr 1: 3647.692028560808
Loss at itr 1: 3641.865173909368
Loss at itr 1: 3644.154540481052
Loss at itr 1: 3656.0243991752213
Loss at itr 1: 3654.8417377792302
Loss at itr 1: 3649.482334967776
Loss at itr 1: 3651.4203330480805
Loss at itr 1: 3646.9399123502667
Loss at itr 1: 3654.7506874074174
Loss at itr 1: 3646.8199376888597
Loss at itr 1: 3642.424291051798
Loss at itr 1: 3647.766436392524
Loss at itr 1: 3650.0329405987
Loss at itr 1: 3637.8460256257786
Loss at itr 1: 3648.0899769386365
Loss at itr 1: 3651.2973663906205
Loss at itr 1: 3649.6922353499704
Loss at itr 1: 3634.0984397882094
Loss at

#### Window Size 3

In [20]:
model_pos_3, time_pos_3 = train_skipgram_model(SkipGramModel(), window_sizes[1])

Starting epoch 1/5
Loss at itr 1: 3639.6928203767216
Loss at itr 1: 3646.986156587476
Loss at itr 1: 3646.683552277603
Loss at itr 1: 3639.8808881827013
Loss at itr 1: 3638.746801946856
Loss at itr 1: 3643.4903783774075
Loss at itr 1: 3640.0443470344185
Loss at itr 1: 3646.276718963434
Loss at itr 1: 3639.454959302818
Loss at itr 1: 3644.3718641547816
Loss at itr 1: 3652.89228333989
Loss at itr 1: 3640.0187790750506
Loss at itr 1: 3636.5469524823766
Loss at itr 1: 3649.9571073712323
Loss at itr 1: 3647.797716711637
Loss at itr 1: 3647.5369996974023
Loss at itr 1: 3645.0595801301606
Loss at itr 1: 3655.6713136434273
Loss at itr 1: 3637.7184603143232
Loss at itr 1: 3653.7942465282613
Loss at itr 1: 3646.6841238587517
Loss at itr 1: 3637.7887055456845
Loss at itr 1: 3639.1369631827206
Loss at itr 1: 3641.363514215086
Loss at itr 1: 3641.522156835159
Loss at itr 1: 3634.210296994176
Loss at itr 1: 3637.8391740733045
Loss at itr 1: 3650.541038236503
Loss at itr 1: 3636.569350331898
Loss at 

#### Window Size 4

In [21]:
model_pos_4, time_pos_4 = train_skipgram_model(SkipGramModel(), window_sizes[2])

Starting epoch 1/5
Loss at itr 1: 3650.4516707860107
Loss at itr 1: 3653.4418680046138
Loss at itr 1: 3646.382994741257
Loss at itr 1: 3652.4202409130457
Loss at itr 1: 3647.188251125928
Loss at itr 1: 3639.835431622299
Loss at itr 1: 3639.551434231101
Loss at itr 1: 3642.0527333884424
Loss at itr 1: 3643.2531963445945
Loss at itr 1: 3637.187169708092
Loss at itr 1: 3639.1347293245135
Loss at itr 1: 3650.9189529793916
Loss at itr 1: 3642.792609162294
Loss at itr 1: 3638.674284744974
Loss at itr 1: 3647.729925283507
Loss at itr 1: 3650.2501401379336
Loss at itr 1: 3647.4634725586134
Loss at itr 1: 3636.0628495392443
Loss at itr 1: 3646.7371214633595
Loss at itr 1: 3652.714775613256
Loss at itr 1: 3644.244277877308
Loss at itr 1: 3650.26729948051
Loss at itr 1: 3640.8193457077814
Loss at itr 1: 3640.0449468954967
Loss at itr 1: 3656.51298346382
Loss at itr 1: 3647.612135941376
Loss at itr 1: 3646.6270460769397
Loss at itr 1: 3638.4299525115057
Loss at itr 1: 3642.5354717741166
Loss at it

### Training SkipGram With Negative Sampling

#### Window Size 2

In [None]:
model_neg_2 = SkipGramModelNeg(vocab_size=len(vocab_neg), embedding_dim=100, negative_samples=7)
start_time = time.time()
model_neg_2.train(sentences_neg, vocab_neg, window_size=window_sizes[0], epochs=num_epochs)
time_neg_2 = time.time() - start_time

#### Window Size 3

In [None]:
model_neg_3 = SkipGramModelNeg(vocab_size=len(vocab_neg), embedding_dim=100, negative_samples=7)
start_time = time.time()
model_neg_3.train(sentences_neg, vocab_neg, window_size=window_sizes[1], epochs=num_epochs)
time_neg_3 = time.time() - start_time

#### Window Size 4

In [None]:
model_neg_4 = SkipGramModelNeg(vocab_size=len(vocab_neg), embedding_dim=100, negative_samples=7)
start_time = time.time()
model_neg_4.train(sentences_neg, vocab_neg, window_size=window_sizes[2], epochs=num_epochs)
time_neg_4 = time.time() - start_time

### Mean Reciprocal Rank (MRR)

In [None]:
def compute_similarity(embedding_matrix, target_vector):
    """
    Compute cosine similarity between the target vector and all word embeddings.
    """
    similarities = cosine_similarity(embedding_matrix, target_vector.reshape(1, -1)).flatten()
    return similarities

In [None]:
def get_rank(similarity_scores, true_index):
    """
    Get the rank of the true context word in the similarity list.
    """
    sorted_indices = np.argsort(-similarity_scores)  # Sort in descending order of similarity
    rank = np.where(sorted_indices == true_index)[0][0] + 1  # Get the rank of the true word
    return rank

In [None]:
def calculate_mrr_for_window(context_indices, target_embedding, embedding_matrix):
    """
    Calculate MRR for a single context window.
    """
    mrr = 0.0
    for context_idx in context_indices:
        similarity_scores = compute_similarity(embedding_matrix, target_embedding)
        rank = get_rank(similarity_scores, context_idx)
        mrr += 1 / rank
    mrr /= len(context_indices)  # Average over the context window
    return mrr

In [None]:
def calculate_mrr_for_dataset(test_data, embedding_matrix, word_index):
    """
    Calculate the overall MRR for the entire test dataset.
    """
    total_mrr = 0.0
    for t in test_data:
        print(t)
        target_word, context_words = t[0], t[1]
        if target_word not in word_index:
            continue  # Skip words that are not in the vocabulary
        
        target_idx = word_index[target_word]
        target_embedding = embedding_matrix[target_idx]
        
        # Get indices for all context words
        context_indices = [word_index[word] for word in context_words if word in word_index]
        
        if len(context_indices) > 0:
            mrr_window = calculate_mrr_for_window(context_indices, target_embedding, embedding_matrix)
            total_mrr += mrr_window
    
    avg_mrr = total_mrr / len(test_data)
    return avg_mrr

### Preparing Test Data

In [None]:
# This function create One hot encoding for Input word and the context words
def prepare_test_data(sentences, window_size, vocab):
    """
    Prepare test data from sentences for MRR calculation.

    Args:
    - sentences: List of sentences (each sentence is a string).
    - window_size: The context window size (c).
    - vocab: Set of valid words (usually, your model's vocabulary).

    Returns:
    - test_data: A list of (target_word, [context_words]) tuples.
    """
    test_data = []
    for sentence in sentences:
        
        # Iterate over each word in the sentence
        for i in range(window_size, len(sentence)-window_size):
            target_word = sentence[i]
            if target_word not in vocab:
                continue  # Skip words not in the vocabulary
            
            # Get the context words within the window size
            context_words = []
            for j in range(max(0, i - window_size), min(len(sentence), i + window_size + 1)):
                if j != i and sentence[j] in vocab:  # Exclude the target word itself
                    context_words.append(sentence[j])
            
            if len(context_words) > 0:
                print(target_word, context_words)
                test_data.append([target_word, context_words])
    
    return test_data

In [None]:
corpus_test = test_data["text"][:1000]

In [None]:
print(corpus_test[:10])

In [None]:
corpus_text_tokenized = ""
for i in corpus_test:
  corpus_text_tokenized = corpus_text_tokenized + "." + i

In [None]:
print(len(corpus_text_tokenized[0]))

In [None]:
corpus_text_tokenized = preprocess_sentences(corpus_text_tokenized)

In [None]:
corpus_text_tokenized

### Window Size 2

In [None]:
test_context_size_2 = prepare_test_data(corpus_text_tokenized, 2, vocab)

In [None]:
embedding_matrix_pos_2 = model_pos_2.get_embedding_matrix()
word_index_pos_2 = model_pos_2.word_index  # word to index mapping from the model

avg_mrr_pos_2 = calculate_mrr_for_dataset(test_context_size_2, embedding_matrix_pos_2, word_index_pos_2)
print(f"Mean Reciprocal Rank for test data (Positive, 2): {avg_mrr_pos_2:.4f}")

In [None]:
embedding_matrix_neg_2 = model_neg_2.get_embedding_matrix()
word_index_neg_2 = model_neg_2.word_index  # word to index mapping from the model

avg_mrr_neg_2 = calculate_mrr_for_dataset(test_context_size_2, embedding_matrix_neg_2, word_index_neg_2)
print(f"Mean Reciprocal Rank for test data (Positive, 2): {avg_mrr_neg_2:.4f}")

### Window Size 3

In [None]:
test_context_size_3 = prepare_test_data(corpus_text_tokenized, 3, vocab)

In [None]:
embedding_matrix_pos_3 = model_pos_3.get_embedding_matrix()
word_index_pos_3 = model_pos_3.word_index  # word to index mapping from the model

avg_mrr_pos_3 = calculate_mrr_for_dataset(test_context_size_3, embedding_matrix_pos_3, word_index_pos_3)
print(f"Mean Reciprocal Rank for test data (Positive, 3): {avg_mrr_pos_3:.4f}")

In [None]:
embedding_matrix_neg_3 = model_neg_3.get_embedding_matrix()
word_index_neg_3 = model_neg_3.word_index  # word to index mapping from the model

avg_mrr_neg_3 = calculate_mrr_for_dataset(test_context_size_3, embedding_matrix_neg_3, word_index_neg_3)
print(f"Mean Reciprocal Rank for test data (Positive, 3): {avg_mrr_neg_3:.4f}")

### Window Size 4

In [None]:
test_context_size_4 = prepare_test_data(corpus_text_tokenized, 4, vocab)

In [None]:
embedding_matrix_pos_4 = model_pos_4.get_embedding_matrix()
word_index_pos_4 = model_pos_4.word_index  # word to index mapping from the model

avg_mrr_pos_4 = calculate_mrr_for_dataset(test_context_size_4, embedding_matrix_pos_4, word_index_pos_4)
print(f"Mean Reciprocal Rank for test data (Positive, 4): {avg_mrr_pos_4:.4f}")

In [None]:
embedding_matrix_neg_4 = model_neg_4.get_embedding_matrix()
word_index_neg_4 = model_neg_4.word_index  # word to index mapping from the model

avg_mrr_neg_4 = calculate_mrr_for_dataset(test_context_size_4, embedding_matrix_neg_4, word_index_neg_4)
print(f"Mean Reciprocal Rank for test data (Positive, 4): {avg_mrr_neg_4:.4f}")

In [None]:
# avg mrr for skip-gram with softmax
avg_mrr__pos = (avg_mrr_pos_2 + avg_mrr_pos_3 + avg_mrr_pos_4)/3

In [None]:
# avg mrr for skip-gram with negative samples
avg_mrr_neg = (avg_mrr_neg_2 + avg_mrr_neg_3 + avg_mrr_neg_4)/3

In [None]:
print("MRR for SkipGram With Softmax: ", avg_mrr__pos)
print("MRR for SkipGram With Negative Samples: ", avg_mrr_neg)

In [None]:
# time taken for training
print("Avg time taken for training skip gram with softmax: ",(time_pos_2 + time_pos_3 + time_pos_4)/3)
print("Avg time taken for training skip gram with negative sampling: ",(time_neg_2 + time_neg_3 + time_neg_4)/3)