In [58]:
import csv
import math
import pickle
import random
import sys
from collections import Counter
import numpy as np

# Hhelpful for computing cosine similarity
from scipy.spatial.distance import cosine

# This will make things go fast when we finally use it
from numba import jit

# Handy command-line argument parsing
import argparse

# Progress bar tracker
from tqdm import tqdm

# Sort of smart tokenization
from nltk.tokenize import RegexpTokenizer

# We'll use this to save our models
from gensim.models import KeyedVectors


In [59]:
random.seed(1234)
np.random.seed(1234)

In [60]:
class word2vec:
    def __init__(self, hidden_layer_size=50):

        self.hidden_layer_size = hidden_layer_size
        self.tokenizer = RegexpTokenizer(r'\w+')
        
        # These state variables become populated as the main() function calls
        #
        # 1. load_data()
        # 2. generate_negative_sampling_table()
        # 3. init_weights()
        #
        # See those functions for how the various values get filled in

        self.word_to_index = {} # word to unique-id
        self.index_to_word = [] # unique-id to word

        # How many times each word occurs in our data after filtering
        self.word_counts = Counter()

        # A utility data structure that lets us quickly sample "negative"
        # instances in a context. This table contains unique-ids
        self.negative_sampling_table = []
        
        # The dataset we'll use for training, as a sequence of unqiue word
        # ids. This is the sequence across all documents after tokens have been
        # randomly subsampled by the word2vec preprocessing step
        self.full_token_sequence_as_ids = []

        # These will contain the two weight matrices. W is the embeddings for
        # the center/target word and C are the embeddings for the context
        # words. You might see these called (W, V) or (W1, W2) in various
        # documentation too. These get initalized later in init_weights() once
        # we know the vocabulary size
        self.W = None
        self.C = None
        
    def tokenize(self, text):
        '''
        Tokenize the document and returns a list of the tokens
        '''
        return self.tokenizer.tokenize(text)

        
    def load_data(self, file_name, min_token_freq):
        '''
        Reads the data from the specified file as long long sequence of text
        (ignoring line breaks) and populates the data structures of this
        word2vec object.
        '''

        # Step 1: Read in the file and create a long sequence of tokens
        with open(file_name,"r") as f:   
            original_sequence= f.read()  
        tokens=self.tokenize(original_sequence)
        # Step 2: Count how many tokens we have of each type
        print('Counting token frequencies')
        original_frequency=Counter(tokens)
        

        # Step 3: Replace all tokens below the specified frequency with an <UNK>
        # token
        print("Performing minimum thresholding")
        for i in range(len(tokens)):
            if original_frequency[tokens[i]]<min_token_freq:
                tokens[i]='<UNK>'

        # Step 4: update self.word_counts to be the number of times each word
        # occurs (including <UNK>)
        self.word_counts=Counter(tokens)

        # Step 5: Create the mappings from word to unique integer ID and the
        # reverse mapping.
        #
        # HINT: the id-to-word mapping is easily represented as a list data
        # structure
    
        all_words=list(self.word_counts.keys())
        for i in range(len(all_words)):
            self.word_to_index[all_words[i]]=i
        self.index_to_word=all_words
        
        # Step 6: Compute the probability of keeping any particular token of a
        # word in the training sequence, which we'll use to subsample. This
        # avoids having the training data be filled with many overly common words
        probability={}
        for i in all_words:
            temp_p=self.word_counts[i]/len(tokens)
            #print(temp_p)
            probability[i]=(np.sqrt(temp_p/0.001)+1)*0.001/temp_p
        #print(probability)
        # Step 7: process the list of tokens (after min-freq filtering) to fill
        # a new list self.full_token_sequence_as_ids where (1) we
        # probabilistically choose whether to keep each token based on the
        # subsampling probabilities and (2) all tokens are convered to their
        # unique ids for faster training.
        for i in range(len(tokens)):
            self.full_token_sequence_as_ids.append(self.word_to_index[tokens[i]])
        print(len(tokens))
        # Transform the original input into a sequence of IDs while also
        # performing token-based subsampling based on the probabilities in
        # word_to_sample_prob. This effectively makes the context window larger
        # for some words by removing words that are common from a particular
        # context before the training occurs.
        for i in all_words:
            if probability[i]<1:
                reserve=np.random.binomial(1,probability[i],1)[0]
                if reserve==0:
                    self.full_token_sequence_as_ids=list(filter(lambda x:x!=self.word_to_index[i],self.full_token_sequence_as_ids))

        # self.negative_sampling_table = generate_negative_sampling_table()
        print('Loaded all data from %s; saw %d tokens (%d unique)' \
              % (file_name, len(self.full_token_sequence_as_ids),
                 len(self.word_to_index)))
                
    
    def generate_negative_sampling_table(self, exp_power=0.75, table_size=1e6):
        '''
        Generates a big list data structure that we can quickly randomly index into
        in order to select a negative training example (i.e., a word that was
        *not* present in the context). 
        '''       
        
        # Step 1: Figure out how many instances of each word need to go into the
        # negative sampling table. 
        #
        # HINT: np.power and np.fill might be useful here        
        print("Generating sampling table")
        len_word=[]
        sum_len_word=sum(np.power(list(self.word_counts.values()),exp_power))
        for i in self.index_to_word:
            len_word.append(np.power(self.word_counts[i],exp_power)/sum_len_word)
        len_word=(np.asarray(len_word)*table_size).astype(int)




        print(len(len_word))
        # Step 2: Create the table to the correct size. You'll want this to be a
        # numpy array of type int
        self.negative_sampling_table=np.ones(int(table_size)).astype(int)


        # Step 3: Fill the table so that each word has a number of IDs
        # proportionate to its probability of being sampled.
        #
        # Example: if we have 3 words "a" "b" and "c" with probabilites 0.5,
        # 0.33, 0.16 and a table size of 6 then our table would look like this
        # (before converting the words to IDs):
        #
        # [ "a", "a", "a", "b", "b", "c" ]
        #
        index_temp=0
        sum_temp=0
        #print(len_word)
        for i in range(len(len_word)):
            np.ndarray.fill(self.negative_sampling_table[sum_temp:(sum_temp+len_word[i])],index_temp)
            sum_temp=sum_temp+len_word[i]
            index_temp=index_temp+1


    def generate_negative_samples(self, cur_context_word_id, num_samples):
        '''
        Randomly samples the specified number of negative samples from the lookup
        table and returns this list of IDs as a numpy array. As a performance
        improvement, avoid sampling a negative example that has the same ID as
        the current positive context word.
        '''

        # Step 1: Create a list and sample from the negative_sampling_table to
        # grow the list to num_samples, avoiding adding a negative example that
        # has the same ID as teh current context_word
        results = []
        temp_num_sample=0
        while temp_num_sample<num_samples:
            temp=np.random.choice(self.negative_sampling_table,1)[0]
            if temp!=cur_context_word_id:
                results.append(temp)
                temp_num_sample=temp_num_sample+1
            else:
                pass

        # Step 2: Convert the list of samples to numpy array and return it            
        return np.array(results)
    def save(self, filename):
        '''
        Saves the model to the specified filename as a gensim KeyedVectors in the
        text format so you can load it separately.
        '''

        # Creates an empty KeyedVectors with our embedding size
        kv = KeyedVectors(vector_size=self.hidden_layer_size)        
        vectors = []
        words = []
        # Get the list of words/vectors in a consistent order
        for index, word in enumerate(self.index_to_word): 
            vectors.append(self.W[index].copy())
            words.append(word)
            
        # Fills the KV object with our data in the right order
        kv.add(words, vectors) 
        kv.save_word2vec_format(filename, binary=False)
    def init_weights(self, init_range=0.1):
        '''
        Initializes the weight matrices W (input->hidden) and C (hidden->output)
        by sampling uniformly within a small range around zero.
        '''

        # Step 1: Initialize two numpy arrays (matrices) for W and C by filling
        # their values with a random sample within the speified range.
        #
        # Hint: numpy.random has lots of ways to create matrices for this task
        self.W=np.random.uniform(-init_range,init_range,size=(len(self.word_to_index),self.hidden_layer_size))
        self.C=np.random.uniform(-init_range,init_range,size=(len(self.word_to_index),self.hidden_layer_size))
        
    def train(self, num_epochs=2, window_size=2, num_negative_samples=2,
              learning_rate=0.05, nll_update_iter=10000, max_steps=-1):
        '''
        Trains the word2vec model on the data loaded from load_data for the
        specified number of epochs.
        '''

        # Rather than compute the full negative log-likelihood (NLL), we'll keep
        # a running tally of the nll values for each step and periodically report them
        nll_results = []
        
        # This value keeps track of which step we're on. Since we don't update
        # when the center token is "<UNK>" we may skip over some ids in the
        # inner loop, so we need a separate step count to keep track of how many
        # updates we've done.
        step = 0
        
        # Iterate for the specified number of epochs
        for epoch in range(1, num_epochs+1):
            print("Beginning epoch %d of %d" % (epoch, num_epochs))           
            
            # Step 1: Iterate over each ID in full_token_sequence_as_ids as a center
            # token (skipping those that are <UNK>) and predicting the context
            # word and negative samples
            #
            # Hint: this is a great loop to wrap with a tqdm() call so you can
            # see how long each epoch will take with a progress bar
            overall_length=len(self.full_token_sequence_as_ids)
            for i in range(overall_length):
                center_word=self.full_token_sequence_as_ids[i]
                if self.index_to_word[center_word]=='<UNK>':
                    continue
                if self.index_to_word[center_word] in all_synonyms_words:
                    index_in_synonyms=synonyms_key[self.index_to_word[center_word]]
                try:
                    center_word=self.word_to_index[random.choice(synonyms[index_in_synonyms])]
                   # print('yes')
                except:
                    pass
                window_begin=(i-window_size)
                window_end=(i+1+window_size)
                if window_begin>=0 and window_end<=overall_length:
                    context_word=self.full_token_sequence_as_ids[window_begin:window_begin+2]+self.full_token_sequence_as_ids[window_end-2:window_end]
                elif window_begin<0:
                    context_word=self.full_token_sequence_as_ids[0:window_begin+2]+self.full_token_sequence_as_ids[window_end-2:window_end]
                elif window_end>overall_length:
                    context_word=self.full_token_sequence_as_ids[window_begin:window_begin+2]+self.full_token_sequence_as_ids[window_end-2:overall_length]
                # Periodically print the NLL so we can see how the model is converging
                if nll_update_iter > 0 and step % nll_update_iter == 0 and step > 0 and len(nll_results) > 0:
                    print("Negative log-likelihood (step: %d): %f " % (step, sum(nll_results)))
                    nll_results = []
                                    
                # Step 2: For each word in the window range (before and after)
                # perform an update where we (1) use the current parameters of
                # the model to predict it using the skip-gram task and (2)
                # sample negative instances and predict those. We'll use the
                # values of those predictions (i.e., the output of the sigmoid)
                # to update the W and C matrices using backpropagation.
                #
                # NOTE: this inner loop should call predict_and_backprop() which is
                # defined outside of the class. See note there for why.
                for each_context in context_word:

                    # Step 3: Pick the context word ID
                    

                    # Step 4: Sample negative instances 
                    negative_sample_instances=self.generate_negative_samples(each_context,num_negative_samples)

                    # Step 5: call predict_and_backprop. Don't forget to add the
                    # nll return value to nll_results to keep track of how the
                    # model is learning
                    nll=predict_and_backprop(self.W,self.C,learning_rate,center_word,each_context,negative_sample_instances)
                    nll_results.append(nll)
                    #print(nll_results)

                    
                step += 1
                #print(step)
                if max_steps > 0 and step >= max_steps:
                    break

            if max_steps > 0 and step >= max_steps:
                print('Maximum number of steps reached: %d' % max_steps)
                break
    def get_neighbors(self, target_word):
        """ 
        Finds the top 10 most similar words to a target word
        """
        outputs = []
        for index, word in tqdm(enumerate(self.index_to_word), total=len(self.index_to_word)):
            similarity = self.compute_cosine_similarity(target_word, word)
            result = {"word": word, "score": similarity}
            outputs.append(result)
    
        # Sort by highest scores
        neighbors = sorted(outputs, key=lambda o: o['score'], reverse=True)
        return neighbors[1:11]

    def compute_cosine_similarity(self, word_one, word_two):
        '''
        Computes the cosine similarity between the two words
        '''
        try:
            word_one_index = self.word_to_index[word_one]
            word_two_index = self.word_to_index[word_two]
        except KeyError:
            return 0
    
        embedding_one = self.W[word_one_index]
        embedding_two = self.W[word_two_index]
        similarity = 1 - abs(float(cosine(embedding_one, embedding_two)))
        return similarity

In [61]:
#@jit(nopython=True)
def predict_and_backprop(W, C, learning_rate, center_id, context_id,
                         negative_sample_ids):
    '''
    Using the center token (specified by center_id), makes a forward pass through
    the network to predict the context token (context_id) and negative samples,
    then backprops the error of those predictions to update the network and
    returns the negative log likelihood (Equation 1 in your homework) from the
    current preditions. W and C are the weight matrices of the network and IDs
    refer to particular rows of the matrices (i.e., the word embeddings of the
    target word and the context words!)

    '''

    #
    # GENERAL NOTE: There are many ways to implement this function, depending on
    # how fancy you want to get with numpy. The instructions/steps here are
    # intended as guides for the main tasks on what you have to do and may be
    # implemented as one line or more lines, depending on which methods you use
    # and how you want to write it. The important thing is that it works, not
    # how fast it is, so feel free to write it in a way that is understandable
    # to you. Often when you get to that point, you'll see ways to improve (but
    # first save a copy of your working code!).
    #

    
    # Step 1: Look up the two vectors in W and C. Note that the row for the
    # center_id is effectively the hidden layer activation, h.
    center_vec=W[center_id]
    context_vec=C[context_id]
    # Step 2: Look up the vectors for the negative sample IDs.
    #
    
    
    negative_sample_vecs=C[negative_sample_ids]
    # NOTE: numpy supports multiple indexing (getting multiple rows at once) if
    # you want to use it

    # Step 3: Compute the predictions for the context word and the negative
    # examples. We want the predictions of the context word to be near 1 and
    # those for the negative examples to be near 0.
    context_word_prediction=np.dot(center_vec,context_vec)
    negative_word_prediction=np.matmul(negative_sample_vecs,np.transpose([center_vec]))
    # Step 4: Compute the negative log likelihood
    nll=-np.log(sigmoid(context_word_prediction))-np.sum(np.log(sigmoid(-negative_word_prediction)))
    # Step 5: Update the negative sample vectors to push their dot product with the
    # center word's vecter closer to zero.
    context_vec_new=context_vec-learning_rate*(sigmoid(context_word_prediction)-1)*center_vec
    negative_sample_vecs_new=negative_sample_vecs-np.matmul(learning_rate*(sigmoid(negative_word_prediction)),[center_vec])
    # Step 6: Now backprop all the way back to the center word's vector. Be sure to
    # update it based on the *old* values of the context vectors, not the
    # new values of the context vectors that you just updated!    
    center_vec_new=center_vec-learning_rate*(sigmoid(context_word_prediction)-1)*context_vec
    for i in range(len(negative_word_prediction)):
        center_vec_new=center_vec_new-learning_rate*(sigmoid(negative_word_prediction[i]))*negative_sample_vecs[i]
    #nll = 0
    W[center_id]=center_vec_new
    C[context_id]=context_vec_new
    C[negative_sample_ids]=negative_sample_vecs_new
    return nll
#@jit(nopython=True)
def sigmoid(x):
    '''
    Returns the sigmoid of the provided value
    '''
    return 1.0 / (1 + np.exp(-x))


In [62]:
with open('synonyms.txt',"r") as f:   
            synonyms= f.read() 

In [63]:
synonyms=synonyms.split('\n')

In [64]:
for i in range(len(synonyms)):
    synonyms[i]=synonyms[i].split()

In [65]:
synonyms_key={}
for i in range(len(synonyms)):
    for j in range(len(synonyms[i])):
        synonyms_key[synonyms[i][j]]=i
       

In [66]:
all_synonyms_words=list(synonyms_key.keys())

In [67]:
instance=word2vec()


In [68]:
instance.load_data('wiki-bios.med.txt',5)

Counting token frequencies
Performing minimum thresholding
23015360
Loaded all data from wiki-bios.med.txt; saw 18594088 tokens (108206 unique)


In [69]:
instance.generate_negative_sampling_table()

Generating sampling table
108206


In [70]:
instance.init_weights()

In [71]:
instance.train()

Beginning epoch 1 of 2
Negative log-likelihood (step: 10000): 80038.850895 
Negative log-likelihood (step: 20000): 76264.079711 
Negative log-likelihood (step: 30000): 74378.371429 
Negative log-likelihood (step: 40000): 73807.440229 
Negative log-likelihood (step: 50000): 73248.533210 
Negative log-likelihood (step: 60000): 71438.149026 
Negative log-likelihood (step: 70000): 70029.930360 
Negative log-likelihood (step: 80000): 71446.052169 
Negative log-likelihood (step: 90000): 69770.239755 
Negative log-likelihood (step: 100000): 69573.577730 
Negative log-likelihood (step: 110000): 69446.717113 
Negative log-likelihood (step: 120000): 68651.865634 
Negative log-likelihood (step: 130000): 66721.759858 
Negative log-likelihood (step: 140000): 67211.069410 
Negative log-likelihood (step: 150000): 66089.910937 
Negative log-likelihood (step: 160000): 67312.018528 
Negative log-likelihood (step: 170000): 65822.079762 
Negative log-likelihood (step: 180000): 61881.731398 
Negative log-l

Negative log-likelihood (step: 1520000): 58189.291151 
Negative log-likelihood (step: 1530000): 57725.594747 
Negative log-likelihood (step: 1540000): 54444.014629 
Negative log-likelihood (step: 1550000): 54775.299384 
Negative log-likelihood (step: 1560000): 56075.113171 
Negative log-likelihood (step: 1570000): 58203.412569 
Negative log-likelihood (step: 1580000): 53277.816424 
Negative log-likelihood (step: 1590000): 58231.320216 
Negative log-likelihood (step: 1600000): 58236.777404 
Negative log-likelihood (step: 1610000): 57568.770114 
Negative log-likelihood (step: 1620000): 55781.587761 
Negative log-likelihood (step: 1630000): 55009.520753 
Negative log-likelihood (step: 1640000): 58044.596920 
Negative log-likelihood (step: 1650000): 56629.563086 
Negative log-likelihood (step: 1660000): 55164.840159 
Negative log-likelihood (step: 1670000): 58597.953031 
Negative log-likelihood (step: 1680000): 57436.342280 
Negative log-likelihood (step: 1690000): 57161.544507 
Negative l

Negative log-likelihood (step: 3010000): 54990.342937 
Negative log-likelihood (step: 3020000): 51554.630139 
Negative log-likelihood (step: 3030000): 58107.521124 
Negative log-likelihood (step: 3040000): 56623.790422 
Negative log-likelihood (step: 3050000): 54377.418674 
Negative log-likelihood (step: 3060000): 58037.040375 
Negative log-likelihood (step: 3070000): 56024.048400 
Negative log-likelihood (step: 3080000): 52955.355568 
Negative log-likelihood (step: 3090000): 56103.546879 
Negative log-likelihood (step: 3100000): 56946.889492 
Negative log-likelihood (step: 3110000): 56614.470034 
Negative log-likelihood (step: 3120000): 54181.859953 
Negative log-likelihood (step: 3130000): 56699.561874 
Negative log-likelihood (step: 3140000): 55183.680562 
Negative log-likelihood (step: 3150000): 57369.094946 
Negative log-likelihood (step: 3160000): 56609.929937 
Negative log-likelihood (step: 3170000): 53430.091167 
Negative log-likelihood (step: 3180000): 56860.716208 
Negative l

Negative log-likelihood (step: 4500000): 56458.680068 
Negative log-likelihood (step: 4510000): 56308.159539 
Negative log-likelihood (step: 4520000): 57238.399989 
Negative log-likelihood (step: 4530000): 55015.149921 
Negative log-likelihood (step: 4540000): 53515.979131 
Negative log-likelihood (step: 4550000): 54952.653069 
Negative log-likelihood (step: 4560000): 55591.317924 
Negative log-likelihood (step: 4570000): 56158.423374 
Negative log-likelihood (step: 4580000): 55906.038763 
Negative log-likelihood (step: 4590000): 55921.504809 
Negative log-likelihood (step: 4600000): 53901.946146 
Negative log-likelihood (step: 4610000): 53631.350115 
Negative log-likelihood (step: 4620000): 56680.475903 
Negative log-likelihood (step: 4630000): 54989.759180 
Negative log-likelihood (step: 4640000): 55734.671156 
Negative log-likelihood (step: 4650000): 56859.333902 
Negative log-likelihood (step: 4660000): 54402.950893 
Negative log-likelihood (step: 4670000): 54149.927536 
Negative l

Negative log-likelihood (step: 5990000): 55877.351841 
Negative log-likelihood (step: 6000000): 54892.475285 
Negative log-likelihood (step: 6010000): 49392.883623 
Negative log-likelihood (step: 6020000): 55964.056362 
Negative log-likelihood (step: 6030000): 57242.165386 
Negative log-likelihood (step: 6040000): 54458.560304 
Negative log-likelihood (step: 6050000): 51593.978797 
Negative log-likelihood (step: 6060000): 55816.413765 
Negative log-likelihood (step: 6070000): 56735.659137 
Negative log-likelihood (step: 6080000): 57531.041450 
Negative log-likelihood (step: 6090000): 54203.973896 
Negative log-likelihood (step: 6100000): 56105.234188 
Negative log-likelihood (step: 6110000): 54839.815188 
Negative log-likelihood (step: 6120000): 54200.651830 
Negative log-likelihood (step: 6130000): 54557.764827 
Negative log-likelihood (step: 6140000): 43094.666204 
Negative log-likelihood (step: 6150000): 57177.741863 
Negative log-likelihood (step: 6160000): 56479.147071 
Negative l

Negative log-likelihood (step: 7480000): 47931.076043 
Negative log-likelihood (step: 7490000): 55893.030277 
Negative log-likelihood (step: 7500000): 57129.461017 
Negative log-likelihood (step: 7510000): 53862.810192 
Negative log-likelihood (step: 7520000): 55576.460751 
Negative log-likelihood (step: 7530000): 56277.427193 
Negative log-likelihood (step: 7540000): 55916.204831 
Negative log-likelihood (step: 7550000): 56130.242330 
Negative log-likelihood (step: 7560000): 55202.160962 
Negative log-likelihood (step: 7570000): 54917.314074 
Negative log-likelihood (step: 7580000): 57043.935198 
Negative log-likelihood (step: 7590000): 56207.789378 
Negative log-likelihood (step: 7600000): 57257.696004 
Negative log-likelihood (step: 7610000): 56211.931806 
Negative log-likelihood (step: 7620000): 52023.169518 
Negative log-likelihood (step: 7630000): 55142.501871 
Negative log-likelihood (step: 7640000): 55991.968826 
Negative log-likelihood (step: 7650000): 55206.753698 
Negative l

Negative log-likelihood (step: 8970000): 49075.475742 
Negative log-likelihood (step: 8980000): 56756.543282 
Negative log-likelihood (step: 8990000): 53969.726050 
Negative log-likelihood (step: 9000000): 56990.075401 
Negative log-likelihood (step: 9010000): 56429.173828 
Negative log-likelihood (step: 9020000): 55722.391143 
Negative log-likelihood (step: 9030000): 51849.267421 
Negative log-likelihood (step: 9040000): 55703.457499 
Negative log-likelihood (step: 9050000): 57948.707078 
Negative log-likelihood (step: 9060000): 55886.557456 
Negative log-likelihood (step: 9070000): 55094.757765 
Negative log-likelihood (step: 9080000): 55739.981712 
Negative log-likelihood (step: 9090000): 56912.240782 
Negative log-likelihood (step: 9100000): 56688.032216 
Negative log-likelihood (step: 9110000): 57373.383967 
Negative log-likelihood (step: 9120000): 55761.638674 
Negative log-likelihood (step: 9130000): 53443.822910 
Negative log-likelihood (step: 9140000): 55838.406363 
Negative l

Negative log-likelihood (step: 10460000): 56949.909895 
Negative log-likelihood (step: 10470000): 49825.101016 
Negative log-likelihood (step: 10480000): 55119.676890 
Negative log-likelihood (step: 10490000): 53126.770652 
Negative log-likelihood (step: 10500000): 54538.759523 
Negative log-likelihood (step: 10510000): 56891.898467 
Negative log-likelihood (step: 10520000): 56423.166026 
Negative log-likelihood (step: 10530000): 55716.004525 
Negative log-likelihood (step: 10540000): 55864.301764 
Negative log-likelihood (step: 10550000): 54532.457267 
Negative log-likelihood (step: 10560000): 57098.400478 
Negative log-likelihood (step: 10570000): 55676.674382 
Negative log-likelihood (step: 10580000): 57212.865684 
Negative log-likelihood (step: 10590000): 57204.152497 
Negative log-likelihood (step: 10600000): 49715.839231 
Negative log-likelihood (step: 10610000): 55749.233915 
Negative log-likelihood (step: 10620000): 54654.396741 
Negative log-likelihood (step: 10630000): 53549.

Negative log-likelihood (step: 11930000): 54760.396988 
Negative log-likelihood (step: 11940000): 57427.634960 
Negative log-likelihood (step: 11950000): 54796.968234 
Negative log-likelihood (step: 11960000): 55140.976477 
Negative log-likelihood (step: 11970000): 55501.148495 
Negative log-likelihood (step: 11980000): 57286.173310 
Negative log-likelihood (step: 11990000): 56675.501314 
Negative log-likelihood (step: 12000000): 54520.968950 
Negative log-likelihood (step: 12010000): 54384.867972 
Negative log-likelihood (step: 12020000): 56091.384253 
Negative log-likelihood (step: 12030000): 56456.703469 
Negative log-likelihood (step: 12040000): 54440.088255 
Negative log-likelihood (step: 12050000): 55317.094998 
Negative log-likelihood (step: 12060000): 55256.886259 
Negative log-likelihood (step: 12070000): 56002.083950 
Negative log-likelihood (step: 12080000): 50998.680939 
Negative log-likelihood (step: 12090000): 51963.193518 
Negative log-likelihood (step: 12100000): 54969.

Negative log-likelihood (step: 13400000): 54634.802450 
Negative log-likelihood (step: 13410000): 53196.854037 
Negative log-likelihood (step: 13420000): 54965.123431 
Negative log-likelihood (step: 13430000): 56247.984597 
Negative log-likelihood (step: 13440000): 51803.990315 
Negative log-likelihood (step: 13450000): 56648.101347 
Negative log-likelihood (step: 13460000): 52788.285793 
Negative log-likelihood (step: 13470000): 55946.794365 
Negative log-likelihood (step: 13480000): 54623.415088 
Negative log-likelihood (step: 13490000): 56326.706932 
Negative log-likelihood (step: 13500000): 55931.803509 
Negative log-likelihood (step: 13510000): 55215.525762 
Negative log-likelihood (step: 13520000): 55815.974055 
Negative log-likelihood (step: 13530000): 56924.193032 
Negative log-likelihood (step: 13540000): 55683.106337 
Negative log-likelihood (step: 13550000): 56267.403834 
Negative log-likelihood (step: 13560000): 55135.254415 
Negative log-likelihood (step: 13570000): 55513.

Negative log-likelihood (step: 14870000): 55642.996334 
Negative log-likelihood (step: 14880000): 55751.770603 
Negative log-likelihood (step: 14890000): 56261.964559 
Negative log-likelihood (step: 14900000): 55560.422346 
Negative log-likelihood (step: 14910000): 54141.302884 
Negative log-likelihood (step: 14920000): 55523.274756 
Negative log-likelihood (step: 14930000): 55344.640719 
Negative log-likelihood (step: 14940000): 55740.819768 
Negative log-likelihood (step: 14950000): 56007.809648 
Negative log-likelihood (step: 14960000): 56523.700322 
Negative log-likelihood (step: 14970000): 54050.009683 
Negative log-likelihood (step: 14980000): 57322.886035 
Negative log-likelihood (step: 14990000): 54951.480853 
Negative log-likelihood (step: 15000000): 55828.982969 
Negative log-likelihood (step: 15010000): 55597.015940 
Negative log-likelihood (step: 15020000): 56999.958951 
Negative log-likelihood (step: 15030000): 57085.535644 
Negative log-likelihood (step: 15040000): 54904.

Negative log-likelihood (step: 16340000): 57402.771807 
Negative log-likelihood (step: 16350000): 54923.091922 
Negative log-likelihood (step: 16360000): 54869.596042 
Negative log-likelihood (step: 16370000): 55554.581017 
Negative log-likelihood (step: 16380000): 56437.143869 
Negative log-likelihood (step: 16390000): 55547.035233 
Negative log-likelihood (step: 16400000): 57628.793898 
Negative log-likelihood (step: 16410000): 56105.271141 
Negative log-likelihood (step: 16420000): 56481.577126 
Negative log-likelihood (step: 16430000): 54557.011121 
Negative log-likelihood (step: 16440000): 56340.342901 
Negative log-likelihood (step: 16450000): 55110.885327 
Negative log-likelihood (step: 16460000): 53701.902993 
Negative log-likelihood (step: 16470000): 53129.351967 
Negative log-likelihood (step: 16480000): 57101.168481 
Negative log-likelihood (step: 16490000): 57136.966269 
Negative log-likelihood (step: 16500000): 56071.829875 
Negative log-likelihood (step: 16510000): 56748.

Negative log-likelihood (step: 17810000): 56725.298665 
Negative log-likelihood (step: 17820000): 53458.442691 
Negative log-likelihood (step: 17830000): 56107.396162 
Negative log-likelihood (step: 17840000): 54716.652866 
Negative log-likelihood (step: 17850000): 57306.028148 
Negative log-likelihood (step: 17860000): 56253.698320 
Negative log-likelihood (step: 17870000): 54110.151660 
Negative log-likelihood (step: 17880000): 56308.362101 
Negative log-likelihood (step: 17890000): 54905.732997 
Negative log-likelihood (step: 17900000): 55473.150601 
Negative log-likelihood (step: 17910000): 55813.253248 
Negative log-likelihood (step: 17920000): 55569.016131 
Negative log-likelihood (step: 17930000): 57635.310838 
Negative log-likelihood (step: 17940000): 57150.115553 
Negative log-likelihood (step: 17950000): 55875.094428 
Negative log-likelihood (step: 17960000): 56175.374066 
Negative log-likelihood (step: 17970000): 56195.385292 
Negative log-likelihood (step: 17980000): 53872.

Negative log-likelihood (step: 19270000): 55157.119386 
Negative log-likelihood (step: 19280000): 55547.865550 
Negative log-likelihood (step: 19290000): 54089.884935 
Negative log-likelihood (step: 19300000): 54900.400619 
Negative log-likelihood (step: 19310000): 51157.909579 
Negative log-likelihood (step: 19320000): 56518.759593 
Negative log-likelihood (step: 19330000): 56706.601645 
Negative log-likelihood (step: 19340000): 56114.780331 
Negative log-likelihood (step: 19350000): 56092.785381 
Negative log-likelihood (step: 19360000): 55174.991538 
Negative log-likelihood (step: 19370000): 53638.329300 
Negative log-likelihood (step: 19380000): 55594.597479 
Negative log-likelihood (step: 19390000): 52533.996928 
Negative log-likelihood (step: 19400000): 55635.817341 
Negative log-likelihood (step: 19410000): 54351.132433 
Negative log-likelihood (step: 19420000): 54645.773885 
Negative log-likelihood (step: 19430000): 57198.399979 
Negative log-likelihood (step: 19440000): 55142.

Negative log-likelihood (step: 20740000): 55788.525196 
Negative log-likelihood (step: 20750000): 54736.057159 
Negative log-likelihood (step: 20760000): 54552.048292 
Negative log-likelihood (step: 20770000): 55319.088254 
Negative log-likelihood (step: 20780000): 54823.974271 
Negative log-likelihood (step: 20790000): 53907.789459 
Negative log-likelihood (step: 20800000): 56515.380673 
Negative log-likelihood (step: 20810000): 55224.657314 
Negative log-likelihood (step: 20820000): 55818.558653 
Negative log-likelihood (step: 20830000): 55796.217650 
Negative log-likelihood (step: 20840000): 55655.559248 
Negative log-likelihood (step: 20850000): 54756.697269 
Negative log-likelihood (step: 20860000): 53170.285694 
Negative log-likelihood (step: 20870000): 54131.989597 
Negative log-likelihood (step: 20880000): 54095.760375 
Negative log-likelihood (step: 20890000): 54796.964004 
Negative log-likelihood (step: 20900000): 54563.879007 
Negative log-likelihood (step: 20910000): 55159.

Negative log-likelihood (step: 22210000): 53681.374101 
Negative log-likelihood (step: 22220000): 55630.877234 
Negative log-likelihood (step: 22230000): 55508.383401 
Negative log-likelihood (step: 22240000): 50800.550321 
Negative log-likelihood (step: 22250000): 53635.310299 
Negative log-likelihood (step: 22260000): 56404.803743 
Negative log-likelihood (step: 22270000): 55792.409668 
Negative log-likelihood (step: 22280000): 55049.422927 
Negative log-likelihood (step: 22290000): 54463.284576 
Negative log-likelihood (step: 22300000): 53905.037806 
Negative log-likelihood (step: 22310000): 55356.012921 
Negative log-likelihood (step: 22320000): 53505.865325 
Negative log-likelihood (step: 22330000): 55913.131519 
Negative log-likelihood (step: 22340000): 54088.265949 
Negative log-likelihood (step: 22350000): 51779.493139 
Negative log-likelihood (step: 22360000): 56614.840620 
Negative log-likelihood (step: 22370000): 55320.099603 
Negative log-likelihood (step: 22380000): 56665.

Negative log-likelihood (step: 23680000): 55386.351924 
Negative log-likelihood (step: 23690000): 50967.452623 
Negative log-likelihood (step: 23700000): 55977.192708 
Negative log-likelihood (step: 23710000): 55653.816378 
Negative log-likelihood (step: 23720000): 54307.931050 
Negative log-likelihood (step: 23730000): 55141.031785 
Negative log-likelihood (step: 23740000): 51836.280962 
Negative log-likelihood (step: 23750000): 53989.926133 
Negative log-likelihood (step: 23760000): 54313.776597 
Negative log-likelihood (step: 23770000): 56245.061851 
Negative log-likelihood (step: 23780000): 53387.356890 
Negative log-likelihood (step: 23790000): 55185.249369 
Negative log-likelihood (step: 23800000): 54949.795533 
Negative log-likelihood (step: 23810000): 54353.549094 
Negative log-likelihood (step: 23820000): 52799.303024 
Negative log-likelihood (step: 23830000): 54701.930694 
Negative log-likelihood (step: 23840000): 53623.979142 
Negative log-likelihood (step: 23850000): 56427.

Negative log-likelihood (step: 25150000): 53128.105770 
Negative log-likelihood (step: 25160000): 56591.087603 
Negative log-likelihood (step: 25170000): 54831.320946 
Negative log-likelihood (step: 25180000): 55680.686800 
Negative log-likelihood (step: 25190000): 53817.207343 
Negative log-likelihood (step: 25200000): 52876.275584 
Negative log-likelihood (step: 25210000): 55799.653691 
Negative log-likelihood (step: 25220000): 53912.170963 
Negative log-likelihood (step: 25230000): 54186.406096 
Negative log-likelihood (step: 25240000): 55041.656003 
Negative log-likelihood (step: 25250000): 56746.824373 
Negative log-likelihood (step: 25260000): 53740.613106 
Negative log-likelihood (step: 25270000): 53994.490591 
Negative log-likelihood (step: 25280000): 54868.722656 
Negative log-likelihood (step: 25290000): 53721.622632 
Negative log-likelihood (step: 25300000): 55345.444750 
Negative log-likelihood (step: 25310000): 53994.309833 
Negative log-likelihood (step: 25320000): 54631.

Negative log-likelihood (step: 26620000): 52999.743570 
Negative log-likelihood (step: 26630000): 54163.532278 
Negative log-likelihood (step: 26640000): 56282.712521 
Negative log-likelihood (step: 26650000): 56430.874923 
Negative log-likelihood (step: 26660000): 56624.757765 
Negative log-likelihood (step: 26670000): 55562.832930 
Negative log-likelihood (step: 26680000): 55943.666764 
Negative log-likelihood (step: 26690000): 52685.551291 
Negative log-likelihood (step: 26700000): 55315.125285 
Negative log-likelihood (step: 26710000): 54024.636050 
Negative log-likelihood (step: 26720000): 54557.091627 
Negative log-likelihood (step: 26730000): 55476.441404 
Negative log-likelihood (step: 26740000): 56527.968762 
Negative log-likelihood (step: 26750000): 54654.179472 
Negative log-likelihood (step: 26760000): 55849.428104 
Negative log-likelihood (step: 26770000): 56010.696769 
Negative log-likelihood (step: 26780000): 56481.973685 
Negative log-likelihood (step: 26790000): 53696.

Negative log-likelihood (step: 28090000): 54361.377796 
Negative log-likelihood (step: 28100000): 54238.238596 
Negative log-likelihood (step: 28110000): 55616.978941 
Negative log-likelihood (step: 28120000): 56504.385466 
Negative log-likelihood (step: 28130000): 53220.689988 
Negative log-likelihood (step: 28140000): 55403.672867 
Negative log-likelihood (step: 28150000): 55271.441224 
Negative log-likelihood (step: 28160000): 55112.783610 
Negative log-likelihood (step: 28170000): 55822.590041 
Negative log-likelihood (step: 28180000): 53267.258562 
Negative log-likelihood (step: 28190000): 55985.982694 
Negative log-likelihood (step: 28200000): 53860.456825 
Negative log-likelihood (step: 28210000): 54852.264748 
Negative log-likelihood (step: 28220000): 55841.662366 
Negative log-likelihood (step: 28230000): 55664.902602 
Negative log-likelihood (step: 28240000): 54444.859191 
Negative log-likelihood (step: 28250000): 55608.768378 
Negative log-likelihood (step: 28260000): 54148.

Negative log-likelihood (step: 29560000): 55652.445995 
Negative log-likelihood (step: 29570000): 56139.605557 
Negative log-likelihood (step: 29580000): 56375.788817 
Negative log-likelihood (step: 29590000): 56438.171152 
Negative log-likelihood (step: 29600000): 54820.416944 
Negative log-likelihood (step: 29610000): 52625.605387 
Negative log-likelihood (step: 29620000): 54958.953396 
Negative log-likelihood (step: 29630000): 54614.571059 
Negative log-likelihood (step: 29640000): 54428.555126 
Negative log-likelihood (step: 29650000): 54610.605982 
Negative log-likelihood (step: 29660000): 55128.072418 
Negative log-likelihood (step: 29670000): 54849.400804 
Negative log-likelihood (step: 29680000): 54017.448556 
Negative log-likelihood (step: 29690000): 55817.920104 
Negative log-likelihood (step: 29700000): 54896.632590 
Negative log-likelihood (step: 29710000): 54673.414825 
Negative log-likelihood (step: 29720000): 55514.296081 
Negative log-likelihood (step: 29730000): 54462.

Negative log-likelihood (step: 31030000): 49722.643033 
Negative log-likelihood (step: 31040000): 54959.319120 
Negative log-likelihood (step: 31050000): 56060.023195 
Negative log-likelihood (step: 31060000): 52620.363641 
Negative log-likelihood (step: 31070000): 56007.890864 
Negative log-likelihood (step: 31080000): 56031.267118 
Negative log-likelihood (step: 31090000): 55361.323703 
Negative log-likelihood (step: 31100000): 54482.468373 
Negative log-likelihood (step: 31110000): 54481.286533 
Negative log-likelihood (step: 31120000): 56734.733549 
Negative log-likelihood (step: 31130000): 55877.499876 
Negative log-likelihood (step: 31140000): 54686.053841 
Negative log-likelihood (step: 31150000): 56071.210171 
Negative log-likelihood (step: 31160000): 54339.764033 
Negative log-likelihood (step: 31170000): 55190.748704 
Negative log-likelihood (step: 31180000): 54339.066123 
Negative log-likelihood (step: 31190000): 55604.692091 
Negative log-likelihood (step: 31200000): 53776.

Negative log-likelihood (step: 32500000): 54322.223218 
Negative log-likelihood (step: 32510000): 53279.131113 
Negative log-likelihood (step: 32520000): 54693.102623 
Negative log-likelihood (step: 32530000): 55086.332036 
Negative log-likelihood (step: 32540000): 55678.498486 
Negative log-likelihood (step: 32550000): 54173.145239 
Negative log-likelihood (step: 32560000): 55678.419720 
Negative log-likelihood (step: 32570000): 55359.390477 
Negative log-likelihood (step: 32580000): 53691.045616 
Negative log-likelihood (step: 32590000): 54304.176582 
Negative log-likelihood (step: 32600000): 55177.893984 
Negative log-likelihood (step: 32610000): 54133.837855 
Negative log-likelihood (step: 32620000): 53591.226431 
Negative log-likelihood (step: 32630000): 54500.032239 
Negative log-likelihood (step: 32640000): 54157.051729 
Negative log-likelihood (step: 32650000): 53920.255040 
Negative log-likelihood (step: 32660000): 56665.868804 
Negative log-likelihood (step: 32670000): 53953.

Negative log-likelihood (step: 33970000): 53261.677391 
Negative log-likelihood (step: 33980000): 56225.698197 
Negative log-likelihood (step: 33990000): 52330.542462 
Negative log-likelihood (step: 34000000): 55793.249485 
Negative log-likelihood (step: 34010000): 54914.833558 
Negative log-likelihood (step: 34020000): 55726.385629 
Negative log-likelihood (step: 34030000): 55109.645167 
Negative log-likelihood (step: 34040000): 54477.925149 
Negative log-likelihood (step: 34050000): 51873.905023 
Negative log-likelihood (step: 34060000): 53054.993425 
Negative log-likelihood (step: 34070000): 54848.240878 
Negative log-likelihood (step: 34080000): 53480.862939 
Negative log-likelihood (step: 34090000): 55509.180196 
Negative log-likelihood (step: 34100000): 51489.262200 
Negative log-likelihood (step: 34110000): 54161.968757 
Negative log-likelihood (step: 34120000): 54302.200738 
Negative log-likelihood (step: 34130000): 56977.693115 
Negative log-likelihood (step: 34140000): 56464.

Negative log-likelihood (step: 35440000): 54133.375900 
Negative log-likelihood (step: 35450000): 54087.486092 
Negative log-likelihood (step: 35460000): 52982.873337 
Negative log-likelihood (step: 35470000): 54762.204087 
Negative log-likelihood (step: 35480000): 54282.405887 
Negative log-likelihood (step: 35490000): 53485.145215 
Negative log-likelihood (step: 35500000): 54084.365772 
Negative log-likelihood (step: 35510000): 56202.776008 
Negative log-likelihood (step: 35520000): 53529.719781 
Negative log-likelihood (step: 35530000): 56374.695663 
Negative log-likelihood (step: 35540000): 54994.376038 
Negative log-likelihood (step: 35550000): 54558.154865 
Negative log-likelihood (step: 35560000): 54848.789706 
Negative log-likelihood (step: 35570000): 54794.500124 
Negative log-likelihood (step: 35580000): 54433.566267 
Negative log-likelihood (step: 35590000): 54373.632496 
Negative log-likelihood (step: 35600000): 54786.031394 
Negative log-likelihood (step: 35610000): 54049.

In [72]:
instance.save('trained_vector_synonym.txt')