# Model Training to suggest Alternative Queries for Bib Search Engine

## Read Data
Files are in gzipped JSON files and abstracts should be extracted. The following task are performed:

1. Import necessary libraries and modules such as os, gzip, json, pandas, re, nltk, gensim, and numpy.
2. Download stopwords and WordNet data from the NLTK library.
3. Define a function called extract_abstract_fields that takes a JSON data object as input and extracts the values of the "abstract" fields recursively from the JSON structure.
4. Define a function called read_gz_json that reads a gzipped JSON file, extracts the abstract fields using the extract_abstract_fields function, and returns a list of abstracts.
5. Specify the directory path where the JSON files are located.
6. Create an empty list called df_list.
7. Iterate over the files in the specified directory and if a file has the extension ".json.gz", read the file using the read_gz_json function and store the abstract fields in a DataFrame called abstract_df.
8. Append the abstract_df to the df_list.
9. Concatenate all the DataFrames in df_list into a single DataFrame called all_abstracts_df using the pd.concat function, ignoring the index.


In [9]:
import os
import gzip
import json
import pandas as pd
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from gensim.parsing.preprocessing import remove_stopwords
from gensim.utils import simple_preprocess
from gensim.parsing.preprocessing import preprocess_string
import numpy as np


nltk.download('stopwords')
nltk.download('wordnet')


def extract_abstract_fields(json_data):
    abstract_fields = []

    def search_fields(data):
        if isinstance(data, dict):
            if "abstract" in data:
                abstract_fields.append(data["abstract"])
            for value in data.values():
                search_fields(value)
        elif isinstance(data, list):
            for item in data:
                search_fields(item)

    search_fields(json_data)
    return abstract_fields


def read_gz_json(file_name):
    with gzip.GzipFile(file_name, 'r') as fin:
        json_bytes = fin.read()
    json_str = json_bytes.decode('utf-8')
    data = json.loads(json_str)

    abstract_fields = extract_abstract_fields(data)

    return abstract_fields


dir_name = 'data/April22_CrossRef/'

df_list = []

for file in os.listdir(dir_name):
    if file.endswith('.json.gz'):
        file_path = os.path.join(dir_name, file)
        abstract_fields = read_gz_json(file_path)

        abstract_df = pd.DataFrame(abstract_fields, columns=['Abstract'])
        df_list.append(abstract_df)

all_abstracts_df = pd.concat(df_list, ignore_index=True)

[nltk_data] Downloading package stopwords to
[nltk_data]     /home/rafaila/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /home/rafaila/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [10]:
all_abstracts_df.describe()

Unnamed: 0,Abstract
count,123116
unique,121721
top,<jats:p />
freq,351


## Data Preprocessing

These functions can be used to clean and preprocess text data by removing unnecessary characters, stopwords, and performing stemming or lemmatization to prepare the text for further analysis, such as text classification, sentiment analysis, or topic modeling. These are done using the Natural Language Toolkit (NLTK) library in Python.

In [11]:
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer, PorterStemmer
import re
import string

# Create a set of English stopwords
stop_words = set(stopwords.words('english'))

def clean_text(text):
    # Convert to lower case
    text = text.lower()

    # Remove HTML tags
    text = re.sub(r'<.*?>', '', text)
    
    # Remove specific characters
    text = text.replace('°', '').replace('..', '').replace(',′', '')

    # Remove 'ie', single letters, and numbers
    text = re.sub(r'\bie\b|\b[a-zA-Z]\b|\b[\d]+\b', '', text)

    # Tokenize
    words = word_tokenize(text)

    # Remove punctuation and stopwords, and strip trailing spaces
    words = [word for word in words if word not in string.punctuation and word not in stop_words and word.strip()]

    # Lemmatize
    lemmatizer = WordNetLemmatizer()
    words = [lemmatizer.lemmatize(word) for word in words]

    return words

def preprocess_data(text):
    # Check if the input is a valid string or bytes-like object
    if not isinstance(text, (str, bytes)):
        raise ValueError("Input must be a string or bytes-like object.")

    # Remove punctuation
    text = re.sub(r'[^\w\s]', '', text)
    # Lowercase
    text = text.lower()
    # Tokenize
    words = word_tokenize(text)
    # Remove stopwords and stem
    stop_words = set(stopwords.words('english'))
    ps = PorterStemmer()
    words = [ps.stem(word) for word in words if word not in stop_words]
    return words

In [12]:
all_abstracts_df['cleaned_abstract'] = all_abstracts_df['Abstract'].apply(clean_text)

In [13]:
# Example usage
all_abstracts_df['cleaned_abstract'] = all_abstracts_df['cleaned_abstract'].apply(lambda x: preprocess_data(' '.join(x)))


In [14]:
all_abstracts_df.head()

Unnamed: 0,Abstract,cleaned_abstract
0,<jats:p> The use of reactive solvents offers a...,"[use, reactiv, solvent, offer, interest, flexi..."
1,<jats:p> Aromatic copolyimides were prepared u...,"[aromat, copolyimid, prepar, use, biphenyltetr..."
2,<jats:p> Phase separation in unreinforced reac...,"[phase, separ, unreinforc, reactiv, blend, stu..."
3,<jats:p> A new impregnating epoxide resin has ...,"[new, impregn, epoxid, resin, develop, optim, ..."
4,<jats:p>Marchuk's model of a general immune re...,"[marchuk, model, gener, immun, reaction, prese..."


## Text vectorization and KNN

The code you provided demonstrates a process of using the scikit-learn library in Python to perform text vectorization and nearest neighbors search. This code is commonly used for tasks such as document similarity or recommendation systems. Let's break down the code step by step:

#### 1. Processing the sentences:

The process_sentences function is called with input_df as the argument, and the result is assigned to the sentences variable.
This step cleans and tokenizes the sentences in the 'Abstract' column of the dataframe, storing the processed sentences in a list.

#### 2. Vectorizing sentences using TF-IDF:

The TfidfVectorizer class is instantiated as vectorizer.
The fit_transform method of the vectorizer is called with sentences as the argument.
This step converts the processed sentences into a matrix of TF-IDF features, where each row corresponds to a sentence, and each column represents a unique word or term in the sentences.

#### 3. Fitting a NearestNeighbors model:

The NearestNeighbors class is instantiated as model.
The fit method of the model is called with the TF-IDF matrix X as the argument.
This step fits the nearest neighbors model to the TF-IDF matrix.
Saving the model and vectorizer:

The pickle module is imported to save the model and vectorizer objects for later use.
The model object is saved to a file named 'knn_model.pkl'.
The vectorizer object is saved to a file named 'vectorizer.pkl'.
By following these steps, the code preprocesses text data, vectorizes it using TF-IDF, fits a nearest neighbors model to the vectorized data, and saves the model and vectorizer for future use.

In [18]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neighbors import NearestNeighbors
from spacy.lang.en.stop_words import STOP_WORDS

def tokenize(words):
    return [word for word in words if word not in STOP_WORDS]


def get_sentences(input_df):
    for index, row in input_df.iterrows():
        yield simple_preprocess(row['Abstract'])
        
def process_sentences(input_df):
    sentences = []
    for sentence in get_sentences(input_df):
        cleaned_sentence = clean_text(' '.join(sentence))
        tokenized_sentence = tokenize(cleaned_sentence)
        sentences.append(' '.join(tokenized_sentence))
    return sentences

input_df = all_abstracts_df
sentences = process_sentences(input_df)

# Convert sentences to a matrix of TF-IDF features
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(sentences)

# Fit a NearestNeighbors model to the data
model = NearestNeighbors(n_neighbors=5)
model.fit(X)

# Save the model and vectorizer for later use
import pickle
with open('knn_model.pkl', 'wb') as f:
    pickle.dump(model, f)
with open('vectorizer.pkl', 'wb') as f:
    pickle.dump(vectorizer, f)


Use a trained nearest neighbors model and vectorizer to propose alternatives for a given query

In [19]:
def propose_alternatives(query, model, vectorizer):
    query_vector = vectorizer.transform([query])
    distances, indices = model.kneighbors(query_vector)
    return indices[0]

# Load the model and vectorizer
with open('knn_model.pkl', 'rb') as f:
    model = pickle.load(f)
with open('vectorizer.pkl', 'rb') as f:
    vectorizer = pickle.load(f)

query = 'Machine learning for software engineering'
alternatives = propose_alternatives(query, model, vectorizer)

for i in alternatives:
    print(sentences[i])


jat software testing method executing program intension finding error process complex time intensive costly automation software testing process turn workable solution software engineering purpose machine learning algorithm chose primary research study tried explain machine learning based approach software testing overview input available data given model learning helping giving prediction discovered machine learning method employed test case development refining evaluation machine learning evaluate production test oracle predict cost testing procedure jat
jat order create software application necessary understand concept software engineering paper describe software engineering followed deliver software application paper architecture software discussed category architecture software pattern category architecture advantage challenge implementation software jat





## Word2Vec 

#### Help function 

The purpose of this callback is to track and display the training loss after each epoch, providing a measure of how the model is learning and improving over time. This information can be helpful for monitoring the training progress and assessing the model's performance.

In [14]:
import time
from gensim.models.callbacks import CallbackAny2Vec

class callback(CallbackAny2Vec):
    '''Callback to print loss after each epoch.'''

    def __init__(self):
        self.epoch = 0

    def on_epoch_end(self, model):
        loss = model.get_latest_training_loss()

        if self.epoch == 0:
            print('Loss after epoch {}: {}'.format(self.epoch, loss))
        else:
            print('Loss after epoch {}: {}'.format(self.epoch, loss - self.loss_previous_step))

        self.epoch += 1
        self.loss_previous_step = loss


### Τaining a Word2Vec model using Gensim in Python

#### Importing necessary modules:

* The logging module is imported to enable logging functionality.
* The gensim.models module is imported to access the Word2Vec model class.
* The gensim.models.phrases module is imported to work with multi-word expressions using the Phrases and Phraser classes.

#### Training the Phrases model:

* The Phrases model is trained using the Phrases class on the 'cleaned_abstract' column of the all_abstracts_df dataframe.
* The min_count parameter sets the minimum frequency of a phrase to be considered.
* The threshold parameter sets the threshold score for forming a phrase.
* This step is used to connect multi-word expressions in the text data.

#### Training the Word2Vec model:

The sentences variable is assigned the transformed sentences obtained from applying the bigram model (Phrases model) on the 'cleaned_abstract' column of the all_abstracts_df dataframe.

The Word2Vec model is instantiated as model_simple with the desired parameters:
* sentences is the preprocessed sentences on which the model will be trained.
* vector_size is the dimensionality of the word vectors.
* window is the maximum distance between the target word and its context words.
* min_count is the minimum frequency threshold for a word to be included in the vocabulary.
* workers is the number of worker threads to train the model in parallel.
* compute_loss=True enables the computation of the training loss during the training process.
* callbacks=[callback()] specifies the callback function to track and print the loss after each epoch.

The train method of the model_simple object is called to train the model further.
The sentences are provided as input data.
total_examples is set to model_simple.corpus_count, which indicates the total number of examples (sentences) in the corpus.
epochs specifies the number of training iterations.
compute_loss=True enables the computation of the training loss during the training process.
callbacks=[callback()] specifies the callback function to track and print the loss after each epoch.


In [17]:
import logging 
from gensim.models import Word2Vec, Phrases
from gensim.models.phrases import Phraser

# Training the Phrases model to connect multi-word expressions
bigram = Phrases(all_abstracts_df['cleaned_abstract'].tolist(), min_count=2, threshold=5)

# Using the Phrases model for training word vectors
sentences = bigram[all_abstracts_df['cleaned_abstract'].tolist()] 

model_simple = Word2Vec(
    sentences,
    vector_size=170,
    window=9,
    min_count=10,
    workers=12,
    compute_loss=True,
    callbacks=[callback()]
)

start_time = time.time()
model_simple.train(
    sentences,
    total_examples=model_simple.corpus_count,
    epochs=100,
    compute_loss=True,
    callbacks=[callback()]
)
# Calculate and print the training time
end_time = time.time()
print("Training time: {}s".format(end_time - start_time))

# Save the model for later use
model_simple.save("word2vec1.model")


Loss after epoch 0: 2013547.125
Loss after epoch 1: 1704544.875
Loss after epoch 2: 1616776.0
Loss after epoch 3: 1428977.5
Loss after epoch 4: 1296910.5
Loss after epoch 0: 1610334.625
Loss after epoch 1: 1360190.625
Loss after epoch 2: 1534656.75
Loss after epoch 3: 1264835.0
Loss after epoch 4: 1159422.0
Loss after epoch 5: 1192438.0
Loss after epoch 6: 1054586.0
Loss after epoch 7: 1164941.0
Loss after epoch 8: 1057960.0
Loss after epoch 9: 1101458.0
Loss after epoch 10: 1007575.0
Loss after epoch 11: 1078382.0
Loss after epoch 12: 1009076.0
Loss after epoch 13: 1151640.0
Loss after epoch 14: 847713.0
Loss after epoch 15: 877336.0
Loss after epoch 16: 923292.0
Loss after epoch 17: 803480.0
Loss after epoch 18: 925674.0
Loss after epoch 19: 776512.0
Loss after epoch 20: 914674.0
Loss after epoch 21: 831320.0
Loss after epoch 22: 778494.0
Loss after epoch 23: 869056.0
Loss after epoch 24: 791802.0
Loss after epoch 25: 714756.0
Loss after epoch 26: 809846.0
Loss after epoch 27: 853928

Some basic metrics of our model.
1. Vocabulary Size: It gives the number of unique words in your model's vocabulary.
2. Embedding Size: It gives the dimensionality of the word vectors.
3. Sample Word Vector: It presents the word vector of a sample word, which helps to understand the structure of the word vectors.
4. Most Similar Words: For a given word, you can find the most similar words in the vocabulary. This can help to illustrate the semantic relationships learned by the model.

In [25]:

model_simple = Word2Vec.load("word2vec1.model")
# Get the vocabulary from your model
vocab = list(model_simple.wv.key_to_index)

# Print the vocabulary
print("Vocabulary Size: ", len(vocab))
print("Embedding Size: ", model_simple.vector_size)

import random

random_word = random.choice(model_simple.wv.index_to_key)
print("Random word from the vocabulary:", random_word)
print("Vector representation of '{}':\n{}".format(random_word, model_simple.wv[random_word]))


word = random.choice(model_simple.wv.index_to_key)
similar_words = model_simple.wv.most_similar(positive=[word], topn=5)
print("Words most similar to '{}':".format(word))
for word, similarity in similar_words:
    print("{}: {:.2f}".format(word, similarity))



Vocabulary Size:  92634
Embedding Size:  170
Random word from the vocabulary: euler
Vector representation of 'euler':
[ 1.8463045  -0.06705415  0.2547172  -2.5437639  -0.28381264 -5.532329
  0.56176656  1.065374    1.1329423   1.8228225  -0.5914034   0.55090314
  2.0438173   0.08780122  0.43613076  1.0922183   1.7926271   1.3081071
 -0.4504095   1.1848265   0.8958641   1.0969577  -0.2637198  -2.4744334
  0.22984296  0.08409906 -2.0167239   0.86515456  0.09193444  2.2129707
 -0.29017285 -2.420264    1.0281972   2.2223926   0.74556214 -1.0457335
 -2.1293983   0.9427547   1.1440977  -4.6470985   0.5070479   0.9725183
  3.3835607   2.3239877   2.15208    -2.1644535   1.1496928  -0.66145366
 -1.4161856  -2.6236892   3.5877209   1.4379928  -0.34087843  3.0576997
  1.4140025   1.5161414   1.9283969  -0.6957571  -1.8991587   1.1379238
 -0.91082406  2.388824   -2.7143767  -1.8783319   0.28064957 -1.0657226
  0.85440105  2.1324694  -3.2595408   0.5635338   0.63588566 -0.37891176
 -1.6751862   0.

In [2]:
from nltk.tokenize import word_tokenize
from gensim.models import Word2Vec
from gensim.models.phrases import Phraser

# model_simple = Word2Vec.load("word2vec1.model")
def preprocess_data(input_phrase):
    # Lowercase the phrase and tokenize it
    processed_phrase = word_tokenize(input_phrase.lower())
    # Load phrases model
    phrases_model = Phraser.load('phrases_model.txt')
    # Apply phrases model to the tokenized phrase
    processed_phrase = phrases_model[processed_phrase]
    return processed_phrase

def generate_alternative_sentences(input_phrase, num_sentences=5, topn=5):
    # Preprocess the input_phrase
    processed_input = preprocess_data(input_phrase)

    # Get the most similar words for each word in the input_phrase
    similar_words = {word: [item[0] for item in model_simple.wv.most_similar(word, topn=topn)] for word in processed_input if word in model.wv}

    # Generate new sentences
    new_sentences = set()
    attempts = 0
    max_attempts = num_sentences * 10  # Arbitrary number of maximum attempts

    while len(new_sentences) < num_sentences and attempts < max_attempts:
        for i in range(topn):  # Iterate over topn most similar words
            new_sentence = []
            for word in processed_input:
                # Select the ith most similar word if it exists in the similar_words dict, else use the original word
                new_word = similar_words.get(word, [word])[i % len(similar_words.get(word, [word]))]  # Use modulo to prevent index errors
                new_sentence.append(new_word)
            new_sentences.add(' '.join(new_sentence))
            attempts += 1
            if len(new_sentences) >= num_sentences:
                break  # Exit if we have enough sentences

    return [sentence.replace('_', ' ') for sentence in new_sentences]
print(generate_alternative_sentences("machine learning for software testing"))

['data driven for tools validation', 'deep learning for computer tests', 'prediction for graphical interface evaluation', 'artificial intelligence for hardware test', 'algorithms for simulation software assessment']


In [4]:
model_simple = Word2Vec.load("word2vec1.model")

words = ["machine", "learning", "for", "software", "testing"]
for word in words:
    if word in model_simple.wv:
        print(f"'{word}' is in the model's vocabulary.")
    else:
        print(f"'{word}' is NOT in the model's vocabulary.")


'machine' is NOT in the model's vocabulary.
'learning' is NOT in the model's vocabulary.
'for' is NOT in the model's vocabulary.
'software' is NOT in the model's vocabulary.
'testing' is NOT in the model's vocabulary.


In [7]:
# Get the vocabulary from your model
vocab = list(model.wv.key_to_index)

# Print the vocabulary
print(vocab)




In [39]:
!pip install transformers

Collecting transformers
  Downloading transformers-4.29.2-py3-none-any.whl (7.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.1/7.1 MB[0m [31m9.1 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting filelock (from transformers)
  Downloading filelock-3.12.0-py3-none-any.whl (10 kB)
Collecting huggingface-hub<1.0,>=0.14.1 (from transformers)
  Downloading huggingface_hub-0.15.1-py3-none-any.whl (236 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m236.8/236.8 kB[0m [31m9.3 MB/s[0m eta [36m0:00:00[0m
Collecting pyyaml>=5.1 (from transformers)
  Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m596.3/596.3 kB[0m [31m10.0 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1 (from transformers)
  Downloading tokenizers-0.13.3-cp37-cp37m-manylinux_2_17

In [5]:
from transformers import T5ForConditionalGeneration, T5Tokenizer, AlbertTokenizer

# initialize the model
model = T5ForConditionalGeneration.from_pretrained('t5-base')

# initialize the tokenizer
tokenizer = T5Tokenizer.from_pretrained('t5-base')

# the sentence to be paraphrased
sentence = "Example sentence."

# encode the sentence using the tokenizer
inputs = tokenizer.encode(sentence, return_tensors='pt')

# generate paraphrases using the model
outputs = model.generate(inputs)

# decode the generated paraphrases
paraphrases = tokenizer.decode(outputs[0], skip_special_tokens=True)

print(paraphrases)


TypeError: 'NoneType' object is not callable

In [24]:
import re
import gensim
from gensim.models.phrases import Phrases, Phraser
from spacy.lang.en.stop_words import STOP_WORDS
import logging

logging.basicConfig(format="%(levelname)s - %(asctime)s: %(message)s", datefmt= '%H:%M:%S', level=logging.INFO)

def read_input(input_df):
    for i, row in input_df.iterrows():
        if i % 10000 == 0:
            print(row)
            logging.info(f"read {i} reviews")
        # do some pre-processing and return a list of words for each review text
        yield gensim.utils.simple_preprocess(row['Abstract'])


read_input(all_abstracts_df)

<generator object read_input at 0x7fab97ff0660>

In [29]:
import re
import logging
from gensim.models import Word2Vec, Phrases
from gensim.models.phrases import Phraser
from gensim.utils import simple_preprocess
from spacy.lang.en.stop_words import STOP_WORDS

def get_sentences(input_df):
    for index, row in input_df.iterrows():
        yield simple_preprocess(row['Abstract'])

def clean_text(text):
    # Convert to lower case
    text = text.lower()
    # Remove HTML tags
    text = re.sub(r'<.*?>', '', text)
    # Remove specific characters
    text = text.replace('°', '').replace('..', '').replace(',′', '')
    # Remove 'ie', single letters, and numbers
    text = re.sub(r'\bie\b|\b[a-zA-Z]\b|\b[\d]+\b', '', text)
    return text

def tokenize(sentence):
    return [token for token in sentence.split() if token not in STOP_WORDS]

def build_phrases(sentences):
    phrases = Phrases(sentences, min_count=5, threshold=7, progress_per=1000)
    return Phraser(phrases)

def sentence_to_bi_grams(phrases_model, sentence):
    return ' '.join(phrases_model[sentence])

def sentences_to_bi_grams(phrases_model, input_df):
    n_grams_sentences = []
    for sentence in get_sentences(input_df):
        cleaned_sentence = clean_text(' '.join(sentence))
        tokenized_sentence = tokenize(cleaned_sentence)
        parsed_sentence = sentence_to_bi_grams(phrases_model, tokenized_sentence)
        n_grams_sentences.append(parsed_sentence.split())
    return n_grams_sentences


input_df = all_abstracts_df
sentences = list(get_sentences(input_df))
phrases_model = build_phrases(sentences)
phrases_model.save('phrases_model.txt')
sentences_with_bi_grams = sentences_to_bi_grams(phrases_model, input_df)

model_advanced = Word2Vec(
    sentences_with_bi_grams,
    vector_size=170,   
    window=9,          
    min_count=10,      
    workers=12,
    compute_loss=True,
    callbacks=[callback()]
)

model_advanced.train(sentences_with_bi_grams, total_examples=model_advanced.corpus_count, epochs=100, compute_loss=True, callbacks=[callback()])

model_advanced.save("word2vec.model")




INFO - 17:19:25: collecting all words and their counts
INFO - 17:19:25: PROGRESS: at sentence #0, processed 0 words and 0 word types
INFO - 17:19:25: PROGRESS: at sentence #1000, processed 186237 words and 132156 word types
INFO - 17:19:25: PROGRESS: at sentence #2000, processed 396700 words and 248478 word types
INFO - 17:19:25: PROGRESS: at sentence #3000, processed 616740 words and 348103 word types
INFO - 17:19:25: PROGRESS: at sentence #4000, processed 822576 words and 440056 word types
INFO - 17:19:25: PROGRESS: at sentence #5000, processed 1022977 words and 524896 word types
INFO - 17:19:25: PROGRESS: at sentence #6000, processed 1215523 words and 602232 word types
INFO - 17:19:26: PROGRESS: at sentence #7000, processed 1408265 words and 675228 word types
INFO - 17:19:26: PROGRESS: at sentence #8000, processed 1595800 words and 745988 word types
INFO - 17:19:26: PROGRESS: at sentence #9000, processed 1785275 words and 815159 word types
INFO - 17:19:26: PROGRESS: at sentence #100

INFO - 17:19:35: PROGRESS: at sentence #87000, processed 15932949 words and 4370116 word types
INFO - 17:19:35: PROGRESS: at sentence #88000, processed 16117085 words and 4408425 word types
INFO - 17:19:36: PROGRESS: at sentence #89000, processed 16327249 words and 4451638 word types
INFO - 17:19:36: PROGRESS: at sentence #90000, processed 16528522 words and 4492031 word types
INFO - 17:19:36: PROGRESS: at sentence #91000, processed 16727177 words and 4536316 word types
INFO - 17:19:36: PROGRESS: at sentence #92000, processed 16921141 words and 4574862 word types
INFO - 17:19:36: PROGRESS: at sentence #93000, processed 17122873 words and 4613586 word types
INFO - 17:19:36: PROGRESS: at sentence #94000, processed 17306035 words and 4648903 word types
INFO - 17:19:36: PROGRESS: at sentence #95000, processed 17487492 words and 4684300 word types
INFO - 17:19:37: PROGRESS: at sentence #96000, processed 17658728 words and 4717235 word types
INFO - 17:19:37: PROGRESS: at sentence #97000, pro

INFO - 17:20:25: EPOCH 0 - PROGRESS: at 19.24% examples, 2152562 words/s, in_qsize 23, out_qsize 0
INFO - 17:20:26: EPOCH 0 - PROGRESS: at 38.62% examples, 2172634 words/s, in_qsize 23, out_qsize 0
INFO - 17:20:27: EPOCH 0 - PROGRESS: at 59.36% examples, 2196558 words/s, in_qsize 23, out_qsize 0
INFO - 17:20:28: EPOCH 0 - PROGRESS: at 80.23% examples, 2217097 words/s, in_qsize 22, out_qsize 1
INFO - 17:20:29: EPOCH 0: training on 12595541 raw words (11171098 effective words) took 5.0s, 2233116 effective words/s


Loss after epoch 0: 2070181.375


INFO - 17:20:30: EPOCH 1 - PROGRESS: at 19.22% examples, 2156670 words/s, in_qsize 22, out_qsize 1
INFO - 17:20:31: EPOCH 1 - PROGRESS: at 39.60% examples, 2225660 words/s, in_qsize 24, out_qsize 0
INFO - 17:20:32: EPOCH 1 - PROGRESS: at 60.28% examples, 2233609 words/s, in_qsize 23, out_qsize 0
INFO - 17:20:33: EPOCH 1 - PROGRESS: at 81.33% examples, 2250586 words/s, in_qsize 23, out_qsize 0
INFO - 17:20:34: EPOCH 1: training on 12595541 raw words (11170764 effective words) took 5.0s, 2252174 effective words/s


Loss after epoch 1: 1439028.125


INFO - 17:20:35: EPOCH 2 - PROGRESS: at 19.55% examples, 2191851 words/s, in_qsize 23, out_qsize 0
INFO - 17:20:36: EPOCH 2 - PROGRESS: at 39.66% examples, 2225750 words/s, in_qsize 24, out_qsize 0
INFO - 17:20:37: EPOCH 2 - PROGRESS: at 57.28% examples, 2112804 words/s, in_qsize 23, out_qsize 0
INFO - 17:20:38: EPOCH 2 - PROGRESS: at 74.01% examples, 2050866 words/s, in_qsize 23, out_qsize 0
INFO - 17:20:39: EPOCH 2 - PROGRESS: at 90.54% examples, 2011932 words/s, in_qsize 23, out_qsize 0
INFO - 17:20:39: EPOCH 2: training on 12595541 raw words (11170717 effective words) took 5.6s, 2003594 effective words/s


Loss after epoch 2: 1310269.0


INFO - 17:20:40: EPOCH 3 - PROGRESS: at 16.14% examples, 1793878 words/s, in_qsize 23, out_qsize 0
INFO - 17:20:41: EPOCH 3 - PROGRESS: at 32.98% examples, 1832085 words/s, in_qsize 23, out_qsize 0
INFO - 17:20:43: EPOCH 3 - PROGRESS: at 50.11% examples, 1851601 words/s, in_qsize 23, out_qsize 0
INFO - 17:20:44: EPOCH 3 - PROGRESS: at 66.80% examples, 1852206 words/s, in_qsize 22, out_qsize 1
INFO - 17:20:45: EPOCH 3 - PROGRESS: at 84.16% examples, 1858628 words/s, in_qsize 23, out_qsize 0
INFO - 17:20:45: EPOCH 3: training on 12595541 raw words (11170638 effective words) took 6.0s, 1866220 effective words/s


Loss after epoch 3: 1159083.5


INFO - 17:20:46: EPOCH 4 - PROGRESS: at 16.08% examples, 1784978 words/s, in_qsize 23, out_qsize 0
INFO - 17:20:47: EPOCH 4 - PROGRESS: at 32.92% examples, 1833465 words/s, in_qsize 23, out_qsize 0
INFO - 17:20:48: EPOCH 4 - PROGRESS: at 49.94% examples, 1850892 words/s, in_qsize 24, out_qsize 0
INFO - 17:20:50: EPOCH 4 - PROGRESS: at 66.81% examples, 1850462 words/s, in_qsize 23, out_qsize 0
INFO - 17:20:51: EPOCH 4 - PROGRESS: at 84.18% examples, 1858733 words/s, in_qsize 23, out_qsize 0
INFO - 17:20:51: EPOCH 4: training on 12595541 raw words (11170812 effective words) took 6.0s, 1864649 effective words/s
INFO - 17:20:51: Word2Vec lifecycle event {'msg': 'training on 62977705 raw words (55854029 effective words) took 27.5s, 2028722 effective words/s', 'datetime': '2023-06-08T17:20:51.965657', 'gensim': '4.2.0', 'python': '3.7.3 (default, Oct 31 2022, 14:04:00) \n[GCC 8.3.0]', 'platform': 'Linux-4.19.0-24-amd64-x86_64-with-debian-10.10', 'event': 'train'}
INFO - 17:20:51: Word2Vec li

Loss after epoch 4: 1210551.0


INFO - 17:20:53: EPOCH 0 - PROGRESS: at 15.87% examples, 1773880 words/s, in_qsize 23, out_qsize 0
INFO - 17:20:54: EPOCH 0 - PROGRESS: at 32.85% examples, 1826758 words/s, in_qsize 23, out_qsize 0
INFO - 17:20:55: EPOCH 0 - PROGRESS: at 50.04% examples, 1849011 words/s, in_qsize 23, out_qsize 0
INFO - 17:20:56: EPOCH 0 - PROGRESS: at 66.81% examples, 1854598 words/s, in_qsize 23, out_qsize 0
INFO - 17:20:57: EPOCH 0 - PROGRESS: at 84.02% examples, 1857303 words/s, in_qsize 23, out_qsize 0
INFO - 17:20:57: EPOCH 0: training on 12595541 raw words (11170131 effective words) took 6.0s, 1861867 effective words/s


Loss after epoch 0: 1349856.75


INFO - 17:20:59: EPOCH 1 - PROGRESS: at 15.75% examples, 1767049 words/s, in_qsize 23, out_qsize 0
INFO - 17:21:00: EPOCH 1 - PROGRESS: at 32.73% examples, 1829662 words/s, in_qsize 23, out_qsize 0
INFO - 17:21:01: EPOCH 1 - PROGRESS: at 49.61% examples, 1834687 words/s, in_qsize 23, out_qsize 0
INFO - 17:21:02: EPOCH 1 - PROGRESS: at 66.57% examples, 1849687 words/s, in_qsize 23, out_qsize 0
INFO - 17:21:03: EPOCH 1 - PROGRESS: at 83.80% examples, 1853769 words/s, in_qsize 22, out_qsize 1
INFO - 17:21:03: EPOCH 1: training on 12595541 raw words (11170644 effective words) took 6.0s, 1865164 effective words/s


Loss after epoch 1: 1550186.0


INFO - 17:21:05: EPOCH 2 - PROGRESS: at 15.87% examples, 1767658 words/s, in_qsize 23, out_qsize 0
INFO - 17:21:06: EPOCH 2 - PROGRESS: at 32.29% examples, 1807585 words/s, in_qsize 23, out_qsize 0
INFO - 17:21:07: EPOCH 2 - PROGRESS: at 49.72% examples, 1840632 words/s, in_qsize 23, out_qsize 0
INFO - 17:21:08: EPOCH 2 - PROGRESS: at 66.57% examples, 1853154 words/s, in_qsize 24, out_qsize 0
INFO - 17:21:09: EPOCH 2 - PROGRESS: at 83.85% examples, 1855794 words/s, in_qsize 24, out_qsize 0
INFO - 17:21:09: EPOCH 2: training on 12595541 raw words (11170538 effective words) took 6.0s, 1860687 effective words/s


Loss after epoch 2: 1266304.25


INFO - 17:21:11: EPOCH 3 - PROGRESS: at 15.75% examples, 1768927 words/s, in_qsize 23, out_qsize 0
INFO - 17:21:12: EPOCH 3 - PROGRESS: at 32.11% examples, 1803670 words/s, in_qsize 23, out_qsize 0
INFO - 17:21:13: EPOCH 3 - PROGRESS: at 49.45% examples, 1828794 words/s, in_qsize 23, out_qsize 0
INFO - 17:21:14: EPOCH 3 - PROGRESS: at 66.22% examples, 1841354 words/s, in_qsize 23, out_qsize 0
INFO - 17:21:15: EPOCH 3 - PROGRESS: at 83.64% examples, 1849828 words/s, in_qsize 23, out_qsize 0
INFO - 17:21:16: EPOCH 3: training on 12595541 raw words (11170475 effective words) took 6.0s, 1859717 effective words/s


Loss after epoch 3: 1091979.5


INFO - 17:21:17: EPOCH 4 - PROGRESS: at 15.94% examples, 1781259 words/s, in_qsize 23, out_qsize 0
INFO - 17:21:18: EPOCH 4 - PROGRESS: at 33.19% examples, 1847255 words/s, in_qsize 24, out_qsize 1
INFO - 17:21:19: EPOCH 4 - PROGRESS: at 50.24% examples, 1858052 words/s, in_qsize 23, out_qsize 0
INFO - 17:21:20: EPOCH 4 - PROGRESS: at 66.81% examples, 1854631 words/s, in_qsize 23, out_qsize 0
INFO - 17:21:21: EPOCH 4 - PROGRESS: at 84.10% examples, 1857615 words/s, in_qsize 23, out_qsize 0
INFO - 17:21:22: EPOCH 4: training on 12595541 raw words (11170694 effective words) took 6.0s, 1859411 effective words/s


Loss after epoch 4: 1311974.0


INFO - 17:21:23: EPOCH 5 - PROGRESS: at 16.15% examples, 1795473 words/s, in_qsize 23, out_qsize 0
INFO - 17:21:24: EPOCH 5 - PROGRESS: at 33.19% examples, 1846762 words/s, in_qsize 23, out_qsize 0
INFO - 17:21:25: EPOCH 5 - PROGRESS: at 50.32% examples, 1846610 words/s, in_qsize 23, out_qsize 0
INFO - 17:21:26: EPOCH 5 - PROGRESS: at 67.33% examples, 1857679 words/s, in_qsize 23, out_qsize 0
INFO - 17:21:27: EPOCH 5 - PROGRESS: at 84.66% examples, 1866617 words/s, in_qsize 23, out_qsize 0
INFO - 17:21:27: EPOCH 5: training on 12595541 raw words (11170285 effective words) took 6.0s, 1873857 effective words/s


Loss after epoch 5: 1063101.5


INFO - 17:21:28: EPOCH 6 - PROGRESS: at 16.21% examples, 1788112 words/s, in_qsize 23, out_qsize 0
INFO - 17:21:30: EPOCH 6 - PROGRESS: at 33.27% examples, 1841517 words/s, in_qsize 23, out_qsize 0
INFO - 17:21:31: EPOCH 6 - PROGRESS: at 50.32% examples, 1859766 words/s, in_qsize 23, out_qsize 0
INFO - 17:21:32: EPOCH 6 - PROGRESS: at 66.80% examples, 1854848 words/s, in_qsize 23, out_qsize 0
INFO - 17:21:33: EPOCH 6 - PROGRESS: at 84.03% examples, 1855728 words/s, in_qsize 23, out_qsize 0
INFO - 17:21:33: EPOCH 6: training on 12595541 raw words (11170607 effective words) took 6.0s, 1867249 effective words/s


Loss after epoch 6: 1159833.0


INFO - 17:21:34: EPOCH 7 - PROGRESS: at 16.15% examples, 1805560 words/s, in_qsize 23, out_qsize 0
INFO - 17:21:35: EPOCH 7 - PROGRESS: at 33.19% examples, 1851056 words/s, in_qsize 23, out_qsize 0
INFO - 17:21:36: EPOCH 7 - PROGRESS: at 50.26% examples, 1863990 words/s, in_qsize 23, out_qsize 0
INFO - 17:21:37: EPOCH 7 - PROGRESS: at 67.51% examples, 1873672 words/s, in_qsize 23, out_qsize 0
INFO - 17:21:38: EPOCH 7 - PROGRESS: at 84.59% examples, 1874642 words/s, in_qsize 22, out_qsize 1
INFO - 17:21:39: EPOCH 7: training on 12595541 raw words (11171010 effective words) took 5.9s, 1883486 effective words/s


Loss after epoch 7: 952297.0


INFO - 17:21:40: EPOCH 8 - PROGRESS: at 16.28% examples, 1808607 words/s, in_qsize 23, out_qsize 0
INFO - 17:21:41: EPOCH 8 - PROGRESS: at 33.32% examples, 1845474 words/s, in_qsize 23, out_qsize 0
INFO - 17:21:42: EPOCH 8 - PROGRESS: at 50.25% examples, 1855008 words/s, in_qsize 23, out_qsize 0
INFO - 17:21:43: EPOCH 8 - PROGRESS: at 67.60% examples, 1870195 words/s, in_qsize 23, out_qsize 0
INFO - 17:21:44: EPOCH 8 - PROGRESS: at 84.90% examples, 1874626 words/s, in_qsize 23, out_qsize 0
INFO - 17:21:45: EPOCH 8: training on 12595541 raw words (11170588 effective words) took 5.9s, 1884406 effective words/s


Loss after epoch 8: 896125.0


INFO - 17:21:46: EPOCH 9 - PROGRESS: at 16.42% examples, 1829343 words/s, in_qsize 23, out_qsize 0
INFO - 17:21:47: EPOCH 9 - PROGRESS: at 33.53% examples, 1864609 words/s, in_qsize 23, out_qsize 0
INFO - 17:21:48: EPOCH 9 - PROGRESS: at 50.79% examples, 1877137 words/s, in_qsize 23, out_qsize 0
INFO - 17:21:49: EPOCH 9 - PROGRESS: at 67.86% examples, 1879762 words/s, in_qsize 22, out_qsize 1
INFO - 17:21:50: EPOCH 9 - PROGRESS: at 84.83% examples, 1878513 words/s, in_qsize 23, out_qsize 0
INFO - 17:21:51: EPOCH 9: training on 12595541 raw words (11170373 effective words) took 5.9s, 1884546 effective words/s


Loss after epoch 9: 917590.0


INFO - 17:21:52: EPOCH 10 - PROGRESS: at 16.28% examples, 1815344 words/s, in_qsize 22, out_qsize 1
INFO - 17:21:53: EPOCH 10 - PROGRESS: at 33.46% examples, 1866827 words/s, in_qsize 23, out_qsize 0
INFO - 17:21:54: EPOCH 10 - PROGRESS: at 50.50% examples, 1872859 words/s, in_qsize 23, out_qsize 0
INFO - 17:21:55: EPOCH 10 - PROGRESS: at 67.49% examples, 1877844 words/s, in_qsize 23, out_qsize 0
INFO - 17:21:56: EPOCH 10 - PROGRESS: at 84.83% examples, 1883401 words/s, in_qsize 23, out_qsize 0
INFO - 17:21:57: EPOCH 10: training on 12595541 raw words (11170621 effective words) took 5.9s, 1892062 effective words/s


Loss after epoch 10: 902021.0


INFO - 17:21:58: EPOCH 11 - PROGRESS: at 16.43% examples, 1826963 words/s, in_qsize 22, out_qsize 1
INFO - 17:21:59: EPOCH 11 - PROGRESS: at 33.34% examples, 1850542 words/s, in_qsize 23, out_qsize 0
INFO - 17:22:00: EPOCH 11 - PROGRESS: at 50.72% examples, 1874107 words/s, in_qsize 22, out_qsize 1
INFO - 17:22:01: EPOCH 11 - PROGRESS: at 67.61% examples, 1874865 words/s, in_qsize 23, out_qsize 0
INFO - 17:22:02: EPOCH 11 - PROGRESS: at 84.73% examples, 1877004 words/s, in_qsize 23, out_qsize 0
INFO - 17:22:03: EPOCH 11: training on 12595541 raw words (11170677 effective words) took 5.9s, 1886905 effective words/s


Loss after epoch 11: 1042522.0


INFO - 17:22:04: EPOCH 12 - PROGRESS: at 16.49% examples, 1841069 words/s, in_qsize 23, out_qsize 0
INFO - 17:22:05: EPOCH 12 - PROGRESS: at 33.61% examples, 1878406 words/s, in_qsize 23, out_qsize 0
INFO - 17:22:06: EPOCH 12 - PROGRESS: at 50.80% examples, 1880532 words/s, in_qsize 23, out_qsize 0
INFO - 17:22:07: EPOCH 12 - PROGRESS: at 68.08% examples, 1888004 words/s, in_qsize 23, out_qsize 0
INFO - 17:22:08: EPOCH 12 - PROGRESS: at 85.24% examples, 1891116 words/s, in_qsize 23, out_qsize 0
INFO - 17:22:09: EPOCH 12: training on 12595541 raw words (11170417 effective words) took 5.9s, 1897558 effective words/s


Loss after epoch 12: 866177.0


INFO - 17:22:10: EPOCH 13 - PROGRESS: at 16.19% examples, 1798164 words/s, in_qsize 23, out_qsize 0
INFO - 17:22:11: EPOCH 13 - PROGRESS: at 33.25% examples, 1851793 words/s, in_qsize 23, out_qsize 0
INFO - 17:22:12: EPOCH 13 - PROGRESS: at 50.50% examples, 1869270 words/s, in_qsize 23, out_qsize 0
INFO - 17:22:13: EPOCH 13 - PROGRESS: at 67.95% examples, 1881761 words/s, in_qsize 23, out_qsize 0
INFO - 17:22:14: EPOCH 13 - PROGRESS: at 85.22% examples, 1887275 words/s, in_qsize 23, out_qsize 0
INFO - 17:22:15: EPOCH 13: training on 12595541 raw words (11171312 effective words) took 5.9s, 1893112 effective words/s


Loss after epoch 13: 870343.0


INFO - 17:22:16: EPOCH 14 - PROGRESS: at 16.15% examples, 1805078 words/s, in_qsize 23, out_qsize 0
INFO - 17:22:17: EPOCH 14 - PROGRESS: at 33.07% examples, 1848768 words/s, in_qsize 23, out_qsize 0
INFO - 17:22:18: EPOCH 14 - PROGRESS: at 50.04% examples, 1858089 words/s, in_qsize 23, out_qsize 0
INFO - 17:22:19: EPOCH 14 - PROGRESS: at 67.35% examples, 1869991 words/s, in_qsize 23, out_qsize 0
INFO - 17:22:20: EPOCH 14 - PROGRESS: at 84.73% examples, 1878186 words/s, in_qsize 23, out_qsize 0
INFO - 17:22:21: EPOCH 14: training on 12595541 raw words (11170385 effective words) took 5.9s, 1884460 effective words/s


Loss after epoch 14: 897422.0


INFO - 17:22:22: EPOCH 15 - PROGRESS: at 16.16% examples, 1800655 words/s, in_qsize 23, out_qsize 0
INFO - 17:22:23: EPOCH 15 - PROGRESS: at 33.18% examples, 1848652 words/s, in_qsize 23, out_qsize 0
INFO - 17:22:24: EPOCH 15 - PROGRESS: at 50.58% examples, 1871754 words/s, in_qsize 23, out_qsize 0
INFO - 17:22:25: EPOCH 15 - PROGRESS: at 67.98% examples, 1883793 words/s, in_qsize 23, out_qsize 0
INFO - 17:22:26: EPOCH 15 - PROGRESS: at 85.22% examples, 1888585 words/s, in_qsize 23, out_qsize 0
INFO - 17:22:27: EPOCH 15: training on 12595541 raw words (11171013 effective words) took 5.9s, 1894098 effective words/s


Loss after epoch 15: 799750.0


INFO - 17:22:28: EPOCH 16 - PROGRESS: at 16.58% examples, 1843531 words/s, in_qsize 23, out_qsize 0
INFO - 17:22:29: EPOCH 16 - PROGRESS: at 33.61% examples, 1874190 words/s, in_qsize 23, out_qsize 0
INFO - 17:22:30: EPOCH 16 - PROGRESS: at 51.14% examples, 1883712 words/s, in_qsize 23, out_qsize 0
INFO - 17:22:31: EPOCH 16 - PROGRESS: at 68.20% examples, 1883782 words/s, in_qsize 23, out_qsize 0
INFO - 17:22:32: EPOCH 16 - PROGRESS: at 85.52% examples, 1892122 words/s, in_qsize 23, out_qsize 0
INFO - 17:22:33: EPOCH 16: training on 12595541 raw words (11170786 effective words) took 5.9s, 1895472 effective words/s


Loss after epoch 16: 710050.0


INFO - 17:22:34: EPOCH 17 - PROGRESS: at 16.50% examples, 1839443 words/s, in_qsize 23, out_qsize 0
INFO - 17:22:35: EPOCH 17 - PROGRESS: at 33.46% examples, 1869601 words/s, in_qsize 23, out_qsize 0
INFO - 17:22:36: EPOCH 17 - PROGRESS: at 51.14% examples, 1880440 words/s, in_qsize 23, out_qsize 0
INFO - 17:22:37: EPOCH 17 - PROGRESS: at 68.71% examples, 1892873 words/s, in_qsize 23, out_qsize 0
INFO - 17:22:38: EPOCH 17 - PROGRESS: at 85.77% examples, 1896212 words/s, in_qsize 23, out_qsize 0
INFO - 17:22:39: EPOCH 17: training on 12595541 raw words (11170894 effective words) took 5.9s, 1900661 effective words/s


Loss after epoch 17: 675676.0


INFO - 17:22:40: EPOCH 18 - PROGRESS: at 16.16% examples, 1795502 words/s, in_qsize 23, out_qsize 0
INFO - 17:22:41: EPOCH 18 - PROGRESS: at 33.32% examples, 1856070 words/s, in_qsize 23, out_qsize 0
INFO - 17:22:42: EPOCH 18 - PROGRESS: at 50.32% examples, 1864457 words/s, in_qsize 23, out_qsize 0
INFO - 17:22:43: EPOCH 18 - PROGRESS: at 67.53% examples, 1875682 words/s, in_qsize 24, out_qsize 1
INFO - 17:22:44: EPOCH 18 - PROGRESS: at 84.83% examples, 1881264 words/s, in_qsize 23, out_qsize 0
INFO - 17:22:44: EPOCH 18: training on 12595541 raw words (11171656 effective words) took 5.9s, 1888691 effective words/s


Loss after epoch 18: 756610.0


INFO - 17:22:45: EPOCH 19 - PROGRESS: at 16.21% examples, 1804836 words/s, in_qsize 23, out_qsize 0
INFO - 17:22:46: EPOCH 19 - PROGRESS: at 32.93% examples, 1836495 words/s, in_qsize 23, out_qsize 0
INFO - 17:22:47: EPOCH 19 - PROGRESS: at 50.25% examples, 1862739 words/s, in_qsize 23, out_qsize 0
INFO - 17:22:48: EPOCH 19 - PROGRESS: at 67.38% examples, 1874698 words/s, in_qsize 23, out_qsize 0
INFO - 17:22:49: EPOCH 19 - PROGRESS: at 84.99% examples, 1885147 words/s, in_qsize 23, out_qsize 0
INFO - 17:22:50: EPOCH 19: training on 12595541 raw words (11170246 effective words) took 5.9s, 1891234 effective words/s


Loss after epoch 19: 701262.0


INFO - 17:22:51: EPOCH 20 - PROGRESS: at 16.58% examples, 1842283 words/s, in_qsize 22, out_qsize 1
INFO - 17:22:52: EPOCH 20 - PROGRESS: at 33.76% examples, 1879563 words/s, in_qsize 24, out_qsize 0
INFO - 17:22:53: EPOCH 20 - PROGRESS: at 51.26% examples, 1892372 words/s, in_qsize 23, out_qsize 0
INFO - 17:22:54: EPOCH 20 - PROGRESS: at 68.30% examples, 1891118 words/s, in_qsize 22, out_qsize 1
INFO - 17:22:55: EPOCH 20 - PROGRESS: at 85.45% examples, 1893475 words/s, in_qsize 23, out_qsize 0
INFO - 17:22:56: EPOCH 20: training on 12595541 raw words (11170610 effective words) took 5.9s, 1899417 effective words/s


Loss after epoch 20: 831940.0


INFO - 17:22:57: EPOCH 21 - PROGRESS: at 16.35% examples, 1817385 words/s, in_qsize 23, out_qsize 0
INFO - 17:22:58: EPOCH 21 - PROGRESS: at 33.26% examples, 1849122 words/s, in_qsize 23, out_qsize 0
INFO - 17:22:59: EPOCH 21 - PROGRESS: at 50.42% examples, 1865669 words/s, in_qsize 22, out_qsize 1
INFO - 17:23:00: EPOCH 21 - PROGRESS: at 67.49% examples, 1872071 words/s, in_qsize 23, out_qsize 0
INFO - 17:23:01: EPOCH 21 - PROGRESS: at 84.87% examples, 1879875 words/s, in_qsize 23, out_qsize 0
INFO - 17:23:02: EPOCH 21: training on 12595541 raw words (11170373 effective words) took 5.9s, 1890293 effective words/s


Loss after epoch 21: 708268.0


INFO - 17:23:03: EPOCH 22 - PROGRESS: at 16.16% examples, 1801715 words/s, in_qsize 24, out_qsize 1
INFO - 17:23:04: EPOCH 22 - PROGRESS: at 33.26% examples, 1853784 words/s, in_qsize 23, out_qsize 0
INFO - 17:23:05: EPOCH 22 - PROGRESS: at 50.52% examples, 1868171 words/s, in_qsize 23, out_qsize 0
INFO - 17:23:06: EPOCH 22 - PROGRESS: at 67.71% examples, 1879178 words/s, in_qsize 23, out_qsize 0
INFO - 17:23:07: EPOCH 22 - PROGRESS: at 85.30% examples, 1889458 words/s, in_qsize 23, out_qsize 0
INFO - 17:23:08: EPOCH 22: training on 12595541 raw words (11171215 effective words) took 5.9s, 1895851 effective words/s


Loss after epoch 22: 751046.0


INFO - 17:23:09: EPOCH 23 - PROGRESS: at 16.57% examples, 1819751 words/s, in_qsize 23, out_qsize 0
INFO - 17:23:10: EPOCH 23 - PROGRESS: at 33.83% examples, 1874722 words/s, in_qsize 23, out_qsize 0
INFO - 17:23:11: EPOCH 23 - PROGRESS: at 51.14% examples, 1879337 words/s, in_qsize 23, out_qsize 0
INFO - 17:23:12: EPOCH 23 - PROGRESS: at 68.51% examples, 1888629 words/s, in_qsize 23, out_qsize 0
INFO - 17:23:13: EPOCH 23 - PROGRESS: at 85.78% examples, 1893497 words/s, in_qsize 23, out_qsize 0
INFO - 17:23:14: EPOCH 23: training on 12595541 raw words (11170301 effective words) took 5.9s, 1893166 effective words/s


Loss after epoch 23: 916810.0


INFO - 17:23:15: EPOCH 24 - PROGRESS: at 16.72% examples, 1860024 words/s, in_qsize 23, out_qsize 0
INFO - 17:23:16: EPOCH 24 - PROGRESS: at 33.76% examples, 1881155 words/s, in_qsize 23, out_qsize 0
INFO - 17:23:17: EPOCH 24 - PROGRESS: at 51.37% examples, 1893239 words/s, in_qsize 23, out_qsize 0
INFO - 17:23:18: EPOCH 24 - PROGRESS: at 69.01% examples, 1902076 words/s, in_qsize 23, out_qsize 0
INFO - 17:23:19: EPOCH 24 - PROGRESS: at 86.24% examples, 1908210 words/s, in_qsize 22, out_qsize 1
INFO - 17:23:20: EPOCH 24: training on 12595541 raw words (11170267 effective words) took 5.8s, 1911317 effective words/s


Loss after epoch 24: 711362.0


INFO - 17:23:21: EPOCH 25 - PROGRESS: at 16.43% examples, 1824882 words/s, in_qsize 23, out_qsize 0
INFO - 17:23:22: EPOCH 25 - PROGRESS: at 33.68% examples, 1877401 words/s, in_qsize 23, out_qsize 0
INFO - 17:23:23: EPOCH 25 - PROGRESS: at 51.15% examples, 1882754 words/s, in_qsize 23, out_qsize 0
INFO - 17:23:24: EPOCH 25 - PROGRESS: at 68.54% examples, 1892037 words/s, in_qsize 23, out_qsize 0
INFO - 17:23:25: EPOCH 25 - PROGRESS: at 85.54% examples, 1893494 words/s, in_qsize 23, out_qsize 0
INFO - 17:23:26: EPOCH 25: training on 12595541 raw words (11170736 effective words) took 5.9s, 1896913 effective words/s


Loss after epoch 25: 685522.0


INFO - 17:23:27: EPOCH 26 - PROGRESS: at 16.50% examples, 1840578 words/s, in_qsize 23, out_qsize 0
INFO - 17:23:28: EPOCH 26 - PROGRESS: at 33.67% examples, 1878107 words/s, in_qsize 23, out_qsize 0
INFO - 17:23:29: EPOCH 26 - PROGRESS: at 51.37% examples, 1895140 words/s, in_qsize 23, out_qsize 0
INFO - 17:23:30: EPOCH 26 - PROGRESS: at 68.84% examples, 1903792 words/s, in_qsize 23, out_qsize 0
INFO - 17:23:31: EPOCH 26 - PROGRESS: at 86.02% examples, 1907415 words/s, in_qsize 23, out_qsize 0
INFO - 17:23:32: EPOCH 26: training on 12595541 raw words (11170732 effective words) took 5.8s, 1911784 effective words/s


Loss after epoch 26: 717006.0


INFO - 17:23:33: EPOCH 27 - PROGRESS: at 16.56% examples, 1847011 words/s, in_qsize 23, out_qsize 0
INFO - 17:23:34: EPOCH 27 - PROGRESS: at 33.84% examples, 1889610 words/s, in_qsize 23, out_qsize 0
INFO - 17:23:35: EPOCH 27 - PROGRESS: at 51.14% examples, 1890882 words/s, in_qsize 23, out_qsize 0
INFO - 17:23:36: EPOCH 27 - PROGRESS: at 68.32% examples, 1893518 words/s, in_qsize 23, out_qsize 0
INFO - 17:23:37: EPOCH 27 - PROGRESS: at 85.45% examples, 1896297 words/s, in_qsize 23, out_qsize 0
INFO - 17:23:37: EPOCH 27: training on 12595541 raw words (11171077 effective words) took 5.9s, 1896062 effective words/s


Loss after epoch 27: 697362.0


INFO - 17:23:38: EPOCH 28 - PROGRESS: at 16.42% examples, 1828284 words/s, in_qsize 23, out_qsize 0
INFO - 17:23:39: EPOCH 28 - PROGRESS: at 33.61% examples, 1873891 words/s, in_qsize 24, out_qsize 1
INFO - 17:23:40: EPOCH 28 - PROGRESS: at 51.03% examples, 1885359 words/s, in_qsize 23, out_qsize 0
INFO - 17:23:41: EPOCH 28 - PROGRESS: at 68.51% examples, 1895248 words/s, in_qsize 23, out_qsize 0
INFO - 17:23:42: EPOCH 28 - PROGRESS: at 85.67% examples, 1898199 words/s, in_qsize 23, out_qsize 0
INFO - 17:23:43: EPOCH 28: training on 12595541 raw words (11170249 effective words) took 5.9s, 1902953 effective words/s


Loss after epoch 28: 819564.0


INFO - 17:23:44: EPOCH 29 - PROGRESS: at 16.71% examples, 1849690 words/s, in_qsize 23, out_qsize 0
INFO - 17:23:45: EPOCH 29 - PROGRESS: at 33.75% examples, 1876480 words/s, in_qsize 23, out_qsize 0
INFO - 17:23:46: EPOCH 29 - PROGRESS: at 51.15% examples, 1886460 words/s, in_qsize 23, out_qsize 0
INFO - 17:23:47: EPOCH 29 - PROGRESS: at 68.64% examples, 1894581 words/s, in_qsize 23, out_qsize 0
INFO - 17:23:48: EPOCH 29 - PROGRESS: at 85.94% examples, 1900667 words/s, in_qsize 23, out_qsize 0
INFO - 17:23:49: EPOCH 29: training on 12595541 raw words (11170826 effective words) took 5.9s, 1903403 effective words/s


Loss after epoch 29: 722930.0


INFO - 17:23:50: EPOCH 30 - PROGRESS: at 16.43% examples, 1823027 words/s, in_qsize 23, out_qsize 0
INFO - 17:23:51: EPOCH 30 - PROGRESS: at 33.53% examples, 1868272 words/s, in_qsize 23, out_qsize 0
INFO - 17:23:52: EPOCH 30 - PROGRESS: at 50.91% examples, 1881975 words/s, in_qsize 23, out_qsize 0
INFO - 17:23:53: EPOCH 30 - PROGRESS: at 68.30% examples, 1891758 words/s, in_qsize 23, out_qsize 0
INFO - 17:23:54: EPOCH 30 - PROGRESS: at 85.59% examples, 1897947 words/s, in_qsize 23, out_qsize 0
INFO - 17:23:55: EPOCH 30: training on 12595541 raw words (11169942 effective words) took 5.9s, 1901615 effective words/s


Loss after epoch 30: 792318.0


INFO - 17:23:56: EPOCH 31 - PROGRESS: at 16.50% examples, 1830846 words/s, in_qsize 23, out_qsize 0
INFO - 17:23:57: EPOCH 31 - PROGRESS: at 33.83% examples, 1885107 words/s, in_qsize 23, out_qsize 0
INFO - 17:23:58: EPOCH 31 - PROGRESS: at 51.49% examples, 1898956 words/s, in_qsize 23, out_qsize 0
INFO - 17:23:59: EPOCH 31 - PROGRESS: at 69.09% examples, 1910354 words/s, in_qsize 23, out_qsize 0
INFO - 17:24:00: EPOCH 31 - PROGRESS: at 86.45% examples, 1915292 words/s, in_qsize 23, out_qsize 0
INFO - 17:24:01: EPOCH 31: training on 12595541 raw words (11171068 effective words) took 5.8s, 1921823 effective words/s


Loss after epoch 31: 881486.0


INFO - 17:24:02: EPOCH 32 - PROGRESS: at 16.42% examples, 1819252 words/s, in_qsize 23, out_qsize 0
INFO - 17:24:03: EPOCH 32 - PROGRESS: at 33.67% examples, 1874634 words/s, in_qsize 23, out_qsize 0
INFO - 17:24:04: EPOCH 32 - PROGRESS: at 51.02% examples, 1885537 words/s, in_qsize 23, out_qsize 0
INFO - 17:24:05: EPOCH 32 - PROGRESS: at 68.76% examples, 1898479 words/s, in_qsize 23, out_qsize 0
INFO - 17:24:06: EPOCH 32 - PROGRESS: at 85.85% examples, 1900879 words/s, in_qsize 23, out_qsize 0
INFO - 17:24:07: EPOCH 32: training on 12595541 raw words (11171154 effective words) took 5.9s, 1905420 effective words/s


Loss after epoch 32: 697342.0


INFO - 17:24:08: EPOCH 33 - PROGRESS: at 16.65% examples, 1853098 words/s, in_qsize 23, out_qsize 0
INFO - 17:24:09: EPOCH 33 - PROGRESS: at 33.82% examples, 1882738 words/s, in_qsize 23, out_qsize 0
INFO - 17:24:10: EPOCH 33 - PROGRESS: at 51.32% examples, 1894556 words/s, in_qsize 23, out_qsize 0
INFO - 17:24:11: EPOCH 33 - PROGRESS: at 68.89% examples, 1905229 words/s, in_qsize 23, out_qsize 0
INFO - 17:24:12: EPOCH 33 - PROGRESS: at 86.08% examples, 1909306 words/s, in_qsize 23, out_qsize 0
INFO - 17:24:13: EPOCH 33: training on 12595541 raw words (11171052 effective words) took 5.8s, 1910138 effective words/s


Loss after epoch 33: 656840.0


INFO - 17:24:14: EPOCH 34 - PROGRESS: at 16.65% examples, 1847820 words/s, in_qsize 23, out_qsize 0
INFO - 17:24:15: EPOCH 34 - PROGRESS: at 33.90% examples, 1890554 words/s, in_qsize 23, out_qsize 0
INFO - 17:24:16: EPOCH 34 - PROGRESS: at 51.77% examples, 1907486 words/s, in_qsize 22, out_qsize 1
INFO - 17:24:17: EPOCH 34 - PROGRESS: at 68.81% examples, 1903214 words/s, in_qsize 23, out_qsize 0
INFO - 17:24:18: EPOCH 34 - PROGRESS: at 86.02% examples, 1904460 words/s, in_qsize 23, out_qsize 0
INFO - 17:24:18: EPOCH 34: training on 12595541 raw words (11170574 effective words) took 5.8s, 1912537 effective words/s


Loss after epoch 34: 853808.0


INFO - 17:24:19: EPOCH 35 - PROGRESS: at 16.42% examples, 1814957 words/s, in_qsize 23, out_qsize 0
INFO - 17:24:20: EPOCH 35 - PROGRESS: at 33.68% examples, 1861264 words/s, in_qsize 23, out_qsize 0
INFO - 17:24:21: EPOCH 35 - PROGRESS: at 51.37% examples, 1883952 words/s, in_qsize 23, out_qsize 0
INFO - 17:24:22: EPOCH 35 - PROGRESS: at 68.72% examples, 1890836 words/s, in_qsize 23, out_qsize 0
INFO - 17:24:23: EPOCH 35 - PROGRESS: at 85.68% examples, 1891545 words/s, in_qsize 23, out_qsize 0
INFO - 17:24:24: EPOCH 35: training on 12595541 raw words (11170467 effective words) took 5.9s, 1904434 effective words/s


Loss after epoch 35: 682786.0


INFO - 17:24:25: EPOCH 36 - PROGRESS: at 16.56% examples, 1845939 words/s, in_qsize 23, out_qsize 0
INFO - 17:24:26: EPOCH 36 - PROGRESS: at 33.90% examples, 1889245 words/s, in_qsize 23, out_qsize 0
INFO - 17:24:27: EPOCH 36 - PROGRESS: at 51.15% examples, 1888858 words/s, in_qsize 23, out_qsize 0
INFO - 17:24:28: EPOCH 36 - PROGRESS: at 68.81% examples, 1902755 words/s, in_qsize 23, out_qsize 0
INFO - 17:24:29: EPOCH 36 - PROGRESS: at 86.01% examples, 1905140 words/s, in_qsize 23, out_qsize 0
INFO - 17:24:30: EPOCH 36: training on 12595541 raw words (11170583 effective words) took 5.8s, 1911170 effective words/s


Loss after epoch 36: 818386.0


INFO - 17:24:31: EPOCH 37 - PROGRESS: at 16.64% examples, 1838391 words/s, in_qsize 23, out_qsize 0
INFO - 17:24:32: EPOCH 37 - PROGRESS: at 33.90% examples, 1882862 words/s, in_qsize 23, out_qsize 0
INFO - 17:24:33: EPOCH 37 - PROGRESS: at 51.37% examples, 1891736 words/s, in_qsize 23, out_qsize 0
INFO - 17:24:34: EPOCH 37 - PROGRESS: at 69.00% examples, 1901413 words/s, in_qsize 22, out_qsize 1
INFO - 17:24:35: EPOCH 37 - PROGRESS: at 86.24% examples, 1907319 words/s, in_qsize 23, out_qsize 0
INFO - 17:24:36: EPOCH 37: training on 12595541 raw words (11171076 effective words) took 5.8s, 1910967 effective words/s


Loss after epoch 37: 847064.0


INFO - 17:24:37: EPOCH 38 - PROGRESS: at 16.43% examples, 1826886 words/s, in_qsize 23, out_qsize 0
INFO - 17:24:38: EPOCH 38 - PROGRESS: at 33.67% examples, 1879945 words/s, in_qsize 24, out_qsize 0
INFO - 17:24:39: EPOCH 38 - PROGRESS: at 51.15% examples, 1886269 words/s, in_qsize 23, out_qsize 0
INFO - 17:24:40: EPOCH 38 - PROGRESS: at 68.61% examples, 1891898 words/s, in_qsize 23, out_qsize 0
INFO - 17:24:41: EPOCH 38 - PROGRESS: at 86.09% examples, 1902215 words/s, in_qsize 23, out_qsize 0
INFO - 17:24:42: EPOCH 38: training on 12595541 raw words (11170540 effective words) took 5.9s, 1907978 effective words/s


Loss after epoch 38: 437616.0


INFO - 17:24:43: EPOCH 39 - PROGRESS: at 16.14% examples, 1799102 words/s, in_qsize 23, out_qsize 0
INFO - 17:24:44: EPOCH 39 - PROGRESS: at 33.33% examples, 1859792 words/s, in_qsize 23, out_qsize 0
INFO - 17:24:45: EPOCH 39 - PROGRESS: at 50.69% examples, 1879622 words/s, in_qsize 23, out_qsize 0
INFO - 17:24:46: EPOCH 39 - PROGRESS: at 68.18% examples, 1889147 words/s, in_qsize 22, out_qsize 1
INFO - 17:24:47: EPOCH 39 - PROGRESS: at 85.38% examples, 1891084 words/s, in_qsize 23, out_qsize 0
INFO - 17:24:48: EPOCH 39: training on 12595541 raw words (11170717 effective words) took 5.9s, 1899312 effective words/s


Loss after epoch 39: 501288.0


INFO - 17:24:49: EPOCH 40 - PROGRESS: at 16.56% examples, 1848062 words/s, in_qsize 23, out_qsize 0
INFO - 17:24:50: EPOCH 40 - PROGRESS: at 33.83% examples, 1890690 words/s, in_qsize 23, out_qsize 0
INFO - 17:24:51: EPOCH 40 - PROGRESS: at 51.38% examples, 1897813 words/s, in_qsize 23, out_qsize 0
INFO - 17:24:52: EPOCH 40 - PROGRESS: at 69.10% examples, 1912512 words/s, in_qsize 23, out_qsize 0
INFO - 17:24:53: EPOCH 40 - PROGRESS: at 86.01% examples, 1909442 words/s, in_qsize 23, out_qsize 0
INFO - 17:24:54: EPOCH 40: training on 12595541 raw words (11170646 effective words) took 5.8s, 1919218 effective words/s


Loss after epoch 40: 545324.0


INFO - 17:24:55: EPOCH 41 - PROGRESS: at 16.58% examples, 1840865 words/s, in_qsize 23, out_qsize 0
INFO - 17:24:56: EPOCH 41 - PROGRESS: at 33.61% examples, 1864090 words/s, in_qsize 23, out_qsize 0
INFO - 17:24:57: EPOCH 41 - PROGRESS: at 51.26% examples, 1884211 words/s, in_qsize 23, out_qsize 0
INFO - 17:24:58: EPOCH 41 - PROGRESS: at 68.73% examples, 1895619 words/s, in_qsize 23, out_qsize 0
INFO - 17:24:59: EPOCH 41 - PROGRESS: at 85.75% examples, 1897229 words/s, in_qsize 23, out_qsize 0
INFO - 17:24:59: EPOCH 41: training on 12595541 raw words (11170943 effective words) took 5.9s, 1900413 effective words/s


Loss after epoch 41: 408424.0


INFO - 17:25:00: EPOCH 42 - PROGRESS: at 16.73% examples, 1856509 words/s, in_qsize 23, out_qsize 0
INFO - 17:25:01: EPOCH 42 - PROGRESS: at 34.05% examples, 1888615 words/s, in_qsize 23, out_qsize 0
INFO - 17:25:02: EPOCH 42 - PROGRESS: at 51.95% examples, 1902706 words/s, in_qsize 23, out_qsize 0
INFO - 17:25:03: EPOCH 42 - PROGRESS: at 69.25% examples, 1906947 words/s, in_qsize 23, out_qsize 0
INFO - 17:25:04: EPOCH 42 - PROGRESS: at 86.23% examples, 1906902 words/s, in_qsize 22, out_qsize 1
INFO - 17:25:05: EPOCH 42: training on 12595541 raw words (11170806 effective words) took 5.8s, 1911951 effective words/s


Loss after epoch 42: 400312.0


INFO - 17:25:06: EPOCH 43 - PROGRESS: at 16.49% examples, 1836196 words/s, in_qsize 23, out_qsize 0
INFO - 17:25:07: EPOCH 43 - PROGRESS: at 33.68% examples, 1878599 words/s, in_qsize 23, out_qsize 0
INFO - 17:25:08: EPOCH 43 - PROGRESS: at 51.37% examples, 1894769 words/s, in_qsize 23, out_qsize 0
INFO - 17:25:09: EPOCH 43 - PROGRESS: at 69.01% examples, 1906861 words/s, in_qsize 23, out_qsize 0
INFO - 17:25:10: EPOCH 43 - PROGRESS: at 86.23% examples, 1908070 words/s, in_qsize 23, out_qsize 0
INFO - 17:25:11: EPOCH 43: training on 12595541 raw words (11170371 effective words) took 5.8s, 1915151 effective words/s


Loss after epoch 43: 455124.0


INFO - 17:25:12: EPOCH 44 - PROGRESS: at 16.50% examples, 1815050 words/s, in_qsize 23, out_qsize 0
INFO - 17:25:13: EPOCH 44 - PROGRESS: at 33.75% examples, 1869031 words/s, in_qsize 23, out_qsize 0
INFO - 17:25:14: EPOCH 44 - PROGRESS: at 51.25% examples, 1881978 words/s, in_qsize 23, out_qsize 0
INFO - 17:25:15: EPOCH 44 - PROGRESS: at 68.84% examples, 1895795 words/s, in_qsize 23, out_qsize 0
INFO - 17:25:16: EPOCH 44 - PROGRESS: at 85.93% examples, 1899995 words/s, in_qsize 23, out_qsize 0
INFO - 17:25:17: EPOCH 44: training on 12595541 raw words (11170702 effective words) took 5.9s, 1909010 effective words/s


Loss after epoch 44: 437960.0


INFO - 17:25:18: EPOCH 45 - PROGRESS: at 16.42% examples, 1826802 words/s, in_qsize 23, out_qsize 0
INFO - 17:25:19: EPOCH 45 - PROGRESS: at 33.54% examples, 1872008 words/s, in_qsize 23, out_qsize 0
INFO - 17:25:20: EPOCH 45 - PROGRESS: at 50.80% examples, 1878203 words/s, in_qsize 23, out_qsize 0
INFO - 17:25:21: EPOCH 45 - PROGRESS: at 67.87% examples, 1880841 words/s, in_qsize 23, out_qsize 0
INFO - 17:25:22: EPOCH 45 - PROGRESS: at 85.29% examples, 1888269 words/s, in_qsize 22, out_qsize 1
INFO - 17:25:23: EPOCH 45: training on 12595541 raw words (11170599 effective words) took 5.9s, 1898162 effective words/s


Loss after epoch 45: 436468.0


INFO - 17:25:24: EPOCH 46 - PROGRESS: at 16.43% examples, 1822437 words/s, in_qsize 23, out_qsize 0
INFO - 17:25:25: EPOCH 46 - PROGRESS: at 33.75% examples, 1878680 words/s, in_qsize 23, out_qsize 0
INFO - 17:25:26: EPOCH 46 - PROGRESS: at 51.38% examples, 1894218 words/s, in_qsize 23, out_qsize 0
INFO - 17:25:27: EPOCH 46 - PROGRESS: at 69.00% examples, 1906879 words/s, in_qsize 23, out_qsize 0
INFO - 17:25:28: EPOCH 46 - PROGRESS: at 86.01% examples, 1906864 words/s, in_qsize 23, out_qsize 0
INFO - 17:25:29: EPOCH 46: training on 12595541 raw words (11170476 effective words) took 5.8s, 1914006 effective words/s


Loss after epoch 46: 503164.0


INFO - 17:25:30: EPOCH 47 - PROGRESS: at 16.28% examples, 1801528 words/s, in_qsize 23, out_qsize 2
INFO - 17:25:31: EPOCH 47 - PROGRESS: at 33.60% examples, 1872334 words/s, in_qsize 23, out_qsize 0
INFO - 17:25:32: EPOCH 47 - PROGRESS: at 50.91% examples, 1881139 words/s, in_qsize 23, out_qsize 0
INFO - 17:25:33: EPOCH 47 - PROGRESS: at 68.51% examples, 1894322 words/s, in_qsize 23, out_qsize 0
INFO - 17:25:34: EPOCH 47 - PROGRESS: at 85.86% examples, 1899880 words/s, in_qsize 23, out_qsize 0
INFO - 17:25:35: EPOCH 47: training on 12595541 raw words (11170432 effective words) took 5.9s, 1908053 effective words/s


Loss after epoch 47: 422688.0


INFO - 17:25:36: EPOCH 48 - PROGRESS: at 16.65% examples, 1835349 words/s, in_qsize 23, out_qsize 0
INFO - 17:25:37: EPOCH 48 - PROGRESS: at 34.12% examples, 1889352 words/s, in_qsize 23, out_qsize 0
INFO - 17:25:38: EPOCH 48 - PROGRESS: at 51.87% examples, 1902644 words/s, in_qsize 23, out_qsize 0
INFO - 17:25:39: EPOCH 48 - PROGRESS: at 69.40% examples, 1912172 words/s, in_qsize 23, out_qsize 0
INFO - 17:25:40: EPOCH 48 - PROGRESS: at 86.53% examples, 1913978 words/s, in_qsize 24, out_qsize 0
INFO - 17:25:40: EPOCH 48: training on 12595541 raw words (11170461 effective words) took 5.8s, 1921936 effective words/s


Loss after epoch 48: 406920.0


INFO - 17:25:41: EPOCH 49 - PROGRESS: at 16.35% examples, 1812970 words/s, in_qsize 23, out_qsize 0
INFO - 17:25:42: EPOCH 49 - PROGRESS: at 33.69% examples, 1874788 words/s, in_qsize 23, out_qsize 0
INFO - 17:25:43: EPOCH 49 - PROGRESS: at 51.37% examples, 1892265 words/s, in_qsize 23, out_qsize 0
INFO - 17:25:44: EPOCH 49 - PROGRESS: at 68.84% examples, 1900009 words/s, in_qsize 23, out_qsize 0
INFO - 17:25:45: EPOCH 49 - PROGRESS: at 86.23% examples, 1906881 words/s, in_qsize 23, out_qsize 0
INFO - 17:25:46: EPOCH 49: training on 12595541 raw words (11170506 effective words) took 5.8s, 1914764 effective words/s


Loss after epoch 49: 411332.0


INFO - 17:25:47: EPOCH 50 - PROGRESS: at 16.71% examples, 1848082 words/s, in_qsize 23, out_qsize 0
INFO - 17:25:48: EPOCH 50 - PROGRESS: at 33.83% examples, 1881947 words/s, in_qsize 23, out_qsize 0
INFO - 17:25:49: EPOCH 50 - PROGRESS: at 51.25% examples, 1885319 words/s, in_qsize 23, out_qsize 0
INFO - 17:25:50: EPOCH 50 - PROGRESS: at 68.93% examples, 1895214 words/s, in_qsize 23, out_qsize 0
INFO - 17:25:51: EPOCH 50 - PROGRESS: at 86.02% examples, 1899432 words/s, in_qsize 23, out_qsize 0
INFO - 17:25:52: EPOCH 50: training on 12595541 raw words (11170580 effective words) took 5.8s, 1909696 effective words/s


Loss after epoch 50: 399820.0


INFO - 17:25:53: EPOCH 51 - PROGRESS: at 16.65% examples, 1852477 words/s, in_qsize 23, out_qsize 0
INFO - 17:25:54: EPOCH 51 - PROGRESS: at 33.67% examples, 1878432 words/s, in_qsize 23, out_qsize 0
INFO - 17:25:55: EPOCH 51 - PROGRESS: at 50.57% examples, 1874166 words/s, in_qsize 23, out_qsize 0
INFO - 17:25:56: EPOCH 51 - PROGRESS: at 68.17% examples, 1889872 words/s, in_qsize 23, out_qsize 0
INFO - 17:25:57: EPOCH 51 - PROGRESS: at 85.30% examples, 1890850 words/s, in_qsize 23, out_qsize 0
INFO - 17:25:58: EPOCH 51: training on 12595541 raw words (11170777 effective words) took 5.9s, 1902646 effective words/s


Loss after epoch 51: 427072.0


INFO - 17:25:59: EPOCH 52 - PROGRESS: at 16.18% examples, 1806307 words/s, in_qsize 23, out_qsize 0
INFO - 17:26:00: EPOCH 52 - PROGRESS: at 33.53% examples, 1870802 words/s, in_qsize 23, out_qsize 0
INFO - 17:26:01: EPOCH 52 - PROGRESS: at 51.14% examples, 1889927 words/s, in_qsize 23, out_qsize 0
INFO - 17:26:02: EPOCH 52 - PROGRESS: at 68.61% examples, 1898529 words/s, in_qsize 23, out_qsize 0
INFO - 17:26:03: EPOCH 52 - PROGRESS: at 85.84% examples, 1902241 words/s, in_qsize 23, out_qsize 0
INFO - 17:26:04: EPOCH 52: training on 12595541 raw words (11170610 effective words) took 5.8s, 1913311 effective words/s


Loss after epoch 52: 418964.0


INFO - 17:26:05: EPOCH 53 - PROGRESS: at 16.35% examples, 1814856 words/s, in_qsize 23, out_qsize 0
INFO - 17:26:06: EPOCH 53 - PROGRESS: at 33.60% examples, 1872744 words/s, in_qsize 23, out_qsize 0
INFO - 17:26:07: EPOCH 53 - PROGRESS: at 50.81% examples, 1880561 words/s, in_qsize 23, out_qsize 0
INFO - 17:26:08: EPOCH 53 - PROGRESS: at 68.20% examples, 1890290 words/s, in_qsize 21, out_qsize 2
INFO - 17:26:09: EPOCH 53 - PROGRESS: at 85.53% examples, 1896780 words/s, in_qsize 23, out_qsize 0
INFO - 17:26:10: EPOCH 53: training on 12595541 raw words (11170397 effective words) took 5.9s, 1902728 effective words/s


Loss after epoch 53: 453508.0


INFO - 17:26:11: EPOCH 54 - PROGRESS: at 16.48% examples, 1826474 words/s, in_qsize 23, out_qsize 0
INFO - 17:26:12: EPOCH 54 - PROGRESS: at 33.83% examples, 1881495 words/s, in_qsize 23, out_qsize 0
INFO - 17:26:13: EPOCH 54 - PROGRESS: at 51.60% examples, 1896853 words/s, in_qsize 23, out_qsize 0
INFO - 17:26:14: EPOCH 54 - PROGRESS: at 69.24% examples, 1907952 words/s, in_qsize 23, out_qsize 0
INFO - 17:26:15: EPOCH 54 - PROGRESS: at 86.30% examples, 1907273 words/s, in_qsize 23, out_qsize 0
INFO - 17:26:16: EPOCH 54: training on 12595541 raw words (11170277 effective words) took 5.8s, 1912351 effective words/s


Loss after epoch 54: 377364.0


INFO - 17:26:17: EPOCH 55 - PROGRESS: at 16.58% examples, 1843560 words/s, in_qsize 23, out_qsize 0
INFO - 17:26:18: EPOCH 55 - PROGRESS: at 33.67% examples, 1879476 words/s, in_qsize 24, out_qsize 0
INFO - 17:26:19: EPOCH 55 - PROGRESS: at 51.15% examples, 1888147 words/s, in_qsize 23, out_qsize 0
INFO - 17:26:20: EPOCH 55 - PROGRESS: at 68.61% examples, 1897859 words/s, in_qsize 23, out_qsize 0
INFO - 17:26:21: EPOCH 55 - PROGRESS: at 85.78% examples, 1901620 words/s, in_qsize 23, out_qsize 0
INFO - 17:26:21: EPOCH 55: training on 12595541 raw words (11171200 effective words) took 5.8s, 1909645 effective words/s


Loss after epoch 55: 375576.0


INFO - 17:26:22: EPOCH 56 - PROGRESS: at 16.50% examples, 1838639 words/s, in_qsize 23, out_qsize 0
INFO - 17:26:23: EPOCH 56 - PROGRESS: at 33.83% examples, 1887148 words/s, in_qsize 23, out_qsize 1
INFO - 17:26:24: EPOCH 56 - PROGRESS: at 51.14% examples, 1889481 words/s, in_qsize 23, out_qsize 0
INFO - 17:26:25: EPOCH 56 - PROGRESS: at 68.20% examples, 1886003 words/s, in_qsize 23, out_qsize 0
INFO - 17:26:26: EPOCH 56 - PROGRESS: at 85.36% examples, 1890936 words/s, in_qsize 23, out_qsize 0
INFO - 17:26:27: EPOCH 56: training on 12595541 raw words (11170467 effective words) took 5.9s, 1898966 effective words/s


Loss after epoch 56: 418516.0


INFO - 17:26:28: EPOCH 57 - PROGRESS: at 16.35% examples, 1820676 words/s, in_qsize 23, out_qsize 0
INFO - 17:26:29: EPOCH 57 - PROGRESS: at 33.61% examples, 1874789 words/s, in_qsize 23, out_qsize 0
INFO - 17:26:30: EPOCH 57 - PROGRESS: at 51.14% examples, 1888963 words/s, in_qsize 23, out_qsize 0
INFO - 17:26:31: EPOCH 57 - PROGRESS: at 68.51% examples, 1895267 words/s, in_qsize 22, out_qsize 1
INFO - 17:26:32: EPOCH 57 - PROGRESS: at 85.52% examples, 1892852 words/s, in_qsize 23, out_qsize 0
INFO - 17:26:33: EPOCH 57: training on 12595541 raw words (11170186 effective words) took 5.9s, 1900507 effective words/s


Loss after epoch 57: 428552.0


INFO - 17:26:34: EPOCH 58 - PROGRESS: at 16.65% examples, 1852840 words/s, in_qsize 23, out_qsize 0
INFO - 17:26:35: EPOCH 58 - PROGRESS: at 34.05% examples, 1900092 words/s, in_qsize 23, out_qsize 0
INFO - 17:26:36: EPOCH 58 - PROGRESS: at 51.84% examples, 1912891 words/s, in_qsize 23, out_qsize 0
INFO - 17:26:37: EPOCH 58 - PROGRESS: at 69.40% examples, 1915650 words/s, in_qsize 23, out_qsize 0
INFO - 17:26:38: EPOCH 58 - PROGRESS: at 86.58% examples, 1918874 words/s, in_qsize 24, out_qsize 0
INFO - 17:26:39: EPOCH 58: training on 12595541 raw words (11170713 effective words) took 5.8s, 1922016 effective words/s


Loss after epoch 58: 368100.0


INFO - 17:26:40: EPOCH 59 - PROGRESS: at 16.87% examples, 1859982 words/s, in_qsize 23, out_qsize 0
INFO - 17:26:41: EPOCH 59 - PROGRESS: at 33.76% examples, 1874114 words/s, in_qsize 23, out_qsize 0
INFO - 17:26:42: EPOCH 59 - PROGRESS: at 51.26% examples, 1885921 words/s, in_qsize 24, out_qsize 1
INFO - 17:26:43: EPOCH 59 - PROGRESS: at 68.71% examples, 1897074 words/s, in_qsize 23, out_qsize 0
INFO - 17:26:44: EPOCH 59 - PROGRESS: at 85.59% examples, 1892525 words/s, in_qsize 23, out_qsize 0
INFO - 17:26:45: EPOCH 59: training on 12595541 raw words (11169823 effective words) took 5.9s, 1897071 effective words/s


Loss after epoch 59: 428120.0


INFO - 17:26:46: EPOCH 60 - PROGRESS: at 16.71% examples, 1847828 words/s, in_qsize 23, out_qsize 0
INFO - 17:26:47: EPOCH 60 - PROGRESS: at 34.12% examples, 1894312 words/s, in_qsize 23, out_qsize 0
INFO - 17:26:48: EPOCH 60 - PROGRESS: at 51.88% examples, 1906222 words/s, in_qsize 23, out_qsize 0
INFO - 17:26:49: EPOCH 60 - PROGRESS: at 69.09% examples, 1905678 words/s, in_qsize 22, out_qsize 1
INFO - 17:26:50: EPOCH 60 - PROGRESS: at 86.09% examples, 1903637 words/s, in_qsize 23, out_qsize 0
INFO - 17:26:51: EPOCH 60: training on 12595541 raw words (11170959 effective words) took 5.9s, 1906373 effective words/s


Loss after epoch 60: 383324.0


INFO - 17:26:52: EPOCH 61 - PROGRESS: at 16.78% examples, 1857070 words/s, in_qsize 23, out_qsize 0
INFO - 17:26:53: EPOCH 61 - PROGRESS: at 33.75% examples, 1877742 words/s, in_qsize 23, out_qsize 0
INFO - 17:26:54: EPOCH 61 - PROGRESS: at 51.26% examples, 1888718 words/s, in_qsize 23, out_qsize 0
INFO - 17:26:55: EPOCH 61 - PROGRESS: at 68.51% examples, 1893209 words/s, in_qsize 23, out_qsize 0
INFO - 17:26:56: EPOCH 61 - PROGRESS: at 85.77% examples, 1898529 words/s, in_qsize 23, out_qsize 0
INFO - 17:26:57: EPOCH 61: training on 12595541 raw words (11170412 effective words) took 5.9s, 1904157 effective words/s


Loss after epoch 61: 507104.0


INFO - 17:26:58: EPOCH 62 - PROGRESS: at 16.35% examples, 1823818 words/s, in_qsize 23, out_qsize 0
INFO - 17:26:59: EPOCH 62 - PROGRESS: at 33.67% examples, 1871479 words/s, in_qsize 23, out_qsize 0
INFO - 17:27:00: EPOCH 62 - PROGRESS: at 51.14% examples, 1884868 words/s, in_qsize 23, out_qsize 0
INFO - 17:27:01: EPOCH 62 - PROGRESS: at 68.82% examples, 1897584 words/s, in_qsize 23, out_qsize 0
INFO - 17:27:02: EPOCH 62 - PROGRESS: at 85.86% examples, 1899043 words/s, in_qsize 23, out_qsize 0
INFO - 17:27:02: EPOCH 62: training on 12595541 raw words (11170647 effective words) took 5.9s, 1902096 effective words/s


Loss after epoch 62: 492296.0


INFO - 17:27:03: EPOCH 63 - PROGRESS: at 16.79% examples, 1867130 words/s, in_qsize 23, out_qsize 0
INFO - 17:27:04: EPOCH 63 - PROGRESS: at 34.05% examples, 1899795 words/s, in_qsize 23, out_qsize 0
INFO - 17:27:05: EPOCH 63 - PROGRESS: at 51.95% examples, 1913773 words/s, in_qsize 23, out_qsize 0
INFO - 17:27:06: EPOCH 63 - PROGRESS: at 69.42% examples, 1916871 words/s, in_qsize 23, out_qsize 0
INFO - 17:27:07: EPOCH 63 - PROGRESS: at 86.45% examples, 1915996 words/s, in_qsize 23, out_qsize 0
INFO - 17:27:08: EPOCH 63: training on 12595541 raw words (11169859 effective words) took 5.8s, 1922596 effective words/s


Loss after epoch 63: 360300.0


INFO - 17:27:09: EPOCH 64 - PROGRESS: at 16.57% examples, 1831834 words/s, in_qsize 23, out_qsize 0
INFO - 17:27:10: EPOCH 64 - PROGRESS: at 33.97% examples, 1891748 words/s, in_qsize 24, out_qsize 0
INFO - 17:27:11: EPOCH 64 - PROGRESS: at 51.38% examples, 1891651 words/s, in_qsize 23, out_qsize 0
INFO - 17:27:12: EPOCH 64 - PROGRESS: at 68.93% examples, 1903445 words/s, in_qsize 23, out_qsize 0
INFO - 17:27:13: EPOCH 64 - PROGRESS: at 86.08% examples, 1906287 words/s, in_qsize 23, out_qsize 0
INFO - 17:27:14: EPOCH 64: training on 12595541 raw words (11170949 effective words) took 5.8s, 1910337 effective words/s


Loss after epoch 64: 386060.0


INFO - 17:27:15: EPOCH 65 - PROGRESS: at 16.35% examples, 1820003 words/s, in_qsize 23, out_qsize 0
INFO - 17:27:16: EPOCH 65 - PROGRESS: at 33.25% examples, 1854400 words/s, in_qsize 23, out_qsize 0
INFO - 17:27:17: EPOCH 65 - PROGRESS: at 50.72% examples, 1879335 words/s, in_qsize 23, out_qsize 0
INFO - 17:27:18: EPOCH 65 - PROGRESS: at 68.19% examples, 1890648 words/s, in_qsize 23, out_qsize 0
INFO - 17:27:19: EPOCH 65 - PROGRESS: at 85.44% examples, 1893960 words/s, in_qsize 23, out_qsize 0
INFO - 17:27:20: EPOCH 65: training on 12595541 raw words (11170845 effective words) took 5.9s, 1903759 effective words/s


Loss after epoch 65: 369168.0


INFO - 17:27:21: EPOCH 66 - PROGRESS: at 16.65% examples, 1840151 words/s, in_qsize 23, out_qsize 0
INFO - 17:27:22: EPOCH 66 - PROGRESS: at 33.91% examples, 1887039 words/s, in_qsize 23, out_qsize 0
INFO - 17:27:23: EPOCH 66 - PROGRESS: at 51.37% examples, 1889212 words/s, in_qsize 22, out_qsize 1
INFO - 17:27:24: EPOCH 66 - PROGRESS: at 68.93% examples, 1901393 words/s, in_qsize 23, out_qsize 0
INFO - 17:27:25: EPOCH 66 - PROGRESS: at 85.84% examples, 1897416 words/s, in_qsize 23, out_qsize 0
INFO - 17:27:26: EPOCH 66: training on 12595541 raw words (11170490 effective words) took 5.9s, 1902152 effective words/s


Loss after epoch 66: 361780.0


INFO - 17:27:27: EPOCH 67 - PROGRESS: at 16.42% examples, 1827049 words/s, in_qsize 23, out_qsize 0
INFO - 17:27:28: EPOCH 67 - PROGRESS: at 33.82% examples, 1889554 words/s, in_qsize 23, out_qsize 0
INFO - 17:27:29: EPOCH 67 - PROGRESS: at 51.37% examples, 1893765 words/s, in_qsize 23, out_qsize 0
INFO - 17:27:30: EPOCH 67 - PROGRESS: at 68.85% examples, 1901434 words/s, in_qsize 23, out_qsize 0
INFO - 17:27:31: EPOCH 67 - PROGRESS: at 86.01% examples, 1905412 words/s, in_qsize 23, out_qsize 0
INFO - 17:27:32: EPOCH 67: training on 12595541 raw words (11170683 effective words) took 5.8s, 1911859 effective words/s


Loss after epoch 67: 350296.0


INFO - 17:27:33: EPOCH 68 - PROGRESS: at 16.43% examples, 1830243 words/s, in_qsize 22, out_qsize 1
INFO - 17:27:34: EPOCH 68 - PROGRESS: at 33.53% examples, 1869884 words/s, in_qsize 23, out_qsize 0
INFO - 17:27:35: EPOCH 68 - PROGRESS: at 51.03% examples, 1884327 words/s, in_qsize 23, out_qsize 0
INFO - 17:27:36: EPOCH 68 - PROGRESS: at 68.17% examples, 1885236 words/s, in_qsize 23, out_qsize 0
INFO - 17:27:37: EPOCH 68 - PROGRESS: at 85.69% examples, 1896845 words/s, in_qsize 23, out_qsize 0
INFO - 17:27:38: EPOCH 68: training on 12595541 raw words (11171179 effective words) took 5.9s, 1903283 effective words/s


Loss after epoch 68: 347112.0


INFO - 17:27:39: EPOCH 69 - PROGRESS: at 16.35% examples, 1814624 words/s, in_qsize 23, out_qsize 0
INFO - 17:27:40: EPOCH 69 - PROGRESS: at 33.46% examples, 1864507 words/s, in_qsize 23, out_qsize 0
INFO - 17:27:41: EPOCH 69 - PROGRESS: at 51.14% examples, 1887757 words/s, in_qsize 23, out_qsize 0
INFO - 17:27:42: EPOCH 69 - PROGRESS: at 68.73% examples, 1898241 words/s, in_qsize 23, out_qsize 0
INFO - 17:27:43: EPOCH 69 - PROGRESS: at 86.09% examples, 1906419 words/s, in_qsize 23, out_qsize 0
INFO - 17:27:43: EPOCH 69: training on 12595541 raw words (11170370 effective words) took 5.8s, 1914683 effective words/s


Loss after epoch 69: 442032.0


INFO - 17:27:44: EPOCH 70 - PROGRESS: at 16.48% examples, 1837170 words/s, in_qsize 23, out_qsize 0
INFO - 17:27:45: EPOCH 70 - PROGRESS: at 33.83% examples, 1889712 words/s, in_qsize 23, out_qsize 0
INFO - 17:27:46: EPOCH 70 - PROGRESS: at 50.92% examples, 1886234 words/s, in_qsize 23, out_qsize 0
INFO - 17:27:47: EPOCH 70 - PROGRESS: at 68.20% examples, 1891582 words/s, in_qsize 22, out_qsize 1
INFO - 17:27:48: EPOCH 70 - PROGRESS: at 85.56% examples, 1898022 words/s, in_qsize 23, out_qsize 0
INFO - 17:27:49: EPOCH 70: training on 12595541 raw words (11171076 effective words) took 5.9s, 1907582 effective words/s


Loss after epoch 70: 409296.0


INFO - 17:27:50: EPOCH 71 - PROGRESS: at 16.58% examples, 1839782 words/s, in_qsize 23, out_qsize 0
INFO - 17:27:51: EPOCH 71 - PROGRESS: at 33.75% examples, 1879833 words/s, in_qsize 24, out_qsize 0
INFO - 17:27:52: EPOCH 71 - PROGRESS: at 51.03% examples, 1879107 words/s, in_qsize 23, out_qsize 0
INFO - 17:27:53: EPOCH 71 - PROGRESS: at 68.64% examples, 1889467 words/s, in_qsize 23, out_qsize 0
INFO - 17:27:54: EPOCH 71 - PROGRESS: at 85.86% examples, 1896229 words/s, in_qsize 23, out_qsize 0
INFO - 17:27:55: EPOCH 71: training on 12595541 raw words (11170537 effective words) took 5.9s, 1903450 effective words/s


Loss after epoch 71: 406024.0


INFO - 17:27:56: EPOCH 72 - PROGRESS: at 16.58% examples, 1843107 words/s, in_qsize 23, out_qsize 0
INFO - 17:27:57: EPOCH 72 - PROGRESS: at 33.75% examples, 1884603 words/s, in_qsize 23, out_qsize 0
INFO - 17:27:58: EPOCH 72 - PROGRESS: at 51.25% examples, 1893909 words/s, in_qsize 23, out_qsize 0
INFO - 17:27:59: EPOCH 72 - PROGRESS: at 68.64% examples, 1900321 words/s, in_qsize 23, out_qsize 0
INFO - 17:28:00: EPOCH 72 - PROGRESS: at 85.94% examples, 1906031 words/s, in_qsize 23, out_qsize 0
INFO - 17:28:01: EPOCH 72: training on 12595541 raw words (11170092 effective words) took 5.8s, 1914224 effective words/s


Loss after epoch 72: 339176.0


INFO - 17:28:02: EPOCH 73 - PROGRESS: at 16.27% examples, 1815267 words/s, in_qsize 23, out_qsize 0
INFO - 17:28:03: EPOCH 73 - PROGRESS: at 33.19% examples, 1852800 words/s, in_qsize 23, out_qsize 0
INFO - 17:28:04: EPOCH 73 - PROGRESS: at 50.68% examples, 1880911 words/s, in_qsize 23, out_qsize 0
INFO - 17:28:05: EPOCH 73 - PROGRESS: at 68.08% examples, 1889422 words/s, in_qsize 23, out_qsize 0
INFO - 17:28:06: EPOCH 73 - PROGRESS: at 85.44% examples, 1893589 words/s, in_qsize 23, out_qsize 0
INFO - 17:28:07: EPOCH 73: training on 12595541 raw words (11171156 effective words) took 5.9s, 1903647 effective words/s


Loss after epoch 73: 335760.0


INFO - 17:28:08: EPOCH 74 - PROGRESS: at 16.50% examples, 1841578 words/s, in_qsize 24, out_qsize 1
INFO - 17:28:09: EPOCH 74 - PROGRESS: at 33.75% examples, 1880570 words/s, in_qsize 23, out_qsize 0
INFO - 17:28:10: EPOCH 74 - PROGRESS: at 51.25% examples, 1891298 words/s, in_qsize 23, out_qsize 0
INFO - 17:28:11: EPOCH 74 - PROGRESS: at 69.01% examples, 1904516 words/s, in_qsize 23, out_qsize 0
INFO - 17:28:12: EPOCH 74 - PROGRESS: at 86.16% examples, 1908620 words/s, in_qsize 23, out_qsize 0
INFO - 17:28:13: EPOCH 74: training on 12595541 raw words (11171105 effective words) took 5.8s, 1913707 effective words/s


Loss after epoch 74: 333332.0


INFO - 17:28:14: EPOCH 75 - PROGRESS: at 16.41% examples, 1828361 words/s, in_qsize 23, out_qsize 0
INFO - 17:28:15: EPOCH 75 - PROGRESS: at 33.69% examples, 1875417 words/s, in_qsize 23, out_qsize 0
INFO - 17:28:16: EPOCH 75 - PROGRESS: at 51.25% examples, 1890453 words/s, in_qsize 23, out_qsize 0
INFO - 17:28:17: EPOCH 75 - PROGRESS: at 68.81% examples, 1900113 words/s, in_qsize 23, out_qsize 0
INFO - 17:28:18: EPOCH 75 - PROGRESS: at 85.94% examples, 1900899 words/s, in_qsize 23, out_qsize 0
INFO - 17:28:19: EPOCH 75: training on 12595541 raw words (11170573 effective words) took 5.9s, 1907840 effective words/s


Loss after epoch 75: 440744.0


INFO - 17:28:20: EPOCH 76 - PROGRESS: at 16.50% examples, 1822052 words/s, in_qsize 23, out_qsize 0
INFO - 17:28:21: EPOCH 76 - PROGRESS: at 33.68% examples, 1871735 words/s, in_qsize 23, out_qsize 0
INFO - 17:28:22: EPOCH 76 - PROGRESS: at 50.92% examples, 1876453 words/s, in_qsize 23, out_qsize 0
INFO - 17:28:23: EPOCH 76 - PROGRESS: at 68.20% examples, 1884021 words/s, in_qsize 23, out_qsize 0
INFO - 17:28:24: EPOCH 76 - PROGRESS: at 85.44% examples, 1889794 words/s, in_qsize 23, out_qsize 0
INFO - 17:28:24: EPOCH 76: training on 12595541 raw words (11169894 effective words) took 5.9s, 1896456 effective words/s


Loss after epoch 76: 339280.0


INFO - 17:28:25: EPOCH 77 - PROGRESS: at 16.28% examples, 1799537 words/s, in_qsize 23, out_qsize 0
INFO - 17:28:26: EPOCH 77 - PROGRESS: at 33.75% examples, 1874144 words/s, in_qsize 23, out_qsize 0
INFO - 17:28:27: EPOCH 77 - PROGRESS: at 50.82% examples, 1875874 words/s, in_qsize 23, out_qsize 0
INFO - 17:28:28: EPOCH 77 - PROGRESS: at 67.74% examples, 1876362 words/s, in_qsize 23, out_qsize 0
INFO - 17:28:29: EPOCH 77 - PROGRESS: at 85.01% examples, 1881097 words/s, in_qsize 23, out_qsize 0
INFO - 17:28:30: EPOCH 77: training on 12595541 raw words (11171191 effective words) took 5.9s, 1895395 effective words/s


Loss after epoch 77: 350116.0


INFO - 17:28:31: EPOCH 78 - PROGRESS: at 16.43% examples, 1823808 words/s, in_qsize 23, out_qsize 0
INFO - 17:28:32: EPOCH 78 - PROGRESS: at 33.61% examples, 1871640 words/s, in_qsize 23, out_qsize 0
INFO - 17:28:33: EPOCH 78 - PROGRESS: at 50.72% examples, 1877399 words/s, in_qsize 23, out_qsize 0
INFO - 17:28:34: EPOCH 78 - PROGRESS: at 67.55% examples, 1874998 words/s, in_qsize 23, out_qsize 0
INFO - 17:28:35: EPOCH 78 - PROGRESS: at 85.08% examples, 1883199 words/s, in_qsize 23, out_qsize 0
INFO - 17:28:36: EPOCH 78: training on 12595541 raw words (11171508 effective words) took 5.9s, 1892056 effective words/s


Loss after epoch 78: 358216.0


INFO - 17:28:37: EPOCH 79 - PROGRESS: at 16.35% examples, 1817587 words/s, in_qsize 23, out_qsize 0
INFO - 17:28:38: EPOCH 79 - PROGRESS: at 33.26% examples, 1854607 words/s, in_qsize 23, out_qsize 0
INFO - 17:28:39: EPOCH 79 - PROGRESS: at 50.42% examples, 1871112 words/s, in_qsize 23, out_qsize 0
INFO - 17:28:40: EPOCH 79 - PROGRESS: at 67.41% examples, 1873692 words/s, in_qsize 23, out_qsize 0
INFO - 17:28:41: EPOCH 79 - PROGRESS: at 84.92% examples, 1883503 words/s, in_qsize 23, out_qsize 0
INFO - 17:28:42: EPOCH 79: training on 12595541 raw words (11170834 effective words) took 5.9s, 1886765 effective words/s


Loss after epoch 79: 337664.0


INFO - 17:28:43: EPOCH 80 - PROGRESS: at 16.64% examples, 1853914 words/s, in_qsize 22, out_qsize 1
INFO - 17:28:44: EPOCH 80 - PROGRESS: at 33.76% examples, 1883192 words/s, in_qsize 23, out_qsize 0
INFO - 17:28:45: EPOCH 80 - PROGRESS: at 51.15% examples, 1891845 words/s, in_qsize 23, out_qsize 0
INFO - 17:28:46: EPOCH 80 - PROGRESS: at 68.64% examples, 1900720 words/s, in_qsize 23, out_qsize 0
INFO - 17:28:47: EPOCH 80 - PROGRESS: at 85.94% examples, 1906172 words/s, in_qsize 23, out_qsize 0
INFO - 17:28:48: EPOCH 80: training on 12595541 raw words (11170736 effective words) took 5.9s, 1908830 effective words/s


Loss after epoch 80: 318632.0


INFO - 17:28:49: EPOCH 81 - PROGRESS: at 16.57% examples, 1843625 words/s, in_qsize 23, out_qsize 0
INFO - 17:28:50: EPOCH 81 - PROGRESS: at 33.76% examples, 1880857 words/s, in_qsize 23, out_qsize 0
INFO - 17:28:51: EPOCH 81 - PROGRESS: at 51.49% examples, 1892490 words/s, in_qsize 23, out_qsize 0
INFO - 17:28:52: EPOCH 81 - PROGRESS: at 69.01% examples, 1900723 words/s, in_qsize 23, out_qsize 0
INFO - 17:28:53: EPOCH 81 - PROGRESS: at 86.09% examples, 1899471 words/s, in_qsize 23, out_qsize 0
INFO - 17:28:54: EPOCH 81: training on 12595541 raw words (11170482 effective words) took 5.9s, 1906300 effective words/s


Loss after epoch 81: 320236.0


INFO - 17:28:55: EPOCH 82 - PROGRESS: at 16.50% examples, 1841765 words/s, in_qsize 23, out_qsize 0
INFO - 17:28:56: EPOCH 82 - PROGRESS: at 33.61% examples, 1865326 words/s, in_qsize 23, out_qsize 0
INFO - 17:28:57: EPOCH 82 - PROGRESS: at 50.69% examples, 1870053 words/s, in_qsize 23, out_qsize 0
INFO - 17:28:58: EPOCH 82 - PROGRESS: at 68.22% examples, 1882171 words/s, in_qsize 23, out_qsize 0
INFO - 17:28:59: EPOCH 82 - PROGRESS: at 85.22% examples, 1883551 words/s, in_qsize 23, out_qsize 0
INFO - 17:29:00: EPOCH 82: training on 12595541 raw words (11170694 effective words) took 5.9s, 1894945 effective words/s


Loss after epoch 82: 317812.0


INFO - 17:29:01: EPOCH 83 - PROGRESS: at 16.19% examples, 1802629 words/s, in_qsize 23, out_qsize 0
INFO - 17:29:02: EPOCH 83 - PROGRESS: at 33.40% examples, 1862414 words/s, in_qsize 22, out_qsize 1
INFO - 17:29:03: EPOCH 83 - PROGRESS: at 50.70% examples, 1879794 words/s, in_qsize 23, out_qsize 0
INFO - 17:29:04: EPOCH 83 - PROGRESS: at 67.96% examples, 1885055 words/s, in_qsize 24, out_qsize 0
INFO - 17:29:05: EPOCH 83 - PROGRESS: at 85.38% examples, 1890918 words/s, in_qsize 23, out_qsize 0
INFO - 17:29:06: EPOCH 83: training on 12595541 raw words (11170688 effective words) took 5.9s, 1898719 effective words/s


Loss after epoch 83: 322604.0


INFO - 17:29:07: EPOCH 84 - PROGRESS: at 16.19% examples, 1795531 words/s, in_qsize 23, out_qsize 0
INFO - 17:29:08: EPOCH 84 - PROGRESS: at 33.34% examples, 1848760 words/s, in_qsize 23, out_qsize 0
INFO - 17:29:09: EPOCH 84 - PROGRESS: at 50.17% examples, 1854541 words/s, in_qsize 23, out_qsize 0
INFO - 17:29:10: EPOCH 84 - PROGRESS: at 67.30% examples, 1868323 words/s, in_qsize 23, out_qsize 0
INFO - 17:29:11: EPOCH 84 - PROGRESS: at 84.90% examples, 1879733 words/s, in_qsize 23, out_qsize 0
INFO - 17:29:12: EPOCH 84: training on 12595541 raw words (11170428 effective words) took 5.9s, 1887709 effective words/s


Loss after epoch 84: 304600.0


INFO - 17:29:13: EPOCH 85 - PROGRESS: at 16.50% examples, 1830971 words/s, in_qsize 23, out_qsize 0
INFO - 17:29:14: EPOCH 85 - PROGRESS: at 33.69% examples, 1870446 words/s, in_qsize 23, out_qsize 0
INFO - 17:29:15: EPOCH 85 - PROGRESS: at 51.37% examples, 1888246 words/s, in_qsize 23, out_qsize 0
INFO - 17:29:16: EPOCH 85 - PROGRESS: at 68.64% examples, 1894181 words/s, in_qsize 21, out_qsize 2
INFO - 17:29:17: EPOCH 85 - PROGRESS: at 85.60% examples, 1894863 words/s, in_qsize 23, out_qsize 0
INFO - 17:29:17: EPOCH 85: training on 12595541 raw words (11170430 effective words) took 5.9s, 1902790 effective words/s


Loss after epoch 85: 337388.0


INFO - 17:29:19: EPOCH 86 - PROGRESS: at 16.35% examples, 1805717 words/s, in_qsize 23, out_qsize 0
INFO - 17:29:20: EPOCH 86 - PROGRESS: at 33.54% examples, 1864953 words/s, in_qsize 22, out_qsize 1
INFO - 17:29:21: EPOCH 86 - PROGRESS: at 50.80% examples, 1877246 words/s, in_qsize 23, out_qsize 0
INFO - 17:29:22: EPOCH 86 - PROGRESS: at 68.07% examples, 1885103 words/s, in_qsize 23, out_qsize 0
INFO - 17:29:23: EPOCH 86 - PROGRESS: at 85.46% examples, 1893837 words/s, in_qsize 23, out_qsize 0
INFO - 17:29:23: EPOCH 86: training on 12595541 raw words (11171166 effective words) took 5.9s, 1898053 effective words/s


Loss after epoch 86: 384492.0


INFO - 17:29:24: EPOCH 87 - PROGRESS: at 16.42% examples, 1828366 words/s, in_qsize 23, out_qsize 0
INFO - 17:29:25: EPOCH 87 - PROGRESS: at 33.34% examples, 1859745 words/s, in_qsize 23, out_qsize 0
INFO - 17:29:26: EPOCH 87 - PROGRESS: at 50.71% examples, 1880232 words/s, in_qsize 23, out_qsize 0
INFO - 17:29:27: EPOCH 87 - PROGRESS: at 67.96% examples, 1887573 words/s, in_qsize 22, out_qsize 1
INFO - 17:29:28: EPOCH 87 - PROGRESS: at 85.14% examples, 1889836 words/s, in_qsize 23, out_qsize 0
INFO - 17:29:29: EPOCH 87: training on 12595541 raw words (11170894 effective words) took 5.9s, 1896999 effective words/s


Loss after epoch 87: 318812.0


INFO - 17:29:30: EPOCH 88 - PROGRESS: at 15.94% examples, 1787619 words/s, in_qsize 23, out_qsize 0
INFO - 17:29:31: EPOCH 88 - PROGRESS: at 33.26% examples, 1854690 words/s, in_qsize 23, out_qsize 0
INFO - 17:29:32: EPOCH 88 - PROGRESS: at 50.70% examples, 1879144 words/s, in_qsize 23, out_qsize 0
INFO - 17:29:33: EPOCH 88 - PROGRESS: at 67.71% examples, 1882744 words/s, in_qsize 23, out_qsize 0
INFO - 17:29:34: EPOCH 88 - PROGRESS: at 85.22% examples, 1892348 words/s, in_qsize 23, out_qsize 0
INFO - 17:29:35: EPOCH 88: training on 12595541 raw words (11170521 effective words) took 5.9s, 1899214 effective words/s


Loss after epoch 88: 300660.0


INFO - 17:29:36: EPOCH 89 - PROGRESS: at 16.48% examples, 1836255 words/s, in_qsize 23, out_qsize 0
INFO - 17:29:37: EPOCH 89 - PROGRESS: at 33.61% examples, 1875248 words/s, in_qsize 23, out_qsize 0
INFO - 17:29:38: EPOCH 89 - PROGRESS: at 50.92% examples, 1885750 words/s, in_qsize 23, out_qsize 0
INFO - 17:29:39: EPOCH 89 - PROGRESS: at 67.93% examples, 1884070 words/s, in_qsize 23, out_qsize 0
INFO - 17:29:40: EPOCH 89 - PROGRESS: at 85.37% examples, 1891741 words/s, in_qsize 23, out_qsize 0
INFO - 17:29:41: EPOCH 89: training on 12595541 raw words (11170579 effective words) took 5.9s, 1900995 effective words/s


Loss after epoch 89: 281872.0


INFO - 17:29:42: EPOCH 90 - PROGRESS: at 16.42% examples, 1822466 words/s, in_qsize 23, out_qsize 0
INFO - 17:29:43: EPOCH 90 - PROGRESS: at 33.68% examples, 1873757 words/s, in_qsize 23, out_qsize 0
INFO - 17:29:44: EPOCH 90 - PROGRESS: at 50.92% examples, 1881642 words/s, in_qsize 23, out_qsize 0
INFO - 17:29:45: EPOCH 90 - PROGRESS: at 68.07% examples, 1878181 words/s, in_qsize 23, out_qsize 0
INFO - 17:29:46: EPOCH 90 - PROGRESS: at 85.30% examples, 1883100 words/s, in_qsize 23, out_qsize 0
INFO - 17:29:47: EPOCH 90: training on 12595541 raw words (11170840 effective words) took 5.9s, 1894354 effective words/s


Loss after epoch 90: 326748.0


INFO - 17:29:48: EPOCH 91 - PROGRESS: at 16.42% examples, 1828882 words/s, in_qsize 23, out_qsize 0
INFO - 17:29:49: EPOCH 91 - PROGRESS: at 33.69% examples, 1871015 words/s, in_qsize 23, out_qsize 0
INFO - 17:29:50: EPOCH 91 - PROGRESS: at 51.37% examples, 1890107 words/s, in_qsize 23, out_qsize 0
INFO - 17:29:51: EPOCH 91 - PROGRESS: at 68.38% examples, 1890750 words/s, in_qsize 23, out_qsize 0
INFO - 17:29:52: EPOCH 91 - PROGRESS: at 85.36% examples, 1890312 words/s, in_qsize 23, out_qsize 0
INFO - 17:29:53: EPOCH 91: training on 12595541 raw words (11171001 effective words) took 5.9s, 1902558 effective words/s


Loss after epoch 91: 278600.0


INFO - 17:29:54: EPOCH 92 - PROGRESS: at 16.28% examples, 1812347 words/s, in_qsize 23, out_qsize 0
INFO - 17:29:55: EPOCH 92 - PROGRESS: at 33.39% examples, 1864399 words/s, in_qsize 21, out_qsize 2
INFO - 17:29:56: EPOCH 92 - PROGRESS: at 51.14% examples, 1892852 words/s, in_qsize 23, out_qsize 0
INFO - 17:29:57: EPOCH 92 - PROGRESS: at 68.32% examples, 1895290 words/s, in_qsize 23, out_qsize 0
INFO - 17:29:58: EPOCH 92 - PROGRESS: at 85.54% examples, 1899214 words/s, in_qsize 23, out_qsize 0
INFO - 17:29:59: EPOCH 92: training on 12595541 raw words (11170639 effective words) took 5.9s, 1902146 effective words/s


Loss after epoch 92: 271496.0


INFO - 17:30:00: EPOCH 93 - PROGRESS: at 16.50% examples, 1831417 words/s, in_qsize 23, out_qsize 0
INFO - 17:30:01: EPOCH 93 - PROGRESS: at 33.41% examples, 1858736 words/s, in_qsize 23, out_qsize 0
INFO - 17:30:02: EPOCH 93 - PROGRESS: at 50.17% examples, 1855111 words/s, in_qsize 23, out_qsize 0
INFO - 17:30:03: EPOCH 93 - PROGRESS: at 66.72% examples, 1853494 words/s, in_qsize 23, out_qsize 0
INFO - 17:30:04: EPOCH 93 - PROGRESS: at 83.73% examples, 1851170 words/s, in_qsize 23, out_qsize 0
INFO - 17:30:05: EPOCH 93: training on 12595541 raw words (11169597 effective words) took 6.0s, 1861379 effective words/s


Loss after epoch 93: 275020.0


INFO - 17:30:06: EPOCH 94 - PROGRESS: at 16.58% examples, 1849576 words/s, in_qsize 23, out_qsize 0
INFO - 17:30:07: EPOCH 94 - PROGRESS: at 33.54% examples, 1874887 words/s, in_qsize 23, out_qsize 0
INFO - 17:30:08: EPOCH 94 - PROGRESS: at 50.70% examples, 1880572 words/s, in_qsize 23, out_qsize 0
INFO - 17:30:09: EPOCH 94 - PROGRESS: at 67.51% examples, 1878025 words/s, in_qsize 23, out_qsize 0
INFO - 17:30:10: EPOCH 94 - PROGRESS: at 84.97% examples, 1880132 words/s, in_qsize 23, out_qsize 0
INFO - 17:30:11: EPOCH 94: training on 12595541 raw words (11170205 effective words) took 5.9s, 1890704 effective words/s


Loss after epoch 94: 337936.0


INFO - 17:30:12: EPOCH 95 - PROGRESS: at 16.50% examples, 1838847 words/s, in_qsize 24, out_qsize 0
INFO - 17:30:13: EPOCH 95 - PROGRESS: at 33.53% examples, 1871114 words/s, in_qsize 23, out_qsize 0
INFO - 17:30:14: EPOCH 95 - PROGRESS: at 50.82% examples, 1882514 words/s, in_qsize 23, out_qsize 0
INFO - 17:30:15: EPOCH 95 - PROGRESS: at 68.20% examples, 1890039 words/s, in_qsize 23, out_qsize 0
INFO - 17:30:16: EPOCH 95 - PROGRESS: at 85.44% examples, 1893123 words/s, in_qsize 23, out_qsize 0
INFO - 17:30:16: EPOCH 95: training on 12595541 raw words (11170330 effective words) took 5.9s, 1898085 effective words/s


Loss after epoch 95: 257484.0


INFO - 17:30:18: EPOCH 96 - PROGRESS: at 16.28% examples, 1808139 words/s, in_qsize 23, out_qsize 0
INFO - 17:30:19: EPOCH 96 - PROGRESS: at 33.05% examples, 1841114 words/s, in_qsize 23, out_qsize 0
INFO - 17:30:20: EPOCH 96 - PROGRESS: at 49.82% examples, 1847251 words/s, in_qsize 23, out_qsize 0
INFO - 17:30:21: EPOCH 96 - PROGRESS: at 66.95% examples, 1864545 words/s, in_qsize 23, out_qsize 0
INFO - 17:30:22: EPOCH 96 - PROGRESS: at 84.42% examples, 1874038 words/s, in_qsize 23, out_qsize 0
INFO - 17:30:22: EPOCH 96: training on 12595541 raw words (11170851 effective words) took 5.9s, 1885427 effective words/s


Loss after epoch 96: 283424.0


INFO - 17:30:23: EPOCH 97 - PROGRESS: at 16.49% examples, 1834745 words/s, in_qsize 23, out_qsize 0
INFO - 17:30:24: EPOCH 97 - PROGRESS: at 33.40% examples, 1862996 words/s, in_qsize 21, out_qsize 2
INFO - 17:30:25: EPOCH 97 - PROGRESS: at 50.61% examples, 1875970 words/s, in_qsize 23, out_qsize 0
INFO - 17:30:26: EPOCH 97 - PROGRESS: at 67.61% examples, 1879726 words/s, in_qsize 23, out_qsize 0
INFO - 17:30:27: EPOCH 97 - PROGRESS: at 85.16% examples, 1890095 words/s, in_qsize 23, out_qsize 0
INFO - 17:30:28: EPOCH 97: training on 12595541 raw words (11169734 effective words) took 5.9s, 1896998 effective words/s


Loss after epoch 97: 254348.0


INFO - 17:30:29: EPOCH 98 - PROGRESS: at 16.15% examples, 1803090 words/s, in_qsize 20, out_qsize 3
INFO - 17:30:30: EPOCH 98 - PROGRESS: at 33.32% examples, 1855763 words/s, in_qsize 23, out_qsize 0
INFO - 17:30:31: EPOCH 98 - PROGRESS: at 50.72% examples, 1875963 words/s, in_qsize 23, out_qsize 0
INFO - 17:30:32: EPOCH 98 - PROGRESS: at 67.51% examples, 1872685 words/s, in_qsize 23, out_qsize 0
INFO - 17:30:33: EPOCH 98 - PROGRESS: at 84.90% examples, 1880195 words/s, in_qsize 23, out_qsize 0
INFO - 17:30:34: EPOCH 98: training on 12595541 raw words (11170505 effective words) took 5.9s, 1880742 effective words/s


Loss after epoch 98: 257840.0


INFO - 17:30:35: EPOCH 99 - PROGRESS: at 16.35% examples, 1822434 words/s, in_qsize 23, out_qsize 0
INFO - 17:30:36: EPOCH 99 - PROGRESS: at 33.27% examples, 1854693 words/s, in_qsize 22, out_qsize 1
INFO - 17:30:37: EPOCH 99 - PROGRESS: at 50.32% examples, 1865852 words/s, in_qsize 23, out_qsize 0
INFO - 17:30:38: EPOCH 99 - PROGRESS: at 67.22% examples, 1869723 words/s, in_qsize 23, out_qsize 0
INFO - 17:30:39: EPOCH 99 - PROGRESS: at 84.58% examples, 1877053 words/s, in_qsize 23, out_qsize 0
INFO - 17:30:40: EPOCH 99: training on 12595541 raw words (11170812 effective words) took 5.9s, 1882808 effective words/s
INFO - 17:30:40: Word2Vec lifecycle event {'msg': 'training on 1259554100 raw words (1117063374 effective words) took 588.7s, 1897507 effective words/s', 'datetime': '2023-06-08T17:30:40.695995', 'gensim': '4.2.0', 'python': '3.7.3 (default, Oct 31 2022, 14:04:00) \n[GCC 8.3.0]', 'platform': 'Linux-4.19.0-24-amd64-x86_64-with-debian-10.10', 'event': 'train'}
INFO - 17:30:40: 

Loss after epoch 99: 300960.0


INFO - 17:30:40: storing np array 'syn1neg' to word2vec.model.syn1neg.npy
INFO - 17:30:41: not storing attribute cum_table
INFO - 17:30:41: saved word2vec.model


In [6]:
import os
import gzip
import json
import pandas as pd
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from gensim.parsing.preprocessing import remove_stopwords
from gensim.utils import simple_preprocess
from gensim.parsing.preprocessing import preprocess_string
import random
from nltk.tokenize import word_tokenize
from gensim.models import Word2Vec

import random 
model_advanced = Word2Vec.load("word2vec.model")
def preprocess_data(input_phrase):
    # Lowercase the phrase and tokenize it
    processed_phrase = word_tokenize(input_phrase.lower())
    # Load phrases model
    phrases_model = Phraser.load('phrases_model.txt')
    # Apply phrases model to the tokenized phrase
    processed_phrase = phrases_model[processed_phrase]
    return processed_phrase

def generate_alternative_sentences(input_phrase, num_sentences=5):
    # Preprocess the input_phrase
    processed_input = preprocess_data(input_phrase)

    # Get the most similar words for each word in the input_phrase
    similar_words = {word: [item[0] for item in model_advanced.wv.most_similar(word, topn=5)] for word in processed_input if word in model_advanced.wv}

    # Generate new sentences
    new_sentences = []
    for _ in range(num_sentences):
        new_sentence = []
        for word in processed_input:
            # Randomly select a similar word if it exists in the similar_words dict, else use the original word
            new_word = random.choice(similar_words.get(word, [word]))
            new_sentence.append(new_word)
        new_sentences.append(' '.join(new_sentence))

    return new_sentences

print(generate_alternative_sentences("machine learning"))


['artificial_intelligence', 'deep_learning', 'algorithms', 'prediction', 'deep_learning']
