In [None]:
import gensim
from gensim.models import Word2Vec
import time
import os
import csv

class EpochSaver(gensim.models.callbacks.CallbackAny2Vec):
    '''用于保存模型, 打印损失函数等等'''

    def __init__(self, save_name="nn_word2vec.model"):
        self.save_path = save_name
        self.epoch = 0
        self.pre_loss = 0
        self.best_loss = 999999999999.9
        self.since = time.time()

    def on_epoch_end(self, model):
        self.epoch += 1
        cum_loss = model.get_latest_training_loss()  # 返回的是从第一个epoch累计的
        epoch_loss = cum_loss - self.pre_loss
        time_taken = time.time() - self.since
        print("Epoch %d, loss: %.2f, time: %dmin %ds" % (self.epoch, epoch_loss, time_taken // 60, time_taken % 60))
        if self.best_loss > epoch_loss:
            self.best_loss = epoch_loss
            print("Better model. Best loss: %.2f" % self.best_loss)
            model.save(self.save_path)
            print("Model %s save done!" % self.save_path)

        self.pre_loss = cum_loss
        self.since = time.time()

 
model = Word2Vec(min_count=4, window=5, size=150, workers=4)
model.build_vocab(corpus_file="nn_sentences")
model.train(corpus_file="nn_sentences", total_words=model.corpus_total_words,
            epochs=5, compute_loss=True, report_delay=60 * 10,
            callbacks=[EpochSaver("nn_word2vec.model")])