From 48179e1132efeeddd10216cd547e29cb29e3929c Mon Sep 17 00:00:00 2001 From: Sven Kreiss Date: Mon, 30 Nov 2015 11:04:52 -0500 Subject: [PATCH] load_word2vec_format(): Remove norm_only and init_sims parameters. Remove the call to init_sims(). --- gensim/models/word2vec.py | 4 +--- gensim/test/test_word2vec.py | 22 ++++++++-------------- 2 files changed, 9 insertions(+), 17 deletions(-) diff --git a/gensim/models/word2vec.py b/gensim/models/word2vec.py index 64c7deaa51..4aa2d87742 100755 --- a/gensim/models/word2vec.py +++ b/gensim/models/word2vec.py @@ -1015,7 +1015,7 @@ def save_word2vec_format(self, fname, fvocab=None, binary=False): fout.write(utils.to_utf8("%s %s\n" % (word, ' '.join("%f" % val for val in row)))) @classmethod - def load_word2vec_format(cls, fname, fvocab=None, binary=False, norm_only=True, encoding='utf8', unicode_errors='strict', init_sims=True): + def load_word2vec_format(cls, fname, fvocab=None, binary=False, encoding='utf8', unicode_errors='strict'): """ Load the input-hidden weight matrix from the original C word2vec-tool format. @@ -1096,8 +1096,6 @@ def add_word(word, weights): assert (len(result.vocab), result.vector_size) == result.syn0.shape logger.info("loaded %s matrix from %s" % (result.syn0.shape, fname)) - if init_sims: - result.init_sims(norm_only) return result def intersect_word2vec_format(self, fname, binary=False, encoding='utf8', unicode_errors='strict'): diff --git a/gensim/test/test_word2vec.py b/gensim/test/test_word2vec.py index 3549d58a99..b377071b17 100644 --- a/gensim/test/test_word2vec.py +++ b/gensim/test/test_word2vec.py @@ -90,24 +90,16 @@ def testLambdaRule(self): model = word2vec.Word2Vec(sentences, min_count=1, trim_rule=rule) self.assertTrue("human" not in model.vocab) - def testPersistenceWord2VecFormatInitSims(self): - """Test storing/loading the entire model in word2vec format skipping - the init_sims() call.""" - model = word2vec.Word2Vec(sentences, min_count=1) - model.init_sims() - model.save_word2vec_format(testfile(), binary=True) - binary_model = word2vec.Word2Vec.load_word2vec_format(testfile(), binary=True, norm_only=False, init_sims=False) - self.assertTrue(numpy.allclose(model['human'], binary_model['human'])) - self.assertFalse(hasattr(binary_model, 'syn0norm')) - def testPersistenceWord2VecFormat(self): """Test storing/loading the entire model in word2vec format.""" model = word2vec.Word2Vec(sentences, min_count=1) model.init_sims() model.save_word2vec_format(testfile(), binary=True) - binary_model = word2vec.Word2Vec.load_word2vec_format(testfile(), binary=True, norm_only=False) + binary_model = word2vec.Word2Vec.load_word2vec_format(testfile(), binary=True) + binary_model.init_sims(replace=False) self.assertTrue(numpy.allclose(model['human'], binary_model['human'])) - norm_only_model = word2vec.Word2Vec.load_word2vec_format(testfile(), binary=True, norm_only=True) + norm_only_model = word2vec.Word2Vec.load_word2vec_format(testfile(), binary=True) + norm_only_model.init_sims(replace=True) self.assertFalse(numpy.allclose(model['human'], norm_only_model['human'])) self.assertTrue(numpy.allclose(model.syn0norm[model.vocab['human'].index], norm_only_model['human'])) @@ -116,9 +108,11 @@ def testPersistenceWord2VecFormatNonBinary(self): model = word2vec.Word2Vec(sentences, min_count=1) model.init_sims() model.save_word2vec_format(testfile(), binary=False) - text_model = word2vec.Word2Vec.load_word2vec_format(testfile(), binary=False, norm_only=False) + text_model = word2vec.Word2Vec.load_word2vec_format(testfile(), binary=False) + text_model.init_sims(False) self.assertTrue(numpy.allclose(model['human'], text_model['human'], atol=1e-6)) - norm_only_model = word2vec.Word2Vec.load_word2vec_format(testfile(), binary=False, norm_only=True) + norm_only_model = word2vec.Word2Vec.load_word2vec_format(testfile(), binary=False) + norm_only_model.init_sims(True) self.assertFalse(numpy.allclose(model['human'], norm_only_model['human'], atol=1e-6)) self.assertTrue(numpy.allclose(model.syn0norm[model.vocab['human'].index], norm_only_model['human'], atol=1e-4))