Skip to content

Commit

Permalink
load_word2vec_format(): Remove norm_only and init_sims parameters. Re…
Browse files Browse the repository at this point in the history
…move the call to init_sims().
  • Loading branch information
svenkreiss committed Nov 30, 2015
1 parent 602c0c7 commit 48179e1
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 17 deletions.
4 changes: 1 addition & 3 deletions gensim/models/word2vec.py
Expand Up @@ -1015,7 +1015,7 @@ def save_word2vec_format(self, fname, fvocab=None, binary=False):
fout.write(utils.to_utf8("%s %s\n" % (word, ' '.join("%f" % val for val in row))))

@classmethod
def load_word2vec_format(cls, fname, fvocab=None, binary=False, norm_only=True, encoding='utf8', unicode_errors='strict', init_sims=True):
def load_word2vec_format(cls, fname, fvocab=None, binary=False, encoding='utf8', unicode_errors='strict'):
"""
Load the input-hidden weight matrix from the original C word2vec-tool format.
Expand Down Expand Up @@ -1096,8 +1096,6 @@ def add_word(word, weights):
assert (len(result.vocab), result.vector_size) == result.syn0.shape

logger.info("loaded %s matrix from %s" % (result.syn0.shape, fname))
if init_sims:
result.init_sims(norm_only)
return result

def intersect_word2vec_format(self, fname, binary=False, encoding='utf8', unicode_errors='strict'):
Expand Down
22 changes: 8 additions & 14 deletions gensim/test/test_word2vec.py
Expand Up @@ -90,24 +90,16 @@ def testLambdaRule(self):
model = word2vec.Word2Vec(sentences, min_count=1, trim_rule=rule)
self.assertTrue("human" not in model.vocab)

def testPersistenceWord2VecFormatInitSims(self):
"""Test storing/loading the entire model in word2vec format skipping
the init_sims() call."""
model = word2vec.Word2Vec(sentences, min_count=1)
model.init_sims()
model.save_word2vec_format(testfile(), binary=True)
binary_model = word2vec.Word2Vec.load_word2vec_format(testfile(), binary=True, norm_only=False, init_sims=False)
self.assertTrue(numpy.allclose(model['human'], binary_model['human']))
self.assertFalse(hasattr(binary_model, 'syn0norm'))

def testPersistenceWord2VecFormat(self):
"""Test storing/loading the entire model in word2vec format."""
model = word2vec.Word2Vec(sentences, min_count=1)
model.init_sims()
model.save_word2vec_format(testfile(), binary=True)
binary_model = word2vec.Word2Vec.load_word2vec_format(testfile(), binary=True, norm_only=False)
binary_model = word2vec.Word2Vec.load_word2vec_format(testfile(), binary=True)
binary_model.init_sims(replace=False)
self.assertTrue(numpy.allclose(model['human'], binary_model['human']))
norm_only_model = word2vec.Word2Vec.load_word2vec_format(testfile(), binary=True, norm_only=True)
norm_only_model = word2vec.Word2Vec.load_word2vec_format(testfile(), binary=True)
norm_only_model.init_sims(replace=True)
self.assertFalse(numpy.allclose(model['human'], norm_only_model['human']))
self.assertTrue(numpy.allclose(model.syn0norm[model.vocab['human'].index], norm_only_model['human']))

Expand All @@ -116,9 +108,11 @@ def testPersistenceWord2VecFormatNonBinary(self):
model = word2vec.Word2Vec(sentences, min_count=1)
model.init_sims()
model.save_word2vec_format(testfile(), binary=False)
text_model = word2vec.Word2Vec.load_word2vec_format(testfile(), binary=False, norm_only=False)
text_model = word2vec.Word2Vec.load_word2vec_format(testfile(), binary=False)
text_model.init_sims(False)
self.assertTrue(numpy.allclose(model['human'], text_model['human'], atol=1e-6))
norm_only_model = word2vec.Word2Vec.load_word2vec_format(testfile(), binary=False, norm_only=True)
norm_only_model = word2vec.Word2Vec.load_word2vec_format(testfile(), binary=False)
norm_only_model.init_sims(True)
self.assertFalse(numpy.allclose(model['human'], norm_only_model['human'], atol=1e-6))

self.assertTrue(numpy.allclose(model.syn0norm[model.vocab['human'].index], norm_only_model['human'], atol=1e-4))
Expand Down

0 comments on commit 48179e1

Please sign in to comment.