Skip to content

Commit

Permalink
more helpful error message when user tries to index an empty corpus
Browse files Browse the repository at this point in the history
  • Loading branch information
piskvorky committed Oct 30, 2012
1 parent 9477067 commit f1a87e0
Showing 1 changed file with 3 additions and 1 deletion.
4 changes: 3 additions & 1 deletion gensim/similarities/docsim.py
Original file line number Diff line number Diff line change
Expand Up @@ -485,7 +485,7 @@ class for description of the other parameters.
"""
if num_features is None:
logger.info("scanning corpus to determine the number of features")
logger.warning("scanning corpus to determine the number of features (consider setting `num_features` explicitly)")
num_features = 1 + utils.get_max_id(corpus)

self.num_features = num_features
Expand All @@ -494,6 +494,8 @@ class for description of the other parameters.
self.chunksize = chunksize

if corpus is not None:
if self.num_features <= 0:
raise ValueError("cannot index a corpus with zero features (you must specify either `num_features` or a non-empty corpus in the constructor)")
logger.info("creating matrix for %s documents and %i features" %
(len(corpus), num_features))
self.index = numpy.empty(shape=(len(corpus), num_features), dtype=dtype)
Expand Down

0 comments on commit f1a87e0

Please sign in to comment.