more helpful error message when user tries to index an empty corpus

commit f1a87e092fef790e7417a779fcb983308af94c54 1 parent 9477067
@piskvorky authored
Showing with 3 additions and 1 deletion.
  1. +3 −1 gensim/similarities/
4 gensim/similarities/
@@ -485,7 +485,7 @@ class for description of the other parameters.
if num_features is None:
-"scanning corpus to determine the number of features")
+ logger.warning("scanning corpus to determine the number of features (consider setting `num_features` explicitly)")
num_features = 1 + utils.get_max_id(corpus)
self.num_features = num_features
@@ -494,6 +494,8 @@ class for description of the other parameters.
self.chunksize = chunksize
if corpus is not None:
+ if self.num_features <= 0:
+ raise ValueError("cannot index a corpus with zero features (you must specify either `num_features` or a non-empty corpus in the constructor)")"creating matrix for %s documents and %i features" %
(len(corpus), num_features))
self.index = numpy.empty(shape=(len(corpus), num_features), dtype=dtype)
