Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added new ValueError in place of assertion error for no model data provided in lsi model #3271

Merged
merged 12 commits into from
Mar 22, 2022
17 changes: 16 additions & 1 deletion gensim/models/lsimodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@
import logging
import sys
import time
import types

import numpy as np
import scipy.linalg
Expand Down Expand Up @@ -482,14 +483,28 @@ def add_documents(self, corpus, chunksize=None, decay=None):
If the distributed mode is on, each chunk is sent to a different worker/computer.

"""
def is_empty(corpus):
mark-todd marked this conversation as resolved.
Show resolved Hide resolved
"""Is the corpus (an iterable or a scipy.sparse array) empty?"""
if scipy.sparse.issparse(corpus):
return corpus.shape[1] == 0 # by convention, scipy.sparse documents are columns
if isinstance(corpus, types.GeneratorType):
return False # don't try to guess emptiness of generators, may lose elements irretrievably
try:
first_doc = next(iter(corpus)) # list, numpy array etc
return False # first document exists => not empty
except StopIteration:
return True
except Exception:
return False

mark-todd marked this conversation as resolved.
Show resolved Hide resolved
logger.info("updating model with new documents")

# get computation parameters; if not specified, use the ones from constructor
if chunksize is None:
chunksize = self.chunksize
if decay is None:
decay = self.decay
if not corpus:
if is_empty(corpus):
logger.warning('LsiModel.add_documents() called but no documents provided, is this intended?')
if not scipy.sparse.issparse(corpus):
if not self.onepass:
Expand Down