Skip to content

Commit

Permalink
Sparse2Corpus: update __getitem__ to work on slices, lists and ellipsis
Browse files Browse the repository at this point in the history
  • Loading branch information
PrimozGodec committed Oct 11, 2021
1 parent 3d72896 commit 71729a2
Showing 1 changed file with 10 additions and 9 deletions.
19 changes: 10 additions & 9 deletions gensim/matutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -597,23 +597,24 @@ def __iter__(self):
def __len__(self):
return self.sparse.shape[1]

def __getitem__(self, document_index):
def __getitem__(
self, key: Union[int, List[int], type(...), slice]
) -> Sparse2Corpus:
"""Retrieve a document vector from the corpus by its index.
Parameters
----------
document_index : int
Index of document
key
Index of document or slice for documents
Returns
-------
list of (int, number)
Document in BoW format.
Sparse2Corpus with data subset
"""
indprev = self.sparse.indptr[document_index]
indnow = self.sparse.indptr[document_index + 1]
return list(zip(self.sparse.indices[indprev:indnow], self.sparse.data[indprev:indnow]))
if not isinstance(key, (int, list, type(...), slice)):
raise TypeError(f"Indexing by type {type(key)} not supported.")
sparse = self.sparse.__getitem__((slice(None, None, None), key))
return Sparse2Corpus(sparse)


def veclen(vec):
Expand Down

0 comments on commit 71729a2

Please sign in to comment.