-
-
Notifications
You must be signed in to change notification settings - Fork 4.4k
/
basemodel.py
52 lines (40 loc) · 1.52 KB
/
basemodel.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
class BaseTopicModel:
def print_topic(self, topicno, topn=10):
"""Get a single topic as a formatted string.
Parameters
----------
topicno : int
Topic id.
topn : int
Number of words from topic that will be used.
Returns
-------
str
String representation of topic, like '-0.340 * "category" + 0.298 * "$M$" + 0.183 * "algebra" + ... '.
"""
return ' + '.join('%.3f*"%s"' % (v, k) for k, v in self.show_topic(topicno, topn))
def print_topics(self, num_topics=20, num_words=10):
"""Get the most significant topics (alias for `show_topics()` method).
Parameters
----------
num_topics : int, optional
The number of topics to be selected, if -1 - all topics will be in result (ordered by significance).
num_words : int, optional
The number of words to be included per topics (ordered by significance).
Returns
-------
list of (int, list of (str, float))
Sequence with (topic_id, [(word, value), ... ]).
"""
return self.show_topics(num_topics=num_topics, num_words=num_words, log=True)
def get_topics(self):
"""Get words X topics matrix.
Returns
--------
numpy.ndarray:
The term topic matrix learned during inference, shape (`num_topics`, `vocabulary_size`).
Raises
------
NotImplementedError
"""
raise NotImplementedError