In [1]:
%load_ext autoreload
%autoreload 2
from gensim.corpora import MmCorpus, Dictionary
from gensim.models.nmf import Nmf
from gensim.models import LdaModel
import gensim.downloader as api
from gensim.parsing.preprocessing import preprocess_string
from tqdm import tqdm, tqdm_notebook
import json
import itertools

tqdm.pandas()

import logging
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.DEBUG)

In [2]:
data = api.load("wiki-english-20171001")
for article in data:
    for section_title, section_text in zip(article['section_titles'], article['section_texts']):
        print("Section title: %s" % section_title)
        print("Section text: %s" % section_text)
    break

2018-09-26 17:27:02,079 : DEBUG : {'uri': '/home/anotherbugmaster/gensim-data/wiki-english-20171001/wiki-english-20171001.gz', 'mode': 'rb', 'kw': {'encoding': 'utf-8'}}
2018-09-26 17:27:02,081 : DEBUG : encoding_wrapper: {'fileobj': <gzip _io.BufferedReader name='/home/anotherbugmaster/gensim-data/wiki-english-20171001/wiki-english-20171001.gz' 0x7f418df71ac8>, 'mode': 'r', 'encoding': 'utf-8', 'errors': 'strict'}


Section title: Introduction
Section text: 




'''Anarchism''' is a political philosophy that advocates self-governed societies based on voluntary institutions. These are often described as stateless societies, although several authors have defined them more specifically as institutions based on non-hierarchical free associations. Anarchism holds the state to be undesirable, unnecessary and harmful.

While anti-statism is central, anarchism specifically entails opposing authority or hierarchical organisation in the conduct of all human relations, including—but not limited to—the state system. Anarchism is usually considered a far-left ideology and much of anarchist economics and anarchist legal philosophy reflects anti-authoritarian interpretations of communism, collectivism, syndicalism, mutualism or participatory economics.

Anarchism does not offer a fixed body of doctrine from a single particular world view, instead fluxing and flowing as a philosophy. Many types and traditions of 

In [3]:
def wiki_articles_iterator():
    for article in tqdm_notebook(data):
        yield (
            preprocess_string(
                " ".join(
                    " ".join(section)
                    for section
                    in zip(article['section_titles'], article['section_texts'])
                )
            )
        )

In [4]:
def save_preprocessed_articles(filename, articles):
    with open(filename, 'w+') as writer:
        for article in tqdm_notebook(articles):
            writer.write(
                json.dumps(
                    preprocess_string(
                        " ".join(
                            " ".join(section)
                            for section
                            in zip(article['section_titles'], article['section_texts'])
                        )
                    )
                ) + '\n'
            )

def get_preprocessed_articles(filename):
    with open(filename, 'r') as reader:
        for line in tqdm_notebook(reader):
            yield json.loads(
                line
            )

In [5]:
# save_preprocessed_articles('wiki_articles.jsonlines', data)

In [6]:
# dictionary = Dictionary(get_preprocessed_articles('wiki_articles.jsonlines'))

# dictionary.save('wiki.dict')

In [7]:
dictionary = Dictionary.load('wiki.dict')
dictionary.filter_extremes()
dictionary.compactify()

2018-09-26 17:27:02,269 : INFO : loading Dictionary object from wiki.dict
2018-09-26 17:27:02,269 : DEBUG : {'uri': 'wiki.dict', 'mode': 'rb', 'kw': {}}
2018-09-26 17:27:03,110 : INFO : loaded wiki.dict
2018-09-26 17:27:05,264 : INFO : discarding 1910258 tokens: [('abdelrahim', 49), ('abstention', 120), ('anarcha', 101), ('anarchica', 40), ('anarchosyndicalist', 20), ('antimilitar', 68), ('arbet', 194), ('archo', 100), ('arkhē', 5), ('autonomedia', 118)]...
2018-09-26 17:27:05,264 : INFO : keeping 100000 tokens which were in no less than 5 and no more than 2462447 (=50.0%) documents
2018-09-26 17:27:05,513 : DEBUG : rebuilding dictionary, shrinking gaps
2018-09-26 17:27:05,594 : INFO : resulting dictionary: Dictionary(100000 unique tokens: ['abandon', 'abil', 'abl', 'abolit', 'abstent']...)
2018-09-26 17:27:05,669 : DEBUG : rebuilding dictionary, shrinking gaps


In [16]:
import random

class RandomCorpus(MmCorpus):
    def __init__(self, *args, random_state, **kwargs):
        super().__init__(*args, **kwargs)
        self.random_state = random_state
        random.seed(self.random_state)
        
        self.shuffled_indices = list(range(self.num_docs))
        random.shuffle(self.shuffled_indices)

    def __iter__(self):
        for doc_id in self.shuffled_indices:
            yield self[doc_id]

In [17]:
# corpus = (
#     dictionary.doc2bow(article)
#     for article
#     in get_preprocessed_articles('wiki_articles.jsonlines')
# )

# RandomCorpus.serialize('wiki.mm', corpus)

In [18]:
corpus = RandomCorpus('wiki.mm', random_state=42)

2018-09-26 17:31:16,992 : DEBUG : {'uri': 'wiki.mm.index', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:17,529 : INFO : loaded corpus index from wiki.mm.index
2018-09-26 17:31:17,530 : INFO : initializing cython corpus reader from wiki.mm
2018-09-26 17:31:17,531 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:17,532 : INFO : accepted corpus with 4924894 documents, 100000 features, 683375728 non-zero entries


In [19]:
PASSES = 2

training_params = dict(
    chunksize=2000,
    num_topics=100,
    id2word=dictionary
)

In [None]:
%load_ext line_profiler

%lprun -f Nmf._setup gensim_nmf = Nmf(**training_params, corpus=corpus, use_r=True, lambda_=200)

2018-09-26 17:31:23,234 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}


The line_profiler extension is already loaded. To reload it, use:
  %reload_ext line_profiler


2018-09-26 17:31:24,026 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:24,029 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:24,030 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:24,031 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:24,031 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:24,032 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:24,033 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:24,034 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:24,035 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:24,035 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:24,036 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:24,235 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:24,236 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}

2018-09-26 17:31:24,505 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:24,505 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:24,506 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:24,507 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:24,508 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:24,509 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:24,510 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:24,510 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:24,511 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:24,512 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:24,513 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:24,514 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:24,515 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}

2018-09-26 17:31:24,599 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:24,600 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:24,600 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:24,602 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:24,603 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:24,603 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:24,604 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:24,605 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:24,605 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:24,606 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:24,607 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:24,607 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:24,608 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}

2018-09-26 17:31:24,680 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:24,681 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:24,682 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:24,683 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:24,684 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:24,684 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:24,685 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:24,686 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:24,686 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:24,687 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:24,688 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:24,689 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:24,690 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}

2018-09-26 17:31:24,763 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:24,764 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:24,764 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:24,765 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:24,766 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:24,766 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:24,767 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:24,768 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:24,769 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:24,769 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:24,770 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:24,771 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:24,772 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}

2018-09-26 17:31:24,858 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:24,859 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:24,859 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:24,860 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:24,861 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:24,861 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:24,863 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:24,864 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:24,865 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:24,866 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:24,866 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:24,867 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:24,867 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}

2018-09-26 17:31:24,941 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:24,942 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:24,943 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:24,944 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:24,945 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:24,945 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:24,946 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:24,947 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:24,948 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:24,949 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:24,949 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:24,950 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:24,951 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}

2018-09-26 17:31:25,043 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:25,044 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:25,044 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:25,045 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:25,046 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:25,046 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:25,047 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:25,048 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:25,048 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:25,049 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:25,050 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:25,050 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:25,051 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}

2018-09-26 17:31:25,125 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:25,126 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:25,126 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:25,127 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:25,128 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:25,129 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:25,129 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:25,130 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:25,131 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:25,132 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:25,132 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:25,133 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:25,134 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}

2018-09-26 17:31:25,231 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:25,232 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:25,233 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:25,233 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:25,234 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:25,235 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:25,236 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:25,236 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:25,238 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:25,239 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:25,240 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:25,462 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:25,464 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}

2018-09-26 17:31:25,551 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:25,572 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:25,572 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:25,573 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:25,574 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:25,574 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:25,575 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:25,576 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:25,577 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:25,577 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:25,578 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:25,579 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:25,580 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}

2018-09-26 17:31:25,653 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:25,653 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:25,654 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:25,655 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:25,656 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:25,656 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:25,657 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:25,658 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:25,659 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:25,659 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:25,660 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:25,661 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:25,662 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}

2018-09-26 17:31:25,750 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:25,750 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:25,751 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:25,752 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:25,752 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:25,753 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:25,754 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:25,754 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:25,755 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:25,756 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:25,757 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:25,757 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:25,758 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}

2018-09-26 17:31:25,826 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:25,827 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:25,827 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:25,828 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:25,829 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:25,829 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:25,830 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:25,831 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:25,832 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:25,832 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:25,833 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:25,834 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:25,834 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}

2018-09-26 17:31:25,930 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:25,930 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:25,931 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:25,932 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:25,933 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:25,933 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:25,934 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:25,935 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:25,935 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:25,936 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:25,937 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:25,938 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:25,939 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}

2018-09-26 17:31:26,023 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:26,024 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:26,025 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:26,025 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:26,026 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:26,027 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:26,027 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:26,028 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:26,029 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:26,030 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:26,031 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:26,032 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:26,032 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}

2018-09-26 17:31:26,127 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:26,128 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:26,129 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:26,129 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:26,130 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:26,131 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:26,131 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:26,132 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:26,133 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:26,134 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:26,135 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:26,136 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:26,136 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}

2018-09-26 17:31:26,207 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:26,208 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:26,209 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:26,210 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:26,210 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:26,211 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:26,212 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:26,212 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:26,213 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:26,214 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:26,215 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:26,216 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:26,216 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}

2018-09-26 17:31:26,324 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:26,325 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:26,325 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:26,326 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:26,327 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:26,328 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:26,328 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:26,329 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:26,330 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:26,331 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:26,332 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:26,333 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}
2018-09-26 17:31:26,333 : DEBUG : {'uri': 'wiki.mm', 'mode': 'rb', 'kw': {}}

In [None]:
## %%time

gensim_nmf = Nmf(
    **training_params,
    use_r=True,
    lambda_=200,
)

for pass_ in range(PASSES):
#     gensim_nmf.update(itertools.islice(corpus, 100))
    gensim_nmf.update(corpus)
    gensim_nmf.save('nmf_%s.model' % pass_)

2018-09-25 20:01:01,127 : INFO : Loss (no outliers): 2187.8126676419793	Loss (with outliers): 2187.8126676419793
2018-09-25 20:05:20,635 : INFO : Loss (no outliers): 1827.3584807920479	Loss (with outliers): 1827.3584807920479
2018-09-25 20:09:26,448 : INFO : Loss (no outliers): 2160.9315399905636	Loss (with outliers): 2160.9315399905636
2018-09-25 20:11:43,012 : INFO : Loss (no outliers): 2096.3005293752603	Loss (with outliers): 2096.3005293752603
2018-09-25 20:13:39,924 : INFO : Loss (no outliers): 2219.8595473938913	Loss (with outliers): 2219.8595473938913
2018-09-25 20:15:16,059 : INFO : Loss (no outliers): 2386.337256599918	Loss (with outliers): 2386.337256599918
2018-09-25 20:16:55,507 : INFO : Loss (no outliers): 2174.8242892829526	Loss (with outliers): 2174.8242892829526
2018-09-25 20:18:13,774 : INFO : Loss (no outliers): 2167.327441415781	Loss (with outliers): 2167.327441415781
2018-09-25 20:19:08,062 : INFO : Loss (no outliers): 2302.0876438555892	Loss (with outliers): 2302.0

2018-09-25 20:56:02,823 : INFO : Loss (no outliers): 2427.223231537262	Loss (with outliers): 2427.223231537262
2018-09-25 20:56:31,906 : INFO : Loss (no outliers): 1916.1747272542427	Loss (with outliers): 1916.1747272542427
2018-09-25 20:57:00,958 : INFO : Loss (no outliers): 2160.7246788720004	Loss (with outliers): 2160.7246788720004
2018-09-25 20:57:29,892 : INFO : Loss (no outliers): 2418.632822651408	Loss (with outliers): 2418.632822651408
2018-09-25 20:57:59,123 : INFO : Loss (no outliers): 1839.4987454612694	Loss (with outliers): 1839.4987454612694
2018-09-25 20:58:28,258 : INFO : Loss (no outliers): 2193.5954352867516	Loss (with outliers): 2193.5954352867516
2018-09-25 20:58:57,297 : INFO : Loss (no outliers): 1840.22885077515	Loss (with outliers): 1840.22885077515
2018-09-25 20:59:25,111 : INFO : Loss (no outliers): 2256.5277679640835	Loss (with outliers): 2256.5277679640835
2018-09-25 20:59:54,109 : INFO : Loss (no outliers): 2100.762294874342	Loss (with outliers): 2100.762294

2018-09-25 21:30:36,622 : INFO : Loss (no outliers): 2095.0487477112065	Loss (with outliers): 2095.0487477112065
2018-09-25 21:31:04,338 : INFO : Loss (no outliers): 2099.0227812135545	Loss (with outliers): 2099.0227812135545
2018-09-25 21:31:32,823 : INFO : Loss (no outliers): 2051.6399910837404	Loss (with outliers): 2051.6399910837404
2018-09-25 21:32:00,823 : INFO : Loss (no outliers): 2210.176946629351	Loss (with outliers): 2210.176946629351
2018-09-25 21:32:29,752 : INFO : Loss (no outliers): 1919.1980163329379	Loss (with outliers): 1919.1980163329379
2018-09-25 21:32:58,007 : INFO : Loss (no outliers): 2815.500072368925	Loss (with outliers): 2815.500072368925
2018-09-25 21:33:25,814 : INFO : Loss (no outliers): 2092.2302648232076	Loss (with outliers): 2092.2302648232076
2018-09-25 21:33:52,795 : INFO : Loss (no outliers): 2049.2576301920562	Loss (with outliers): 2049.2576301920562
2018-09-25 21:34:20,888 : INFO : Loss (no outliers): 2040.4636635273798	Loss (with outliers): 2040.4

2018-09-25 22:05:23,333 : INFO : Loss (no outliers): 2045.8914472500164	Loss (with outliers): 2045.8914472500164
2018-09-25 22:05:49,855 : INFO : Loss (no outliers): 2021.2644079653978	Loss (with outliers): 2021.2644079653978
2018-09-25 22:06:20,389 : INFO : Loss (no outliers): 2054.8243581364536	Loss (with outliers): 2054.8243581364536
2018-09-25 22:06:50,099 : INFO : Loss (no outliers): 1978.88264172572	Loss (with outliers): 1978.88264172572
2018-09-25 22:07:18,494 : INFO : Loss (no outliers): 1880.6688041814068	Loss (with outliers): 1880.6688041814068
2018-09-25 22:07:46,152 : INFO : Loss (no outliers): 2008.3882815060638	Loss (with outliers): 2008.3882815060638
2018-09-25 22:08:14,700 : INFO : Loss (no outliers): 2142.5359761925024	Loss (with outliers): 2142.5359761925024
2018-09-25 22:08:43,337 : INFO : Loss (no outliers): 1915.8094403414839	Loss (with outliers): 1915.8094403414839
2018-09-25 22:09:11,780 : INFO : Loss (no outliers): 1801.005269158962	Loss (with outliers): 1801.00

2018-09-25 22:41:01,044 : INFO : Loss (no outliers): 2094.4917084070967	Loss (with outliers): 2094.4917084070967
2018-09-25 22:41:28,486 : INFO : Loss (no outliers): 2154.240691211658	Loss (with outliers): 2154.240691211658
2018-09-25 22:41:53,447 : INFO : Loss (no outliers): 2802.6732906098923	Loss (with outliers): 2802.6732906098923
2018-09-25 22:42:20,289 : INFO : Loss (no outliers): 2613.120421709758	Loss (with outliers): 2613.120421709758
2018-09-25 22:42:47,349 : INFO : Loss (no outliers): 5317.562151779767	Loss (with outliers): 5317.562151779767
2018-09-25 22:43:14,199 : INFO : Loss (no outliers): 1971.8530237998427	Loss (with outliers): 1971.8530237998427
2018-09-25 22:43:41,212 : INFO : Loss (no outliers): 3059.6913116173	Loss (with outliers): 3059.6913116173
2018-09-25 22:44:07,936 : INFO : Loss (no outliers): 2119.064129683836	Loss (with outliers): 2119.064129683836
2018-09-25 22:44:32,600 : INFO : Loss (no outliers): 1972.9436280672116	Loss (with outliers): 1972.94362806721

2018-09-25 23:14:21,401 : INFO : Loss (no outliers): 2176.093877943977	Loss (with outliers): 2176.093877943977
2018-09-25 23:14:52,134 : INFO : Loss (no outliers): 1965.582201308679	Loss (with outliers): 1965.582201308679
2018-09-25 23:15:23,216 : INFO : Loss (no outliers): 2358.571359311288	Loss (with outliers): 2358.571359311288
2018-09-25 23:15:53,292 : INFO : Loss (no outliers): 1979.0309962150184	Loss (with outliers): 1979.0309962150184
2018-09-25 23:16:20,867 : INFO : Loss (no outliers): 2074.951204201203	Loss (with outliers): 2074.951204201203
2018-09-25 23:16:51,763 : INFO : Loss (no outliers): 1955.2326162454233	Loss (with outliers): 1955.2326162454233
2018-09-25 23:17:20,574 : INFO : Loss (no outliers): 2172.5557242886475	Loss (with outliers): 2172.5557242886475
2018-09-25 23:17:48,661 : INFO : Loss (no outliers): 1728.0919560487505	Loss (with outliers): 1728.0919560487505
2018-09-25 23:18:19,099 : INFO : Loss (no outliers): 2104.077330774064	Loss (with outliers): 2104.077330

2018-09-25 23:48:23,705 : INFO : Loss (no outliers): 2080.0181605330813	Loss (with outliers): 2080.0181605330813
2018-09-25 23:48:50,755 : INFO : Loss (no outliers): 1777.5633441823009	Loss (with outliers): 1777.5633441823009
2018-09-25 23:49:18,657 : INFO : Loss (no outliers): 1967.0114183374278	Loss (with outliers): 1967.0114183374278
2018-09-25 23:49:44,242 : INFO : Loss (no outliers): 2280.2841649712095	Loss (with outliers): 2280.2841649712095
2018-09-25 23:50:10,607 : INFO : Loss (no outliers): 2087.404533633996	Loss (with outliers): 2087.404533633996
2018-09-25 23:50:38,403 : INFO : Loss (no outliers): 2437.395526161061	Loss (with outliers): 2437.395526161061
2018-09-25 23:51:07,075 : INFO : Loss (no outliers): 2075.1895737839277	Loss (with outliers): 2075.1895737839277
2018-09-25 23:51:34,039 : INFO : Loss (no outliers): 2004.940901973424	Loss (with outliers): 2004.940901973424
2018-09-25 23:52:02,144 : INFO : Loss (no outliers): 2191.0941805913494	Loss (with outliers): 2191.094

In [7]:
gensim_nmf = Nmf.load('nmf_0.model')

2018-09-26 00:34:11,714 : INFO : loading Nmf object from nmf_0.model
2018-09-26 00:34:23,539 : INFO : loading id2word recursively from nmf_0.model.id2word.* with mmap=r
2018-09-26 00:34:23,572 : INFO : loaded nmf_0.model


In [8]:
gensim_nmf.show_topics(100)

[(0,
  '0.100*"damag" + 0.084*"tornado" + 0.070*"list" + 0.062*"home" + 0.056*"tree" + 0.041*"report" + 0.038*"build" + 0.033*"storm" + 0.031*"destroi" + 0.022*"caus"'),
 (1,
  '0.220*"order" + 0.117*"regul" + 0.096*"amend" + 0.032*"road" + 0.030*"traffic" + 0.028*"prohibit" + 0.027*"temporari" + 0.027*"trunk" + 0.026*"health" + 0.021*"england"'),
 (2,
  '0.125*"mount" + 0.124*"peak" + 0.123*"lemmon" + 0.122*"kitt" + 0.122*"spacewatch" + 0.062*"survei" + 0.043*"octob" + 0.038*"septemb" + 0.023*"novemb" + 0.023*"catalina"'),
 (3,
  '0.236*"new" + 0.076*"york" + 0.018*"zealand" + 0.016*"jersei" + 0.012*"washington" + 0.011*"chicago" + 0.010*"boston" + 0.010*"broadcast" + 0.010*"channel" + 0.009*"televis"'),
 (4,
  '0.271*"servic" + 0.059*"commun" + 0.030*"royal" + 0.026*"mr" + 0.025*"offic" + 0.023*"late" + 0.020*"director" + 0.019*"educ" + 0.019*"public" + 0.018*"chief"'),
 (5,
  '0.251*"parti" + 0.038*"elect" + 0.038*"vote" + 0.031*"liber" + 0.029*"communist" + 0.029*"seat" + 0.026*"po

In [None]:
%%time

gensim_lda = LdaModel(**training_params)

for pass_ in range(PASSES):
    gensim_lda.update(corpus)
    gensim_lda.save('lda_%s.model' % pass_)

In [None]:
lda.show_topics(20)