In [1]:
from __future__ import print_function
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
from IPython.display import display
import logging
import pprint
from os import listdir
from os.path import isfile, join

import pandas as pd
from gensim.parsing.preprocessing import preprocess_string, strip_tags, strip_punctuation, strip_multiple_whitespaces, \
    remove_stopwords, stem_text
from gensim.corpora import MmCorpus, Dictionary
from gensim.models import TfidfModel, LdaModel, HdpModel, LsiModel

logging.basicConfig(format='\r%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)

pp = pprint.PrettyPrinter(indent=4)
ENCODING = 'iso-8859-1'



In [2]:
def get_files(path):
    return [f for f in listdir(path) if isfile(join(path, f))]

## Analysis functions
### Text formating

In [3]:
ALLOWED_SHORT_WORDS = ['c', 'c#', 'r', '3d', '2d', '1d', '7z', 'qt']


def strip_short(word_list):    
    return [word for word in word_list if len(word) > 2 or word in ALLOWED_SHORT_WORDS]


def tokenize_text(text):
    processed = preprocess_string(text, filters=[strip_tags, strip_punctuation, 
                                                 strip_multiple_whitespaces, remove_stopwords, stem_text])
    return strip_short(processed)

### Data analysis

In [15]:
def get_real_tags(question_id):
    real_tags = tags_df[tags_df['Id'] == question_id]['Tag'].values
    return [tag for tag in real_tags]


def get_predicted_tags(question_index, model, dictionary):
    question_bow, question_id, text = get_bow_question(question_index, dictionary)
    return model[question_bow], question_id


def compare_real_predicted(question_index, model, dictionary, prob = 0.0):
    predicted_tags, question_id = get_predicted_tags(question_index, model, dictionary)
    predicted_tags = [p_tag for p_tag in predicted_tags if p_tag[1] > prob]
    real_tags = get_real_tags(question_id)
    correct_predictions = 0
    for tag in predicted_tags:
        if dictionary[tag[0]] in ' '.join(str(e) for e in real_tags):
            correct_predictions += 1            
    if len(real_tags) > len(predicted_tags):
        return correct_predictions, len(real_tags)    
    return correct_predictions, len(predicted_tags)


def get_bow_question(index, dictionary):
    tokenized_text = tokenize_text(csv_data['Title'][index])
    question_id = csv_data['Id'][index]
    text = csv_data['Title'][index]
    return dictionary.doc2bow(tokenized_text), question_id, text


def display_results(index, model, dictionary, prob=0.0):
    question_bow, question_id, text = get_bow_question(index, dictionary)
    predicted_tags = model[question_bow]
    tags = get_real_tags(question_id)
    print(f'Question {question_id}:\n{text}\nReal tags:\n{tags}\n\nPredicted tags:')
    [print(f'Tag: {dictionary[tag_id]}, Probability: {p}') for tag_id, p in predicted_tags if p >= prob]

In [16]:
def compare_n_tags(n, model, dictionary, prob=0.0):
    correct_predictions = 0
    all_predictions = 0
    for i in range(n):
        comparison = compare_real_predicted(i, model, dictionary, prob)
        correct_predictions += comparison[0]
        all_predictions += comparison[1]
    return correct_predictions, all_predictions

## Setup
### CSV files

In [6]:
PATH_TAGS = '../data/stacksample/Tags.csv'
tags_df = pd.read_csv(PATH_TAGS, encoding=ENCODING)

In [7]:
COLUMNS_QUESTION = ['Title', 'Body', 'Id']
QUESTIONS_PATH = '../data/stacksample/Questions.csv'
N_ROWS = 10000

csv_data  = pd.read_csv(QUESTIONS_PATH, encoding=ENCODING, usecols=COLUMNS_QUESTION, nrows=N_ROWS)
csv_data['All'] = csv_data['Title'].map(str) + csv_data['Body']

### Widgets

In [24]:
model_file_name = widgets.Dropdown(options=[f for f in get_files('../data/stacksample/models') if 'npy' not in f],
                                   description='Model file: ')
dict_file_name = widgets.Dropdown(options=get_files('../data/stacksample/dictionary'), description='Dictionary file: ')

text_index_slider1 = widgets.IntSlider(description='Text index: ', value=0, min=0, max=N_ROWS)
text_index_slider2 = widgets.IntSlider(description='Text index: ', value=0, min=0, max=N_ROWS)

probability_slider1 = widgets.FloatSlider(description='Tag probability: ', value=0.0, min=0.0, max=1.0, step=0.05)
probability_slider2 = widgets.FloatSlider(description='Tag probability: ', value=0.0, min=0.0, max=1.0, step=0.05)

analyse_btn = widgets.Button(description='Analyse model!', disabled=True)

def get_dictionary(file_name):
    return Dictionary.load(f'../data/stacksample/dictionary/{file_name}')

def get_model(file_name):
    return TfidfModel.load(f'../data/stacksample/models/{file_name}')
   
def test_example(b):
    display_results(text_index.value, model, corpus_dict)    

In [25]:
corpus_dict_widget = interactive(get_dictionary, file_name=dict_file_name)
model_widget = interactive(get_model, file_name=model_file_name)
display(corpus_dict_widget, model_widget)

In [27]:
interact_manual(display_results, index=text_index_slider1, p=probability_slider1,
                model=fixed(model_widget.result), dictionary=fixed(corpus_dict_widget.result))

<function __main__.display_results>

In [28]:
interact_manual(compare_n_tags, n=text_index_slider2, model=fixed(model_widget.result), 
                dictionary=fixed(corpus_dict_widget.result), prob=probability_slider2)

<function __main__.compare_n_tags>

In [19]:
import pyLDAvis.gensim

pyLDAvis.enable_notebook()

In [20]:
CORPUS_FILE = 'titles_body_compact.mm'
corpus = MmCorpus(f'../data/stacksample/bow/{CORPUS_FILE}')

2018-06-05 01:57:10,770 : INFO : loaded corpus index from ../data/stacksample/bow/titles_body_compact.mm.index
2018-06-05 01:57:10,771 : INFO : initializing cython corpus reader from ../data/stacksample/bow/titles_body_compact.mm
2018-06-05 01:57:10,812 : INFO : accepted corpus with 1264216 documents, 2009852 features, 65879940 non-zero entries


In [24]:
pyLDAvis.gensim.prepare(model_widget.result, corpus, corpus_dict_widget.result)

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
  topic_term_dists = topic_term_dists.ix[topic_order]


JoblibMemoryError: JoblibMemoryError
___________________________________________________________________________
Multiprocessing exception:
...........................................................................
d:\users\janvi\anaconda3\lib\runpy.py in _run_module_as_main(mod_name='ipykernel_launcher', alter_argv=1)
    188         sys.exit(msg)
    189     main_globals = sys.modules["__main__"].__dict__
    190     if alter_argv:
    191         sys.argv[0] = mod_spec.origin
    192     return _run_code(code, main_globals, None,
--> 193                      "__main__", mod_spec)
        mod_spec = ModuleSpec(name='ipykernel_launcher', loader=<_f...nda3\\lib\\site-packages\\ipykernel_launcher.py')
    194 
    195 def run_module(mod_name, init_globals=None,
    196                run_name=None, alter_sys=False):
    197     """Execute a module's code without importing it

...........................................................................
d:\users\janvi\anaconda3\lib\runpy.py in _run_code(code=<code object <module> at 0x000001FB5E988DB0, fil...lib\site-packages\ipykernel_launcher.py", line 5>, run_globals={'__annotations__': {}, '__builtins__': <module 'builtins' (built-in)>, '__cached__': r'd:\users\janvi\anaconda3\lib\site-packages\__pycache__\ipykernel_launcher.cpython-36.pyc', '__doc__': 'Entry point for launching an IPython kernel.\n\nTh...orts until\nafter removing the cwd from sys.path.\n', '__file__': r'd:\users\janvi\anaconda3\lib\site-packages\ipykernel_launcher.py', '__loader__': <_frozen_importlib_external.SourceFileLoader object>, '__name__': '__main__', '__package__': '', '__spec__': ModuleSpec(name='ipykernel_launcher', loader=<_f...nda3\\lib\\site-packages\\ipykernel_launcher.py'), 'app': <module 'ipykernel.kernelapp' from 'd:\\users\\j...a3\\lib\\site-packages\\ipykernel\\kernelapp.py'>, ...}, init_globals=None, mod_name='__main__', mod_spec=ModuleSpec(name='ipykernel_launcher', loader=<_f...nda3\\lib\\site-packages\\ipykernel_launcher.py'), pkg_name='', script_name=None)
     80                        __cached__ = cached,
     81                        __doc__ = None,
     82                        __loader__ = loader,
     83                        __package__ = pkg_name,
     84                        __spec__ = mod_spec)
---> 85     exec(code, run_globals)
        code = <code object <module> at 0x000001FB5E988DB0, fil...lib\site-packages\ipykernel_launcher.py", line 5>
        run_globals = {'__annotations__': {}, '__builtins__': <module 'builtins' (built-in)>, '__cached__': r'd:\users\janvi\anaconda3\lib\site-packages\__pycache__\ipykernel_launcher.cpython-36.pyc', '__doc__': 'Entry point for launching an IPython kernel.\n\nTh...orts until\nafter removing the cwd from sys.path.\n', '__file__': r'd:\users\janvi\anaconda3\lib\site-packages\ipykernel_launcher.py', '__loader__': <_frozen_importlib_external.SourceFileLoader object>, '__name__': '__main__', '__package__': '', '__spec__': ModuleSpec(name='ipykernel_launcher', loader=<_f...nda3\\lib\\site-packages\\ipykernel_launcher.py'), 'app': <module 'ipykernel.kernelapp' from 'd:\\users\\j...a3\\lib\\site-packages\\ipykernel\\kernelapp.py'>, ...}
     86     return run_globals
     87 
     88 def _run_module_code(code, init_globals=None,
     89                     mod_name=None, mod_spec=None,

...........................................................................
d:\users\janvi\anaconda3\lib\site-packages\ipykernel_launcher.py in <module>()
     11     # This is added back by InteractiveShellApp.init_path()
     12     if sys.path[0] == '':
     13         del sys.path[0]
     14 
     15     from ipykernel import kernelapp as app
---> 16     app.launch_new_instance()

...........................................................................
d:\users\janvi\anaconda3\lib\site-packages\traitlets\config\application.py in launch_instance(cls=<class 'ipykernel.kernelapp.IPKernelApp'>, argv=None, **kwargs={})
    653 
    654         If a global instance already exists, this reinitializes and starts it
    655         """
    656         app = cls.instance(**kwargs)
    657         app.initialize(argv)
--> 658         app.start()
        app.start = <bound method IPKernelApp.start of <ipykernel.kernelapp.IPKernelApp object>>
    659 
    660 #-----------------------------------------------------------------------------
    661 # utility functions, for convenience
    662 #-----------------------------------------------------------------------------

...........................................................................
d:\users\janvi\anaconda3\lib\site-packages\ipykernel\kernelapp.py in start(self=<ipykernel.kernelapp.IPKernelApp object>)
    481         if self.poller is not None:
    482             self.poller.start()
    483         self.kernel.start()
    484         self.io_loop = ioloop.IOLoop.current()
    485         try:
--> 486             self.io_loop.start()
        self.io_loop.start = <bound method BaseAsyncIOLoop.start of <tornado.platform.asyncio.AsyncIOMainLoop object>>
    487         except KeyboardInterrupt:
    488             pass
    489 
    490 launch_new_instance = IPKernelApp.launch_instance

...........................................................................
d:\users\janvi\anaconda3\lib\site-packages\tornado\platform\asyncio.py in start(self=<tornado.platform.asyncio.AsyncIOMainLoop object>)
    122         except (RuntimeError, AssertionError):
    123             old_loop = None
    124         try:
    125             self._setup_logging()
    126             asyncio.set_event_loop(self.asyncio_loop)
--> 127             self.asyncio_loop.run_forever()
        self.asyncio_loop.run_forever = <bound method BaseEventLoop.run_forever of <_Win...EventLoop running=True closed=False debug=False>>
    128         finally:
    129             asyncio.set_event_loop(old_loop)
    130 
    131     def stop(self):

...........................................................................
d:\users\janvi\anaconda3\lib\asyncio\base_events.py in run_forever(self=<_WindowsSelectorEventLoop running=True closed=False debug=False>)
    416             sys.set_asyncgen_hooks(firstiter=self._asyncgen_firstiter_hook,
    417                                    finalizer=self._asyncgen_finalizer_hook)
    418         try:
    419             events._set_running_loop(self)
    420             while True:
--> 421                 self._run_once()
        self._run_once = <bound method BaseEventLoop._run_once of <_Windo...EventLoop running=True closed=False debug=False>>
    422                 if self._stopping:
    423                     break
    424         finally:
    425             self._stopping = False

...........................................................................
d:\users\janvi\anaconda3\lib\asyncio\base_events.py in _run_once(self=<_WindowsSelectorEventLoop running=True closed=False debug=False>)
   1421                         logger.warning('Executing %s took %.3f seconds',
   1422                                        _format_handle(handle), dt)
   1423                 finally:
   1424                     self._current_handle = None
   1425             else:
-> 1426                 handle._run()
        handle._run = <bound method Handle._run of <Handle BaseAsyncIOLoop._handle_events(936, 1)>>
   1427         handle = None  # Needed to break cycles when an exception occurs.
   1428 
   1429     def _set_coroutine_wrapper(self, enabled):
   1430         try:

...........................................................................
d:\users\janvi\anaconda3\lib\asyncio\events.py in _run(self=<Handle BaseAsyncIOLoop._handle_events(936, 1)>)
    122             self._callback = None
    123             self._args = None
    124 
    125     def _run(self):
    126         try:
--> 127             self._callback(*self._args)
        self._callback = <bound method BaseAsyncIOLoop._handle_events of <tornado.platform.asyncio.AsyncIOMainLoop object>>
        self._args = (936, 1)
    128         except Exception as exc:
    129             cb = _format_callback_source(self._callback, self._args)
    130             msg = 'Exception in callback {}'.format(cb)
    131             context = {

...........................................................................
d:\users\janvi\anaconda3\lib\site-packages\tornado\platform\asyncio.py in _handle_events(self=<tornado.platform.asyncio.AsyncIOMainLoop object>, fd=936, events=1)
    112             self.writers.remove(fd)
    113         del self.handlers[fd]
    114 
    115     def _handle_events(self, fd, events):
    116         fileobj, handler_func = self.handlers[fd]
--> 117         handler_func(fileobj, events)
        handler_func = <function wrap.<locals>.null_wrapper>
        fileobj = <zmq.sugar.socket.Socket object>
        events = 1
    118 
    119     def start(self):
    120         try:
    121             old_loop = asyncio.get_event_loop()

...........................................................................
d:\users\janvi\anaconda3\lib\site-packages\tornado\stack_context.py in null_wrapper(*args=(<zmq.sugar.socket.Socket object>, 1), **kwargs={})
    271         # Fast path when there are no active contexts.
    272         def null_wrapper(*args, **kwargs):
    273             try:
    274                 current_state = _state.contexts
    275                 _state.contexts = cap_contexts[0]
--> 276                 return fn(*args, **kwargs)
        args = (<zmq.sugar.socket.Socket object>, 1)
        kwargs = {}
    277             finally:
    278                 _state.contexts = current_state
    279         null_wrapper._wrapped = True
    280         return null_wrapper

...........................................................................
d:\users\janvi\anaconda3\lib\site-packages\zmq\eventloop\zmqstream.py in _handle_events(self=<zmq.eventloop.zmqstream.ZMQStream object>, fd=<zmq.sugar.socket.Socket object>, events=1)
    445             return
    446         zmq_events = self.socket.EVENTS
    447         try:
    448             # dispatch events:
    449             if zmq_events & zmq.POLLIN and self.receiving():
--> 450                 self._handle_recv()
        self._handle_recv = <bound method ZMQStream._handle_recv of <zmq.eventloop.zmqstream.ZMQStream object>>
    451                 if not self.socket:
    452                     return
    453             if zmq_events & zmq.POLLOUT and self.sending():
    454                 self._handle_send()

...........................................................................
d:\users\janvi\anaconda3\lib\site-packages\zmq\eventloop\zmqstream.py in _handle_recv(self=<zmq.eventloop.zmqstream.ZMQStream object>)
    475             else:
    476                 raise
    477         else:
    478             if self._recv_callback:
    479                 callback = self._recv_callback
--> 480                 self._run_callback(callback, msg)
        self._run_callback = <bound method ZMQStream._run_callback of <zmq.eventloop.zmqstream.ZMQStream object>>
        callback = <function wrap.<locals>.null_wrapper>
        msg = [<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>]
    481         
    482 
    483     def _handle_send(self):
    484         """Handle a send event."""

...........................................................................
d:\users\janvi\anaconda3\lib\site-packages\zmq\eventloop\zmqstream.py in _run_callback(self=<zmq.eventloop.zmqstream.ZMQStream object>, callback=<function wrap.<locals>.null_wrapper>, *args=([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],), **kwargs={})
    427         close our socket."""
    428         try:
    429             # Use a NullContext to ensure that all StackContexts are run
    430             # inside our blanket exception handler rather than outside.
    431             with stack_context.NullContext():
--> 432                 callback(*args, **kwargs)
        callback = <function wrap.<locals>.null_wrapper>
        args = ([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],)
        kwargs = {}
    433         except:
    434             gen_log.error("Uncaught exception in ZMQStream callback",
    435                           exc_info=True)
    436             # Re-raise the exception so that IOLoop.handle_callback_exception

...........................................................................
d:\users\janvi\anaconda3\lib\site-packages\tornado\stack_context.py in null_wrapper(*args=([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],), **kwargs={})
    271         # Fast path when there are no active contexts.
    272         def null_wrapper(*args, **kwargs):
    273             try:
    274                 current_state = _state.contexts
    275                 _state.contexts = cap_contexts[0]
--> 276                 return fn(*args, **kwargs)
        args = ([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],)
        kwargs = {}
    277             finally:
    278                 _state.contexts = current_state
    279         null_wrapper._wrapped = True
    280         return null_wrapper

...........................................................................
d:\users\janvi\anaconda3\lib\site-packages\ipykernel\kernelbase.py in dispatcher(msg=[<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>])
    278         if self.control_stream:
    279             self.control_stream.on_recv(self.dispatch_control, copy=False)
    280 
    281         def make_dispatcher(stream):
    282             def dispatcher(msg):
--> 283                 return self.dispatch_shell(stream, msg)
        msg = [<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>]
    284             return dispatcher
    285 
    286         for s in self.shell_streams:
    287             s.on_recv(make_dispatcher(s), copy=False)

...........................................................................
d:\users\janvi\anaconda3\lib\site-packages\ipykernel\kernelbase.py in dispatch_shell(self=<ipykernel.ipkernel.IPythonKernel object>, stream=<zmq.eventloop.zmqstream.ZMQStream object>, msg={'buffers': [], 'content': {'allow_stdin': True, 'code': 'pyLDAvis.gensim.prepare(model_widget.result, corpus, corpus_dict_widget.result)', 'silent': False, 'stop_on_error': True, 'store_history': True, 'user_expressions': {}}, 'header': {'date': datetime.datetime(2018, 6, 4, 22, 57, 47, 896219, tzinfo=datetime.timezone.utc), 'msg_id': '80d851bf28ee4e17a87208d469345ffa', 'msg_type': 'execute_request', 'session': '3f40e4eb9ed2456f8fbdd0ca7ec7a750', 'username': 'username', 'version': '5.2'}, 'metadata': {}, 'msg_id': '80d851bf28ee4e17a87208d469345ffa', 'msg_type': 'execute_request', 'parent_header': {}})
    228             self.log.warn("Unknown message type: %r", msg_type)
    229         else:
    230             self.log.debug("%s: %s", msg_type, msg)
    231             self.pre_handler_hook()
    232             try:
--> 233                 handler(stream, idents, msg)
        handler = <bound method Kernel.execute_request of <ipykernel.ipkernel.IPythonKernel object>>
        stream = <zmq.eventloop.zmqstream.ZMQStream object>
        idents = [b'3f40e4eb9ed2456f8fbdd0ca7ec7a750']
        msg = {'buffers': [], 'content': {'allow_stdin': True, 'code': 'pyLDAvis.gensim.prepare(model_widget.result, corpus, corpus_dict_widget.result)', 'silent': False, 'stop_on_error': True, 'store_history': True, 'user_expressions': {}}, 'header': {'date': datetime.datetime(2018, 6, 4, 22, 57, 47, 896219, tzinfo=datetime.timezone.utc), 'msg_id': '80d851bf28ee4e17a87208d469345ffa', 'msg_type': 'execute_request', 'session': '3f40e4eb9ed2456f8fbdd0ca7ec7a750', 'username': 'username', 'version': '5.2'}, 'metadata': {}, 'msg_id': '80d851bf28ee4e17a87208d469345ffa', 'msg_type': 'execute_request', 'parent_header': {}}
    234             except Exception:
    235                 self.log.error("Exception in message handler:", exc_info=True)
    236             finally:
    237                 self.post_handler_hook()

...........................................................................
d:\users\janvi\anaconda3\lib\site-packages\ipykernel\kernelbase.py in execute_request(self=<ipykernel.ipkernel.IPythonKernel object>, stream=<zmq.eventloop.zmqstream.ZMQStream object>, ident=[b'3f40e4eb9ed2456f8fbdd0ca7ec7a750'], parent={'buffers': [], 'content': {'allow_stdin': True, 'code': 'pyLDAvis.gensim.prepare(model_widget.result, corpus, corpus_dict_widget.result)', 'silent': False, 'stop_on_error': True, 'store_history': True, 'user_expressions': {}}, 'header': {'date': datetime.datetime(2018, 6, 4, 22, 57, 47, 896219, tzinfo=datetime.timezone.utc), 'msg_id': '80d851bf28ee4e17a87208d469345ffa', 'msg_type': 'execute_request', 'session': '3f40e4eb9ed2456f8fbdd0ca7ec7a750', 'username': 'username', 'version': '5.2'}, 'metadata': {}, 'msg_id': '80d851bf28ee4e17a87208d469345ffa', 'msg_type': 'execute_request', 'parent_header': {}})
    394         if not silent:
    395             self.execution_count += 1
    396             self._publish_execute_input(code, parent, self.execution_count)
    397 
    398         reply_content = self.do_execute(code, silent, store_history,
--> 399                                         user_expressions, allow_stdin)
        user_expressions = {}
        allow_stdin = True
    400 
    401         # Flush output before sending the reply.
    402         sys.stdout.flush()
    403         sys.stderr.flush()

...........................................................................
d:\users\janvi\anaconda3\lib\site-packages\ipykernel\ipkernel.py in do_execute(self=<ipykernel.ipkernel.IPythonKernel object>, code='pyLDAvis.gensim.prepare(model_widget.result, corpus, corpus_dict_widget.result)', silent=False, store_history=True, user_expressions={}, allow_stdin=True)
    203 
    204         self._forward_input(allow_stdin)
    205 
    206         reply_content = {}
    207         try:
--> 208             res = shell.run_cell(code, store_history=store_history, silent=silent)
        res = undefined
        shell.run_cell = <bound method ZMQInteractiveShell.run_cell of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        code = 'pyLDAvis.gensim.prepare(model_widget.result, corpus, corpus_dict_widget.result)'
        store_history = True
        silent = False
    209         finally:
    210             self._restore_input()
    211 
    212         if res.error_before_exec is not None:

...........................................................................
d:\users\janvi\anaconda3\lib\site-packages\ipykernel\zmqshell.py in run_cell(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, *args=('pyLDAvis.gensim.prepare(model_widget.result, corpus, corpus_dict_widget.result)',), **kwargs={'silent': False, 'store_history': True})
    532             )
    533         self.payload_manager.write_payload(payload)
    534 
    535     def run_cell(self, *args, **kwargs):
    536         self._last_traceback = None
--> 537         return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
        self.run_cell = <bound method ZMQInteractiveShell.run_cell of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        args = ('pyLDAvis.gensim.prepare(model_widget.result, corpus, corpus_dict_widget.result)',)
        kwargs = {'silent': False, 'store_history': True}
    538 
    539     def _showtraceback(self, etype, evalue, stb):
    540         # try to preserve ordering of tracebacks and print statements
    541         sys.stdout.flush()

...........................................................................
d:\users\janvi\anaconda3\lib\site-packages\IPython\core\interactiveshell.py in run_cell(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, raw_cell='pyLDAvis.gensim.prepare(model_widget.result, corpus, corpus_dict_widget.result)', store_history=True, silent=False, shell_futures=True)
   2700                 self.displayhook.exec_result = result
   2701 
   2702                 # Execute the user code
   2703                 interactivity = "none" if silent else self.ast_node_interactivity
   2704                 has_raised = self.run_ast_nodes(code_ast.body, cell_name,
-> 2705                    interactivity=interactivity, compiler=compiler, result=result)
        interactivity = 'last_expr'
        compiler = <IPython.core.compilerop.CachingCompiler object>
   2706                 
   2707                 self.last_execution_succeeded = not has_raised
   2708 
   2709                 # Reset this so later displayed values do not modify the

...........................................................................
d:\users\janvi\anaconda3\lib\site-packages\IPython\core\interactiveshell.py in run_ast_nodes(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, nodelist=[<_ast.Expr object>], cell_name='<ipython-input-24-201142e2dc32>', interactivity='last', compiler=<IPython.core.compilerop.CachingCompiler object>, result=<ExecutionResult object at 1fc3b112710, executio..._before_exec=None error_in_exec=None result=None>)
   2810                     return True
   2811 
   2812             for i, node in enumerate(to_run_interactive):
   2813                 mod = ast.Interactive([node])
   2814                 code = compiler(mod, cell_name, "single")
-> 2815                 if self.run_code(code, result):
        self.run_code = <bound method InteractiveShell.run_code of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        code = <code object <module> at 0x000001FB023EC270, file "<ipython-input-24-201142e2dc32>", line 1>
        result = <ExecutionResult object at 1fc3b112710, executio..._before_exec=None error_in_exec=None result=None>
   2816                     return True
   2817 
   2818             # Flush softspace
   2819             if softspace(sys.stdout, 0):

...........................................................................
d:\users\janvi\anaconda3\lib\site-packages\IPython\core\interactiveshell.py in run_code(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, code_obj=<code object <module> at 0x000001FB023EC270, file "<ipython-input-24-201142e2dc32>", line 1>, result=<ExecutionResult object at 1fc3b112710, executio..._before_exec=None error_in_exec=None result=None>)
   2864         outflag = 1  # happens in more places, so it's easier as default
   2865         try:
   2866             try:
   2867                 self.hooks.pre_run_code_hook()
   2868                 #rprint('Running code', repr(code_obj)) # dbg
-> 2869                 exec(code_obj, self.user_global_ns, self.user_ns)
        code_obj = <code object <module> at 0x000001FB023EC270, file "<ipython-input-24-201142e2dc32>", line 1>
        self.user_global_ns = {'ALLOWED_SHORT_WORDS': ['c', 'c#', 'r', '3d', '2d', '1d', '7z', 'qt'], 'COLUMNS_QUESTION': ['Title', 'Body', 'Id'], 'CORPUS_FILE': 'titles_body_compact.mm', 'Dictionary': <class 'gensim.corpora.dictionary.Dictionary'>, 'ENCODING': 'iso-8859-1', 'HdpModel': <class 'gensim.models.hdpmodel.HdpModel'>, 'In': ['', "from __future__ import print_function\nfrom ipywi...t.PrettyPrinter(indent=4)\nENCODING = 'iso-8859-1'", 'def get_files(path):\n    return [f for f in listdir(path) if isfile(join(path, f))]', "ALLOWED_SHORT_WORDS = ['c', 'c#', 'r', '3d', '2d...ds, stem_text])\n    return strip_short(processed)", "def get_real_tags(question_id):\n    real_tags = ...}') for tag_id, p in predicted_tags if p >= prob]", 'def compare_n_tags(n, model, dictionary, prob=0....]\n    return correct_predictions, all_predictions', "PATH_TAGS = '../data/stacksample/Tags.csv'\ntags_df = pd.read_csv(PATH_TAGS, encoding=ENCODING)", "COLUMNS_QUESTION = ['Title', 'Body', 'Id']\nQUEST...] = csv_data['Title'].map(str) + csv_data['Body']", 'model_file_name = widgets.Dropdown(options=[f fo...results(text_index.value, model, corpus_dict)    ', 'corpus_dict_widget = interactive(get_dictionary,...e_name)\ndisplay(corpus_dict_widget, model_widget)', 'interact_manual(display_results, index=text_inde...lt), dictionary=fixed(corpus_dict_widget.result))', 'interact_manual(compare_n_tags, n=text_index_sli...us_dict_widget.result), prob=probability_slider2)', 'import pyLDAvis.gensim\n\npyLDAvis.enable_notebook()', "CORPUS_FILE = 'titles_body_compact.mm'\ncorpus = MmCorpus(f'../data/stacksample/bow/{CORPUS_FILE}')", 'pyLDAvis.gensim.prepare(model_widget.result, corpus, corpus_dict_widget.result)', 'len(model_widget.result.get_topics())', 'model_file_name = widgets.Dropdown(options=[f fo...results(text_index.value, model, corpus_dict)    ', 'corpus_dict_widget = interactive(get_dictionary,...e_name)\ndisplay(corpus_dict_widget, model_widget)', 'interact_manual(display_results, index=text_inde...lt), dictionary=fixed(corpus_dict_widget.result))', 'import pyLDAvis.gensim\n\npyLDAvis.enable_notebook()', ...], 'LdaModel': <class 'gensim.models.ldamodel.LdaModel'>, 'MmCorpus': <class 'gensim.corpora.mmcorpus.MmCorpus'>, 'N_ROWS': 10000, ...}
        self.user_ns = {'ALLOWED_SHORT_WORDS': ['c', 'c#', 'r', '3d', '2d', '1d', '7z', 'qt'], 'COLUMNS_QUESTION': ['Title', 'Body', 'Id'], 'CORPUS_FILE': 'titles_body_compact.mm', 'Dictionary': <class 'gensim.corpora.dictionary.Dictionary'>, 'ENCODING': 'iso-8859-1', 'HdpModel': <class 'gensim.models.hdpmodel.HdpModel'>, 'In': ['', "from __future__ import print_function\nfrom ipywi...t.PrettyPrinter(indent=4)\nENCODING = 'iso-8859-1'", 'def get_files(path):\n    return [f for f in listdir(path) if isfile(join(path, f))]', "ALLOWED_SHORT_WORDS = ['c', 'c#', 'r', '3d', '2d...ds, stem_text])\n    return strip_short(processed)", "def get_real_tags(question_id):\n    real_tags = ...}') for tag_id, p in predicted_tags if p >= prob]", 'def compare_n_tags(n, model, dictionary, prob=0....]\n    return correct_predictions, all_predictions', "PATH_TAGS = '../data/stacksample/Tags.csv'\ntags_df = pd.read_csv(PATH_TAGS, encoding=ENCODING)", "COLUMNS_QUESTION = ['Title', 'Body', 'Id']\nQUEST...] = csv_data['Title'].map(str) + csv_data['Body']", 'model_file_name = widgets.Dropdown(options=[f fo...results(text_index.value, model, corpus_dict)    ', 'corpus_dict_widget = interactive(get_dictionary,...e_name)\ndisplay(corpus_dict_widget, model_widget)', 'interact_manual(display_results, index=text_inde...lt), dictionary=fixed(corpus_dict_widget.result))', 'interact_manual(compare_n_tags, n=text_index_sli...us_dict_widget.result), prob=probability_slider2)', 'import pyLDAvis.gensim\n\npyLDAvis.enable_notebook()', "CORPUS_FILE = 'titles_body_compact.mm'\ncorpus = MmCorpus(f'../data/stacksample/bow/{CORPUS_FILE}')", 'pyLDAvis.gensim.prepare(model_widget.result, corpus, corpus_dict_widget.result)', 'len(model_widget.result.get_topics())', 'model_file_name = widgets.Dropdown(options=[f fo...results(text_index.value, model, corpus_dict)    ', 'corpus_dict_widget = interactive(get_dictionary,...e_name)\ndisplay(corpus_dict_widget, model_widget)', 'interact_manual(display_results, index=text_inde...lt), dictionary=fixed(corpus_dict_widget.result))', 'import pyLDAvis.gensim\n\npyLDAvis.enable_notebook()', ...], 'LdaModel': <class 'gensim.models.ldamodel.LdaModel'>, 'MmCorpus': <class 'gensim.corpora.mmcorpus.MmCorpus'>, 'N_ROWS': 10000, ...}
   2870             finally:
   2871                 # Reset our crash handler in place
   2872                 sys.excepthook = old_excepthook
   2873         except SystemExit as e:

...........................................................................
D:\Jan\Kool\Informaatika Magistrantuur\#git\school\Keeleaine\project\notebooks\<ipython-input-24-201142e2dc32> in <module>()
----> 1 pyLDAvis.gensim.prepare(model_widget.result, corpus, corpus_dict_widget.result)

...........................................................................
d:\users\janvi\anaconda3\lib\site-packages\pyLDAvis\gensim.py in prepare(topic_model=<gensim.models.ldamulticore.LdaMulticore object>, corpus=<gensim.corpora.mmcorpus.MmCorpus object>, dictionary=<gensim.corpora.dictionary.Dictionary object>, doc_topic_dist=None, **kwargs={})
    107     See
    108     ------
    109     See `pyLDAvis.prepare` for **kwargs.
    110     """
    111     opts = fp.merge(_extract_data(topic_model, corpus, dictionary, doc_topic_dist), kwargs)
--> 112     return vis_prepare(**opts)
        opts = {'doc_lengths': array([160.,  17.,  42., ...,  44., 162., 134.]), 'doc_topic_dists': array([[1.24223574e-04, 1.24223574e-04, 1.242235... 1.48148145e-04, 2.60094225e-01]], dtype=float32), 'term_frequency': array([2.8656e+04, 4.6120e+03, 4.0340e+03, ..., 2.0000e+00, 1.0000e+00,
       2.0000e+00]), 'topic_term_dists': array([[2.8273804e-04, 1.3358187e-08, 1.0527907e...   2.7134344e-09, 2.7134344e-09]], dtype=float32), 'vocab': ['500', 'adob', 'air', 'alert', 'applic', 'applicationdirectori', 'applicationstoragedirectori', 'boolean', 'bytesavail', 'catch', 'categoryid', 'categorynam', 'creat', 'databas', 'date', 'datead', 'dbpath', 'downloadcomplet', 'durat', 'error', ...]}

...........................................................................
d:\users\janvi\anaconda3\lib\site-packages\pyLDAvis\_prepare.py in prepare(topic_term_dists=term    0             1             2           ...6     1.460016e-08  

[50 rows x 2009852 columns], doc_topic_dists=topic          49        1         44        34 ...0.000148  0.000148  

[1264216 rows x 50 columns], doc_lengths=0          160.0
1           17.0
2           42...Name: doc_length, Length: 1264216, dtype: float64, vocab=0                                  500
1        ...queri
Name: vocab, Length: 2009852, dtype: object, term_frequency=term
0           29034.585938
1            3025....        41.460430
Length: 2009852, dtype: float32, R=30, lambda_step=0.01, mds=<function js_PCoA>, n_jobs=-1, plot_opts={'xlab': 'PC1', 'ylab': 'PC2'}, sort_topics=True)
    393    ## term frequencies internally, using the topic term distributions and the
    394    ## topic frequencies, rather than using the user-supplied term frequencies.
    395    ## For a detailed discussion, see: https://github.com/cpsievert/LDAvis/pull/41
    396    term_frequency = np.sum(term_topic_freq, axis=0)
    397 
--> 398    topic_info         = _topic_info(topic_term_dists, topic_proportion, term_frequency, term_topic_freq, vocab, lambda_step, R, n_jobs)
        topic_info = undefined
        topic_term_dists = term    0             1             2           ...6     1.460016e-08  

[50 rows x 2009852 columns]
        topic_proportion = topic
49    0.063771
1     0.047897
44    0.0458...8836
42    0.008504
26    0.008400
dtype: float32
        term_frequency = term
0           29034.585938
1            3025....        41.460430
Length: 2009852, dtype: float32
        term_topic_freq = term       0            1            2          ....016971   0.016971  

[50 rows x 2009852 columns]
        vocab = 0                                  500
1        ...queri
Name: vocab, Length: 2009852, dtype: object
        lambda_step = 0.01
        R = 30
        n_jobs = -1
    399    token_table        = _token_table(topic_info, term_topic_freq, vocab, term_frequency)
    400    topic_coordinates = _topic_coordinates(mds, topic_term_dists, topic_proportion)
    401    client_topic_order = [x + 1 for x in topic_order]
    402 

...........................................................................
d:\users\janvi\anaconda3\lib\site-packages\pyLDAvis\_prepare.py in _topic_info(topic_term_dists=term    0             1             2           ...6     1.460016e-08  

[50 rows x 2009852 columns], topic_proportion=topic
49    0.063771
1     0.047897
44    0.0458...8836
42    0.008504
26    0.008400
dtype: float32, term_frequency=term
0           29034.585938
1            3025....        41.460430
Length: 2009852, dtype: float32, term_topic_freq=term       0            1            2          ....016971   0.016971  

[50 rows x 2009852 columns], vocab=0                                  500
1        ...queri
Name: vocab, Length: 2009852, dtype: object, lambda_step=0.01, R=30, n_jobs=-1)
    250                            'logprob': log_ttd.loc[original_topic_id, term_ix].round(4), \
    251                            'loglift': log_lift.loc[original_topic_id, term_ix].round(4), \
    252                            'Category': 'Topic%d' % new_topic_id})
    253 
    254    top_terms = pd.concat(Parallel(n_jobs=n_jobs)(delayed(_find_relevance_chunks)(log_ttd, log_lift, R, ls) \
--> 255                                                  for ls in _job_chunks(lambda_seq, n_jobs)))
        lambda_seq = array([0.  , 0.01, 0.02, 0.03, 0.04, 0.05, 0.06,...0.94, 0.95, 0.96, 0.97, 0.98,
       0.99, 1.  ])
        n_jobs = -1
    256    topic_dfs = map(topic_top_term_df, enumerate(top_terms.T.iterrows(), 1))
    257    return pd.concat([default_term_info] + list(topic_dfs))
    258 
    259 

...........................................................................
d:\users\janvi\anaconda3\lib\site-packages\joblib\parallel.py in __call__(self=Parallel(n_jobs=-1), iterable=<generator object _topic_info.<locals>.<genexpr>>)
    784             if pre_dispatch == "all" or n_jobs == 1:
    785                 # The iterable was consumed all at once by the above for loop.
    786                 # No need to wait for async callbacks to trigger to
    787                 # consumption.
    788                 self._iterating = False
--> 789             self.retrieve()
        self.retrieve = <bound method Parallel.retrieve of Parallel(n_jobs=-1)>
    790             # Make sure that we get a last message telling us we are done
    791             elapsed_time = time.time() - self._start_time
    792             self._print('Done %3i out of %3i | elapsed: %s finished',
    793                         (len(self._output), len(self._output),

---------------------------------------------------------------------------
Sub-process traceback:
---------------------------------------------------------------------------
MemoryError                                        Tue Jun  5 02:22:37 2018
PID: 63676                Python 3.6.2: d:\users\janvi\anaconda3\python.exe
...........................................................................
d:\users\janvi\anaconda3\lib\site-packages\joblib\parallel.py in __call__(self=<joblib.parallel.BatchedCalls object>)
    126     def __init__(self, iterator_slice):
    127         self.items = list(iterator_slice)
    128         self._size = len(self.items)
    129 
    130     def __call__(self):
--> 131         return [func(*args, **kwargs) for func, args, kwargs in self.items]
        self.items = [(<function _find_relevance_chunks>, (term     0          1          2          3     ...18.042234 -18.042234

[50 rows x 2009852 columns], term    0         1         2         3         ... -2.349903 -3.021520

[50 rows x 2009852 columns], 30, array([0.  , 0.01, 0.02, 0.03, 0.04, 0.05])), {})]
    132 
    133     def __len__(self):
    134         return self._size
    135 

...........................................................................
d:\users\janvi\anaconda3\lib\site-packages\joblib\parallel.py in <listcomp>(.0=<list_iterator object>)
    126     def __init__(self, iterator_slice):
    127         self.items = list(iterator_slice)
    128         self._size = len(self.items)
    129 
    130     def __call__(self):
--> 131         return [func(*args, **kwargs) for func, args, kwargs in self.items]
        func = <function _find_relevance_chunks>
        args = (term     0          1          2          3     ...18.042234 -18.042234

[50 rows x 2009852 columns], term    0         1         2         3         ... -2.349903 -3.021520

[50 rows x 2009852 columns], 30, array([0.  , 0.01, 0.02, 0.03, 0.04, 0.05]))
        kwargs = {}
    132 
    133     def __len__(self):
    134         return self._size
    135 

...........................................................................
d:\users\janvi\anaconda3\lib\site-packages\pyLDAvis\_prepare.py in _find_relevance_chunks(log_ttd=term     0          1          2          3     ...18.042234 -18.042234

[50 rows x 2009852 columns], log_lift=term    0         1         2         3         ... -2.349903 -3.021520

[50 rows x 2009852 columns], R=30, lambda_seq=array([0.  , 0.01, 0.02, 0.03, 0.04, 0.05]))
    208    relevance = lambda_ * log_ttd + (1 - lambda_) * log_lift
    209    return relevance.T.apply(lambda s: s.sort_values(ascending=False).index).head(R)
    210 
    211 
    212 def _find_relevance_chunks(log_ttd, log_lift, R, lambda_seq):
--> 213    return pd.concat([_find_relevance(log_ttd, log_lift, R, l) for l in lambda_seq])
        log_ttd = term     0          1          2          3     ...18.042234 -18.042234

[50 rows x 2009852 columns]
        log_lift = term    0         1         2         3         ... -2.349903 -3.021520

[50 rows x 2009852 columns]
        R = 30
        lambda_seq = array([0.  , 0.01, 0.02, 0.03, 0.04, 0.05])
    214 
    215 
    216 def _topic_info(topic_term_dists, topic_proportion, term_frequency, term_topic_freq, vocab, lambda_step, R, n_jobs):
    217    # marginal distribution over terms (width of blue bars)

...........................................................................
d:\users\janvi\anaconda3\lib\site-packages\pyLDAvis\_prepare.py in <listcomp>(.0=<iterator object>)
    208    relevance = lambda_ * log_ttd + (1 - lambda_) * log_lift
    209    return relevance.T.apply(lambda s: s.sort_values(ascending=False).index).head(R)
    210 
    211 
    212 def _find_relevance_chunks(log_ttd, log_lift, R, lambda_seq):
--> 213    return pd.concat([_find_relevance(log_ttd, log_lift, R, l) for l in lambda_seq])
        l = 0.03
    214 
    215 
    216 def _topic_info(topic_term_dists, topic_proportion, term_frequency, term_topic_freq, vocab, lambda_step, R, n_jobs):
    217    # marginal distribution over terms (width of blue bars)

...........................................................................
d:\users\janvi\anaconda3\lib\site-packages\pyLDAvis\_prepare.py in _find_relevance(log_ttd=term     0          1          2          3     ...18.042234 -18.042234

[50 rows x 2009852 columns], log_lift=term    0         1         2         3         ... -2.349903 -3.021520

[50 rows x 2009852 columns], R=30, lambda_=0.03)
    203 
    204    return _chunks(l, n_chunks)
    205 
    206 
    207 def _find_relevance(log_ttd, log_lift, R, lambda_):
--> 208    relevance = lambda_ * log_ttd + (1 - lambda_) * log_lift
        relevance = undefined
        lambda_ = 0.03
        log_ttd = term     0          1          2          3     ...18.042234 -18.042234

[50 rows x 2009852 columns]
        log_lift = term    0         1         2         3         ... -2.349903 -3.021520

[50 rows x 2009852 columns]
    209    return relevance.T.apply(lambda s: s.sort_values(ascending=False).index).head(R)
    210 
    211 
    212 def _find_relevance_chunks(log_ttd, log_lift, R, lambda_seq):

...........................................................................
d:\users\janvi\anaconda3\lib\site-packages\pandas\core\ops.py in f(self=term    0         1         2         3         ... -0.541267 -0.541267

[50 rows x 2009852 columns], other=term    0         1         2         3         ... -2.279406 -2.930875

[50 rows x 2009852 columns], axis=None, level=None, fill_value=None)
   1515     def f(self, other, axis=default_axis, level=None, fill_value=None):
   1516 
   1517         other = _align_method_FRAME(self, other, axis)
   1518 
   1519         if isinstance(other, ABCDataFrame):  # Another DataFrame
-> 1520             return self._combine_frame(other, na_op, fill_value, level)
        self._combine_frame = <bound method DataFrame._combine_frame of term  ...-0.541267 -0.541267

[50 rows x 2009852 columns]>
        other = term    0         1         2         3         ... -2.279406 -2.930875

[50 rows x 2009852 columns]
        fill_value = None
        level = None
   1521         elif isinstance(other, ABCSeries):
   1522             return _combine_series_frame(self, other, na_op,
   1523                                          fill_value=fill_value, axis=axis,
   1524                                          level=level, try_cast=True)

...........................................................................
d:\users\janvi\anaconda3\lib\site-packages\pandas\core\frame.py in _combine_frame(self=term    0         1         2         3         ... -0.541267 -0.541267

[50 rows x 2009852 columns], other=term    0         1         2         3         ... -2.279406 -2.930875

[50 rows x 2009852 columns], func=<function _arith_method_FRAME.<locals>.na_op>, fill_value=None, level=None)
   4746                 result = self._constructor(result, index=new_index, copy=False)
   4747                 result.columns = new_columns
   4748             return result
   4749 
   4750         else:
-> 4751             result = _arith_op(this.values, other.values)
        result = undefined
        other.values = array([[-2.680421  , -1.9575108 ,  0.3039032 , ....       -2.2794056 , -2.9308748 ]], dtype=float32)
   4752 
   4753         return self._constructor(result, index=new_index, columns=new_columns,
   4754                                  copy=False)
   4755 

...........................................................................
d:\users\janvi\anaconda3\lib\site-packages\pandas\core\frame.py in _arith_op(left=array([[-0.3369759 , -0.38245544, -0.30995116, ....       -0.54126704, -0.54126704]], dtype=float32), right=array([[-2.680421  , -1.9575108 ,  0.3039032 , ....       -2.2794056 , -2.9308748 ]], dtype=float32))
   4727         def _arith_op(left, right):
   4728             # for the mixed_type case where we iterate over columns,
   4729             # _arith_op(left, right) is equivalent to
   4730             # left._binop(right, func, fill_value=fill_value)
   4731             left, right = ops.fill_binop(left, right, fill_value)
-> 4732             return func(left, right)
        left = array([[-0.3369759 , -0.38245544, -0.30995116, ....       -0.54126704, -0.54126704]], dtype=float32)
        right = array([[-2.680421  , -1.9575108 ,  0.3039032 , ....       -2.2794056 , -2.9308748 ]], dtype=float32)
   4733 
   4734         if this._is_mixed_type or other._is_mixed_type:
   4735             # iterate over columns
   4736             if this.columns.is_unique:

...........................................................................
d:\users\janvi\anaconda3\lib\site-packages\pandas\core\ops.py in na_op(x=array([[-0.3369759 , -0.38245544, -0.30995116, ....       -0.54126704, -0.54126704]], dtype=float32), y=array([[-2.680421  , -1.9575108 ,  0.3039032 , ....       -2.2794056 , -2.9308748 ]], dtype=float32))
   1462 
   1463     def na_op(x, y):
   1464         import pandas.core.computation.expressions as expressions
   1465 
   1466         try:
-> 1467             result = expressions.evaluate(op, str_rep, x, y, **eval_kwargs)
        result = undefined
        expressions.evaluate = <function evaluate>
        x = array([[-0.3369759 , -0.38245544, -0.30995116, ....       -0.54126704, -0.54126704]], dtype=float32)
        y = array([[-2.680421  , -1.9575108 ,  0.3039032 , ....       -2.2794056 , -2.9308748 ]], dtype=float32)
   1468         except TypeError:
   1469             xrav = x.ravel()
   1470             if isinstance(y, (np.ndarray, ABCSeries)):
   1471                 dtype = find_common_type([x.dtype, y.dtype])

...........................................................................
d:\users\janvi\anaconda3\lib\site-packages\pandas\core\computation\expressions.py in evaluate(op=<built-in function add>, op_str='+', a=array([[-0.3369759 , -0.38245544, -0.30995116, ....       -0.54126704, -0.54126704]], dtype=float32), b=array([[-2.680421  , -1.9575108 ,  0.3039032 , ....       -2.2794056 , -2.9308748 ]], dtype=float32), use_numexpr=True, **eval_kwargs={})
    200         use_numexpr : whether to try to use numexpr (default True)
    201         """
    202 
    203     use_numexpr = use_numexpr and _bool_arith_check(op_str, a, b)
    204     if use_numexpr:
--> 205         return _evaluate(op, op_str, a, b, **eval_kwargs)
        op = <built-in function add>
        op_str = '+'
        a = array([[-0.3369759 , -0.38245544, -0.30995116, ....       -0.54126704, -0.54126704]], dtype=float32)
        b = array([[-2.680421  , -1.9575108 ,  0.3039032 , ....       -2.2794056 , -2.9308748 ]], dtype=float32)
        eval_kwargs = {}
    206     return _evaluate_standard(op, op_str, a, b)
    207 
    208 
    209 def where(cond, a, b, use_numexpr=True):

...........................................................................
d:\users\janvi\anaconda3\lib\site-packages\pandas\core\computation\expressions.py in _evaluate_numexpr(op=<built-in function add>, op_str='+', a=array([[-0.3369759 , -0.38245544, -0.30995116, ....       -0.54126704, -0.54126704]], dtype=float32), b=array([[-2.680421  , -1.9575108 ,  0.3039032 , ....       -2.2794056 , -2.9308748 ]], dtype=float32), truediv=True, reversed=False, **eval_kwargs={})
    106             b_value = getattr(b, "values", b)
    107             result = ne.evaluate('a_value {op} b_value'.format(op=op_str),
    108                                  local_dict={'a_value': a_value,
    109                                              'b_value': b_value},
    110                                  casting='safe', truediv=truediv,
--> 111                                  **eval_kwargs)
        eval_kwargs = {}
    112         except ValueError as detail:
    113             if 'unknown type object' in str(detail):
    114                 pass
    115 

...........................................................................
d:\users\janvi\anaconda3\lib\site-packages\numexpr\necompiler.py in evaluate(ex='a_value + b_value', local_dict={'a_value': array([[-0.3369759 , -0.38245544, -0.30995116, ....       -0.54126704, -0.54126704]], dtype=float32), 'b_value': array([[-2.680421  , -1.9575108 ,  0.3039032 , ....       -2.2794056 , -2.9308748 ]], dtype=float32)}, global_dict=None, out=None, order='K', casting='safe', **kwargs={'casting': 'safe', 'ex_uses_vml': False, 'order': 'K', 'out': None})
    797                       NumExpr(ex, signature, **context)
    798     kwargs = {'out': out, 'order': order, 'casting': casting,
    799               'ex_uses_vml': ex_uses_vml}
    800     _numexpr_last = dict(ex=compiled_ex, argnames=names, kwargs=kwargs)
    801     with evaluate_lock:
--> 802         return compiled_ex(*arguments, **kwargs)
        compiled_ex = <numexpr.NumExpr object>
        arguments = [array([[-0.3369759 , -0.38245544, -0.30995116, ....       -0.54126704, -0.54126704]], dtype=float32), array([[-2.680421  , -1.9575108 ,  0.3039032 , ....       -2.2794056 , -2.9308748 ]], dtype=float32)]
        kwargs = {'casting': 'safe', 'ex_uses_vml': False, 'order': 'K', 'out': None}
    803 
    804 
    805 def re_evaluate(local_dict=None):
    806     """Re-evaluate the previous executed array expression without any check.

MemoryError: 
___________________________________________________________________________

In [23]:
model_widget.result.show_topics()

[(11,
  '0.156*"android" + 0.051*"layout" + 0.032*"view" + 0.021*"activ" + 0.017*"r" + 0.017*"app" + 0.017*"intent" + 0.016*"com" + 0.015*"content" + 0.014*"parent"'),
 (29,
  '0.100*"valu" + 0.078*"row" + 0.073*"option" + 0.070*"select" + 0.030*"col" + 0.027*"echo" + 0.023*"php" + 0.015*"result" + 0.013*"var" + 0.009*"num"'),
 (20,
  '0.018*"type" + 0.014*"refer" + 0.013*"librari" + 0.012*"dll" + 0.012*"load" + 0.012*"the" + 0.011*"c" + 0.011*"object" + 0.011*"work" + 0.010*"method"'),
 (32,
  '0.060*"c" + 0.025*"build" + 0.023*"error" + 0.021*"command" + 0.021*"run" + 0.019*"compil" + 0.015*"file" + 0.014*"instal" + 0.011*"debug" + 0.010*"project"'),
 (40,
  '0.097*"kei" + 0.036*"properti" + 0.032*"entiti" + 0.024*"set" + 0.023*"valu" + 0.022*"titl" + 0.018*"book" + 0.015*"string" + 0.012*"type" + 0.010*"asset"'),
 (21,
  '0.055*"time" + 0.042*"2016" + 0.024*"textview" + 0.023*"dai" + 0.021*"start" + 0.017*"job" + 0.017*"year" + 0.017*"month" + 0.016*"date" + 0.013*"inflat"'),
 (27,


In [None]:
def get_all_predicted_tags(model, dictionary):
    output_dict = {
        'Id': [],
        'Tags_p': []
    }
    for idx in range(len(csv_data)):
        prediction = get_predicted_tags(idx, model, dictionary)        
        tags = [(dictionary[tag_id], p) for tag_id, p in prediction[0]]
        output_dict['Tags_p'].append(tags)
        output_dict['Id'].append(prediction[1])
    return output_dict