In [11]:
from __future__ import print_function

import logging
import pprint
from os import listdir
from os.path import isfile, join

import ipywidgets as widgets
from IPython.display import display
from gensim.corpora import MmCorpus, Dictionary
from gensim.models import HdpModel, TfidfModel

logging.basicConfig(format='\r%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)

pp = pprint.PrettyPrinter(indent=4)


In [12]:
def get_files(path):
    return [f for f in listdir(path) if isfile(join(path, f))]

## Model training
### Widget creation

In [13]:
dict_file = widgets.Dropdown(options=get_files('../data/stacksample/dictionary'), description='Dictionary file:')
bow_file = widgets.Dropdown(options=[file for file in get_files('../data/stacksample/bow') if 'index' not in file], 
                            description='BoW file:')
train_model_btn = widgets.Button(description='Train model!')
model_name = widgets.Text(value='model_name.type', placeholder='Type model name', description='Model name:')
save_model = widgets.Checkbox(value=False, description='Save model?', disabled=False)

### Training methods

In [14]:
def train_model(dict_f, bow_f, model_name='model_name', save_model=False):
    corpus_dict = Dictionary.load(f'../data/stacksample/dictionary/{dict_f}')
    corpus_bow = MmCorpus(f'../data/stacksample/bow/{bow_f}')
    
    model = HdpModel(corpus_bow, id2word=corpus_dict)
    
    if save_model:
        model.save(f'../data/stacksample/models/{model_name}')    
    
    
def train_btn_click(b):
    train_model(dict_file.value, bow_file.value, model_name.value, save_model.value)

### Run training

In [15]:
logging.StreamHandler.terminator = ''
display(dict_file, bow_file, model_name, save_model, train_model_btn)
train_model_btn.on_click(train_btn_click)

