In [1]:
import sys
import numpy as np
import artm
print artm.version()

from os import path, mkdir
from datetime import datetime
%matplotlib inline
sys.path.insert(0, '..\\modules\\helpers')
from plot_helper import PlotMaker
from config_helper import ConfigPaths
from print_helper import PrintHelper

0.8.1


In [2]:
config = ConfigPaths('config.cfg')
plot_maker = PlotMaker()
printer = PrintHelper()

In [3]:
print config.models_file_name

Q:\\topic_modeling\\csi_science_collections.git\experiments\UCI_filtered_ngramm_trimmed_without_names\02_12_decor_sparse_theta\models.txt


In [17]:
models_file = open(config.models_file_name, 'a')

In [5]:
def create_model(current_dictionary, n_topics, n_doc_passes, seed_value, n_top_tokens, p_mass_threshold):    
    print '[{}] creating model'.format(datetime.now())
    model = artm.ARTM(num_topics=n_topics, dictionary=current_dictionary, cache_theta=True, seed=seed_value, 
                  class_ids={'ngramm': 1.0, 'author_id': 0.0, 'author': 0.0, 
                             'post_tag': 0.0, 'projects': 0.0, 'category': 0.0,
                             'following_users': 0.0})
    model.num_document_passes = n_doc_passes
    add_scores_to_model(model, n_top_tokens=n_top_tokens, p_mass_threshold=p_mass_threshold)
    return model


def add_scores_to_model(artm_model, n_top_tokens, p_mass_threshold):
    print '[{}] adding scores'.format(datetime.now())
    artm_model.scores.add(artm.PerplexityScore(name='perplexity_score',
                                      use_unigram_document_model=False,
                                      dictionary=dictionary))
    artm_model.scores.add(artm.SparsityPhiScore(name='sparsity_phi_score', class_id='ngramm'))
    artm_model.scores.add(artm.SparsityThetaScore(name='sparsity_theta_score'))
    artm_model.scores.add(artm.TopicKernelScore(name='topic_kernel_score', class_id='ngramm', 
                                                probability_mass_threshold=p_mass_threshold))
    artm_model.scores.add(artm.TopTokensScore(name='top_tokens_score', class_id='ngramm', num_tokens=n_top_tokens))

In [6]:
def process_one_model(dictionary, _n_topics, _n_doc_passes, _seed_value, _n_top_tokens, _p_mass_threshold, _n_iterations,
                     _model_name=''):
    print '[{}] processing model'.format(datetime.now())
    model = create_model(current_dictionary=dictionary, n_topics=_n_topics, n_doc_passes=_n_doc_passes, seed_value=_seed_value,
                         n_top_tokens=_n_top_tokens, p_mass_threshold=_p_mass_threshold)
    model = fit_one_model(model, _n_iterations, _model_name)
    return model
    
def fit_one_model(model, _n_iterations, _model_name=''): 
    print '[{}] fitting'.format(datetime.now())
    model.fit_offline(batch_vectorizer=batch_vectorizer, num_collection_passes=_n_iterations)
    print '[{}] outputting'.format(datetime.now())
    printer.print_artm_model(model, _model_name, _n_iterations, output_file=models_file)
    model_pics_file_name =  path.join(config.experiment_path, _model_name)
    plot_maker.make_tm_plots(model, model_pics_file_name)
    model_output_file_name = path.join(config.experiment_path, _model_name + '.txt')
    printer.print_scores(model, _model_name, _n_iterations, model_output_file_name)
    printer.print_top_tokens(model, model_output_file_name)
    return model

In [7]:
def save_model(_model, _model_name): 
    print '[{}] saving model'.format(datetime.now())
    model_output_file_name = path.join(config.models_archive_path, _model_name)
    _model.save(filename=model_output_file_name+'_saved_p_wt', model_name=_model_name+'p_wt')
    _model.save(filename=model_output_file_name+'_saved_n_wt', model_name=_model_name+'n_wt')

In [None]:
batch_vectorizer = artm.BatchVectorizer(data_path=config.dataset_path,
                                        data_format='bow_uci',
                                        collection_name=config.collection_name,
                                        target_folder=config.output_batches_path)
dictionary = artm.Dictionary()
dictionary.gather(data_path=config.output_batches_path,
                  vocab_file_path=config.vocabulary_path)
dictionary.save(dictionary_path=config.dictionary_path)
dictionary.save_text(dictionary_path=config.dictionary_path + '.txt')
dictionary.load_text(dictionary_path=config.dictionary_path + '.txt')

In [8]:
batch_vectorizer = artm.BatchVectorizer(data_path=config.output_batches_path,
                                        data_format='batches')
dictionary = artm.Dictionary()
dictionary.load(dictionary_path=config.dictionary_path + '.dict')

In [None]:
# dictionary.filter(min_tf=5, max_tf=2000, min_df_rate=0.01, max_df_rate=0.9)

In [9]:
tmp_model = create_model(current_dictionary=dictionary, n_topics=2000, n_doc_passes=5, seed_value=100,
                         n_top_tokens=15, p_mass_threshold=0.25)
tmp_model.regularizers.add(artm.SmoothSparseThetaRegularizer(name='sparse_theta_regularizer'))
tmp_model.regularizers['sparse_theta_regularizer'].tau = -1e-1
tmp_model = fit_one_model(tmp_model, _n_iterations=15, _model_name='model_theta_reg_1')
model_theta_reg_1 = tmp_model; tmp_model = None

[2016-12-02 21:09:38.044000] creating model
[2016-12-02 21:09:41.993000] adding scores
[2016-12-02 21:09:42.033000] fitting
[2016-12-02 21:16:56.274000] outputting
name = model_theta_reg_1, n_topics = 2000, n_doc_passes = 5, seed_value = 100, n_iterations = 15, n_top_tokens = 15, p_threshold = 0.25
sparse_theta_regularizer, tau = -0.1



In [10]:
tmp_model = create_model(current_dictionary=dictionary, n_topics=2000, n_doc_passes=5, seed_value=100,
                         n_top_tokens=15, p_mass_threshold=0.25)
tmp_model.regularizers.add(artm.SmoothSparseThetaRegularizer(name='sparse_theta_regularizer'))
tmp_model.regularizers['sparse_theta_regularizer'].tau = -1e+1
tmp_model = fit_one_model(tmp_model, _n_iterations=15, _model_name='model_theta_reg_2')
model_theta_reg_2 = tmp_model; tmp_model = None

[2016-12-02 21:17:23.129000] creating model
[2016-12-02 21:17:27.294000] adding scores
[2016-12-02 21:17:27.334000] fitting
[2016-12-02 21:23:33.576000] outputting
name = model_theta_reg_2, n_topics = 2000, n_doc_passes = 5, seed_value = 100, n_iterations = 15, n_top_tokens = 15, p_threshold = 0.25
sparse_theta_regularizer, tau = -10.0



In [11]:
tmp_model = create_model(current_dictionary=dictionary, n_topics=2000, n_doc_passes=5, seed_value=100,
                         n_top_tokens=15, p_mass_threshold=0.25)
tmp_model.regularizers.add(artm.SmoothSparseThetaRegularizer(name='sparse_theta_regularizer'))
tmp_model.regularizers['sparse_theta_regularizer'].tau = -1e-2
tmp_model = fit_one_model(tmp_model, _n_iterations=15, _model_name='model_theta_reg_3')
model_theta_reg_3 = tmp_model; tmp_model = None

[2016-12-02 21:23:51.646000] creating model
[2016-12-02 21:23:55.544000] adding scores
[2016-12-02 21:23:55.569000] fitting
[2016-12-02 21:31:32.482000] outputting
name = model_theta_reg_3, n_topics = 2000, n_doc_passes = 5, seed_value = 100, n_iterations = 15, n_top_tokens = 15, p_threshold = 0.25
sparse_theta_regularizer, tau = -0.01



In [12]:
tmp_model = create_model(current_dictionary=dictionary, n_topics=2000, n_doc_passes=5, seed_value=100,
                         n_top_tokens=15, p_mass_threshold=0.25)
tmp_model.regularizers.add(artm.SmoothSparseThetaRegularizer(name='sparse_theta_regularizer'))
tmp_model.regularizers['sparse_theta_regularizer'].tau = -1e-3
tmp_model = fit_one_model(tmp_model, _n_iterations=15, _model_name='model_theta_reg_4')
model_theta_reg_4 = tmp_model; tmp_model = None

[2016-12-02 21:32:04.374000] creating model
[2016-12-02 21:32:10.683000] adding scores
[2016-12-02 21:32:10.725000] fitting
[2016-12-02 21:40:28.884000] outputting
name = model_theta_reg_4, n_topics = 2000, n_doc_passes = 5, seed_value = 100, n_iterations = 15, n_top_tokens = 15, p_threshold = 0.25
sparse_theta_regularizer, tau = -0.001



In [13]:
tmp_model = create_model(current_dictionary=dictionary, n_topics=2000, n_doc_passes=5, seed_value=100,
                         n_top_tokens=15, p_mass_threshold=0.25)
tmp_model.regularizers.add(artm.SmoothSparseThetaRegularizer(name='sparse_theta_regularizer'))
tmp_model.regularizers['sparse_theta_regularizer'].tau = -0.5
tmp_model = fit_one_model(tmp_model, _n_iterations=15, _model_name='model_theta_reg_5')
model_theta_reg_5 = tmp_model; tmp_model = None

[2016-12-02 21:41:11.662000] creating model
[2016-12-02 21:41:20.883000] adding scores
[2016-12-02 21:41:20.914000] fitting
[2016-12-02 21:48:31.142000] outputting
name = model_theta_reg_5, n_topics = 2000, n_doc_passes = 5, seed_value = 100, n_iterations = 15, n_top_tokens = 15, p_threshold = 0.25
sparse_theta_regularizer, tau = -0.5



In [14]:
tmp_model = create_model(current_dictionary=dictionary, n_topics=2000, n_doc_passes=5, seed_value=100,
                         n_top_tokens=15, p_mass_threshold=0.25)
tmp_model.regularizers.add(artm.SmoothSparseThetaRegularizer(name='sparse_theta_regularizer'))
tmp_model.regularizers['sparse_theta_regularizer'].tau = -1
tmp_model = fit_one_model(tmp_model, _n_iterations=15, _model_name='model_theta_reg_6')
model_theta_reg_6 = tmp_model; tmp_model = None

[2016-12-02 21:48:56.587000] creating model
[2016-12-02 21:49:00.369000] adding scores
[2016-12-02 21:49:00.413000] fitting
[2016-12-02 21:56:18.327000] outputting
name = model_theta_reg_6, n_topics = 2000, n_doc_passes = 5, seed_value = 100, n_iterations = 15, n_top_tokens = 15, p_threshold = 0.25
sparse_theta_regularizer, tau = -1



In [None]:
# модель + sparse + decor

In [15]:
tmp_model = create_model(current_dictionary=dictionary, n_topics=2000, n_doc_passes=5, seed_value=100,
                         n_top_tokens=15, p_mass_threshold=0.25)
tmp_model.regularizers.add(artm.DecorrelatorPhiRegularizer(name='decorrelator_phi_regularizer', class_ids=['ngramm']))
tmp_model.regularizers.add(artm.SmoothSparseThetaRegularizer(name='sparse_theta_regularizer'))
tmp_model.regularizers['sparse_theta_regularizer'].tau = -0.1
tmp_model.regularizers['decorrelator_phi_regularizer'].tau = 1e-1
tmp_model = fit_one_model(tmp_model, _n_iterations=15, _model_name='model_decor_sparse_t_reg_1')
model_decor_sparse_t_reg_1 = tmp_model; tmp_model = None

[2016-12-02 21:56:42.877000] creating model
[2016-12-02 21:56:47.094000] adding scores
[2016-12-02 21:56:47.148000] fitting
[2016-12-02 22:05:18.147000] outputting
name = model_decor_sparse_t_reg_1, n_topics = 2000, n_doc_passes = 5, seed_value = 100, n_iterations = 15, n_top_tokens = 15, p_threshold = 0.25
sparse_theta_regularizer, tau = -0.1
decorrelator_phi_regularizer, tau = 0.1



In [16]:
tmp_model = create_model(current_dictionary=dictionary, n_topics=2000, n_doc_passes=5, seed_value=100,
                         n_top_tokens=15, p_mass_threshold=0.25)
tmp_model.regularizers.add(artm.DecorrelatorPhiRegularizer(name='decorrelator_phi_regularizer', class_ids=['ngramm']))
tmp_model.regularizers.add(artm.SmoothSparseThetaRegularizer(name='sparse_theta_regularizer'))
tmp_model.regularizers['sparse_theta_regularizer'].tau = -1e-1
tmp_model.regularizers['decorrelator_phi_regularizer'].tau = 1e-1
tmp_model = fit_one_model(tmp_model, _n_iterations=15, _model_name='model_decor_sparse_t_reg_2')
model_decor_sparse_t_reg_2 = tmp_model; tmp_model = None

[2016-12-02 22:05:46.911000] creating model
[2016-12-02 22:05:51.162000] adding scores
[2016-12-02 22:05:51.220000] fitting
[2016-12-02 22:14:36.426000] outputting
name = model_decor_sparse_t_reg_2, n_topics = 2000, n_doc_passes = 5, seed_value = 100, n_iterations = 15, n_top_tokens = 15, p_threshold = 0.25
sparse_theta_regularizer, tau = -0.1
decorrelator_phi_regularizer, tau = 0.1



In [17]:
tmp_model = create_model(current_dictionary=dictionary, n_topics=2000, n_doc_passes=5, seed_value=100,
                         n_top_tokens=15, p_mass_threshold=0.25)
tmp_model.regularizers.add(artm.DecorrelatorPhiRegularizer(name='decorrelator_phi_regularizer', class_ids=['ngramm']))
tmp_model.regularizers.add(artm.SmoothSparseThetaRegularizer(name='sparse_theta_regularizer'))
tmp_model.regularizers['sparse_theta_regularizer'].tau = -1e-2
tmp_model.regularizers['decorrelator_phi_regularizer'].tau = 1e-1
tmp_model = fit_one_model(tmp_model, _n_iterations=15, _model_name='model_decor_sparse_t_reg_3')
model_decor_sparse_t_reg_3 = tmp_model; tmp_model = None

[2016-12-02 22:15:04.531000] creating model
[2016-12-02 22:15:08.824000] adding scores
[2016-12-02 22:15:08.881000] fitting
[2016-12-02 22:23:44.633000] outputting
name = model_decor_sparse_t_reg_3, n_topics = 2000, n_doc_passes = 5, seed_value = 100, n_iterations = 15, n_top_tokens = 15, p_threshold = 0.25
sparse_theta_regularizer, tau = -0.01
decorrelator_phi_regularizer, tau = 0.1



In [18]:
tmp_model = create_model(current_dictionary=dictionary, n_topics=2000, n_doc_passes=5, seed_value=100,
                         n_top_tokens=15, p_mass_threshold=0.25)
tmp_model.regularizers.add(artm.DecorrelatorPhiRegularizer(name='decorrelator_phi_regularizer', class_ids=['ngramm']))
tmp_model.regularizers.add(artm.SmoothSparseThetaRegularizer(name='sparse_theta_regularizer'))
tmp_model.regularizers['sparse_theta_regularizer'].tau = -1e-3
tmp_model.regularizers['decorrelator_phi_regularizer'].tau = 1e-1
tmp_model = fit_one_model(tmp_model, _n_iterations=15, _model_name='model_decor_sparse_t_reg_3')
model_decor_sparse_t_reg_3 = tmp_model; tmp_model = None

[2016-12-02 22:24:14.983000] creating model
[2016-12-02 22:24:19.228000] adding scores
[2016-12-02 22:24:19.281000] fitting
[2016-12-02 22:33:08.436000] outputting
name = model_decor_sparse_t_reg_3, n_topics = 2000, n_doc_passes = 5, seed_value = 100, n_iterations = 15, n_top_tokens = 15, p_threshold = 0.25
sparse_theta_regularizer, tau = -0.001
decorrelator_phi_regularizer, tau = 0.1



In [None]:
tmp_model = create_model(current_dictionary=dictionary, n_topics=2000, n_doc_passes=5, seed_value=100,
                         n_top_tokens=15, p_mass_threshold=0.25)
tmp_model.regularizers.add(artm.DecorrelatorPhiRegularizer(name='decorrelator_phi_regularizer', class_ids=['ngramm']))
tmp_model.regularizers.add(artm.SmoothSparseThetaRegularizer(name='sparse_theta_regularizer'))
tmp_model.regularizers['sparse_theta_regularizer'].tau = 1e-3
tmp_model.regularizers['decorrelator_phi_regularizer'].tau = 1e-1
tmp_model = fit_one_model(tmp_model, _n_iterations=15, _model_name='model_decor_sparse_t_reg_4')
model_decor_sparse_t_reg_4 = tmp_model; tmp_model = None

[2016-12-02 22:33:36.593000] creating model
[2016-12-02 22:33:41.061000] adding scores
[2016-12-02 22:33:41.124000] fitting
[2016-12-02 22:42:17.591000] outputting
name = model_decor_sparse_t_reg_4, n_topics = 2000, n_doc_passes = 5, seed_value = 100, n_iterations = 15, n_top_tokens = 15, p_threshold = 0.25
sparse_theta_regularizer, tau = 0.001
decorrelator_phi_regularizer, tau = 0.1



In [None]:
tmp_model = create_model(current_dictionary=dictionary, n_topics=2000, n_doc_passes=5, seed_value=100,
                         n_top_tokens=15, p_mass_threshold=0.25)
tmp_model.regularizers.add(artm.DecorrelatorPhiRegularizer(name='decorrelator_phi_regularizer', class_ids=['ngramm']))
tmp_model.regularizers.add(artm.SmoothSparseThetaRegularizer(name='sparse_theta_regularizer'))
tmp_model.regularizers['sparse_theta_regularizer'].tau = 1e-2
tmp_model.regularizers['decorrelator_phi_regularizer'].tau = 1e-1
tmp_model = fit_one_model(tmp_model, _n_iterations=15, _model_name='model_decor_sparse_t_reg_5')
model_decor_sparse_t_reg_5 = tmp_model; tmp_model = None

[2016-12-02 22:42:45.844000] creating model
[2016-12-02 22:42:50.158000] adding scores
[2016-12-02 22:42:50.218000] fitting


In [None]:
# tau decorrelator_phi_regularizer = 100

In [9]:
tmp_model = create_model(current_dictionary=dictionary, n_topics=2000, n_doc_passes=5, seed_value=100,
                         n_top_tokens=15, p_mass_threshold=0.25)
tmp_model.regularizers.add(artm.DecorrelatorPhiRegularizer(name='decorrelator_phi_regularizer', class_ids=['ngramm']))
tmp_model.regularizers.add(artm.SmoothSparseThetaRegularizer(name='sparse_theta_regularizer'))
tmp_model.regularizers['sparse_theta_regularizer'].tau = -0.01
tmp_model.regularizers['decorrelator_phi_regularizer'].tau = 100
tmp_model = fit_one_model(tmp_model, _n_iterations=15, _model_name='model_decor_sparse_t_reg_21')
model_decor_sparse_t_reg_21 = tmp_model; tmp_model = None

[2016-12-03 14:15:02.098000] creating model
[2016-12-03 14:15:06.048000] adding scores
[2016-12-03 14:15:06.179000] fitting
[2016-12-03 14:22:20.353000] outputting
name = model_decor_sparse_t_reg_21, n_topics = 2000, n_doc_passes = 5, seed_value = 100, n_iterations = 15, n_top_tokens = 15, p_threshold = 0.25
sparse_theta_regularizer, tau = -0.01
decorrelator_phi_regularizer, tau = 100



In [10]:
tmp_model = create_model(current_dictionary=dictionary, n_topics=2000, n_doc_passes=5, seed_value=100,
                         n_top_tokens=15, p_mass_threshold=0.25)
tmp_model.regularizers.add(artm.DecorrelatorPhiRegularizer(name='decorrelator_phi_regularizer', class_ids=['ngramm']))
tmp_model.regularizers.add(artm.SmoothSparseThetaRegularizer(name='sparse_theta_regularizer'))
tmp_model.regularizers['sparse_theta_regularizer'].tau = -0.1
tmp_model.regularizers['decorrelator_phi_regularizer'].tau = 100
tmp_model = fit_one_model(tmp_model, _n_iterations=15, _model_name='model_decor_sparse_t_reg_22')
model_decor_sparse_t_reg_22 = tmp_model; tmp_model = None

[2016-12-03 14:22:46.415000] creating model
[2016-12-03 14:22:50.389000] adding scores
[2016-12-03 14:22:50.438000] fitting
[2016-12-03 14:30:00.214000] outputting
name = model_decor_sparse_t_reg_22, n_topics = 2000, n_doc_passes = 5, seed_value = 100, n_iterations = 15, n_top_tokens = 15, p_threshold = 0.25
sparse_theta_regularizer, tau = -0.1
decorrelator_phi_regularizer, tau = 100



In [11]:
tmp_model = create_model(current_dictionary=dictionary, n_topics=2000, n_doc_passes=5, seed_value=100,
                         n_top_tokens=15, p_mass_threshold=0.25)
tmp_model.regularizers.add(artm.DecorrelatorPhiRegularizer(name='decorrelator_phi_regularizer', class_ids=['ngramm']))
tmp_model.regularizers.add(artm.SmoothSparseThetaRegularizer(name='sparse_theta_regularizer'))
tmp_model.regularizers['sparse_theta_regularizer'].tau = -0.5
tmp_model.regularizers['decorrelator_phi_regularizer'].tau = 100
tmp_model = fit_one_model(tmp_model, _n_iterations=15, _model_name='model_decor_sparse_t_reg_23')
model_decor_sparse_t_reg_23 = tmp_model; tmp_model = None

[2016-12-03 14:30:24.706000] creating model
[2016-12-03 14:30:28.462000] adding scores
[2016-12-03 14:30:28.511000] fitting
[2016-12-03 14:37:16.691000] outputting
name = model_decor_sparse_t_reg_23, n_topics = 2000, n_doc_passes = 5, seed_value = 100, n_iterations = 15, n_top_tokens = 15, p_threshold = 0.25
sparse_theta_regularizer, tau = -0.5
decorrelator_phi_regularizer, tau = 100



In [12]:
tmp_model = create_model(current_dictionary=dictionary, n_topics=2000, n_doc_passes=5, seed_value=100,
                         n_top_tokens=15, p_mass_threshold=0.25)
tmp_model.regularizers.add(artm.DecorrelatorPhiRegularizer(name='decorrelator_phi_regularizer', class_ids=['ngramm']))
tmp_model.regularizers.add(artm.SmoothSparseThetaRegularizer(name='sparse_theta_regularizer'))
tmp_model.regularizers['sparse_theta_regularizer'].tau = -1
tmp_model.regularizers['decorrelator_phi_regularizer'].tau = 100
tmp_model = fit_one_model(tmp_model, _n_iterations=15, _model_name='model_decor_sparse_t_reg_24')
model_decor_sparse_t_reg_24 = tmp_model; tmp_model = None

[2016-12-03 14:37:41.962000] creating model
[2016-12-03 14:37:45.664000] adding scores
[2016-12-03 14:37:45.713000] fitting
[2016-12-03 14:44:19.493000] outputting
name = model_decor_sparse_t_reg_24, n_topics = 2000, n_doc_passes = 5, seed_value = 100, n_iterations = 15, n_top_tokens = 15, p_threshold = 0.25
sparse_theta_regularizer, tau = -1
decorrelator_phi_regularizer, tau = 100



In [13]:
tmp_model = create_model(current_dictionary=dictionary, n_topics=2000, n_doc_passes=5, seed_value=100,
                         n_top_tokens=15, p_mass_threshold=0.25)
tmp_model.regularizers.add(artm.DecorrelatorPhiRegularizer(name='decorrelator_phi_regularizer', class_ids=['ngramm']))
tmp_model.regularizers.add(artm.SmoothSparseThetaRegularizer(name='sparse_theta_regularizer'))
tmp_model.regularizers['sparse_theta_regularizer'].tau = 0.01
tmp_model.regularizers['decorrelator_phi_regularizer'].tau = 100
tmp_model = fit_one_model(tmp_model, _n_iterations=15, _model_name='model_decor_sparse_t_reg_25')
model_decor_sparse_t_reg_25 = tmp_model; tmp_model = None

[2016-12-03 14:44:41.576000] creating model
[2016-12-03 14:44:45.258000] adding scores
[2016-12-03 14:44:45.307000] fitting
[2016-12-03 14:51:41.478000] outputting
name = model_decor_sparse_t_reg_25, n_topics = 2000, n_doc_passes = 5, seed_value = 100, n_iterations = 15, n_top_tokens = 15, p_threshold = 0.25
sparse_theta_regularizer, tau = 0.01
decorrelator_phi_regularizer, tau = 100



In [14]:
tmp_model = create_model(current_dictionary=dictionary, n_topics=2000, n_doc_passes=5, seed_value=100,
                         n_top_tokens=15, p_mass_threshold=0.25)
tmp_model.regularizers.add(artm.DecorrelatorPhiRegularizer(name='decorrelator_phi_regularizer', class_ids=['ngramm']))
tmp_model.regularizers.add(artm.SmoothSparseThetaRegularizer(name='sparse_theta_regularizer'))
tmp_model.regularizers['sparse_theta_regularizer'].tau = 0.1
tmp_model.regularizers['decorrelator_phi_regularizer'].tau = 100
tmp_model = fit_one_model(tmp_model, _n_iterations=15, _model_name='model_decor_sparse_t_reg_26')
model_decor_sparse_t_reg_26 = tmp_model; tmp_model = None

[2016-12-03 14:52:05.225000] creating model
[2016-12-03 14:52:08.889000] adding scores
[2016-12-03 14:52:08.938000] fitting
[2016-12-03 14:59:18.249000] outputting
name = model_decor_sparse_t_reg_26, n_topics = 2000, n_doc_passes = 5, seed_value = 100, n_iterations = 15, n_top_tokens = 15, p_threshold = 0.25
sparse_theta_regularizer, tau = 0.1
decorrelator_phi_regularizer, tau = 100



In [15]:
tmp_model = create_model(current_dictionary=dictionary, n_topics=2000, n_doc_passes=5, seed_value=100,
                         n_top_tokens=15, p_mass_threshold=0.25)
tmp_model.regularizers.add(artm.DecorrelatorPhiRegularizer(name='decorrelator_phi_regularizer', class_ids=['ngramm']))
tmp_model.regularizers.add(artm.SmoothSparseThetaRegularizer(name='sparse_theta_regularizer'))
tmp_model.regularizers['sparse_theta_regularizer'].tau = 0.5
tmp_model.regularizers['decorrelator_phi_regularizer'].tau = 100
tmp_model = fit_one_model(tmp_model, _n_iterations=15, _model_name='model_decor_sparse_t_reg_27')
model_decor_sparse_t_reg_27 = tmp_model; tmp_model = None

[2016-12-03 14:59:41.015000] creating model
[2016-12-03 14:59:44.788000] adding scores
[2016-12-03 14:59:44.838000] fitting
[2016-12-03 15:06:55.074000] outputting
name = model_decor_sparse_t_reg_27, n_topics = 2000, n_doc_passes = 5, seed_value = 100, n_iterations = 15, n_top_tokens = 15, p_threshold = 0.25
sparse_theta_regularizer, tau = 0.5
decorrelator_phi_regularizer, tau = 100



In [18]:
tmp_model = create_model(current_dictionary=dictionary, n_topics=2000, n_doc_passes=5, seed_value=100,
                         n_top_tokens=15, p_mass_threshold=0.25)
tmp_model.regularizers.add(artm.DecorrelatorPhiRegularizer(name='decorrelator_phi_regularizer', class_ids=['ngramm']))
tmp_model.regularizers.add(artm.SmoothSparseThetaRegularizer(name='sparse_theta_regularizer'))
tmp_model.regularizers['sparse_theta_regularizer'].tau = -0.01
tmp_model.regularizers['decorrelator_phi_regularizer'].tau = 10
tmp_model = fit_one_model(tmp_model, _n_iterations=15, _model_name='model_decor_sparse_t_reg_31')
model_decor_sparse_t_reg_31 = tmp_model; tmp_model = None

[2016-12-03 17:11:04.517000] creating model
[2016-12-03 17:11:09.389000] adding scores
[2016-12-03 17:11:09.471000] fitting
[2016-12-03 17:18:05.643000] outputting
name = model_decor_sparse_t_reg_31, n_topics = 2000, n_doc_passes = 5, seed_value = 100, n_iterations = 15, n_top_tokens = 15, p_threshold = 0.25
sparse_theta_regularizer, tau = -0.01
decorrelator_phi_regularizer, tau = 10



In [19]:
tmp_model = create_model(current_dictionary=dictionary, n_topics=2000, n_doc_passes=5, seed_value=100,
                         n_top_tokens=15, p_mass_threshold=0.25)
tmp_model.regularizers.add(artm.DecorrelatorPhiRegularizer(name='decorrelator_phi_regularizer', class_ids=['ngramm']))
tmp_model.regularizers.add(artm.SmoothSparseThetaRegularizer(name='sparse_theta_regularizer'))
tmp_model.regularizers['sparse_theta_regularizer'].tau = -0.1
tmp_model.regularizers['decorrelator_phi_regularizer'].tau = 10
tmp_model = fit_one_model(tmp_model, _n_iterations=15, _model_name='model_decor_sparse_t_reg_32')
model_decor_sparse_t_reg_32 = tmp_model; tmp_model = None

[2016-12-03 17:18:30.546000] creating model
[2016-12-03 17:18:34.534000] adding scores
[2016-12-03 17:18:34.594000] fitting
[2016-12-03 17:25:44.312000] outputting
name = model_decor_sparse_t_reg_32, n_topics = 2000, n_doc_passes = 5, seed_value = 100, n_iterations = 15, n_top_tokens = 15, p_threshold = 0.25
sparse_theta_regularizer, tau = -0.1
decorrelator_phi_regularizer, tau = 10



In [20]:
tmp_model = create_model(current_dictionary=dictionary, n_topics=2000, n_doc_passes=5, seed_value=100,
                         n_top_tokens=15, p_mass_threshold=0.25)
tmp_model.regularizers.add(artm.DecorrelatorPhiRegularizer(name='decorrelator_phi_regularizer', class_ids=['ngramm']))
tmp_model.regularizers.add(artm.SmoothSparseThetaRegularizer(name='sparse_theta_regularizer'))
tmp_model.regularizers['sparse_theta_regularizer'].tau = -0.5
tmp_model.regularizers['decorrelator_phi_regularizer'].tau = 10
tmp_model = fit_one_model(tmp_model, _n_iterations=15, _model_name='model_decor_sparse_t_reg_33')
model_decor_sparse_t_reg_33 = tmp_model; tmp_model = None

[2016-12-03 17:26:09.503000] creating model
[2016-12-03 17:26:13.390000] adding scores
[2016-12-03 17:26:13.448000] fitting
[2016-12-03 17:33:21.962000] outputting
name = model_decor_sparse_t_reg_33, n_topics = 2000, n_doc_passes = 5, seed_value = 100, n_iterations = 15, n_top_tokens = 15, p_threshold = 0.25
sparse_theta_regularizer, tau = -0.5
decorrelator_phi_regularizer, tau = 10



In [21]:
tmp_model = create_model(current_dictionary=dictionary, n_topics=2000, n_doc_passes=5, seed_value=100,
                         n_top_tokens=15, p_mass_threshold=0.25)
tmp_model.regularizers.add(artm.DecorrelatorPhiRegularizer(name='decorrelator_phi_regularizer', class_ids=['ngramm']))
tmp_model.regularizers.add(artm.SmoothSparseThetaRegularizer(name='sparse_theta_regularizer'))
tmp_model.regularizers['sparse_theta_regularizer'].tau = -1
tmp_model.regularizers['decorrelator_phi_regularizer'].tau = 10
tmp_model = fit_one_model(tmp_model, _n_iterations=15, _model_name='model_decor_sparse_t_reg_34')
model_decor_sparse_t_reg_34 = tmp_model; tmp_model = None

[2016-12-03 17:33:46.812000] creating model
[2016-12-03 17:33:50.620000] adding scores
[2016-12-03 17:33:50.669000] fitting
[2016-12-03 17:41:00.351000] outputting
name = model_decor_sparse_t_reg_34, n_topics = 2000, n_doc_passes = 5, seed_value = 100, n_iterations = 15, n_top_tokens = 15, p_threshold = 0.25
sparse_theta_regularizer, tau = -1
decorrelator_phi_regularizer, tau = 10



In [22]:
tmp_model = create_model(current_dictionary=dictionary, n_topics=2000, n_doc_passes=5, seed_value=100,
                         n_top_tokens=15, p_mass_threshold=0.25)
tmp_model.regularizers.add(artm.DecorrelatorPhiRegularizer(name='decorrelator_phi_regularizer', class_ids=['ngramm']))
tmp_model.regularizers.add(artm.SmoothSparseThetaRegularizer(name='sparse_theta_regularizer'))
tmp_model.regularizers['sparse_theta_regularizer'].tau = 0.01
tmp_model.regularizers['decorrelator_phi_regularizer'].tau = 10
tmp_model = fit_one_model(tmp_model, _n_iterations=15, _model_name='model_decor_sparse_t_reg_35')
model_decor_sparse_t_reg_35 = tmp_model; tmp_model = None

[2016-12-03 17:41:22.570000] creating model
[2016-12-03 17:41:26.590000] adding scores
[2016-12-03 17:41:26.640000] fitting
[2016-12-03 17:49:18.020000] outputting
name = model_decor_sparse_t_reg_35, n_topics = 2000, n_doc_passes = 5, seed_value = 100, n_iterations = 15, n_top_tokens = 15, p_threshold = 0.25
sparse_theta_regularizer, tau = 0.01
decorrelator_phi_regularizer, tau = 10



In [23]:
tmp_model = create_model(current_dictionary=dictionary, n_topics=2000, n_doc_passes=5, seed_value=100,
                         n_top_tokens=15, p_mass_threshold=0.25)
tmp_model.regularizers.add(artm.DecorrelatorPhiRegularizer(name='decorrelator_phi_regularizer', class_ids=['ngramm']))
tmp_model.regularizers.add(artm.SmoothSparseThetaRegularizer(name='sparse_theta_regularizer'))
tmp_model.regularizers['sparse_theta_regularizer'].tau = 0.1
tmp_model.regularizers['decorrelator_phi_regularizer'].tau = 10
tmp_model = fit_one_model(tmp_model, _n_iterations=15, _model_name='model_decor_sparse_t_reg_36')
model_decor_sparse_t_reg_36 = tmp_model; tmp_model = None

[2016-12-03 17:49:45.713000] creating model
[2016-12-03 17:49:49.594000] adding scores
[2016-12-03 17:49:49.649000] fitting
[2016-12-03 17:57:43.455000] outputting
name = model_decor_sparse_t_reg_36, n_topics = 2000, n_doc_passes = 5, seed_value = 100, n_iterations = 15, n_top_tokens = 15, p_threshold = 0.25
sparse_theta_regularizer, tau = 0.1
decorrelator_phi_regularizer, tau = 10



In [24]:
tmp_model = create_model(current_dictionary=dictionary, n_topics=2000, n_doc_passes=5, seed_value=100,
                         n_top_tokens=15, p_mass_threshold=0.25)
tmp_model.regularizers.add(artm.DecorrelatorPhiRegularizer(name='decorrelator_phi_regularizer', class_ids=['ngramm']))
tmp_model.regularizers.add(artm.SmoothSparseThetaRegularizer(name='sparse_theta_regularizer'))
tmp_model.regularizers['sparse_theta_regularizer'].tau = 0.5
tmp_model.regularizers['decorrelator_phi_regularizer'].tau = 10
tmp_model = fit_one_model(tmp_model, _n_iterations=15, _model_name='model_decor_sparse_t_reg_37')
model_decor_sparse_t_reg_37 = tmp_model; tmp_model = None

[2016-12-03 17:58:07.658000] creating model
[2016-12-03 17:58:11.538000] adding scores
[2016-12-03 17:58:11.622000] fitting
[2016-12-03 18:05:40.229000] outputting
name = model_decor_sparse_t_reg_37, n_topics = 2000, n_doc_passes = 5, seed_value = 100, n_iterations = 15, n_top_tokens = 15, p_threshold = 0.25
sparse_theta_regularizer, tau = 0.5
decorrelator_phi_regularizer, tau = 10



In [25]:
models_file.close()