In [1]:
import sys
import numpy as np
import artm
print artm.version()

from os import path, mkdir
from datetime import datetime
%matplotlib inline
sys.path.insert(0, '..\\modules\\helpers')
from plot_helper import PlotMaker
from config_helper import ConfigPaths
from print_helper import PrintHelper

0.8.1


In [2]:
config = ConfigPaths('config.cfg')
plot_maker = PlotMaker()
printer = PrintHelper()

In [3]:
print config.models_file_name

Q:\\topic_modeling\\csi_science_collections.git\experiments\UCI_filtered_ngramm_trimmed_without_names\08_12_500_topics_exp\models.txt


In [4]:
models_file = open(config.models_file_name, 'a')

In [5]:
def create_model(current_dictionary, n_topics, n_doc_passes, seed_value, n_top_tokens, p_mass_threshold):    
    print '[{}] creating model'.format(datetime.now())
    model = artm.ARTM(num_topics=n_topics, dictionary=current_dictionary, cache_theta=True, seed=seed_value, 
                  class_ids={'ngramm': 1.0, 'author_id': 0.0, 'author': 0.0, 
                             'post_tag': 0.0, 'projects': 0.0, 'category': 0.0,
                             'following_users': 0.0})
    model.num_document_passes = n_doc_passes
    add_scores_to_model(model, n_top_tokens=n_top_tokens, p_mass_threshold=p_mass_threshold)
    return model


def add_scores_to_model(artm_model, n_top_tokens, p_mass_threshold):
    print '[{}] adding scores'.format(datetime.now())
    artm_model.scores.add(artm.PerplexityScore(name='perplexity_score',
                                      use_unigram_document_model=False,
                                      dictionary=dictionary))
    artm_model.scores.add(artm.SparsityPhiScore(name='sparsity_phi_score', class_id='ngramm'))
    artm_model.scores.add(artm.SparsityThetaScore(name='sparsity_theta_score'))
    artm_model.scores.add(artm.TopicKernelScore(name='topic_kernel_score', class_id='ngramm', 
                                                probability_mass_threshold=p_mass_threshold))
    artm_model.scores.add(artm.TopTokensScore(name='top_tokens_score', class_id='ngramm', num_tokens=n_top_tokens))
def fit_one_model(model, _n_iterations, _model_name=''): 
    print '[{}] fitting'.format(datetime.now())
    model.fit_offline(batch_vectorizer=batch_vectorizer, num_collection_passes=_n_iterations)
    print '[{}] outputting'.format(datetime.now())
    printer.print_artm_model(model, _model_name, _n_iterations, output_file=models_file)
    model_pics_file_name =  path.join(config.experiment_path, _model_name)
    plot_maker.make_tm_plots(model, model_pics_file_name)
    model_output_file_name = path.join(config.experiment_path, _model_name + '.txt')
    printer.print_scores(model, _model_name, _n_iterations, model_output_file_name)
    printer.print_top_tokens(model, model_output_file_name)
    return model

In [6]:
batch_vectorizer = artm.BatchVectorizer(data_path=config.output_batches_path,
                                        data_format='batches')
dictionary = artm.Dictionary()
dictionary.load(dictionary_path=config.dictionary_path + '.dict')

In [None]:
# простая модель без регуляризаторов 

In [7]:
tmp_model = create_model(current_dictionary=dictionary, n_topics=500, n_doc_passes=5, seed_value=100,
                            n_top_tokens=15, p_mass_threshold=0.25)
tmp_model = fit_one_model(tmp_model, _n_iterations=25, _model_name='model_no_reg_500')
model_no_reg_500= tmp_model; tmp_model = None

[2016-12-08 15:06:47.178000] creating model
[2016-12-08 15:06:54.492000] adding scores
[2016-12-08 15:06:54.522000] fitting
[2016-12-08 15:12:53.630000] outputting
name = model_no_reg_500, n_topics = 500, n_doc_passes = 5, seed_value = 100, n_iterations = 25, n_top_tokens = 15, p_threshold = 0.25



In [None]:
# + decor phi

In [8]:
tmp_model = create_model(current_dictionary=dictionary, n_topics=500, n_doc_passes=5, seed_value=100,
                            n_top_tokens=15, p_mass_threshold=0.25)
tmp_model.regularizers.add(artm.DecorrelatorPhiRegularizer(name='decorrelator_phi_regularizer', class_ids=['ngramm']))
tmp_model.regularizers['decorrelator_phi_regularizer'].tau = 0.1
tmp_model = fit_one_model(tmp_model, _n_iterations=20, _model_name='model_decor_1')
model_decor_1 = tmp_model; tmp_model = None

[2016-12-08 15:14:01.225000] creating model
[2016-12-08 15:14:07.308000] adding scores
[2016-12-08 15:14:07.358000] fitting
[2016-12-08 15:18:12.300000] outputting
name = model_decor_1, n_topics = 500, n_doc_passes = 5, seed_value = 100, n_iterations = 20, n_top_tokens = 15, p_threshold = 0.25
decorrelator_phi_regularizer, tau = 0.1



In [9]:
tmp_model = create_model(current_dictionary=dictionary, n_topics=500, n_doc_passes=5, seed_value=100,
                            n_top_tokens=15, p_mass_threshold=0.25)
tmp_model.regularizers.add(artm.DecorrelatorPhiRegularizer(name='decorrelator_phi_regularizer', class_ids=['ngramm']))
tmp_model.regularizers['decorrelator_phi_regularizer'].tau = 10
tmp_model = fit_one_model(tmp_model, _n_iterations=20, _model_name='model_decor_2')
model_decor_2 = tmp_model; tmp_model = None

[2016-12-08 15:18:47.664000] creating model
[2016-12-08 15:18:50.276000] adding scores
[2016-12-08 15:18:50.294000] fitting
[2016-12-08 15:22:50.759000] outputting
name = model_decor_2, n_topics = 500, n_doc_passes = 5, seed_value = 100, n_iterations = 20, n_top_tokens = 15, p_threshold = 0.25
decorrelator_phi_regularizer, tau = 10



In [10]:
tmp_model = create_model(current_dictionary=dictionary, n_topics=500, n_doc_passes=5, seed_value=100,
                            n_top_tokens=15, p_mass_threshold=0.25)
tmp_model.regularizers.add(artm.DecorrelatorPhiRegularizer(name='decorrelator_phi_regularizer', class_ids=['ngramm']))
tmp_model.regularizers['decorrelator_phi_regularizer'].tau = 100
tmp_model = fit_one_model(tmp_model, _n_iterations=20, _model_name='model_decor_3')
model_decor_3 = tmp_model; tmp_model = None

[2016-12-08 15:23:24.447000] creating model
[2016-12-08 15:23:27.605000] adding scores
[2016-12-08 15:23:27.629000] fitting
[2016-12-08 15:27:12.769000] outputting
name = model_decor_3, n_topics = 500, n_doc_passes = 5, seed_value = 100, n_iterations = 20, n_top_tokens = 15, p_threshold = 0.25
decorrelator_phi_regularizer, tau = 100



In [11]:
tmp_model = create_model(current_dictionary=dictionary, n_topics=500, n_doc_passes=5, seed_value=100,
                            n_top_tokens=15, p_mass_threshold=0.25)
tmp_model.regularizers.add(artm.DecorrelatorPhiRegularizer(name='decorrelator_phi_regularizer', class_ids=['ngramm']))
tmp_model.regularizers['decorrelator_phi_regularizer'].tau = 1000
tmp_model = fit_one_model(tmp_model, _n_iterations=20, _model_name='model_decor_4')
model_decor_4 = tmp_model; tmp_model = None

[2016-12-08 15:27:44.048000] creating model
[2016-12-08 15:27:48.418000] adding scores
[2016-12-08 15:27:48.473000] fitting
[2016-12-08 15:31:39.275000] outputting
name = model_decor_4, n_topics = 500, n_doc_passes = 5, seed_value = 100, n_iterations = 20, n_top_tokens = 15, p_threshold = 0.25
decorrelator_phi_regularizer, tau = 1000



In [12]:
tmp_model = create_model(current_dictionary=dictionary, n_topics=500, n_doc_passes=5, seed_value=100,
                            n_top_tokens=15, p_mass_threshold=0.25)
tmp_model.regularizers.add(artm.DecorrelatorPhiRegularizer(name='decorrelator_phi_regularizer', class_ids=['ngramm']))
tmp_model.regularizers['decorrelator_phi_regularizer'].tau = 10000
tmp_model = fit_one_model(tmp_model, _n_iterations=20, _model_name='model_decor_5')
model_decor_5 = tmp_model; tmp_model = None

[2016-12-08 15:32:08.738000] creating model
[2016-12-08 15:32:13.255000] adding scores
[2016-12-08 15:32:13.277000] fitting
[2016-12-08 15:35:03.401000] outputting
name = model_decor_5, n_topics = 500, n_doc_passes = 5, seed_value = 100, n_iterations = 20, n_top_tokens = 15, p_threshold = 0.25
decorrelator_phi_regularizer, tau = 10000



In [None]:
# ss theta

In [13]:
tmp_model = create_model(current_dictionary=dictionary, n_topics=500, n_doc_passes=5, seed_value=100,
                            n_top_tokens=15, p_mass_threshold=0.25)
tmp_model.regularizers.add(artm.SmoothSparseThetaRegularizer(name='ss_theta_regularizer'))
tmp_model.regularizers['ss_theta_regularizer'].tau = -0.1
tmp_model = fit_one_model(tmp_model, _n_iterations=20, _model_name='model_sst_1')
model_sst_1 = tmp_model; tmp_model = None

[2016-12-08 15:35:31.588000] creating model
[2016-12-08 15:35:33.741000] adding scores
[2016-12-08 15:35:33.756000] fitting
[2016-12-08 15:38:18.737000] outputting
name = model_sst_1, n_topics = 500, n_doc_passes = 5, seed_value = 100, n_iterations = 20, n_top_tokens = 15, p_threshold = 0.25
ss_theta_regularizer, tau = -0.1



In [14]:
tmp_model = create_model(current_dictionary=dictionary, n_topics=500, n_doc_passes=5, seed_value=100,
                            n_top_tokens=15, p_mass_threshold=0.25)
tmp_model.regularizers.add(artm.SmoothSparseThetaRegularizer(name='ss_theta_regularizer'))
tmp_model.regularizers['ss_theta_regularizer'].tau = -0.5
tmp_model = fit_one_model(tmp_model, _n_iterations=20, _model_name='model_sst_2')
model_sst_2 = tmp_model; tmp_model = None

[2016-12-08 15:38:39.892000] creating model
[2016-12-08 15:38:42.100000] adding scores
[2016-12-08 15:38:42.114000] fitting
[2016-12-08 15:41:28.651000] outputting
name = model_sst_2, n_topics = 500, n_doc_passes = 5, seed_value = 100, n_iterations = 20, n_top_tokens = 15, p_threshold = 0.25
ss_theta_regularizer, tau = -0.5



In [15]:
tmp_model = create_model(current_dictionary=dictionary, n_topics=500, n_doc_passes=5, seed_value=100,
                            n_top_tokens=15, p_mass_threshold=0.25)
tmp_model.regularizers.add(artm.SmoothSparseThetaRegularizer(name='ss_theta_regularizer'))
tmp_model.regularizers['ss_theta_regularizer'].tau = -1
tmp_model = fit_one_model(tmp_model, _n_iterations=20, _model_name='model_sst_3')
model_sst_3 = tmp_model; tmp_model = None

[2016-12-08 15:41:50.193000] creating model
[2016-12-08 15:41:52.533000] adding scores
[2016-12-08 15:41:52.547000] fitting
[2016-12-08 15:44:36.676000] outputting
name = model_sst_3, n_topics = 500, n_doc_passes = 5, seed_value = 100, n_iterations = 20, n_top_tokens = 15, p_threshold = 0.25
ss_theta_regularizer, tau = -1



In [None]:
# ss phi

In [16]:
tmp_model = create_model(current_dictionary=dictionary, n_topics=500, n_doc_passes=5, seed_value=100,
                            n_top_tokens=15, p_mass_threshold=0.25)
tmp_model.regularizers.add(artm.SmoothSparsePhiRegularizer(name='ss_phi_regularizer', class_ids=['ngramm']))
tmp_model.regularizers['ss_phi_regularizer'].tau = -0.0001
tmp_model = fit_one_model(tmp_model, _n_iterations=20, _model_name='model_ssphi_1')
model_ssphi_1 = tmp_model; tmp_model = None

[2016-12-08 15:44:58.699000] creating model
[2016-12-08 15:45:00.973000] adding scores
[2016-12-08 15:45:01.023000] fitting
[2016-12-08 15:47:48.650000] outputting
name = model_ssphi_1, n_topics = 500, n_doc_passes = 5, seed_value = 100, n_iterations = 20, n_top_tokens = 15, p_threshold = 0.25
ss_phi_regularizer, tau = -0.0001



In [17]:
tmp_model = create_model(current_dictionary=dictionary, n_topics=500, n_doc_passes=5, seed_value=100,
                            n_top_tokens=15, p_mass_threshold=0.25)
tmp_model.regularizers.add(artm.SmoothSparsePhiRegularizer(name='ss_phi_regularizer', class_ids=['ngramm']))
tmp_model.regularizers['ss_phi_regularizer'].tau = -0.01
tmp_model = fit_one_model(tmp_model, _n_iterations=20, _model_name='model_ssphi_2')
model_ssphi_2 = tmp_model; tmp_model = None

[2016-12-08 15:48:09.472000] creating model
[2016-12-08 15:48:11.786000] adding scores
[2016-12-08 15:48:11.801000] fitting
[2016-12-08 15:50:59.520000] outputting
name = model_ssphi_2, n_topics = 500, n_doc_passes = 5, seed_value = 100, n_iterations = 20, n_top_tokens = 15, p_threshold = 0.25
ss_phi_regularizer, tau = -0.01



In [18]:
tmp_model = create_model(current_dictionary=dictionary, n_topics=500, n_doc_passes=5, seed_value=100,
                            n_top_tokens=15, p_mass_threshold=0.25)
tmp_model.regularizers.add(artm.SmoothSparsePhiRegularizer(name='ss_phi_regularizer', class_ids=['ngramm']))
tmp_model.regularizers['ss_phi_regularizer'].tau = -0.1
tmp_model = fit_one_model(tmp_model, _n_iterations=20, _model_name='model_ssphi_3')
model_ssphi_3 = tmp_model; tmp_model = None

[2016-12-08 15:51:24.785000] creating model
[2016-12-08 15:51:26.887000] adding scores
[2016-12-08 15:51:26.901000] fitting
[2016-12-08 15:54:11.922000] outputting
name = model_ssphi_3, n_topics = 500, n_doc_passes = 5, seed_value = 100, n_iterations = 20, n_top_tokens = 15, p_threshold = 0.25
ss_phi_regularizer, tau = -0.1



In [19]:
tmp_model = create_model(current_dictionary=dictionary, n_topics=500, n_doc_passes=5, seed_value=100,
                            n_top_tokens=15, p_mass_threshold=0.25)
tmp_model.regularizers.add(artm.SmoothSparsePhiRegularizer(name='ss_phi_regularizer', class_ids=['ngramm']))
tmp_model.regularizers['ss_phi_regularizer'].tau = 0.1
tmp_model = fit_one_model(tmp_model, _n_iterations=20, _model_name='model_ssphi_4')
model_ssphi_4 = tmp_model; tmp_model = None

[2016-12-08 15:54:28.858000] creating model
[2016-12-08 15:54:31.034000] adding scores
[2016-12-08 15:54:31.049000] fitting
[2016-12-08 15:57:22.481000] outputting
name = model_ssphi_4, n_topics = 500, n_doc_passes = 5, seed_value = 100, n_iterations = 20, n_top_tokens = 15, p_threshold = 0.25
ss_phi_regularizer, tau = 0.1



In [20]:
tmp_model = create_model(current_dictionary=dictionary, n_topics=500, n_doc_passes=5, seed_value=100,
                            n_top_tokens=15, p_mass_threshold=0.25)
tmp_model.regularizers.add(artm.SmoothSparsePhiRegularizer(name='ss_phi_regularizer', class_ids=['ngramm']))
tmp_model.regularizers['ss_phi_regularizer'].tau = 0.5
tmp_model = fit_one_model(tmp_model, _n_iterations=20, _model_name='model_ssphi_5')
model_ssphi_5 = tmp_model; tmp_model = None

[2016-12-08 15:57:35.738000] creating model
[2016-12-08 15:57:37.889000] adding scores
[2016-12-08 15:57:37.903000] fitting
[2016-12-08 16:00:30.746000] outputting
name = model_ssphi_5, n_topics = 500, n_doc_passes = 5, seed_value = 100, n_iterations = 20, n_top_tokens = 15, p_threshold = 0.25
ss_phi_regularizer, tau = 0.5



In [21]:
tmp_model = create_model(current_dictionary=dictionary, n_topics=500, n_doc_passes=5, seed_value=100,
                            n_top_tokens=15, p_mass_threshold=0.25)
tmp_model.regularizers.add(artm.DecorrelatorPhiRegularizer(name='decorrelator_phi_regularizer', class_ids=['ngramm']))
tmp_model.regularizers.add(artm.SmoothSparseThetaRegularizer(name='ss_theta_regularizer'))
tmp_model.regularizers['decorrelator_phi_regularizer'].tau = 1000
tmp_model.regularizers['ss_theta_regularizer'].tau = -0.5
tmp_model = fit_one_model(tmp_model, _n_iterations=20, _model_name='model_decor_sst_1')
model_decor_sst_1 = tmp_model; tmp_model = None

[2016-12-08 16:00:47.082000] creating model
[2016-12-08 16:00:49.379000] adding scores
[2016-12-08 16:00:49.409000] fitting
[2016-12-08 16:03:37.831000] outputting
name = model_decor_sst_1, n_topics = 500, n_doc_passes = 5, seed_value = 100, n_iterations = 20, n_top_tokens = 15, p_threshold = 0.25
ss_theta_regularizer, tau = -0.5
decorrelator_phi_regularizer, tau = 1000



In [22]:
tmp_model = create_model(current_dictionary=dictionary, n_topics=500, n_doc_passes=5, seed_value=100,
                            n_top_tokens=15, p_mass_threshold=0.25)
tmp_model.regularizers.add(artm.DecorrelatorPhiRegularizer(name='decorrelator_phi_regularizer', class_ids=['ngramm']))
tmp_model.regularizers.add(artm.SmoothSparseThetaRegularizer(name='ss_theta_regularizer'))
tmp_model.regularizers['decorrelator_phi_regularizer'].tau = 100
tmp_model.regularizers['ss_theta_regularizer'].tau = -0.5
tmp_model = fit_one_model(tmp_model, _n_iterations=20, _model_name='model_decor_sst_2')
model_decor_sst_2 = tmp_model; tmp_model = None

[2016-12-08 16:03:59.692000] creating model
[2016-12-08 16:04:02.007000] adding scores
[2016-12-08 16:04:02.029000] fitting
[2016-12-08 16:06:52.232000] outputting
name = model_decor_sst_2, n_topics = 500, n_doc_passes = 5, seed_value = 100, n_iterations = 20, n_top_tokens = 15, p_threshold = 0.25
ss_theta_regularizer, tau = -0.5
decorrelator_phi_regularizer, tau = 100



In [7]:
tmp_model = create_model(current_dictionary=dictionary, n_topics=500, n_doc_passes=5, seed_value=100,
                            n_top_tokens=15, p_mass_threshold=0.25)
tmp_model.regularizers.add(artm.DecorrelatorPhiRegularizer(name='decorrelator_phi_regularizer', class_ids=['ngramm']))
tmp_model.regularizers.add(artm.SmoothSparseThetaRegularizer(name='ss_theta_regularizer'))
tmp_model.regularizers.add(artm.SmoothSparsePhiRegularizer(name='ss_phi_regularizer', class_ids=['ngramm']))
tmp_model.regularizers['decorrelator_phi_regularizer'].tau = 100
tmp_model.regularizers['ss_theta_regularizer'].tau = -0.5
tmp_model.regularizers['ss_phi_regularizer'].tau = -0.01
tmp_model = fit_one_model(tmp_model, _n_iterations=20, _model_name='model_decor_sst_ssphi_1')
model_decor_sst_ssphi_1 = tmp_model; tmp_model = None

[2016-12-09 20:56:42.478000] creating model
[2016-12-09 20:56:44.443000] adding scores
[2016-12-09 20:56:44.502000] fitting
[2016-12-09 20:59:10.174000] outputting
name = model_decor_sst_ssphi_1, n_topics = 500, n_doc_passes = 5, seed_value = 100, n_iterations = 20, n_top_tokens = 15, p_threshold = 0.25
ss_theta_regularizer, tau = -0.5
decorrelator_phi_regularizer, tau = 100
ss_phi_regularizer, tau = -0.01



In [8]:
tmp_model = create_model(current_dictionary=dictionary, n_topics=500, n_doc_passes=5, seed_value=100,
                            n_top_tokens=15, p_mass_threshold=0.25)
tmp_model.regularizers.add(artm.DecorrelatorPhiRegularizer(name='decorrelator_phi_regularizer', class_ids=['ngramm']))
tmp_model.regularizers.add(artm.SmoothSparseThetaRegularizer(name='ss_theta_regularizer'))
tmp_model.regularizers.add(artm.SmoothSparsePhiRegularizer(name='ss_phi_regularizer', class_ids=['ngramm']))
tmp_model.regularizers['decorrelator_phi_regularizer'].tau = 1000
tmp_model.regularizers['ss_theta_regularizer'].tau = -0.5
tmp_model.regularizers['ss_phi_regularizer'].tau = -0.01
tmp_model = fit_one_model(tmp_model, _n_iterations=20, _model_name='model_decor_sst_ssphi_2')
model_decor_sst_ssphi_2 = tmp_model; tmp_model = None

[2016-12-09 20:59:32.434000] creating model
[2016-12-09 20:59:34.180000] adding scores
[2016-12-09 20:59:34.211000] fitting
[2016-12-09 21:01:52.838000] outputting
name = model_decor_sst_ssphi_2, n_topics = 500, n_doc_passes = 5, seed_value = 100, n_iterations = 20, n_top_tokens = 15, p_threshold = 0.25
ss_theta_regularizer, tau = -0.5
decorrelator_phi_regularizer, tau = 1000
ss_phi_regularizer, tau = -0.01



In [9]:
tmp_model = create_model(current_dictionary=dictionary, n_topics=500, n_doc_passes=5, seed_value=100,
                            n_top_tokens=15, p_mass_threshold=0.25)
tmp_model.regularizers.add(artm.DecorrelatorPhiRegularizer(name='decorrelator_phi_regularizer', class_ids=['ngramm']))
tmp_model.regularizers.add(artm.SmoothSparseThetaRegularizer(name='ss_theta_regularizer'))
tmp_model.regularizers.add(artm.SmoothSparsePhiRegularizer(name='ss_phi_regularizer', class_ids=['ngramm']))
tmp_model.regularizers['decorrelator_phi_regularizer'].tau = 1000
tmp_model.regularizers['ss_theta_regularizer'].tau = -0.5
tmp_model.regularizers['ss_phi_regularizer'].tau = -0.1
tmp_model = fit_one_model(tmp_model, _n_iterations=20, _model_name='model_decor_sst_ssphi_3')
model_decor_sst_ssphi_3 = tmp_model; tmp_model = None

[2016-12-09 21:35:11.573000] creating model
[2016-12-09 21:35:14.675000] adding scores
[2016-12-09 21:35:14.817000] fitting
[2016-12-09 21:37:34.204000] outputting
name = model_decor_sst_ssphi_3, n_topics = 500, n_doc_passes = 5, seed_value = 100, n_iterations = 20, n_top_tokens = 15, p_threshold = 0.25
ss_theta_regularizer, tau = -0.5
decorrelator_phi_regularizer, tau = 1000
ss_phi_regularizer, tau = -0.1



In [10]:
tmp_model = create_model(current_dictionary=dictionary, n_topics=500, n_doc_passes=5, seed_value=100,
                            n_top_tokens=15, p_mass_threshold=0.25)
tmp_model.regularizers.add(artm.DecorrelatorPhiRegularizer(name='decorrelator_phi_regularizer', class_ids=['ngramm']))
tmp_model.regularizers.add(artm.SmoothSparseThetaRegularizer(name='ss_theta_regularizer'))
tmp_model.regularizers.add(artm.SmoothSparsePhiRegularizer(name='ss_phi_regularizer', class_ids=['ngramm']))
tmp_model.regularizers['decorrelator_phi_regularizer'].tau = 1000
tmp_model.regularizers['ss_theta_regularizer'].tau = -0.1
tmp_model.regularizers['ss_phi_regularizer'].tau = -0.05
tmp_model = fit_one_model(tmp_model, _n_iterations=20, _model_name='model_decor_sst_ssphi_4')
model_decor_sst_ssphi_4 = tmp_model; tmp_model = None

[2016-12-09 21:39:04.534000] creating model
[2016-12-09 21:39:06.366000] adding scores
[2016-12-09 21:39:06.398000] fitting
[2016-12-09 21:41:21.294000] outputting
name = model_decor_sst_ssphi_4, n_topics = 500, n_doc_passes = 5, seed_value = 100, n_iterations = 20, n_top_tokens = 15, p_threshold = 0.25
ss_theta_regularizer, tau = -0.1
decorrelator_phi_regularizer, tau = 1000
ss_phi_regularizer, tau = -0.05



In [11]:
tmp_model = create_model(current_dictionary=dictionary, n_topics=500, n_doc_passes=5, seed_value=100,
                            n_top_tokens=15, p_mass_threshold=0.25)
tmp_model.regularizers.add(artm.DecorrelatorPhiRegularizer(name='decorrelator_phi_regularizer', class_ids=['ngramm']))
tmp_model.regularizers.add(artm.SmoothSparseThetaRegularizer(name='ss_theta_regularizer'))
tmp_model.regularizers.add(artm.SmoothSparsePhiRegularizer(name='ss_phi_regularizer', class_ids=['ngramm']))
tmp_model.regularizers['decorrelator_phi_regularizer'].tau = 100
tmp_model.regularizers['ss_theta_regularizer'].tau = -0.1
tmp_model.regularizers['ss_phi_regularizer'].tau = -0.05
tmp_model = fit_one_model(tmp_model, _n_iterations=20, _model_name='model_decor_sst_ssphi_5')
model_decor_sst_ssphi_5 = tmp_model; tmp_model = None

[2016-12-09 21:41:43.229000] creating model
[2016-12-09 21:41:44.969000] adding scores
[2016-12-09 21:41:44.993000] fitting
[2016-12-09 21:43:59.215000] outputting
name = model_decor_sst_ssphi_5, n_topics = 500, n_doc_passes = 5, seed_value = 100, n_iterations = 20, n_top_tokens = 15, p_threshold = 0.25
ss_theta_regularizer, tau = -0.1
decorrelator_phi_regularizer, tau = 100
ss_phi_regularizer, tau = -0.05



In [12]:
models_file.close()