In [1]:
import pickle
import sys
import numpy as np
import pandas as pd
import artm
import seaborn as sns
import matplotlib.pyplot as plt
print artm.version()

from os import path, mkdir
from datetime import datetime
sys.path.insert(0, '..\\modules\\helpers')
import distances_helper as dh 
from plot_helper import PlotMaker
from config_helper import ConfigPaths
from print_helper import PrintHelper

0.8.1


In [5]:
def create_model(current_dictionary, n_topics, n_doc_passes, seed_value, n_top_tokens, p_mass_threshold):    
    print '[{}] creating model'.format(datetime.now())
    model = artm.ARTM(num_topics=n_topics, dictionary=current_dictionary, cache_theta=True, seed=seed_value, 
                  class_ids={'@default_class': 1.0})
    model.num_document_passes = n_doc_passes
    add_scores_to_model(model, n_top_tokens=n_top_tokens, p_mass_threshold=p_mass_threshold)
    return model
def add_scores_to_model(artm_model, n_top_tokens, p_mass_threshold):
    print '[{}] adding scores'.format(datetime.now())
    artm_model.scores.add(artm.PerplexityScore(name='perplexity_score',
                                      dictionary=dictionary))
    artm_model.scores.add(artm.SparsityPhiScore(name='ss_phi_score', class_id='@default_class'))
    artm_model.scores.add(artm.SparsityThetaScore(name='ss_theta_score'))
    artm_model.scores.add(artm.TopicKernelScore(name='topic_kernel_score', class_id='@default_class', 
                                                probability_mass_threshold=p_mass_threshold))
    artm_model.scores.add(artm.TopTokensScore(name='top_tokens_score', class_id='@default_class', num_tokens=n_top_tokens))
def fit_one_model(model, _n_iterations, _model_name=''): 
    print '[{}] fitting'.format(datetime.now())
    model.fit_offline(batch_vectorizer=batch_vectorizer, num_collection_passes=_n_iterations)
    print '[{}] outputting'.format(datetime.now())
    printer.print_artm_model(model, _model_name, _n_iterations, output_file=models_file)
    model_pics_file_name =  path.join(config.experiment_path, _model_name)
    plot_maker.make_tm_plots(model, model_pics_file_name)
    model_output_file_name = path.join(config.experiment_path, _model_name + '.txt')
    printer.print_scores(model, _model_name, _n_iterations, model_output_file_name)
    printer.print_top_tokens(model, model_output_file_name)
    return model
def save_pickle_file(dists, filename):
    pickle_filename = path.join(config.experiment_path, filename)
    pickle_file = open(pickle_filename, 'wb')
    pickle.dump(dists, pickle_file)
    pickle_file.close()
def load_pickle_file(filename):
    pickle_filename = path.join(config.experiment_path, filename)
    pickle_file = open(pickle_filename, 'rb')
    p_file = pickle.load(pickle_file)
    pickle_file.close()
    return p_file
def save_model_pickle(_model_name, _model, _save=True):
    phi = _model.get_phi()
    phi = phi[(phi.T != 0).any()]
    theta = _model.get_theta()    
    saved_top_tokens = _model.score_tracker['top_tokens_score'].last_tokens
    if _save:
        save_pickle_file(phi, 'phi_{}.p'.format(_model_name))
        save_pickle_file(theta, 'theta_{}.p'.format(_model_name))
        save_pickle_file(saved_top_tokens, 'saved_top_tokens_{}.p'.format(_model_name))
    return phi, theta, saved_top_tokens
def load_model_pickle(_model_name, _distance_name):
    phi = load_pickle_file('phi_{}.p'.format(_model_name))
    theta = load_pickle_file('theta_{}.p'.format(_model_name))
    saved_top_tokens = load_pickle_file('saved_top_tokens_{}.p'.format(_model_name))
    distances = load_pickle_file('{}.p'.format(_distance_name))
    return phi, theta, saved_top_tokens, distances

# Model 1

In [3]:
config = ConfigPaths('config_sample_m1.cfg')
plot_maker = PlotMaker()
printer = PrintHelper()
print config.models_file_name
models_file = open(config.models_file_name, 'a')

Q:\\topic_modeling\\csi_science_collections.git\experiments\pn_model1\np_25_01_m1\models.txt


In [6]:
batch_vectorizer = artm.BatchVectorizer(data_path=config.output_batches_path,
                                        data_format='batches')
dictionary = artm.Dictionary()
dictionary.load(dictionary_path=config.dictionary_path + '.dict')

In [7]:
tmp_model = create_model(current_dictionary=dictionary, n_topics=20, n_doc_passes=5, seed_value=100,
                            n_top_tokens=15, p_mass_threshold=0.25)
tmp_model = fit_one_model(tmp_model, _n_iterations=20, _model_name='model_20_m1')

[2017-01-25 12:57:06.793000] creating model
[2017-01-25 12:57:07.788000] adding scores
[2017-01-25 12:57:07.804000] fitting
[2017-01-25 13:06:32.344000] outputting
name = model_20_m1, n_topics = 20, n_doc_passes = 5, seed_value = 100, n_iterations = 20, n_top_tokens = 15, p_threshold = 0.25



In [8]:
tmp_model = create_model(current_dictionary=dictionary, n_topics=50, n_doc_passes=5, seed_value=100,
                            n_top_tokens=15, p_mass_threshold=0.25)
tmp_model = fit_one_model(tmp_model, _n_iterations=20, _model_name='model_50_m2')

[2017-01-25 13:06:56.245000] creating model
[2017-01-25 13:07:00.965000] adding scores
[2017-01-25 13:07:01.361000] fitting
[2017-01-25 13:19:06.815000] outputting
name = model_50_m2, n_topics = 50, n_doc_passes = 5, seed_value = 100, n_iterations = 20, n_top_tokens = 15, p_threshold = 0.25



In [9]:
tmp_model = create_model(current_dictionary=dictionary, n_topics=100, n_doc_passes=5, seed_value=100,
                            n_top_tokens=15, p_mass_threshold=0.25)
tmp_model = fit_one_model(tmp_model, _n_iterations=20, _model_name='model_100_m3')

[2017-01-25 13:19:22.352000] creating model
[2017-01-25 13:19:24.138000] adding scores
[2017-01-25 13:19:24.279000] fitting
[2017-01-25 13:37:58.481000] outputting
name = model_50_m3, n_topics = 100, n_doc_passes = 5, seed_value = 100, n_iterations = 20, n_top_tokens = 15, p_threshold = 0.25



In [10]:
tmp_model = create_model(current_dictionary=dictionary, n_topics=20, n_doc_passes=5, seed_value=100,
                            n_top_tokens=15, p_mass_threshold=0.25)
tmp_model.regularizers.add(artm.DecorrelatorPhiRegularizer(name='decorrelator_phi_regularizer', class_ids=['@default_class']))
tmp_model.regularizers.add(artm.SmoothSparseThetaRegularizer(name='ss_theta_regularizer'))
tmp_model.regularizers.add(artm.SmoothSparsePhiRegularizer(name='ss_phi_regularizer', class_ids=['@default_class']))
tmp_model.regularizers['decorrelator_phi_regularizer'].tau = 100
tmp_model.regularizers['ss_theta_regularizer'].tau = -1
tmp_model.regularizers['ss_phi_regularizer'].tau = -1
tmp_model = fit_one_model(tmp_model, _n_iterations=20, _model_name='model_20_m4')

[2017-01-25 13:38:21.927000] creating model
[2017-01-25 13:38:23.397000] adding scores
[2017-01-25 13:38:23.605000] fitting
[2017-01-25 13:48:28.322000] outputting
name = model_20_m4, n_topics = 20, n_doc_passes = 5, seed_value = 100, n_iterations = 20, n_top_tokens = 15, p_threshold = 0.25
ss_theta_regularizer, tau = -1
decorrelator_phi_regularizer, tau = 100
ss_phi_regularizer, tau = -1



In [11]:
tmp_model = create_model(current_dictionary=dictionary, n_topics=20, n_doc_passes=5, seed_value=100,
                            n_top_tokens=15, p_mass_threshold=0.25)
tmp_model.regularizers.add(artm.DecorrelatorPhiRegularizer(name='decorrelator_phi_regularizer', class_ids=['@default_class']))
tmp_model.regularizers.add(artm.SmoothSparseThetaRegularizer(name='ss_theta_regularizer'))
tmp_model.regularizers.add(artm.SmoothSparsePhiRegularizer(name='ss_phi_regularizer', class_ids=['@default_class']))
tmp_model.regularizers['decorrelator_phi_regularizer'].tau = 100
tmp_model.regularizers['ss_theta_regularizer'].tau = -1.5
tmp_model.regularizers['ss_phi_regularizer'].tau = -2
tmp_model = fit_one_model(tmp_model, _n_iterations=20, _model_name='model_20_m5')

[2017-01-25 13:49:52.432000] creating model
[2017-01-25 13:49:55.316000] adding scores
[2017-01-25 13:49:55.396000] fitting
[2017-01-25 14:00:01.268000] outputting
name = model_20_m5, n_topics = 20, n_doc_passes = 5, seed_value = 100, n_iterations = 20, n_top_tokens = 15, p_threshold = 0.25
ss_theta_regularizer, tau = -1.5
decorrelator_phi_regularizer, tau = 100
ss_phi_regularizer, tau = -2



In [12]:
tmp_model = create_model(current_dictionary=dictionary, n_topics=20, n_doc_passes=5, seed_value=100,
                            n_top_tokens=15, p_mass_threshold=0.25)
tmp_model.regularizers.add(artm.DecorrelatorPhiRegularizer(name='decorrelator_phi_regularizer', class_ids=['@default_class']))
tmp_model.regularizers.add(artm.SmoothSparseThetaRegularizer(name='ss_theta_regularizer'))
tmp_model.regularizers.add(artm.SmoothSparsePhiRegularizer(name='ss_phi_regularizer', class_ids=['@default_class']))
tmp_model.regularizers['decorrelator_phi_regularizer'].tau = 100
tmp_model.regularizers['ss_theta_regularizer'].tau = -2.5
tmp_model.regularizers['ss_phi_regularizer'].tau = -3
tmp_model = fit_one_model(tmp_model, _n_iterations=20, _model_name='model_20_m6')

[2017-01-25 14:03:35.984000] creating model
[2017-01-25 14:03:37.166000] adding scores
[2017-01-25 14:03:37.235000] fitting
[2017-01-25 14:12:44.161000] outputting
name = model_20_m6, n_topics = 20, n_doc_passes = 5, seed_value = 100, n_iterations = 20, n_top_tokens = 15, p_threshold = 0.25
ss_theta_regularizer, tau = -2.5
decorrelator_phi_regularizer, tau = 100
ss_phi_regularizer, tau = -3



In [13]:
tmp_model = create_model(current_dictionary=dictionary, n_topics=20, n_doc_passes=5, seed_value=100,
                            n_top_tokens=15, p_mass_threshold=0.25)
tmp_model.regularizers.add(artm.DecorrelatorPhiRegularizer(name='decorrelator_phi_regularizer', class_ids=['@default_class']))
tmp_model.regularizers.add(artm.SmoothSparseThetaRegularizer(name='ss_theta_regularizer'))
tmp_model.regularizers.add(artm.SmoothSparsePhiRegularizer(name='ss_phi_regularizer', class_ids=['@default_class']))
tmp_model.regularizers['decorrelator_phi_regularizer'].tau = 1000
tmp_model.regularizers['ss_theta_regularizer'].tau = -4
tmp_model.regularizers['ss_phi_regularizer'].tau = -4
tmp_model = fit_one_model(tmp_model, _n_iterations=20, _model_name='model_20_m7')

[2017-01-25 14:13:43.871000] creating model
[2017-01-25 14:13:45.186000] adding scores
[2017-01-25 14:13:45.331000] fitting
[2017-01-25 14:21:55.639000] outputting
name = model_20_m7, n_topics = 20, n_doc_passes = 5, seed_value = 100, n_iterations = 20, n_top_tokens = 15, p_threshold = 0.25
ss_theta_regularizer, tau = -4
decorrelator_phi_regularizer, tau = 1000
ss_phi_regularizer, tau = -4



In [14]:
models_file.close()

# Model 2

In [15]:
config = ConfigPaths('config_sample_m2.cfg')
print config.models_file_name
models_file = open(config.models_file_name, 'a')

Q:\\topic_modeling\\csi_science_collections.git\experiments\pn_model2\np_25_01_m2\models.txt


In [16]:
batch_vectorizer = artm.BatchVectorizer(data_path=config.output_batches_path,
                                        data_format='batches')
dictionary = artm.Dictionary()
dictionary.load(dictionary_path=config.dictionary_path + '.dict')

In [17]:
tmp_model = create_model(current_dictionary=dictionary, n_topics=20, n_doc_passes=5, seed_value=100,
                            n_top_tokens=15, p_mass_threshold=0.25)
tmp_model = fit_one_model(tmp_model, _n_iterations=20, _model_name='model_20_m1')

[2017-01-25 14:22:04.419000] creating model
[2017-01-25 14:22:05.036000] adding scores
[2017-01-25 14:22:05.119000] fitting
[2017-01-25 14:23:44.174000] outputting
name = model_20_m1, n_topics = 20, n_doc_passes = 5, seed_value = 100, n_iterations = 20, n_top_tokens = 15, p_threshold = 0.25



In [18]:
tmp_model = create_model(current_dictionary=dictionary, n_topics=50, n_doc_passes=5, seed_value=100,
                            n_top_tokens=15, p_mass_threshold=0.25)
tmp_model = fit_one_model(tmp_model, _n_iterations=20, _model_name='model_50_m2')

[2017-01-25 14:23:50.228000] creating model
[2017-01-25 14:23:50.666000] adding scores
[2017-01-25 14:23:50.766000] fitting
[2017-01-25 14:26:30.492000] outputting
name = model_50_m2, n_topics = 50, n_doc_passes = 5, seed_value = 100, n_iterations = 20, n_top_tokens = 15, p_threshold = 0.25



In [19]:
tmp_model = create_model(current_dictionary=dictionary, n_topics=100, n_doc_passes=5, seed_value=100,
                            n_top_tokens=15, p_mass_threshold=0.25)
tmp_model = fit_one_model(tmp_model, _n_iterations=20, _model_name='model_100_m3')

[2017-01-25 14:26:35.852000] creating model
[2017-01-25 14:26:36.338000] adding scores
[2017-01-25 14:26:36.417000] fitting
[2017-01-25 14:30:53.511000] outputting
name = model_100_m3, n_topics = 100, n_doc_passes = 5, seed_value = 100, n_iterations = 20, n_top_tokens = 15, p_threshold = 0.25



In [20]:
tmp_model = create_model(current_dictionary=dictionary, n_topics=20, n_doc_passes=5, seed_value=100,
                            n_top_tokens=15, p_mass_threshold=0.25)
tmp_model.regularizers.add(artm.DecorrelatorPhiRegularizer(name='decorrelator_phi_regularizer', class_ids=['@default_class']))
tmp_model.regularizers.add(artm.SmoothSparseThetaRegularizer(name='ss_theta_regularizer'))
tmp_model.regularizers.add(artm.SmoothSparsePhiRegularizer(name='ss_phi_regularizer', class_ids=['@default_class']))
tmp_model.regularizers['decorrelator_phi_regularizer'].tau = 100
tmp_model.regularizers['ss_theta_regularizer'].tau = -1
tmp_model.regularizers['ss_phi_regularizer'].tau = -1
tmp_model = fit_one_model(tmp_model, _n_iterations=20, _model_name='model_20_m4')

[2017-01-25 14:31:00.335000] creating model
[2017-01-25 14:31:00.767000] adding scores
[2017-01-25 14:31:00.867000] fitting
[2017-01-25 14:32:31.318000] outputting
name = model_20_m4, n_topics = 20, n_doc_passes = 5, seed_value = 100, n_iterations = 20, n_top_tokens = 15, p_threshold = 0.25
ss_theta_regularizer, tau = -1
decorrelator_phi_regularizer, tau = 100
ss_phi_regularizer, tau = -1



In [21]:
tmp_model = create_model(current_dictionary=dictionary, n_topics=20, n_doc_passes=5, seed_value=100,
                            n_top_tokens=15, p_mass_threshold=0.25)
tmp_model.regularizers.add(artm.DecorrelatorPhiRegularizer(name='decorrelator_phi_regularizer', class_ids=['@default_class']))
tmp_model.regularizers.add(artm.SmoothSparseThetaRegularizer(name='ss_theta_regularizer'))
tmp_model.regularizers.add(artm.SmoothSparsePhiRegularizer(name='ss_phi_regularizer', class_ids=['@default_class']))
tmp_model.regularizers['decorrelator_phi_regularizer'].tau = 100
tmp_model.regularizers['ss_theta_regularizer'].tau = -1.5
tmp_model.regularizers['ss_phi_regularizer'].tau = -2
tmp_model = fit_one_model(tmp_model, _n_iterations=20, _model_name='model_20_m5')

[2017-01-25 14:32:34.156000] creating model
[2017-01-25 14:32:34.585000] adding scores
[2017-01-25 14:32:34.669000] fitting
[2017-01-25 14:34:05.175000] outputting
name = model_20_m5, n_topics = 20, n_doc_passes = 5, seed_value = 100, n_iterations = 20, n_top_tokens = 15, p_threshold = 0.25
ss_theta_regularizer, tau = -1.5
decorrelator_phi_regularizer, tau = 100
ss_phi_regularizer, tau = -2



In [22]:
tmp_model = create_model(current_dictionary=dictionary, n_topics=20, n_doc_passes=5, seed_value=100,
                            n_top_tokens=15, p_mass_threshold=0.25)
tmp_model.regularizers.add(artm.DecorrelatorPhiRegularizer(name='decorrelator_phi_regularizer', class_ids=['@default_class']))
tmp_model.regularizers.add(artm.SmoothSparseThetaRegularizer(name='ss_theta_regularizer'))
tmp_model.regularizers.add(artm.SmoothSparsePhiRegularizer(name='ss_phi_regularizer', class_ids=['@default_class']))
tmp_model.regularizers['decorrelator_phi_regularizer'].tau = 100
tmp_model.regularizers['ss_theta_regularizer'].tau = -2.5
tmp_model.regularizers['ss_phi_regularizer'].tau = -3
tmp_model = fit_one_model(tmp_model, _n_iterations=20, _model_name='model_20_m6')

[2017-01-25 14:34:07.600000] creating model
[2017-01-25 14:34:08.031000] adding scores
[2017-01-25 14:34:08.100000] fitting
[2017-01-25 14:35:38.240000] outputting
name = model_20_m6, n_topics = 20, n_doc_passes = 5, seed_value = 100, n_iterations = 20, n_top_tokens = 15, p_threshold = 0.25
ss_theta_regularizer, tau = -2.5
decorrelator_phi_regularizer, tau = 100
ss_phi_regularizer, tau = -3



In [23]:
tmp_model = create_model(current_dictionary=dictionary, n_topics=20, n_doc_passes=5, seed_value=100,
                            n_top_tokens=15, p_mass_threshold=0.25)
tmp_model.regularizers.add(artm.DecorrelatorPhiRegularizer(name='decorrelator_phi_regularizer', class_ids=['@default_class']))
tmp_model.regularizers.add(artm.SmoothSparseThetaRegularizer(name='ss_theta_regularizer'))
tmp_model.regularizers.add(artm.SmoothSparsePhiRegularizer(name='ss_phi_regularizer', class_ids=['@default_class']))
tmp_model.regularizers['decorrelator_phi_regularizer'].tau = 1000
tmp_model.regularizers['ss_theta_regularizer'].tau = -3
tmp_model.regularizers['ss_phi_regularizer'].tau = -3
tmp_model = fit_one_model(tmp_model, _n_iterations=20, _model_name='model_20_m7')

[2017-01-25 14:39:13.776000] creating model
[2017-01-25 14:39:14.252000] adding scores
[2017-01-25 14:39:14.330000] fitting
[2017-01-25 14:40:27.552000] outputting
name = model_20_m7, n_topics = 20, n_doc_passes = 5, seed_value = 100, n_iterations = 20, n_top_tokens = 15, p_threshold = 0.25
ss_theta_regularizer, tau = -3
decorrelator_phi_regularizer, tau = 1000
ss_phi_regularizer, tau = -3



In [24]:
models_file.close()

# Model 3

In [35]:
config = ConfigPaths('config_sample_m3.cfg')
print config.models_file_name
models_file = open(config.models_file_name, 'a')

Q:\\topic_modeling\\csi_science_collections.git\experiments\pn_model3\np_25_01_m3\models.txt


In [26]:
batch_vectorizer = artm.BatchVectorizer(data_path=config.output_batches_path,
                                        data_format='batches')
dictionary = artm.Dictionary()
dictionary.load(dictionary_path=config.dictionary_path + '.dict')

In [27]:
tmp_model = create_model(current_dictionary=dictionary, n_topics=20, n_doc_passes=5, seed_value=100,
                            n_top_tokens=15, p_mass_threshold=0.25)
tmp_model = fit_one_model(tmp_model, _n_iterations=20, _model_name='model_20_m1')

[2017-01-25 14:40:30.932000] creating model
[2017-01-25 14:40:31.037000] adding scores
[2017-01-25 14:40:31.107000] fitting
[2017-01-25 14:40:54.407000] outputting
name = model_20_m1, n_topics = 20, n_doc_passes = 5, seed_value = 100, n_iterations = 20, n_top_tokens = 15, p_threshold = 0.25



In [28]:
tmp_model = create_model(current_dictionary=dictionary, n_topics=50, n_doc_passes=5, seed_value=100,
                            n_top_tokens=15, p_mass_threshold=0.25)
tmp_model = fit_one_model(tmp_model, _n_iterations=20, _model_name='model_50_m2')

[2017-01-25 14:40:58.242000] creating model
[2017-01-25 14:40:58.351000] adding scores
[2017-01-25 14:40:58.428000] fitting
[2017-01-25 14:41:39.006000] outputting
name = model_50_m2, n_topics = 50, n_doc_passes = 5, seed_value = 100, n_iterations = 20, n_top_tokens = 15, p_threshold = 0.25



In [29]:
tmp_model = create_model(current_dictionary=dictionary, n_topics=100, n_doc_passes=5, seed_value=100,
                            n_top_tokens=15, p_mass_threshold=0.25)
tmp_model = fit_one_model(tmp_model, _n_iterations=20, _model_name='model_100_m3')

[2017-01-25 14:41:42.175000] creating model
[2017-01-25 14:41:42.339000] adding scores
[2017-01-25 14:41:42.398000] fitting
[2017-01-25 14:42:52.370000] outputting
name = model_100_m3, n_topics = 100, n_doc_passes = 5, seed_value = 100, n_iterations = 20, n_top_tokens = 15, p_threshold = 0.25



In [30]:
tmp_model = create_model(current_dictionary=dictionary, n_topics=20, n_doc_passes=5, seed_value=100,
                            n_top_tokens=15, p_mass_threshold=0.25)
tmp_model.regularizers.add(artm.DecorrelatorPhiRegularizer(name='decorrelator_phi_regularizer', class_ids=['@default_class']))
tmp_model.regularizers.add(artm.SmoothSparseThetaRegularizer(name='ss_theta_regularizer'))
tmp_model.regularizers.add(artm.SmoothSparsePhiRegularizer(name='ss_phi_regularizer', class_ids=['@default_class']))
tmp_model.regularizers['decorrelator_phi_regularizer'].tau = 100
tmp_model.regularizers['ss_theta_regularizer'].tau = -1
tmp_model.regularizers['ss_phi_regularizer'].tau = -1
tmp_model = fit_one_model(tmp_model, _n_iterations=20, _model_name='model_20_m4')

[2017-01-25 14:42:56.135000] creating model
[2017-01-25 14:42:56.237000] adding scores
[2017-01-25 14:42:56.312000] fitting
[2017-01-25 14:43:23.471000] outputting
name = model_20_m4, n_topics = 20, n_doc_passes = 5, seed_value = 100, n_iterations = 20, n_top_tokens = 15, p_threshold = 0.25
ss_theta_regularizer, tau = -1
decorrelator_phi_regularizer, tau = 100
ss_phi_regularizer, tau = -1



In [31]:
tmp_model = create_model(current_dictionary=dictionary, n_topics=20, n_doc_passes=5, seed_value=100,
                            n_top_tokens=15, p_mass_threshold=0.25)
tmp_model.regularizers.add(artm.DecorrelatorPhiRegularizer(name='decorrelator_phi_regularizer', class_ids=['@default_class']))
tmp_model.regularizers.add(artm.SmoothSparseThetaRegularizer(name='ss_theta_regularizer'))
tmp_model.regularizers.add(artm.SmoothSparsePhiRegularizer(name='ss_phi_regularizer', class_ids=['@default_class']))
tmp_model.regularizers['decorrelator_phi_regularizer'].tau = 100
tmp_model.regularizers['ss_theta_regularizer'].tau = -1.5
tmp_model.regularizers['ss_phi_regularizer'].tau = -2
tmp_model = fit_one_model(tmp_model, _n_iterations=20, _model_name='model_20_m5')

[2017-01-25 14:43:26.337000] creating model
[2017-01-25 14:43:26.435000] adding scores
[2017-01-25 14:43:26.498000] fitting
[2017-01-25 14:43:52.106000] outputting
name = model_20_m5, n_topics = 20, n_doc_passes = 5, seed_value = 100, n_iterations = 20, n_top_tokens = 15, p_threshold = 0.25
ss_theta_regularizer, tau = -1.5
decorrelator_phi_regularizer, tau = 100
ss_phi_regularizer, tau = -2



In [32]:
tmp_model = create_model(current_dictionary=dictionary, n_topics=20, n_doc_passes=5, seed_value=100,
                            n_top_tokens=15, p_mass_threshold=0.25)
tmp_model.regularizers.add(artm.DecorrelatorPhiRegularizer(name='decorrelator_phi_regularizer', class_ids=['@default_class']))
tmp_model.regularizers.add(artm.SmoothSparseThetaRegularizer(name='ss_theta_regularizer'))
tmp_model.regularizers.add(artm.SmoothSparsePhiRegularizer(name='ss_phi_regularizer', class_ids=['@default_class']))
tmp_model.regularizers['decorrelator_phi_regularizer'].tau = 100
tmp_model.regularizers['ss_theta_regularizer'].tau = -2.5
tmp_model.regularizers['ss_phi_regularizer'].tau = -3
tmp_model = fit_one_model(tmp_model, _n_iterations=20, _model_name='model_20_m6')

[2017-01-25 14:43:55.529000] creating model
[2017-01-25 14:43:55.638000] adding scores
[2017-01-25 14:43:55.717000] fitting
[2017-01-25 14:44:22.576000] outputting
name = model_20_m6, n_topics = 20, n_doc_passes = 5, seed_value = 100, n_iterations = 20, n_top_tokens = 15, p_threshold = 0.25
ss_theta_regularizer, tau = -2.5
decorrelator_phi_regularizer, tau = 100
ss_phi_regularizer, tau = -3



In [33]:
tmp_model = create_model(current_dictionary=dictionary, n_topics=20, n_doc_passes=5, seed_value=100,
                            n_top_tokens=15, p_mass_threshold=0.25)
tmp_model.regularizers.add(artm.DecorrelatorPhiRegularizer(name='decorrelator_phi_regularizer', class_ids=['@default_class']))
tmp_model.regularizers.add(artm.SmoothSparseThetaRegularizer(name='ss_theta_regularizer'))
tmp_model.regularizers.add(artm.SmoothSparsePhiRegularizer(name='ss_phi_regularizer', class_ids=['@default_class']))
tmp_model.regularizers['decorrelator_phi_regularizer'].tau = 500
tmp_model.regularizers['ss_theta_regularizer'].tau = -3
tmp_model.regularizers['ss_phi_regularizer'].tau = -3
tmp_model = fit_one_model(tmp_model, _n_iterations=20, _model_name='model_20_m7')

[2017-01-25 14:46:11.445000] creating model
[2017-01-25 14:46:11.566000] adding scores
[2017-01-25 14:46:11.578000] fitting
[2017-01-25 14:46:30.702000] outputting
name = model_20_m7, n_topics = 20, n_doc_passes = 5, seed_value = 100, n_iterations = 20, n_top_tokens = 15, p_threshold = 0.25
ss_theta_regularizer, tau = -3
decorrelator_phi_regularizer, tau = 500
ss_phi_regularizer, tau = -3



In [36]:
tmp_model = create_model(current_dictionary=dictionary, n_topics=20, n_doc_passes=5, seed_value=100,
                            n_top_tokens=15, p_mass_threshold=0.25)
tmp_model.regularizers.add(artm.DecorrelatorPhiRegularizer(name='decorrelator_phi_regularizer', class_ids=['@default_class']))
tmp_model.regularizers.add(artm.SmoothSparseThetaRegularizer(name='ss_theta_regularizer'))
tmp_model.regularizers.add(artm.SmoothSparsePhiRegularizer(name='ss_phi_regularizer', class_ids=['@default_class']))
tmp_model.regularizers['decorrelator_phi_regularizer'].tau = 100
tmp_model.regularizers['ss_theta_regularizer'].tau = -3
tmp_model.regularizers['ss_phi_regularizer'].tau = -5
tmp_model = fit_one_model(tmp_model, _n_iterations=20, _model_name='model_20_m8')

[2017-01-25 14:50:01.411000] creating model
[2017-01-25 14:50:01.528000] adding scores
[2017-01-25 14:50:01.557000] fitting
[2017-01-25 14:50:21.069000] outputting
name = model_20_m8, n_topics = 20, n_doc_passes = 5, seed_value = 100, n_iterations = 20, n_top_tokens = 15, p_threshold = 0.25
ss_theta_regularizer, tau = -3
decorrelator_phi_regularizer, tau = 100
ss_phi_regularizer, tau = -5



In [37]:
tmp_model = create_model(current_dictionary=dictionary, n_topics=20, n_doc_passes=5, seed_value=100,
                            n_top_tokens=15, p_mass_threshold=0.25)
tmp_model.regularizers.add(artm.DecorrelatorPhiRegularizer(name='decorrelator_phi_regularizer', class_ids=['@default_class']))
tmp_model.regularizers.add(artm.SmoothSparseThetaRegularizer(name='ss_theta_regularizer'))
tmp_model.regularizers.add(artm.SmoothSparsePhiRegularizer(name='ss_phi_regularizer', class_ids=['@default_class']))
tmp_model.regularizers['decorrelator_phi_regularizer'].tau = 1000
tmp_model.regularizers['ss_theta_regularizer'].tau = -3
tmp_model.regularizers['ss_phi_regularizer'].tau = -5
tmp_model = fit_one_model(tmp_model, _n_iterations=20, _model_name='model_20_m9')

[2017-01-25 14:50:49.735000] creating model
[2017-01-25 14:50:49.894000] adding scores
[2017-01-25 14:50:49.953000] fitting
[2017-01-25 14:51:09.378000] outputting
name = model_20_m9, n_topics = 20, n_doc_passes = 5, seed_value = 100, n_iterations = 20, n_top_tokens = 15, p_threshold = 0.25
ss_theta_regularizer, tau = -3
decorrelator_phi_regularizer, tau = 1000
ss_phi_regularizer, tau = -5



In [38]:
tmp_model = create_model(current_dictionary=dictionary, n_topics=20, n_doc_passes=5, seed_value=100,
                            n_top_tokens=15, p_mass_threshold=0.25)
tmp_model.regularizers.add(artm.DecorrelatorPhiRegularizer(name='decorrelator_phi_regularizer', class_ids=['@default_class']))
tmp_model.regularizers.add(artm.SmoothSparseThetaRegularizer(name='ss_theta_regularizer'))
tmp_model.regularizers.add(artm.SmoothSparsePhiRegularizer(name='ss_phi_regularizer', class_ids=['@default_class']))
tmp_model.regularizers['decorrelator_phi_regularizer'].tau = 100
tmp_model.regularizers['ss_theta_regularizer'].tau = -3
tmp_model.regularizers['ss_phi_regularizer'].tau = -20
tmp_model = fit_one_model(tmp_model, _n_iterations=20, _model_name='model_20_m10')

[2017-01-25 14:51:28.395000] creating model
[2017-01-25 14:51:28.577000] adding scores
[2017-01-25 14:51:28.635000] fitting
[2017-01-25 14:51:47.450000] outputting
name = model_20_m10, n_topics = 20, n_doc_passes = 5, seed_value = 100, n_iterations = 20, n_top_tokens = 15, p_threshold = 0.25
ss_theta_regularizer, tau = -3
decorrelator_phi_regularizer, tau = 100
ss_phi_regularizer, tau = -20



In [39]:
tmp_model = create_model(current_dictionary=dictionary, n_topics=20, n_doc_passes=5, seed_value=100,
                            n_top_tokens=15, p_mass_threshold=0.25)
tmp_model.regularizers.add(artm.DecorrelatorPhiRegularizer(name='decorrelator_phi_regularizer', class_ids=['@default_class']))
tmp_model.regularizers.add(artm.SmoothSparseThetaRegularizer(name='ss_theta_regularizer'))
tmp_model.regularizers.add(artm.SmoothSparsePhiRegularizer(name='ss_phi_regularizer', class_ids=['@default_class']))
tmp_model.regularizers['decorrelator_phi_regularizer'].tau = 100
tmp_model.regularizers['ss_theta_regularizer'].tau = -10
tmp_model.regularizers['ss_phi_regularizer'].tau = -25
tmp_model = fit_one_model(tmp_model, _n_iterations=20, _model_name='model_20_m11')

[2017-01-25 14:52:06.649000] creating model
[2017-01-25 14:52:06.818000] adding scores
[2017-01-25 14:52:06.907000] fitting
[2017-01-25 14:52:25.195000] outputting
name = model_20_m11, n_topics = 20, n_doc_passes = 5, seed_value = 100, n_iterations = 20, n_top_tokens = 15, p_threshold = 0.25
ss_theta_regularizer, tau = -10
decorrelator_phi_regularizer, tau = 100
ss_phi_regularizer, tau = -25



In [40]:
tmp_model = create_model(current_dictionary=dictionary, n_topics=20, n_doc_passes=5, seed_value=100,
                            n_top_tokens=15, p_mass_threshold=0.25)
tmp_model.regularizers.add(artm.DecorrelatorPhiRegularizer(name='decorrelator_phi_regularizer', class_ids=['@default_class']))
tmp_model.regularizers.add(artm.SmoothSparseThetaRegularizer(name='ss_theta_regularizer'))
tmp_model.regularizers.add(artm.SmoothSparsePhiRegularizer(name='ss_phi_regularizer', class_ids=['@default_class']))
tmp_model.regularizers['decorrelator_phi_regularizer'].tau = 1000
tmp_model.regularizers['ss_theta_regularizer'].tau = -10
tmp_model.regularizers['ss_phi_regularizer'].tau = -25
tmp_model = fit_one_model(tmp_model, _n_iterations=20, _model_name='model_20_m12')
# bad

[2017-01-25 14:53:11.126000] creating model
[2017-01-25 14:53:11.299000] adding scores
[2017-01-25 14:53:11.342000] fitting
[2017-01-25 14:53:29.891000] outputting
name = model_20_m12, n_topics = 20, n_doc_passes = 5, seed_value = 100, n_iterations = 20, n_top_tokens = 15, p_threshold = 0.25
ss_theta_regularizer, tau = -10
decorrelator_phi_regularizer, tau = 1000
ss_phi_regularizer, tau = -25



In [41]:
tmp_model = create_model(current_dictionary=dictionary, n_topics=20, n_doc_passes=5, seed_value=100,
                            n_top_tokens=15, p_mass_threshold=0.25)
tmp_model.regularizers.add(artm.DecorrelatorPhiRegularizer(name='decorrelator_phi_regularizer', class_ids=['@default_class']))
tmp_model.regularizers.add(artm.SmoothSparseThetaRegularizer(name='ss_theta_regularizer'))
tmp_model.regularizers.add(artm.SmoothSparsePhiRegularizer(name='ss_phi_regularizer', class_ids=['@default_class']))
tmp_model.regularizers['decorrelator_phi_regularizer'].tau = 300
tmp_model.regularizers['ss_theta_regularizer'].tau = -5
tmp_model.regularizers['ss_phi_regularizer'].tau = -10
tmp_model = fit_one_model(tmp_model, _n_iterations=20, _model_name='model_20_m13')

[2017-01-25 14:54:48.669000] creating model
[2017-01-25 14:54:48.843000] adding scores
[2017-01-25 14:54:48.885000] fitting
[2017-01-25 14:55:10.884000] outputting
name = model_20_m13, n_topics = 20, n_doc_passes = 5, seed_value = 100, n_iterations = 20, n_top_tokens = 15, p_threshold = 0.25
ss_theta_regularizer, tau = -5
decorrelator_phi_regularizer, tau = 300
ss_phi_regularizer, tau = -10



In [42]:
tmp_model = create_model(current_dictionary=dictionary, n_topics=20, n_doc_passes=5, seed_value=100,
                            n_top_tokens=15, p_mass_threshold=0.25)
tmp_model.regularizers.add(artm.DecorrelatorPhiRegularizer(name='decorrelator_phi_regularizer', class_ids=['@default_class']))
tmp_model.regularizers.add(artm.SmoothSparseThetaRegularizer(name='ss_theta_regularizer'))
tmp_model.regularizers.add(artm.SmoothSparsePhiRegularizer(name='ss_phi_regularizer', class_ids=['@default_class']))
tmp_model.regularizers['decorrelator_phi_regularizer'].tau = 300
tmp_model.regularizers['ss_theta_regularizer'].tau = -5
tmp_model.regularizers['ss_phi_regularizer'].tau = -20
tmp_model = fit_one_model(tmp_model, _n_iterations=20, _model_name='model_20_m14')
# this one

[2017-01-25 14:59:21.681000] creating model
[2017-01-25 14:59:21.861000] adding scores
[2017-01-25 14:59:21.921000] fitting
[2017-01-25 14:59:41.355000] outputting
name = model_20_m14, n_topics = 20, n_doc_passes = 5, seed_value = 100, n_iterations = 20, n_top_tokens = 15, p_threshold = 0.25
ss_theta_regularizer, tau = -5
decorrelator_phi_regularizer, tau = 300
ss_phi_regularizer, tau = -20



In [43]:
models_file.close()