In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

import artm

# change log style
lc = artm.messages.ConfigureLoggingArgs()
lc.minloglevel = 3
lib = artm.wrapper.LibArtm(logging_config=lc)

import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning) 

In [2]:
from copy import deepcopy
from topicnet.cooking_machine.models.topic_model import TopicModel
from topicnet.cooking_machine.cubes import RegularizersModifierCube
from topicnet.cooking_machine.experiment import Experiment
from topicnet.cooking_machine.cubes import *
from topicnet.cooking_machine.dataset import Dataset

%load_ext autoreload
%autoreload 2

In [3]:
! rm -r topicnet/experiments/

In [4]:
from IPython.core.display import display, HTML
from IPython.display import clear_output, display_html
display(HTML("<style>.container { width:90% !important; }</style>"))

In [19]:
display(HTML("""<style>
div .output_subarea > pre {
  white-space: pre;
  word-wrap: normal;
}
</style>"""))

## Инициализация модели

Создаем `ARTM` модель:

In [5]:
DATA_PATH_SCI = 'topicnet/PScience.csv'

dataset_sci = Dataset(DATA_PATH_SCI)
dictionary_sci = dataset_sci.get_dictionary()

In [6]:
from topicnet.cooking_machine.baselines import init_simple_default_model

model_artm = init_simple_default_model(
    dictionary=dictionary_sci,
    modalities_to_use={'@word'},
    main_modality='@word',
    n_specific_topics=14,
    n_background_topics=1,
)

model_artm.scores

[PerplexityScore@all, SparsityThetaScore, SparsityPhiScore@word, PerplexityScore@word, TopicKernel@word]

## Cooking Machine

Создаем объект `TopicModel` и `Experiment`:

In [7]:
tm = TopicModel(model_artm, model_id='Groot')

experiment = Experiment(experiment_id="Short_test", save_path="topicnet/experiments", topic_model=tm)

In [8]:
from topicnet.cooking_machine.cubes import RegularizersModifierCube
from topicnet.cooking_machine.cubes.perplexity_strategy import retrieve_score_for_strategy
from topicnet.cooking_machine.cubes.perplexity_strategy import PerplexityStrategy

In [9]:
my_first_cube = RegularizersModifierCube(
    num_iter=15,
    strategy=PerplexityStrategy(1,10,10),
    tracked_score_function=retrieve_score_for_strategy('PerplexityScore@word'),
    regularizer_parameters={
        'regularizer': artm.DecorrelatorPhiRegularizer(name='decorrelation_phi', tau=1),
        'tau_grid': [],
    },
    reg_search='mul',
    verbose=True
)
my_first_cube(tm, dataset_sci)

print(experiment.get_description())

10it [02:37, 16.43s/it]

Experiment Short_test

Experiment was made with BigARTM 0.10.0
Tree:
<<<<<<<<start>>>>>>>>──┐
                       │                       ┌13h42m02s_07d08m2019y
                       │                       ├13h42m17s_07d08m2019y
                       │                       ├13h42m32s_07d08m2019y
                       │                       ├13h42m47s_07d08m2019y
                       │                       ├13h43m03s_07d08m2019y
                       └########Groot########──┤
                                               ├13h43m18s_07d08m2019y
                                               ├13h43m33s_07d08m2019y
                                               ├13h43m48s_07d08m2019y
                                               ├13h44m04s_07d08m2019y
                                               └13h44m23s_07d08m2019y
Cubes:
 START                 | INIT                  | REG_MODIFIER             
                       |                       |                          





In [10]:
baseline_select = 'PerplexityScore@word -> min'
contrast_select = 'TopicKernel@word.average_contrast -> max'

first_cube_models = experiment.select(baseline_select) + experiment.select(contrast_select)

In [11]:
my_second_cube = RegularizersModifierCube(
    num_iter=15,
    strategy=PerplexityStrategy(0.0, -10, 10),
    tracked_score_function=retrieve_score_for_strategy('PerplexityScore@word'),
    regularizer_parameters={
        'regularizer': artm.SmoothSparsePhiRegularizer(name=f'sparse_phi', tau=1),
        'tau_grid': [],
    },
    reg_search='add',
    verbose=True
)
my_second_cube(first_cube_models, dataset_sci)

print(experiment.get_description())


11it [02:59, 16.52s/it]

Experiment Short_test

Experiment was made with BigARTM 0.10.0
Tree:
<<<<<<<<start>>>>>>>>──┐
                       │                       ┌13h42m02s_07d08m2019y
                       │                       ├13h42m17s_07d08m2019y
                       │                       ├13h42m32s_07d08m2019y
                       │                       ├13h42m47s_07d08m2019y
                       │                       ├13h43m03s_07d08m2019y
                       └########Groot########──┤
                                               ├13h43m18s_07d08m2019y
                                               ├13h43m33s_07d08m2019y
                                               ├13h43m48s_07d08m2019y
                                               │                           ┌13h44m53s_07d08m2019y
                                               ├13h44m04s_07d08m2019y──────┤
                                               │                           ├13h45m11s_07d08m2019y
                        




In [12]:
contrast_criterion = 'PerplexityScore@word < 1.2 * MINIMUM(PerplexityScore@word) and TopicKernel@word.average_contrast -> max'
sparse_criterion = 'PerplexityScore@word < 1.2 * MINIMUM(PerplexityScore@word) and SparsityPhiScore@word -> max'

final_models = experiment.select(contrast_criterion) + experiment.select(sparse_criterion)

In [61]:
from topicnet.cooking_machine.models.base_score import BaseScore

class ThatCustomScore(BaseScore):
    def __init__(self):
        super().__init__()

    def call(self, model,
             score_to_max = 'TopicKernel@word.average_contrast',
             score_to_min='TopicKernel@word.contrast',
             topic='background_14'):
        return model.scores[score_to_max][-1] - model.scores[score_to_min][-1][topic]

In [62]:
custom_score = ThatCustomScore()

In [63]:
custom_score.call(final_models[0])

-0.020692765712738037

In [47]:
final_models[0].scores['TopicKernel@word.average_contrast'][-1]

0.9572338461875916

In [55]:
final_models[0].scores['TopicKernel@word.contrast'][-1]['background_14']

0.9779266119003296