In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

import artm

# change log style
lc = artm.messages.ConfigureLoggingArgs()
lc.minloglevel = 3
lib = artm.wrapper.LibArtm(logging_config=lc)

import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning) 

In [2]:
from copy import deepcopy
from topicnet.cooking_machine.models.topic_model import TopicModel
from topicnet.cooking_machine.cubes import RegularizersModifierCube
from topicnet.cooking_machine.experiment import Experiment
from topicnet.cooking_machine.cubes import *
from topicnet.cooking_machine.dataset import Dataset

%load_ext autoreload
%autoreload 2

In [3]:
import topicnet.cooking_machine.cubes as tncubes

In [4]:
from IPython.core.display import display, HTML
from IPython.display import clear_output, display_html
display(HTML("""<style>
.container { width:90% !important; }
div.output_scroll .output_subarea { white-space: pre; }
</style>"""))

In [5]:
! rm -r topicnet/experiments/

## Инициализация модели

Создаем `ARTM` модель:

In [6]:
DATA_PATH_SUB = 'topicnet/subtitles.csv'
DATA_PATH_SCI = 'topicnet/PScience.csv'

In [7]:
dataset_sub = Dataset(DATA_PATH_SUB)
dictionary_sub = dataset_sub.get_dictionary()

dataset_sci = Dataset(DATA_PATH_SCI)
dictionary_sci = dataset_sci.get_dictionary()

In [8]:
dataset_sci.get_possible_modalities()

{'@2gramm', '@3gramm', '@author', '@post_tag', '@snippet', '@title', '@word'}

In [9]:
from topicnet.cooking_machine.baselines import init_simple_default_model

n_topics = 15
n_background = 1
model_artm = init_simple_default_model(
    dictionary=dictionary_sci,
    modalities_to_use=dataset_sci.get_possible_modalities(),
    main_modality='@word',
    n_specific_topics=n_topics - n_background,
    n_background_topics=n_background,
)
specific_topics, bacground_topics = model_artm.topic_names[:-n_background], model_artm.topic_names[-n_background:]
model_artm.scores

[PerplexityScore@all, SparsityThetaScore, SparsityPhiScore@2gramm, PerplexityScore@2gramm, TopicKernel@2gramm, SparsityPhiScore@post_tag, PerplexityScore@post_tag, TopicKernel@post_tag, SparsityPhiScore@3gramm, PerplexityScore@3gramm, TopicKernel@3gramm, SparsityPhiScore@word, PerplexityScore@word, TopicKernel@word, SparsityPhiScore@snippet, PerplexityScore@snippet, TopicKernel@snippet, SparsityPhiScore@author, PerplexityScore@author, TopicKernel@author, SparsityPhiScore@title, PerplexityScore@title, TopicKernel@title]

## Cooking Machine

Создаем объект `TopicModel` и `Experiment`:
##### Для `TopicModel` можно объявить кастомный скор
Скор наследуется от базового класса отвечающего за хранение и обновление истории метрики

In [10]:
from topicnet.cooking_machine.models.base_score import BaseScore

class ThatCustomScore(BaseScore):
    def __init__(self):
        super().__init__()

    def call(self, model,
             score_to_max = 'TopicKernel@word',
             score_to_min='TopicKernel@word',
             topic='background_14'):
        want_higher = model.get_score('TopicKernel@word').average_kernel_contrast
        want_lower = model.get_score('TopicKernel@word').kernel_contrast[-1]
        
        return want_higher - want_lower

In [11]:
custom_scores = {'GoodinessMeasure': ThatCustomScore()}
tm = TopicModel(model_artm, model_id='new_id', custom_scores=custom_scores)

In [12]:
# experiment starts without model

experiment = Experiment(experiment_id="Test_", save_path="topicnet/experiments")

#experiment = Experiment(experiment_id="Test_", save_path="topicnet/experiments", model=tm)

#### Воспроизведём стратегию Насти Яниной из статьи "Мультимодальные тематические модели для разведочного поиска в коллективном блоге"
Для этого понадобится описание стратегии через параметры кубов добавляющих регуляризаторы и перебирающих значения их гиперпараметров

In [13]:
from topicnet.cooking_machine.cubes.perplexity_strategy import retrieve_score_for_strategy
from topicnet.cooking_machine.cubes.perplexity_strategy import PerplexityStrategy

cube_iters = 5

cube_settings = [
    {'CubeCreator':{
        'model': tm,
        'num_iter': cube_iters,
        'parameters':[
            {'name': 'seed',
             'values': [82019, 322],
            },
        ],
        'reg_search': 'grid',
    },
    'selection': [
        f'model.seed = 82019 and PerplexityScore@all -> min',
        f'model.seed = 322 and PerplexityScore@all -> min']
    },
    {'RegularizersModifierCube':{
        'num_iter': cube_iters,
        'strategy': PerplexityStrategy(0.0, 10, 15),
        'tracked_score_function': retrieve_score_for_strategy('GoodinessMeasure'),
        'regularizer_parameters': {
             "regularizer": artm.SmoothSparsePhiRegularizer(
                 name='smooth_phi_bcg',
                 topic_names=bacground_topics,
                 tau=1,
                 class_ids=['@word']),
             "tau_grid": [],
            },
        'reg_search': 'add',
        'verbose': False,
    },
    'selection': [f'model.seed = 82019 and GoodinessMeasure -> max',
                  f'model.seed = 322 and GoodinessMeasure -> max']        
    },
    {'RegularizersModifierCube':{
        'num_iter': cube_iters,
        'strategy': PerplexityStrategy(0.0, 10, 15),
        'tracked_score_function': retrieve_score_for_strategy('GoodinessMeasure'),
        'regularizer_parameters': {
             "regularizer": artm.SmoothSparseThetaRegularizer(
                 name='smooth_theta_bcg',
                 topic_names=bacground_topics,
                 tau=1),
             "tau_grid": [],
            },
        'reg_search': 'add',
        'verbose': False,
    },
    'selection': [f'model.seed = 82019 and GoodinessMeasure -> max',
                  f'model.seed = 322 and GoodinessMeasure -> max']
    },
]
class_ids=['@word', '@2gramm', '@3gramm']
for modality in class_ids:
    cube_settings += [{
        'RegularizersModifierCube':{
            'num_iter': cube_iters,
            'strategy': PerplexityStrategy(1, 10, 10),
            'tracked_score_function': retrieve_score_for_strategy(f'PerplexityScore{modality}'),
            'regularizer_parameters': {
             "regularizer": artm.DecorrelatorPhiRegularizer(
                 name=f'decorrelation_phi_{modality}',
                 topic_names=specific_topics,
                 tau=1,
                 class_ids=[modality]),
             'tau_grid': [],
            },
            'reg_search': 'mul',
            'verbose': True,
        },
        'selection': [f'model.seed = 82019 and PerplexityScore{modality} -> min',
                      f'model.seed = 322 and PerplexityScore{modality} -> min']
    }]
    cube_settings += [{
        'RegularizersModifierCube':{
            'num_iter': cube_iters,
            'strategy': PerplexityStrategy(0.0, -10, 15),
            'tracked_score_function': retrieve_score_for_strategy(f'PerplexityScore{modality}'),
            'regularizer_parameters': {
                "regularizer": artm.SmoothSparsePhiRegularizer(
                     name=f'sparse_phi_{modality}',
                     topic_names=specific_topics,
                     tau=1,
                     class_ids=[modality]),
                'tau_grid': [],
            },
            'reg_search': 'add',
            'verbose': False,
        },
        'selection': [f'model.seed = 82019 and PerplexityScore{modality} < 1.1 * MINIMUM(PerplexityScore{modality}) and SparsityPhiScore{modality} -> max',
                      f'model.seed = 322 and PerplexityScore{modality} < 1.1 * MINIMUM(PerplexityScore{modality}) and SparsityPhiScore{modality} -> max']
    }]
cube_settings += [{
    'RegularizersModifierCube':{
        'num_iter': cube_iters,
        'strategy': PerplexityStrategy(0.0, 10, 15),
        'tracked_score_function': retrieve_score_for_strategy(f'SparsityThetaScore'),
        'regularizer_parameters': {
            "regularizer": artm.SmoothSparseThetaRegularizer(
                 name='smooth_theta',
                 topic_names=specific_topics,
                 tau=1),
            'tau_grid': [],
        },
        'reg_search': 'add',
        'verbose': True,
    },
    'selection': [f'model.seed = 82019 and PerplexityScore{modality} < 1.1 * MINIMUM(PerplexityScore{modality}) and SparsityThetaScore -> max',
                  f'model.seed = 322 and PerplexityScore{modality} < 1.1 * MINIMUM(PerplexityScore{modality}) and SparsityThetaScore -> max']
}]

In [14]:
experiment.build(cube_settings)

In [15]:
display(HTML("""<style>
div .output_subarea > pre {
  white-space: pre;
  word-wrap: normal;
}
</style>"""))

In [16]:
import warnings
with warnings.catch_warnings():
    warnings.simplefilter('ignore', RuntimeWarning)
    final_models = experiment.run(dataset_sci, verbose=True, nb_verbose=True)

Experiment Test_

Experiment was made with BigARTM 0.10.0
Tree:
                                                        ┌21h33m49s_07d08m2019y
                       ┌21h33m29s_07d08m2019y───────────┤
                       │                                ├21h34m01s_07d08m2019y
                       │                                │                                  ┌21h35m13s_07d08m2019y
                       │                                │                                  ├21h35m26s_07d08m2019y
                       │                                │                                  │                                  ┌21h44m18s_07d08m2019y
                       │                                │                                  │                                  ├21h44m35s_07d08m2019y
                       │                                │                                  │                                  ├21h44m51s_07d08m2019y
                       │                     

In [17]:
tm.info

config {
  topic_name: "topic_0"
  topic_name: "topic_1"
  topic_name: "topic_2"
  topic_name: "topic_3"
  topic_name: "topic_4"
  topic_name: "topic_5"
  topic_name: "topic_6"
  topic_name: "topic_7"
  topic_name: "topic_8"
  topic_name: "topic_9"
  topic_name: "topic_10"
  topic_name: "topic_11"
  topic_name: "topic_12"
  topic_name: "topic_13"
  topic_name: "background_14"
  class_id: "@2gramm"
  class_id: "@post_tag"
  class_id: "@3gramm"
  class_id: "@word"
  class_id: "@snippet"
  class_id: "@author"
  class_id: "@title"
  class_weight: 8.780309677124023
  class_weight: 79.51461029052734
  class_weight: 245.16552734375
  class_weight: 1.0
  class_weight: 31.14565658569336
  class_weight: 389.0713806152344
  class_weight: 105.8414306640625
  score_config {
    name: "PerplexityScore@all"
    type: ScoreType_Perplexity
    config: "\010\001\022$d99a5640-f468-4bbf-b93a-5abe512d0cea\032\007@2gramm\032\t@post_tag\032\007@3gramm\032\005@word\032\010@snippet\032\007@author\032\006@title

In [18]:
display(HTML("""<style>
.container { width:90% !important; }
div.output_scroll .output_subarea { white-space: pre; }
</style>"""))

Посмотрим на перплексию полученных во время тренировки моделей

In [19]:
from matplotlib import cm

tms = experiment.models
n=len(tms)
colors = np.linspace(0, 0.65, n)

plt.figure(figsize=(8,8))
for i, tm in enumerate(list(tms.values())[2:]):
    perp_score = tm.scores['PerplexityScore@all']
    plt.plot(perp_score[1:], color=cm.Blues(colors[i]))
plt.show()

KeyError: 'PerplexityScore'

<Figure size 576x576 with 0 Axes>

In [None]:
len(tms)

## Давайте посмотрим на отобранные модели

Сначала посмотрим на топ токены тем получившихся моделей

In [20]:
num_show = 2
show_all = False
n = None if show_all else num_show 
first_model = final_models[0]
second_model = final_models[1]

In [21]:
from topicnet.viewers.top_tokens_viewer import TopTokensViewer

In [22]:
thresh = 1e-5
first_model_top_tok = TopTokensViewer(first_model, num_top_tokens=10, method='phi')
second_model_top_tok = TopTokensViewer(second_model, num_top_tokens=10, method='phi')

first_model_html =  first_model_top_tok.to_html(first_model_top_tok.view(),thresh=thresh)
second_model_html = second_model_top_tok.to_html(second_model_top_tok.view(),thresh=thresh)

Методы вывода топ токенов: 
* 'top/phi'
* 'blei'
* 'pwt'
* 'likelihood'
* 'tfidf' (special)

In [23]:
for line in first_model_html[:n]:
    display_html(line, raw=True)

Unnamed: 0,topic_0 @2gramm
центральный_банк,0.0098
нейронный_сеть,0.00788
государственный_долг,0.00618
центральный_банка,0.00591
средний_класс,0.00479
развитый_страна,0.0045
среднее_класс,0.00437
крупный_город,0.00412
машинный_обучение,0.00392
рак_грудь,0.00373

Unnamed: 0,topic_0 @3gramm
научный_точка_зрение,0.09655
теория_рациональный_выбор,0.06212
теория_международный_отношение,0.06212
противоречить_друг_друг,0.05781
конец_прошлое_век,0.05781
чёрный_дыра_являться,0.0449
играть_огромный_роль,0.04059
математический_точка_зрение,0.03629
создание_новый_материал,0.03198
полиненасыщенный_жирный_кислота,0.03198

Unnamed: 0,topic_0 @author
fedichev,0.05071
voskoboynikov,0.05071
alekseevsky,0.0461
grigoriev,0.0461
lvovski,0.04149
umruhin,0.03688
dobrovolskaya,0.03688
los,0.03227
sivkov,0.03227
schurov,0.03227

Unnamed: 0,topic_0 @post_tag
город,0.05048
экономика,0.0493
россия,0.03646
культура,0.03041
медицина,0.02665
городское_пространство,0.02443
урбанистика,0.02363
общество,0.0235
математика,0.02324
технологии,0.02314

Unnamed: 0,topic_0 @snippet
и,0.07685
о,0.06323
в,0.01849
об,0.01297
фактов,0.01136
александр,0.00905
экономист,0.00853
дмитрий,0.0055
биофизик,0.00542
7,0.00472

Unnamed: 0,topic_0 @title
и,0.04436
в,0.04242
как,0.01314
пространства,0.01124
эмоций,0.01018
что,0.01001
физиология,0.00638
стресс,0.00632
роль,0.00624
город,0.00611

Unnamed: 0,topic_0 @word
город,0.00771
страна,0.00442
являться,0.00394
система,0.00351
проблема,0.0035
существовать,0.00338
ребёнок,0.00331
говорить,0.00329
большой,0.00299
работа,0.00282


Unnamed: 0,topic_1 @2gramm
теория_струна,0.00965
магнитный_поль,0.00802
чёрный_дыра,0.00801
квантовый_компьютер,0.0079
длина_волна,0.00729
квантовый_механика,0.00657
скорость_свет,0.00631
магнитный_пол,0.0062
электромагнитный_волна,0.00619
закон_физика,0.00472

Unnamed: 0,topic_1 @3gramm
общий_теория_относительность,0.28576
квантовый_теория_поль,0.13555
специальный_теория_относительность,0.1242
инерциальный_система_отсчёт,0.05239
закон_сохранение_энергия,0.0342
постоянный_тонкий_структура,0.02931
сильный_магнитный_пол,0.02418
закон_всемирный_тяготение,0.02183
изучение_чёрный_дыра,0.02136
сложный_квантовый_система,0.01905

Unnamed: 0,topic_1 @author
akhmedov,0.09742
gelfand,0.08931
surdin,0.07505
rogozin,0.06495
akimov,0.04871
eskov,0.04465
alpina_non-fiction,0.03712
rubcov,0.03247
zhavoronkov,0.03247
sokolovsky,0.03212

Unnamed: 0,topic_1 @post_tag
физика,0.21793
квантовая_физика,0.04983
технологии,0.04695
квантовая_механика,0.02705
нанотехнологии,0.02542
атом,0.02477
квантовые_технологии,0.02113
математика,0.01894
магнитное_поле,0.01862
оптика,0.01859

Unnamed: 0,topic_1 @snippet
и,0.06376
о,0.05077
физик,0.04998
об,0.01445
владимир,0.01091
в,0.0103
квантовой,0.00982
на,0.00858
алексей,0.00824
с,0.00698

Unnamed: 0,topic_1 @title
и,0.02867
теория,0.02642
квантовая,0.01486
#,0.01161
квантовые,0.01114
поля,0.0099
главы,0.00962
относительности,0.00873
физики,0.00746
с,0.00715

Unnamed: 0,topic_1 @word
атом,0.00467
теория,0.00431
свет,0.00424
являться,0.00367
система,0.00364
электрон,0.00351
существовать,0.00308
фотон,0.00292
частица,0.00292
эйнштейн,0.00286


In [24]:
for line in second_model_html[:n]:
    display_html(line, raw=True)

Unnamed: 0,topic_0 @2gramm
массовый_культура,0.01233
xx_век,0.00606
речь_идти,0.00532
культовый_кино,0.00486
cinema_studies,0.00474
крайний_мера,0.00401
языковой_семья,0.0039
советский_союз,0.0038
общество_потребление,0.00367
индоевропейский_язык,0.00355

Unnamed: 0,topic_0 @3gramm
начало_xx_век,0.55287
средство_массовый_информация,0.10049
рак_молочный_железа,0.05635
теория_международный_отношение,0.0398
наиболее_часто_встречаться,0.01949
выглядеть_следующий_образ,0.01846
высокий_психический_функция,0.01832
помогать_друг_друг,0.01498
передавать_друг_друг,0.01498
буквальный_смысл_слово,0.01498

Unnamed: 0,topic_0 @author
pavlov,0.1267
plungyan,0.08447
alekseevsky,0.03839
lapina-kratasyuk,0.03455
zhukov,0.03429
zorin,0.03072
starostin,0.03072
gromov,0.02688
mann-ivanov,0.0255
zharkov,0.02304

Unnamed: 0,topic_0 @post_tag
культура,0.09706
массовая_культура,0.05777
кино,0.03876
культурология,0.03786
кинематограф,0.03045
медицина,0.02961
философия,0.0287
общество,0.02738
биология,0.02658
психология,0.02535

Unnamed: 0,topic_0 @snippet
и,0.07352
о,0.05634
в,0.01866
об,0.01284
из,0.00956
фактов,0.0092
александр,0.00911
культуролог,0.00898
павлов,0.0081
отрывок,0.00697

Unnamed: 0,topic_0 @title
в,0.05563
и,0.03965
#,0.02297
главы,0.02181
россии,0.01478
мира,0.01401
языки,0.01209
культуре,0.01185
как,0.01064
культуры,0.00886

Unnamed: 0,topic_0 @word
фильм,0.00527
женщина,0.00407
говорить,0.00358
являться,0.00352
ребёнок,0.00332
книга,0.00313
жизнь,0.00287
существовать,0.00276
стать,0.00276
культура,0.00275


Unnamed: 0,topic_1 @2gramm
нобелевский_премия,0.01
квантовый_компьютер,0.00772
длина_волна,0.00551
раковый_клетка,0.00479
органический_химия,0.00438
твёрдый_тело,0.00421
углеродный_нанотрубка,0.0042
квантовый_точка,0.00414
квантовый_технология,0.00397
магнитный_поль,0.00388

Unnamed: 0,topic_1 @3gramm
получить_нобелевский_премия,0.17319
лауреат_нобелевский_премия,0.12078
рождаться_наукоемкий_бизнес,0.05956
внешний_магнитный_поль,0.05081
выброс_углекислый_газа,0.03915
конец_прошлое_век,0.03714
решение_нобелевский_комитет,0.03624
клетка_иммунный_система,0.03332
заметный_экономический_эффект,0.03041
математический_точка_зрение,0.02457

Unnamed: 0,topic_1 @author
andrew,0.06014
buzdin,0.05212
akimov,0.04811
ashihmin,0.0441
fedichev,0.0441
bovykine,0.0441
paraschuk,0.03608
caturian,0.03608
schurov,0.02806
bobrovnikova,0.02406

Unnamed: 0,topic_1 @post_tag
физика,0.1054
химия,0.07112
медицина,0.06874
технологии,0.06862
нанотехнологии,0.03566
квантовая_физика,0.03145
биомедицина,0.02523
биология,0.02492
материаловедение,0.02011
нобелевская_премия,0.01753

Unnamed: 0,topic_1 @snippet
и,0.07861
о,0.05714
физик,0.03621
химик,0.0174
об,0.01589
в,0.01517
новых,0.00955
с,0.00677
фактов,0.00661
на,0.00591

Unnamed: 0,topic_1 @title
и,0.0259
премия,0.01892
в,0.01832
по,0.01699
нобелевская,0.01608
—,0.01481
премии,0.01324
для,0.01183
на,0.01176
#,0.0116

Unnamed: 0,topic_1 @word
использовать,0.00537
система,0.00533
молекула,0.00511
материал,0.00483
атом,0.00434
являться,0.00416
образ,0.00377
клетка,0.00375
свет,0.00368
работать,0.00364


In [25]:
from topicnet.viewers.top_documents_viewer import TopDocumentsViewer

In [26]:
first_model_top_doc = TopDocumentsViewer(first_model, dataset=dataset_sci).view()
second_model_top_doc = TopDocumentsViewer(second_model, dataset=dataset_sci).view()

In [27]:
for i, topic_docs in enumerate(first_model_top_doc[:n]):
    print('topic_'+str(i))
    topic_docs = [str(topic) for topic in topic_docs]
    for doc_id in topic_docs:
        print(doc_id,' ',dataset_sci.get_source_document(document_id=doc_id))
    print('='*100)

topic_0
823.txt   ['@title Как взаимодействуют фискальная и монетарная политика? | @snippet Экономист Сергей Пекарский о макроэкономическом анализе, борьбе с инфляцией и контроле обменного курса\n']
3140.txt   ['@title Создана нейронная сеть, работающая по принципу машины Тьюринга | @snippet Об архитектуре фон Неймана, машине Тьюринга и проверке работоспособности\n']
3111.txt   ['@title FAQ: Финансовая репрессия | @snippet 8 фактов о механизмах регулирования государством финансовых рынков в развивающихся экономиках\n']
224.txt   ['@title FAQ: Гендерная идентичность и адаптация | @snippet 6 фактов о мужских и женских социальных признаках, стратегиях поведения и хромосомных аномалиях\n']
1960.txt   ['@title Взаимодействие фискальной и монетарной политики | @snippet Экономист Сергей Пекарский о политике стимулирования экспорта, курсе валют и методах борьбы с инфляцией\n']
665.txt   ['@title Когда мы станем бессмертными? | @snippet Биофизик Петр Федичев о диагностике инфекционных заболеван

In [28]:
for i, topic_docs in enumerate(first_model_top_doc[:n]):
    print('topic_'+str(i))
    topic_docs = [str(topic) for topic in topic_docs]
    for doc_id in topic_docs:
        print(doc_id,' ',dataset_sci.get_source_document(document_id=doc_id))
    print('='*100)

topic_0
823.txt   ['@title Как взаимодействуют фискальная и монетарная политика? | @snippet Экономист Сергей Пекарский о макроэкономическом анализе, борьбе с инфляцией и контроле обменного курса\n']
3140.txt   ['@title Создана нейронная сеть, работающая по принципу машины Тьюринга | @snippet Об архитектуре фон Неймана, машине Тьюринга и проверке работоспособности\n']
3111.txt   ['@title FAQ: Финансовая репрессия | @snippet 8 фактов о механизмах регулирования государством финансовых рынков в развивающихся экономиках\n']
224.txt   ['@title FAQ: Гендерная идентичность и адаптация | @snippet 6 фактов о мужских и женских социальных признаках, стратегиях поведения и хромосомных аномалиях\n']
1960.txt   ['@title Взаимодействие фискальной и монетарной политики | @snippet Экономист Сергей Пекарский о политике стимулирования экспорта, курсе валют и методах борьбы с инфляцией\n']
665.txt   ['@title Когда мы станем бессмертными? | @snippet Биофизик Петр Федичев о диагностике инфекционных заболеван

Давайте посмотрим насколько похожи темы моделей друг на друга

In [29]:
from topicnet.viewers.spectrum import TopicSpectrumViewer

In [30]:
first_spectrum = TopicSpectrumViewer(first_model, early_stopping=1000, class_ids=['@word']).view()
second_spectrum = TopicSpectrumViewer(second_model, early_stopping=1000, class_ids=['@word']).view()

spectrum_order_first = first_model_top_tok.to_html(first_model_top_tok.view(), first_spectrum, thresh=thresh)
spectrum_order_second = second_model_top_tok.to_html(second_model_top_tok.view(), second_spectrum, thresh=thresh,)

                                                       

In [31]:
for line in spectrum_order_first[:n]:
    display_html(line, raw=True)

Unnamed: 0,topic_0 @2gramm
центральный_банк,0.0098
нейронный_сеть,0.00788
государственный_долг,0.00618
центральный_банка,0.00591
средний_класс,0.00479
развитый_страна,0.0045
среднее_класс,0.00437
крупный_город,0.00412
машинный_обучение,0.00392
рак_грудь,0.00373

Unnamed: 0,topic_0 @3gramm
научный_точка_зрение,0.09655
теория_рациональный_выбор,0.06212
теория_международный_отношение,0.06212
противоречить_друг_друг,0.05781
конец_прошлое_век,0.05781
чёрный_дыра_являться,0.0449
играть_огромный_роль,0.04059
математический_точка_зрение,0.03629
создание_новый_материал,0.03198
полиненасыщенный_жирный_кислота,0.03198

Unnamed: 0,topic_0 @author
fedichev,0.05071
voskoboynikov,0.05071
alekseevsky,0.0461
grigoriev,0.0461
lvovski,0.04149
umruhin,0.03688
dobrovolskaya,0.03688
los,0.03227
sivkov,0.03227
schurov,0.03227

Unnamed: 0,topic_0 @post_tag
город,0.05048
экономика,0.0493
россия,0.03646
культура,0.03041
медицина,0.02665
городское_пространство,0.02443
урбанистика,0.02363
общество,0.0235
математика,0.02324
технологии,0.02314

Unnamed: 0,topic_0 @snippet
и,0.07685
о,0.06323
в,0.01849
об,0.01297
фактов,0.01136
александр,0.00905
экономист,0.00853
дмитрий,0.0055
биофизик,0.00542
7,0.00472

Unnamed: 0,topic_0 @title
и,0.04436
в,0.04242
как,0.01314
пространства,0.01124
эмоций,0.01018
что,0.01001
физиология,0.00638
стресс,0.00632
роль,0.00624
город,0.00611

Unnamed: 0,topic_0 @word
город,0.00771
страна,0.00442
являться,0.00394
система,0.00351
проблема,0.0035
существовать,0.00338
ребёнок,0.00331
говорить,0.00329
большой,0.00299
работа,0.00282


Unnamed: 0,topic_2 @2gramm
крайний_мера,0.00609
нобелевский_премия,0.00475
нитрид_бора,0.0043
экономический_рост,0.00386
массовый_культура,0.00378
рабочий_место,0.00358
современный_общество,0.00344
повседневный_жизнь,0.00322
речь_идти,0.00315
общество_потребление,0.00306

Unnamed: 0,topic_2 @3gramm
лауреат_нобелевский_премия,0.11986
играть_важный_роль,0.10829
средство_массовый_информация,0.1054
опрос_общественный_мнение,0.07936
словарь_русский_язык,0.05621
федеральный_резервный_система,0.03885
решение_нобелевский_комитет,0.03596
выглядеть_следующий_образ,0.03017
ширина_запретить_зона,0.0263
играть_ключевой_роль,0.0262

Unnamed: 0,topic_2 @author
kozyrevskaya,0.16411
mann-ivanov,0.10175
sokolov,0.09518
kurennoj,0.07877
nlobooks,0.04488
apt,0.02954
khaitovich,0.02298
gromov,0.02297
gaidarpress,0.02153
bankovskaya,0.01969

Unnamed: 0,topic_2 @post_tag
общество,0.08581
культура,0.06569
социология,0.06317
экономика,0.04753
сша,0.03265
культурология,0.0246
философия,0.02456
массовая_культура,0.02246
образование,0.01945
идентичность,0.01765

Unnamed: 0,topic_2 @snippet
и,0.07029
о,0.0383
из,0.03123
книги,0.02769
отрывок,0.02131
в,0.01555
культуролог,0.0089
михаил,0.00829
социолог,0.00808
об,0.00775

Unnamed: 0,topic_2 @title
#,0.09546
главы,0.07279
и,0.03779
в,0.025
прямая,0.0248
премия,0.0195
по,0.01761
нобелевская,0.01656
премии,0.01365
—,0.01331

Unnamed: 0,topic_2 @word
работа,0.00359
общество,0.00334
большой,0.00319
отношение,0.00287
являться,0.00276
стать,0.00267
книга,0.00263
жизнь,0.00259
страна,0.00256
культура,0.00244


In [32]:
for line in spectrum_order_second[:n]:
    display_html(line, raw=True)

Unnamed: 0,topic_0 @2gramm
массовый_культура,0.01233
xx_век,0.00606
речь_идти,0.00532
культовый_кино,0.00486
cinema_studies,0.00474
крайний_мера,0.00401
языковой_семья,0.0039
советский_союз,0.0038
общество_потребление,0.00367
индоевропейский_язык,0.00355

Unnamed: 0,topic_0 @3gramm
начало_xx_век,0.55287
средство_массовый_информация,0.10049
рак_молочный_железа,0.05635
теория_международный_отношение,0.0398
наиболее_часто_встречаться,0.01949
выглядеть_следующий_образ,0.01846
высокий_психический_функция,0.01832
помогать_друг_друг,0.01498
передавать_друг_друг,0.01498
буквальный_смысл_слово,0.01498

Unnamed: 0,topic_0 @author
pavlov,0.1267
plungyan,0.08447
alekseevsky,0.03839
lapina-kratasyuk,0.03455
zhukov,0.03429
zorin,0.03072
starostin,0.03072
gromov,0.02688
mann-ivanov,0.0255
zharkov,0.02304

Unnamed: 0,topic_0 @post_tag
культура,0.09706
массовая_культура,0.05777
кино,0.03876
культурология,0.03786
кинематограф,0.03045
медицина,0.02961
философия,0.0287
общество,0.02738
биология,0.02658
психология,0.02535

Unnamed: 0,topic_0 @snippet
и,0.07352
о,0.05634
в,0.01866
об,0.01284
из,0.00956
фактов,0.0092
александр,0.00911
культуролог,0.00898
павлов,0.0081
отрывок,0.00697

Unnamed: 0,topic_0 @title
в,0.05563
и,0.03965
#,0.02297
главы,0.02181
россии,0.01478
мира,0.01401
языки,0.01209
культуре,0.01185
как,0.01064
культуры,0.00886

Unnamed: 0,topic_0 @word
фильм,0.00527
женщина,0.00407
говорить,0.00358
являться,0.00352
ребёнок,0.00332
книга,0.00313
жизнь,0.00287
существовать,0.00276
стать,0.00276
культура,0.00275


Unnamed: 0,topic_6 @2gramm
xx_век,0.00932
русский_язык,0.00867
xix_век,0.00581
критический_теория,0.00527
речь_идти,0.005
история_наука,0.00363
гуманитарный_наука,0.00354
мировой_война,0.00323
хх_век,0.0032
крайний_мера,0.00314

Unnamed: 0,topic_6 @3gramm
половина_xix_век,0.15244
начало_xix_век,0.15244
конец_xx_век,0.12391
немецкий_классический_философия,0.07255
середина_хх_век,0.05829
середина_xviii_век,0.05543
конец_хх_век,0.03261
предсказание_кристаллический_структура,0.02405
стоить_обратить_внимание,0.02226
дизайн_новый_материал,0.0212

Unnamed: 0,topic_6 @author
sokolov,0.14822
oganov,0.11244
bonch,0.08178
kronhaus,0.05622
boldyrev,0.05622
sivkov,0.03578
plotnikov,0.03067
vainshtein,0.02555
hitrov,0.02555
p_sokolov,0.02044

Unnamed: 0,topic_6 @post_tag
философия,0.09177
культура,0.0638
социология,0.05615
история,0.05039
литература,0.04568
общество,0.02786
кант_иммануил,0.02782
литературоведение,0.0211
наука,0.01893
россия,0.01762

Unnamed: 0,topic_6 @snippet
о,0.05261
и,0.05146
рекомендует,0.04173
что,0.03963
читать,0.03673
наук,0.02257
кандидат,0.01402
философ,0.01256
об,0.01251
в,0.01157

Unnamed: 0,topic_6 @title
5,0.12642
о,0.10926
книг,0.10905
об,0.02799
в,0.01939
и,0.01686
по,0.01562
философии,0.01451
истории,0.0119
что,0.00973

Unnamed: 0,topic_6 @word
книга,0.01223
метр,0.0049
автор,0.00429
работа,0.00394
история,0.00394
наука,0.00383
являться,0.00376
говорить,0.0028
текст,0.00261
жизнь,0.00259


Давайте сравним темы двух моделей

In [33]:
from topicnet.viewers.topic_mapping import TopicMapViewer

In [34]:
close_themes = TopicMapViewer(model=first_model, second_model=second_model).view()

In [35]:
map_first = first_model_top_tok.to_html(first_model_top_tok.view(), close_themes[0], thresh=thresh)
map_second = second_model_top_tok.to_html(second_model_top_tok.view(), close_themes[1], thresh=thresh)

for line_first, line_second in zip(map_first[:n], map_second[:n]):
    display_html(line_first, raw=True)
    display_html(line_second, raw=True)
    print('='*100)

Unnamed: 0,topic_0 @2gramm
центральный_банк,0.0098
нейронный_сеть,0.00788
государственный_долг,0.00618
центральный_банка,0.00591
средний_класс,0.00479
развитый_страна,0.0045
среднее_класс,0.00437
крупный_город,0.00412
машинный_обучение,0.00392
рак_грудь,0.00373

Unnamed: 0,topic_0 @3gramm
научный_точка_зрение,0.09655
теория_рациональный_выбор,0.06212
теория_международный_отношение,0.06212
противоречить_друг_друг,0.05781
конец_прошлое_век,0.05781
чёрный_дыра_являться,0.0449
играть_огромный_роль,0.04059
математический_точка_зрение,0.03629
создание_новый_материал,0.03198
полиненасыщенный_жирный_кислота,0.03198

Unnamed: 0,topic_0 @author
fedichev,0.05071
voskoboynikov,0.05071
alekseevsky,0.0461
grigoriev,0.0461
lvovski,0.04149
umruhin,0.03688
dobrovolskaya,0.03688
los,0.03227
sivkov,0.03227
schurov,0.03227

Unnamed: 0,topic_0 @post_tag
город,0.05048
экономика,0.0493
россия,0.03646
культура,0.03041
медицина,0.02665
городское_пространство,0.02443
урбанистика,0.02363
общество,0.0235
математика,0.02324
технологии,0.02314

Unnamed: 0,topic_0 @snippet
и,0.07685
о,0.06323
в,0.01849
об,0.01297
фактов,0.01136
александр,0.00905
экономист,0.00853
дмитрий,0.0055
биофизик,0.00542
7,0.00472

Unnamed: 0,topic_0 @title
и,0.04436
в,0.04242
как,0.01314
пространства,0.01124
эмоций,0.01018
что,0.01001
физиология,0.00638
стресс,0.00632
роль,0.00624
город,0.00611

Unnamed: 0,topic_0 @word
город,0.00771
страна,0.00442
являться,0.00394
система,0.00351
проблема,0.0035
существовать,0.00338
ребёнок,0.00331
говорить,0.00329
большой,0.00299
работа,0.00282


Unnamed: 0,topic_12 @2gramm
экономический_рост,0.00425
принятие_решение,0.00341
северный_кавказ,0.00331
крайний_мера,0.00323
европейский_союз,0.00304
конец_конец,0.00292
речь_идти,0.00263
саудовский_аравия,0.00246
рынок_труд,0.00241
городской_пространство,0.00238

Unnamed: 0,topic_12 @3gramm
важный_роль_играть,0.08034
сменять_друг_друг,0.06719
эпоха_поздний_бронза,0.04745
образование_планетный_система,0.04745
начало_xvi_век,0.04416
тело_солнечный_система,0.04088
темп_экономический_рост,0.04088
принятие_политический_решение,0.04088
планета_земной_группа,0.03759
твёрдый_бытовой_отход,0.03759

Unnamed: 0,topic_12 @author
mann-ivanov,0.11256
belyaev,0.06931
malahov,0.06007
belyanin,0.05545
fokin,0.05083
afontsev,0.04621
strelkapress,0.04071
brileva,0.03697
nozhevnikova,0.03235
stepantsov,0.03235

Unnamed: 0,topic_12 @post_tag
экономика,0.11928
история,0.04566
общество,0.04324
город,0.04225
культура,0.04153
государство,0.03908
россия,0.03606
политология,0.02365
экология,0.01863
политика,0.01851

Unnamed: 0,topic_12 @snippet
и,0.06422
о,0.04837
в,0.02164
экономист,0.01477
из,0.01438
об,0.01415
книги,0.01217
историк,0.00984
отрывок,0.00966
дмитрий,0.0092

Unnamed: 0,topic_12 @title
и,0.04047
главы,0.03232
#,0.03177
в,0.02153
на,0.01119
кавказа,0.01065
проблемы,0.0103
экономике,0.0085
что,0.00825
развитие,0.00663

Unnamed: 0,topic_12 @word
город,0.00475
страна,0.004
китай,0.00338
государство,0.00321
большой,0.00314
стать,0.00297
общество,0.00267
проблема,0.0025
работа,0.00241
являться,0.00237




Unnamed: 0,topic_1 @2gramm
теория_струна,0.00965
магнитный_поль,0.00802
чёрный_дыра,0.00801
квантовый_компьютер,0.0079
длина_волна,0.00729
квантовый_механика,0.00657
скорость_свет,0.00631
магнитный_пол,0.0062
электромагнитный_волна,0.00619
закон_физика,0.00472

Unnamed: 0,topic_1 @3gramm
общий_теория_относительность,0.28576
квантовый_теория_поль,0.13555
специальный_теория_относительность,0.1242
инерциальный_система_отсчёт,0.05239
закон_сохранение_энергия,0.0342
постоянный_тонкий_структура,0.02931
сильный_магнитный_пол,0.02418
закон_всемирный_тяготение,0.02183
изучение_чёрный_дыра,0.02136
сложный_квантовый_система,0.01905

Unnamed: 0,topic_1 @author
akhmedov,0.09742
gelfand,0.08931
surdin,0.07505
rogozin,0.06495
akimov,0.04871
eskov,0.04465
alpina_non-fiction,0.03712
rubcov,0.03247
zhavoronkov,0.03247
sokolovsky,0.03212

Unnamed: 0,topic_1 @post_tag
физика,0.21793
квантовая_физика,0.04983
технологии,0.04695
квантовая_механика,0.02705
нанотехнологии,0.02542
атом,0.02477
квантовые_технологии,0.02113
математика,0.01894
магнитное_поле,0.01862
оптика,0.01859

Unnamed: 0,topic_1 @snippet
и,0.06376
о,0.05077
физик,0.04998
об,0.01445
владимир,0.01091
в,0.0103
квантовой,0.00982
на,0.00858
алексей,0.00824
с,0.00698

Unnamed: 0,topic_1 @title
и,0.02867
теория,0.02642
квантовая,0.01486
#,0.01161
квантовые,0.01114
поля,0.0099
главы,0.00962
относительности,0.00873
физики,0.00746
с,0.00715

Unnamed: 0,topic_1 @word
атом,0.00467
теория,0.00431
свет,0.00424
являться,0.00367
система,0.00364
электрон,0.00351
существовать,0.00308
фотон,0.00292
частица,0.00292
эйнштейн,0.00286


Unnamed: 0,topic_13 @2gramm
чёрный_дыра,0.02319
социальный_сеть,0.00851
гравитационный_волна,0.00532
теория_относительность,0.0045
система_отсчёт,0.00446
электромагнитный_волна,0.00435
трансгенный_растение,0.004
большой_количество,0.00393
анализ_дать,0.00393
скорость_свет,0.00382

Unnamed: 0,topic_13 @3gramm
общий_теория_относительность,0.34324
специальный_теория_относительность,0.14918
инерциальный_система_отсчёт,0.06293
магнитный_пол_земля,0.05677
закон_сохранение_энергия,0.03217
чёрный_дыра_являться,0.03213
уделять_большой_внимание,0.02904
горизонт_чёрный_дыра,0.02904
относительно_друг_друг,0.02904
закон_всемирный_тяготение,0.02669

Unnamed: 0,topic_13 @author
kurennoj,0.10356
alpina_non-fiction,0.08785
kibrik,0.05178
akhmedov,0.04746
voskoboynikov,0.04746
kuznetsov,0.03883
judin,0.0347
rubcov,0.03452
sokolovsky,0.03452
lukashov,0.02157

Unnamed: 0,topic_13 @post_tag
математика,0.08057
информационные_технологии,0.04199
физика,0.03847
культура,0.03532
технологии,0.03471
психология,0.03292
интернет,0.0292
информатика,0.02888
общество,0.02464
общая_теория_относительности,0.02282

Unnamed: 0,topic_13 @snippet
и,0.07727
о,0.0556
в,0.01344
об,0.01205
математик,0.01176
книги,0.00941
виталий,0.00906
фактов,0.0082
из,0.00817
философ,0.008

Unnamed: 0,topic_13 @title
в,0.04284
и,0.03021
теория,0.02759
главы,0.01794
#,0.01595
как,0.01179
относительности,0.01117
психология,0.00821
данных,0.00726
черных,0.00698

Unnamed: 0,topic_13 @word
являться,0.00416
дать,0.00388
теория,0.00366
должный,0.00312
система,0.0029
образ,0.00284
большой,0.00284
пространство,0.00281
друг,0.00262
сеть,0.00252


