# Experiments
## Bioit set

In [None]:
%matplotlib inline
import glob
import os
import matplotlib.pyplot as plt

import artm

In [None]:
#Preparing input
batch_vectorizer = artm.BatchVectorizer(data_path='../Data/bioit_set/bioit_set_1_vw.txt',
                                        data_format='vowpal_wabbit',
                                        target_folder='my_collection_batches')
dictionary = batch_vectorizer.dictionary
topic_names = ['topic_{}'.format(i) for i in range(15)]

In [None]:
#Model parameters
#defining quality metrics
scores = [
    artm.PerplexityScore(name='PerplexityScore', dictionary=dictionary),
    artm.SparsityPhiScore(name='SparsityPhiScore'),
    artm.SparsityThetaScore(name='SparsityThetaScore'),
    artm.TopicKernelScore(name='TopicKernelScore', probability_mass_threshold=0.3),
    artm.TopTokensScore(name='TopTokensScore', num_tokens=6)
         ]

#defining regularizers
regularizers = [
    artm.SmoothSparseThetaRegularizer(name='SparseTheta', tau=-0.15),
    artm.SmoothSparsePhiRegularizer(name='SparsePhi', tau=-0.1),
    artm.DecorrelatorPhiRegularizer(name='DecorrelatorPhi', tau=1.5e+5),
]

#creating the model
model_artm = artm.ARTM(topic_names=topic_names, 
                       cache_theta=True,
                       scores=scores,
                       regularizers=regularizers)

#hyperparameters
num_collection_passes = 25
#varying regularization coefficients
model_artm.regularizers['SparsePhi'].tau = -0.000002
model_artm.regularizers['SparseTheta'].tau = -0.2
model_artm.regularizers['DecorrelatorPhi'].tau = 2.5e+5

In [None]:
#Fitting the model
model_artm.initialize(dictionary = dictionary)
model_artm.fit_offline(batch_vectorizer=batch_vectorizer, num_collection_passes=num_collection_passes)