In [1]:
import re
import gensim
from gensim.parsing.preprocessing import strip_punctuation, strip_short, strip_numeric, strip_multiple_whitespaces, remove_stopwords
from gensim.models.word2vec import LineSentence
from gensim.models import Word2Vec
from gensim.models.callbacks import CallbackAny2Vec
import urllib.request
import zipfile
import lzma
import shutil

In [2]:
import logging
logging.basicConfig(level=logging.INFO, force = True)
logger = logging.getLogger()
logger.info("Logging initialized")

INFO:root:Logging initialized


In [3]:
#Link found here: https://metatext.io/datasets/cc100-belarusian
urllib.request.urlretrieve('https://data.statmt.org/cc-100/be.txt.xz', 
                           'be.txt.xz')

urllib.request.urlretrieve('https://github.com/Belarus/GrammarDB/archive/refs/tags/PUBLICATION_2021.zip', 
                           'GrammarDB.zip')

('GrammarDB.zip', <http.client.HTTPMessage at 0x174669610>)

In [4]:
with lzma.open("be.txt.xz", "rb") as fsrc:
    with open("be.txt", "wb") as fdst:
        shutil.copyfileobj(fsrc, fdst)

with zipfile.ZipFile('GrammarDB.zip', 'r') as zip_ref:
    zip_ref.extractall('.')

In [5]:
import xml.dom.minidom
from itertools import islice

def calculate_mapping_from_forms_to_base(filepath):
    xml_doc = xml.dom.minidom.parse(filepath)
    words = xml_doc.getElementsByTagName('Variant')
    result = {}
    collision_count = 0
    collisions = set()
    for word in words:
        base = word.getAttribute('lemma').replace("+", "").lower()
        forms_objs = word.getElementsByTagName('Form')
        local_map = {}
        citation_count = max([forms_obj.getAttribute('slouniki').count(',') for forms_obj in forms_objs]) + 1
        for forms_obj in forms_objs:
            if len(forms_obj.childNodes) > 0:
                form = forms_obj.childNodes[0].data.replace("+", "").lower()
                local_map[form] = (base, citation_count)
        for k, v in local_map.items():
            if k in result:
                if result[k][1] == v[1] and result[k][0] != v[0]:
                    collision_count += 1
                    collisions.add(v[0])
                    collisions.add(result[k][0])
                elif result[k][1] < v[1]:
                    result[k] = v
            else:
                result[k] = v
        #result.update(local_map)
    logger.info(f"Collisions (forms leading to different base word, and having same amount of citation): {collision_count}")
    logger.info(f"Examples of collisions: {list(islice(collisions, 5))}")
    for k in result:
        result[k] = result[k][0]
    return result

In [6]:
#verbs
v = calculate_mapping_from_forms_to_base('GrammarDB-PUBLICATION_2021/V.xml')

#proper nouns
np = calculate_mapping_from_forms_to_base('GrammarDB-PUBLICATION_2021/NP.xml')

#nouns
n1 = calculate_mapping_from_forms_to_base('GrammarDB-PUBLICATION_2021/N1.xml')
n2 = calculate_mapping_from_forms_to_base('GrammarDB-PUBLICATION_2021/N2.xml')
n3 = calculate_mapping_from_forms_to_base('GrammarDB-PUBLICATION_2021/N3.xml')

#adjectives
adj1 = calculate_mapping_from_forms_to_base('GrammarDB-PUBLICATION_2021/A1.xml')
adj2 = calculate_mapping_from_forms_to_base('GrammarDB-PUBLICATION_2021/A2.xml')

WORD_MAP = {}
WORD_MAP.update(v)
WORD_MAP.update(np)
WORD_MAP.update(n1)
WORD_MAP.update(n2)
WORD_MAP.update(n3)
WORD_MAP.update(adj1)
WORD_MAP.update(adj2)

INFO:root:Collisions (forms leading to different base word, and having same amount of citation): 2597
INFO:root:Examples of collisions: ['адапраць', 'навяліцца', 'адчыняць', 'мурчэць', 'спіцца']
INFO:root:Collisions (forms leading to different base word, and having same amount of citation): 139
INFO:root:Examples of collisions: ['уладзімір', 'барыс', 'захар', 'маркаў', 'павел']
INFO:root:Collisions (forms leading to different base word, and having same amount of citation): 1345
INFO:root:Examples of collisions: ['гайка', 'дамка', 'ёта', 'важычаня', 'зажор']
INFO:root:Collisions (forms leading to different base word, and having same amount of citation): 1155
INFO:root:Examples of collisions: ['кум', 'лісце', 'кош', 'лёс', 'нома']
INFO:root:Collisions (forms leading to different base word, and having same amount of citation): 954
INFO:root:Examples of collisions: ['ростра', 'удава', 'тэхніка', 'трух', 'руно']
INFO:root:Collisions (forms leading to different base word, and having same amo

In [7]:
print(len(WORD_MAP))

2282675


In [8]:
print(WORD_MAP['рухам'])
print(WORD_MAP['беларусі'])

рух
беларусь


In [9]:
def strip_trailing_newline(iterable):
    for i in iterable:
        yield i.rstrip()

CHARACTERS_MAP = {'ý': 'ў', 'i': 'і', 'ньн': 'нн', 'цьц': 'цц', 'сьц': 'сц', 'сьл':'сл'}
def replace_characters(iterable):
    for s in iterable:
        for k, v in CHARACTERS_MAP.items():
            s = s.replace(k, v)
        yield s

def split_sentences(iterable):
    for i in iterable:
        merged_dots = re.sub("[\.]+", ".", i)
        sentences = merged_dots.split('.')
        for s in sentences:
            yield s

STOPWORDS = ['на', 'не', 'што', 'да', 'па', 'як', 'за', 'для', 'гэта', 'ад', 'але', 'калі', 'пра', 'у', 'яго', 'якія', 'ён', 'іх', 'мы', 'каб', 'толькі', 'аб', 'таксама', 'які', 'ці', 'быў', 'было', 'яны', 'так', 'вы', 'яе', 'будзе', 'пры', 'яшчэ', 'тым', 'таму', 'вельмі', 'былі', 'можна', 'яна', 'пасля', 'пад', 'можа', 'дзе', 'якая', 'тут', 'была', 'трэба', 'тое', 'таго', 'або', 'гэтым', 'бо', 'ўсё', 'хто', 'ня', 'нас', 'гэтага', 'быць', 'гэты', 'ёсць', 'праз', 'ўжо', 'са', 'нават', 'то', 'мяне', 'ім','ва', 'той', 'усе', 'без', 'чым', 'мне', 'мае', 'сябе', 'гэтай', 'там', 'усё', 'вось', 'ды', 'каля', 'якіх', 'ты', 'якой', 'ўсе', 'жа', 'ужо', 'паводле', 'будуць', 'аднак', 'могуць', 'сваю', 'ні', 'сваёй', 'яму', 'свае', 'гэтыя', 'проста', 'ўсіх', 'якім', 'падчас', 'тады', 'свой', 'вас', 'паміж', 'нам', 'раз', 'сваіх', 'нашай', 'менавіта', 'перад', 'вам', 'тых','зь','такім', 'свайго', 'над', 'зараз', 'амаль', 'чаму', 'ёй', 'чынам', 'напрыклад', 'якога', 'якое', 'сваім', 'можаце', 'га', 'хоць', 'бы', 'тыя', 'такія', 'потым', 'адным', 'такі', 'якую', 'сабе','сам','гэтых','мая','наш','зусім','чаго','наша','зноў','дык','такіх','нашага','адразу','каго','самі','ст','ну','нашы','нашым','самы','яно','гэтае','дзеля','куды','by','гг']
def preprocess_sentences(iterable):
    for i in iterable:
        s = strip_multiple_whitespaces(strip_numeric(strip_short(strip_punctuation(i))))
        s = s.lower()
        s = re.sub("[«»“”„…—°′²]", "", s)
        s = remove_stopwords(s, stopwords=STOPWORDS)
        s = ' '.join([WORD_MAP.get(w, w) for w in s.split()])
        yield s

def remove_short_lines(iterable):
    for i in iterable:
        if not i.isspace() and len(i) >= 20:
            yield i


In [10]:
with open('be.txt', 'r') as original_file:
    with open('sentences.txt', 'w') as sentences_file:
        for s in remove_short_lines(preprocess_sentences(split_sentences(replace_characters(strip_trailing_newline(original_file))))):
            sentences_file.write(s + "\n")

In [11]:
loss_list = []
class Callback(CallbackAny2Vec):
    def __init__(self):
        self.epoch = 0

    def on_epoch_end(self, model):
        loss = model.get_latest_training_loss()
        loss_list.append(loss)
        print('Loss after epoch {}:{}'.format(self.epoch, loss))
        model.running_training_loss = 0.0
        self.epoch = self.epoch + 1

In [12]:
model = Word2Vec(vector_size=100, window=3, min_count=10, workers=5)

INFO:gensim.utils:Word2Vec lifecycle event {'params': 'Word2Vec<vocab=0, vector_size=100, alpha=0.025>', 'datetime': '2023-04-23T10:21:24.381682', 'gensim': '4.3.1', 'python': '3.11.3 | packaged by conda-forge | (main, Apr  6 2023, 09:05:00) [Clang 14.0.6 ]', 'platform': 'macOS-12.6-x86_64-i386-64bit', 'event': 'created'}


In [13]:
sentences = LineSentence('sentences.txt')

In [14]:
model.build_vocab(sentences, progress_per=5000000)

INFO:gensim.models.word2vec:collecting all words and their counts
INFO:gensim.models.word2vec:PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
INFO:gensim.models.word2vec:PROGRESS: at sentence #5000000, processed 44718951 words, keeping 810955 word types
INFO:gensim.models.word2vec:PROGRESS: at sentence #10000000, processed 89510301 words, keeping 1244638 word types
INFO:gensim.models.word2vec:PROGRESS: at sentence #15000000, processed 134174824 words, keeping 1580646 word types
INFO:gensim.models.word2vec:PROGRESS: at sentence #20000000, processed 178959997 words, keeping 1869907 word types
INFO:gensim.models.word2vec:PROGRESS: at sentence #25000000, processed 223611015 words, keeping 2135119 word types
INFO:gensim.models.word2vec:collected 2151510 word types from a corpus of 226598275 raw words and 25339681 sentences
INFO:gensim.models.word2vec:Creating a fresh vocabulary
INFO:gensim.utils:Word2Vec lifecycle event {'msg': 'effective_min_count=10 retains 277148 unique

In [15]:
model.train(sentences, epochs=100, total_examples=model.corpus_count, total_words=model.corpus_total_words, compute_loss=True, report_delay=180, callbacks=[Callback()])


INFO:gensim.utils:Word2Vec lifecycle event {'msg': 'training model with 5 workers on 277148 vocabulary and 100 features, using sg=0 hs=0 sample=0.001 negative=5 window=3 shrink_windows=True', 'datetime': '2023-04-23T10:22:46.421779', 'gensim': '4.3.1', 'python': '3.11.3 | packaged by conda-forge | (main, Apr  6 2023, 09:05:00) [Clang 14.0.6 ]', 'platform': 'macOS-12.6-x86_64-i386-64bit', 'event': 'train'}
INFO:gensim.models.word2vec:EPOCH 0 - PROGRESS: at 0.59% examples, 1285232 words/s, in_qsize 10, out_qsize 0
INFO:gensim.models.word2vec:EPOCH 0: training on 226598275 raw words (219974027 effective words) took 168.2s, 1307777 effective words/s


Loss after epoch 0:45621836.0


INFO:gensim.models.word2vec:EPOCH 1 - PROGRESS: at 0.57% examples, 1239446 words/s, in_qsize 9, out_qsize 0
INFO:gensim.models.word2vec:EPOCH 1: training on 226598275 raw words (219973293 effective words) took 165.8s, 1326665 effective words/s


Loss after epoch 1:44355904.0


INFO:gensim.models.word2vec:EPOCH 2 - PROGRESS: at 0.56% examples, 1229852 words/s, in_qsize 8, out_qsize 2
INFO:gensim.models.word2vec:EPOCH 2: training on 226598275 raw words (219970716 effective words) took 169.2s, 1299881 effective words/s


Loss after epoch 2:44249936.0


INFO:gensim.models.word2vec:EPOCH 3 - PROGRESS: at 0.57% examples, 1257724 words/s, in_qsize 9, out_qsize 0
INFO:gensim.models.word2vec:EPOCH 3: training on 226598275 raw words (219972848 effective words) took 169.3s, 1299601 effective words/s


Loss after epoch 3:44104492.0


INFO:gensim.models.word2vec:EPOCH 4 - PROGRESS: at 0.57% examples, 1257147 words/s, in_qsize 10, out_qsize 0
INFO:gensim.models.word2vec:EPOCH 4: training on 226598275 raw words (219972840 effective words) took 167.0s, 1317200 effective words/s


Loss after epoch 4:44107028.0


INFO:gensim.models.word2vec:EPOCH 5 - PROGRESS: at 0.56% examples, 1220855 words/s, in_qsize 8, out_qsize 1
INFO:gensim.models.word2vec:EPOCH 5: training on 226598275 raw words (219973851 effective words) took 167.2s, 1315802 effective words/s


Loss after epoch 5:44097436.0


INFO:gensim.models.word2vec:EPOCH 6 - PROGRESS: at 0.57% examples, 1247998 words/s, in_qsize 10, out_qsize 0
INFO:gensim.models.word2vec:EPOCH 6: training on 226598275 raw words (219974504 effective words) took 164.1s, 1340099 effective words/s


Loss after epoch 6:44270596.0


INFO:gensim.models.word2vec:EPOCH 7 - PROGRESS: at 0.60% examples, 1290979 words/s, in_qsize 8, out_qsize 3
INFO:gensim.models.word2vec:EPOCH 7: training on 226598275 raw words (219978018 effective words) took 166.0s, 1325076 effective words/s


Loss after epoch 7:44089604.0


INFO:gensim.models.word2vec:EPOCH 8 - PROGRESS: at 0.59% examples, 1285402 words/s, in_qsize 9, out_qsize 0
INFO:gensim.models.word2vec:EPOCH 8: training on 226598275 raw words (219971424 effective words) took 168.8s, 1302828 effective words/s


Loss after epoch 8:44185456.0


INFO:gensim.models.word2vec:EPOCH 9 - PROGRESS: at 0.58% examples, 1269394 words/s, in_qsize 9, out_qsize 0
INFO:gensim.models.word2vec:EPOCH 9: training on 226598275 raw words (219973625 effective words) took 168.2s, 1307447 effective words/s


Loss after epoch 9:44285424.0


INFO:gensim.models.word2vec:EPOCH 10 - PROGRESS: at 0.58% examples, 1281159 words/s, in_qsize 10, out_qsize 0
INFO:gensim.models.word2vec:EPOCH 10: training on 226598275 raw words (219974854 effective words) took 166.2s, 1323511 effective words/s


Loss after epoch 10:44182396.0


INFO:gensim.models.word2vec:EPOCH 11 - PROGRESS: at 0.58% examples, 1266807 words/s, in_qsize 9, out_qsize 0
INFO:gensim.models.word2vec:EPOCH 11: training on 226598275 raw words (219975569 effective words) took 163.2s, 1347996 effective words/s


Loss after epoch 11:44126032.0


INFO:gensim.models.word2vec:EPOCH 12 - PROGRESS: at 0.61% examples, 1331130 words/s, in_qsize 10, out_qsize 0
INFO:gensim.models.word2vec:EPOCH 12: training on 226598275 raw words (219976024 effective words) took 162.5s, 1353878 effective words/s


Loss after epoch 12:44408908.0


INFO:gensim.models.word2vec:EPOCH 13 - PROGRESS: at 0.61% examples, 1320703 words/s, in_qsize 10, out_qsize 0
INFO:gensim.models.word2vec:EPOCH 13: training on 226598275 raw words (219975924 effective words) took 162.6s, 1353192 effective words/s


Loss after epoch 13:44199988.0


INFO:gensim.models.word2vec:EPOCH 14 - PROGRESS: at 0.60% examples, 1303863 words/s, in_qsize 9, out_qsize 0
INFO:gensim.models.word2vec:EPOCH 14: training on 226598275 raw words (219976300 effective words) took 163.0s, 1349379 effective words/s


Loss after epoch 14:44425624.0


INFO:gensim.models.word2vec:EPOCH 15 - PROGRESS: at 0.57% examples, 1230728 words/s, in_qsize 8, out_qsize 1
INFO:gensim.models.word2vec:EPOCH 15: training on 226598275 raw words (219974649 effective words) took 163.1s, 1348464 effective words/s


Loss after epoch 15:44209516.0


INFO:gensim.models.word2vec:EPOCH 16 - PROGRESS: at 0.59% examples, 1288194 words/s, in_qsize 10, out_qsize 0
INFO:gensim.models.word2vec:EPOCH 16: training on 226598275 raw words (219975068 effective words) took 162.1s, 1357146 effective words/s


Loss after epoch 16:44227556.0


INFO:gensim.models.word2vec:EPOCH 17 - PROGRESS: at 0.57% examples, 1240406 words/s, in_qsize 9, out_qsize 1
INFO:gensim.models.word2vec:EPOCH 17: training on 226598275 raw words (219974402 effective words) took 163.1s, 1348880 effective words/s


Loss after epoch 17:44041368.0


INFO:gensim.models.word2vec:EPOCH 18 - PROGRESS: at 0.60% examples, 1295974 words/s, in_qsize 9, out_qsize 0
INFO:gensim.models.word2vec:EPOCH 18: training on 226598275 raw words (219974102 effective words) took 163.2s, 1348227 effective words/s


Loss after epoch 18:44373620.0


INFO:gensim.models.word2vec:EPOCH 19 - PROGRESS: at 0.60% examples, 1299461 words/s, in_qsize 9, out_qsize 1
INFO:gensim.models.word2vec:EPOCH 19: training on 226598275 raw words (219977575 effective words) took 163.1s, 1348930 effective words/s


Loss after epoch 19:44456688.0


INFO:gensim.models.word2vec:EPOCH 20 - PROGRESS: at 0.61% examples, 1315974 words/s, in_qsize 9, out_qsize 0
INFO:gensim.models.word2vec:EPOCH 20: training on 226598275 raw words (219975905 effective words) took 162.7s, 1352306 effective words/s


Loss after epoch 20:44070200.0


INFO:gensim.models.word2vec:EPOCH 21 - PROGRESS: at 0.57% examples, 1225835 words/s, in_qsize 9, out_qsize 0
INFO:gensim.models.word2vec:EPOCH 21: training on 226598275 raw words (219972785 effective words) took 162.9s, 1350639 effective words/s


Loss after epoch 21:43995008.0


INFO:gensim.models.word2vec:EPOCH 22 - PROGRESS: at 0.59% examples, 1285921 words/s, in_qsize 8, out_qsize 3
INFO:gensim.models.word2vec:EPOCH 22: training on 226598275 raw words (219975298 effective words) took 164.1s, 1340862 effective words/s


Loss after epoch 22:44134528.0


INFO:gensim.models.word2vec:EPOCH 23 - PROGRESS: at 0.57% examples, 1260822 words/s, in_qsize 10, out_qsize 0
INFO:gensim.models.word2vec:EPOCH 23: training on 226598275 raw words (219975682 effective words) took 166.9s, 1317976 effective words/s


Loss after epoch 23:44107632.0


INFO:gensim.models.word2vec:EPOCH 24 - PROGRESS: at 0.56% examples, 1220400 words/s, in_qsize 8, out_qsize 1
INFO:gensim.models.word2vec:EPOCH 24: training on 226598275 raw words (219976889 effective words) took 163.9s, 1342169 effective words/s


Loss after epoch 24:43802268.0


INFO:gensim.models.word2vec:EPOCH 25 - PROGRESS: at 0.56% examples, 1228115 words/s, in_qsize 7, out_qsize 3
INFO:gensim.models.word2vec:EPOCH 25: training on 226598275 raw words (219974616 effective words) took 164.3s, 1339165 effective words/s


Loss after epoch 25:44047244.0


INFO:gensim.models.word2vec:EPOCH 26 - PROGRESS: at 0.59% examples, 1288834 words/s, in_qsize 9, out_qsize 2
INFO:gensim.models.word2vec:EPOCH 26: training on 226598275 raw words (219973772 effective words) took 162.4s, 1354557 effective words/s


Loss after epoch 26:43904480.0


INFO:gensim.models.word2vec:EPOCH 27 - PROGRESS: at 0.61% examples, 1332694 words/s, in_qsize 10, out_qsize 0
INFO:gensim.models.word2vec:EPOCH 27: training on 226598275 raw words (219975927 effective words) took 161.9s, 1358850 effective words/s


Loss after epoch 27:43939264.0


INFO:gensim.models.word2vec:EPOCH 28 - PROGRESS: at 0.61% examples, 1337086 words/s, in_qsize 9, out_qsize 0
INFO:gensim.models.word2vec:EPOCH 28: training on 226598275 raw words (219974638 effective words) took 162.3s, 1355636 effective words/s


Loss after epoch 28:43987436.0


INFO:gensim.models.word2vec:EPOCH 29 - PROGRESS: at 0.56% examples, 1215030 words/s, in_qsize 10, out_qsize 3
INFO:gensim.models.word2vec:EPOCH 29: training on 226598275 raw words (219976718 effective words) took 162.5s, 1353889 effective words/s


Loss after epoch 29:43859148.0


INFO:gensim.models.word2vec:EPOCH 30 - PROGRESS: at 0.60% examples, 1297848 words/s, in_qsize 10, out_qsize 0
INFO:gensim.models.word2vec:EPOCH 30: training on 226598275 raw words (219975108 effective words) took 167.9s, 1310297 effective words/s


Loss after epoch 30:43961468.0


INFO:gensim.models.word2vec:EPOCH 31 - PROGRESS: at 0.52% examples, 1129432 words/s, in_qsize 9, out_qsize 1
INFO:gensim.models.word2vec:EPOCH 31: training on 226598275 raw words (219974789 effective words) took 167.5s, 1312992 effective words/s


Loss after epoch 31:44073332.0


INFO:gensim.models.word2vec:EPOCH 32 - PROGRESS: at 0.56% examples, 1221386 words/s, in_qsize 8, out_qsize 1
INFO:gensim.models.word2vec:EPOCH 32: training on 226598275 raw words (219974057 effective words) took 163.0s, 1349360 effective words/s


Loss after epoch 32:43907992.0


INFO:gensim.models.word2vec:EPOCH 33 - PROGRESS: at 0.60% examples, 1309765 words/s, in_qsize 8, out_qsize 1
INFO:gensim.models.word2vec:EPOCH 33: training on 226598275 raw words (219975396 effective words) took 164.0s, 1341154 effective words/s


Loss after epoch 33:43725336.0


INFO:gensim.models.word2vec:EPOCH 34 - PROGRESS: at 0.56% examples, 1219730 words/s, in_qsize 9, out_qsize 0
INFO:gensim.models.word2vec:EPOCH 34: training on 226598275 raw words (219974172 effective words) took 163.8s, 1343150 effective words/s


Loss after epoch 34:43849884.0


INFO:gensim.models.word2vec:EPOCH 35 - PROGRESS: at 0.55% examples, 1183189 words/s, in_qsize 10, out_qsize 0
INFO:gensim.models.word2vec:EPOCH 35: training on 226598275 raw words (219974073 effective words) took 162.9s, 1350440 effective words/s


Loss after epoch 35:43924944.0


INFO:gensim.models.word2vec:EPOCH 36 - PROGRESS: at 0.58% examples, 1233674 words/s, in_qsize 8, out_qsize 1
INFO:gensim.models.word2vec:EPOCH 36: training on 226598275 raw words (219973451 effective words) took 163.9s, 1342488 effective words/s


Loss after epoch 36:43826232.0


INFO:gensim.models.word2vec:EPOCH 37 - PROGRESS: at 0.58% examples, 1267448 words/s, in_qsize 9, out_qsize 0
INFO:gensim.models.word2vec:EPOCH 37: training on 226598275 raw words (219975769 effective words) took 164.2s, 1339816 effective words/s


Loss after epoch 37:43496588.0


INFO:gensim.models.word2vec:EPOCH 38 - PROGRESS: at 0.57% examples, 1248605 words/s, in_qsize 10, out_qsize 0
INFO:gensim.models.word2vec:EPOCH 38: training on 226598275 raw words (219974046 effective words) took 163.9s, 1342048 effective words/s


Loss after epoch 38:43605268.0


INFO:gensim.models.word2vec:EPOCH 39 - PROGRESS: at 0.62% examples, 1361229 words/s, in_qsize 9, out_qsize 1
INFO:gensim.models.word2vec:EPOCH 39: training on 226598275 raw words (219974917 effective words) took 165.5s, 1328833 effective words/s


Loss after epoch 39:43641544.0


INFO:gensim.models.word2vec:EPOCH 40 - PROGRESS: at 0.60% examples, 1307722 words/s, in_qsize 7, out_qsize 2
INFO:gensim.models.word2vec:EPOCH 40: training on 226598275 raw words (219975880 effective words) took 162.4s, 1354273 effective words/s


Loss after epoch 40:43543980.0


INFO:gensim.models.word2vec:EPOCH 41 - PROGRESS: at 0.59% examples, 1291145 words/s, in_qsize 9, out_qsize 2
INFO:gensim.models.word2vec:EPOCH 41: training on 226598275 raw words (219972916 effective words) took 161.9s, 1358958 effective words/s


Loss after epoch 41:43537204.0


INFO:gensim.models.word2vec:EPOCH 42 - PROGRESS: at 0.57% examples, 1259155 words/s, in_qsize 9, out_qsize 0
INFO:gensim.models.word2vec:EPOCH 42: training on 226598275 raw words (219973201 effective words) took 162.9s, 1350317 effective words/s


Loss after epoch 42:43491140.0


INFO:gensim.models.word2vec:EPOCH 43 - PROGRESS: at 0.61% examples, 1316216 words/s, in_qsize 7, out_qsize 1
INFO:gensim.models.word2vec:EPOCH 43: training on 226598275 raw words (219973226 effective words) took 162.4s, 1354218 effective words/s


Loss after epoch 43:43373268.0


INFO:gensim.models.word2vec:EPOCH 44 - PROGRESS: at 0.58% examples, 1277548 words/s, in_qsize 9, out_qsize 0
INFO:gensim.models.word2vec:EPOCH 44: training on 226598275 raw words (219974399 effective words) took 161.8s, 1359165 effective words/s


Loss after epoch 44:43624628.0


INFO:gensim.models.word2vec:EPOCH 45 - PROGRESS: at 0.62% examples, 1348850 words/s, in_qsize 10, out_qsize 0
INFO:gensim.models.word2vec:EPOCH 45: training on 226598275 raw words (219973684 effective words) took 162.2s, 1356331 effective words/s


Loss after epoch 45:43330720.0


INFO:gensim.models.word2vec:EPOCH 46 - PROGRESS: at 0.57% examples, 1247644 words/s, in_qsize 10, out_qsize 3
INFO:gensim.models.word2vec:EPOCH 46: training on 226598275 raw words (219975907 effective words) took 162.5s, 1353521 effective words/s


Loss after epoch 46:43449924.0


INFO:gensim.models.word2vec:EPOCH 47 - PROGRESS: at 0.60% examples, 1304291 words/s, in_qsize 9, out_qsize 0
INFO:gensim.models.word2vec:EPOCH 47: training on 226598275 raw words (219973308 effective words) took 162.7s, 1351678 effective words/s


Loss after epoch 47:43234040.0


INFO:gensim.models.word2vec:EPOCH 48 - PROGRESS: at 0.59% examples, 1289082 words/s, in_qsize 9, out_qsize 0
INFO:gensim.models.word2vec:EPOCH 48: training on 226598275 raw words (219974737 effective words) took 162.7s, 1352363 effective words/s


Loss after epoch 48:43097712.0


INFO:gensim.models.word2vec:EPOCH 49 - PROGRESS: at 0.57% examples, 1231720 words/s, in_qsize 9, out_qsize 1
INFO:gensim.models.word2vec:EPOCH 49: training on 226598275 raw words (219976642 effective words) took 162.9s, 1350591 effective words/s


Loss after epoch 49:44565932.0


INFO:gensim.models.word2vec:EPOCH 50 - PROGRESS: at 0.57% examples, 1246664 words/s, in_qsize 7, out_qsize 2
INFO:gensim.models.word2vec:EPOCH 50: training on 226598275 raw words (219973589 effective words) took 166.2s, 1323798 effective words/s


Loss after epoch 50:43416728.0


INFO:gensim.models.word2vec:EPOCH 51 - PROGRESS: at 0.61% examples, 1327862 words/s, in_qsize 10, out_qsize 1
INFO:gensim.models.word2vec:EPOCH 51: training on 226598275 raw words (219976029 effective words) took 162.8s, 1351063 effective words/s


Loss after epoch 51:43234072.0


INFO:gensim.models.word2vec:EPOCH 52 - PROGRESS: at 0.57% examples, 1259336 words/s, in_qsize 9, out_qsize 1
INFO:gensim.models.word2vec:EPOCH 52: training on 226598275 raw words (219973869 effective words) took 163.7s, 1343785 effective words/s


Loss after epoch 52:43260536.0


INFO:gensim.models.word2vec:EPOCH 53 - PROGRESS: at 0.60% examples, 1288929 words/s, in_qsize 9, out_qsize 2
INFO:gensim.models.word2vec:EPOCH 53: training on 226598275 raw words (219974403 effective words) took 162.8s, 1350938 effective words/s


Loss after epoch 53:43074152.0


INFO:gensim.models.word2vec:EPOCH 54 - PROGRESS: at 0.61% examples, 1286956 words/s, in_qsize 9, out_qsize 2
INFO:gensim.models.word2vec:EPOCH 54: training on 226598275 raw words (219974071 effective words) took 162.6s, 1352967 effective words/s


Loss after epoch 54:43096868.0


INFO:gensim.models.word2vec:EPOCH 55 - PROGRESS: at 0.57% examples, 1250614 words/s, in_qsize 9, out_qsize 0
INFO:gensim.models.word2vec:EPOCH 55: training on 226598275 raw words (219974446 effective words) took 162.1s, 1357336 effective words/s


Loss after epoch 55:42714596.0


INFO:gensim.models.word2vec:EPOCH 56 - PROGRESS: at 0.59% examples, 1294430 words/s, in_qsize 9, out_qsize 0
INFO:gensim.models.word2vec:EPOCH 56: training on 226598275 raw words (219973803 effective words) took 162.7s, 1351755 effective words/s


Loss after epoch 56:43268708.0


INFO:gensim.models.word2vec:EPOCH 57 - PROGRESS: at 0.59% examples, 1288494 words/s, in_qsize 8, out_qsize 2
INFO:gensim.models.word2vec:EPOCH 57: training on 226598275 raw words (219973157 effective words) took 162.9s, 1350436 effective words/s


Loss after epoch 57:42984456.0


INFO:gensim.models.word2vec:EPOCH 58 - PROGRESS: at 0.60% examples, 1312188 words/s, in_qsize 8, out_qsize 1
INFO:gensim.models.word2vec:EPOCH 58: training on 226598275 raw words (219977179 effective words) took 163.8s, 1342758 effective words/s


Loss after epoch 58:43917916.0


INFO:gensim.models.word2vec:EPOCH 59 - PROGRESS: at 0.57% examples, 1235182 words/s, in_qsize 10, out_qsize 0
INFO:gensim.models.word2vec:EPOCH 59: training on 226598275 raw words (219975356 effective words) took 162.4s, 1354373 effective words/s


Loss after epoch 59:43035192.0


INFO:gensim.models.word2vec:EPOCH 60 - PROGRESS: at 0.61% examples, 1316582 words/s, in_qsize 10, out_qsize 0
INFO:gensim.models.word2vec:EPOCH 60: training on 226598275 raw words (219975517 effective words) took 162.7s, 1352347 effective words/s


Loss after epoch 60:42818392.0


INFO:gensim.models.word2vec:EPOCH 61 - PROGRESS: at 0.60% examples, 1302978 words/s, in_qsize 10, out_qsize 0
INFO:gensim.models.word2vec:EPOCH 61: training on 226598275 raw words (219975453 effective words) took 162.6s, 1352693 effective words/s


Loss after epoch 61:42656488.0


INFO:gensim.models.word2vec:EPOCH 62 - PROGRESS: at 0.60% examples, 1288687 words/s, in_qsize 9, out_qsize 0
INFO:gensim.models.word2vec:EPOCH 62: training on 226598275 raw words (219975580 effective words) took 164.4s, 1337912 effective words/s


Loss after epoch 62:42411144.0


INFO:gensim.models.word2vec:EPOCH 63 - PROGRESS: at 0.59% examples, 1287263 words/s, in_qsize 8, out_qsize 3
INFO:gensim.models.word2vec:EPOCH 63: training on 226598275 raw words (219976178 effective words) took 164.8s, 1335091 effective words/s


Loss after epoch 63:42707748.0


INFO:gensim.models.word2vec:EPOCH 64 - PROGRESS: at 0.58% examples, 1264672 words/s, in_qsize 10, out_qsize 0
INFO:gensim.models.word2vec:EPOCH 64: training on 226598275 raw words (219974815 effective words) took 165.3s, 1330678 effective words/s


Loss after epoch 64:42523348.0


INFO:gensim.models.word2vec:EPOCH 65 - PROGRESS: at 0.61% examples, 1330019 words/s, in_qsize 10, out_qsize 0
INFO:gensim.models.word2vec:EPOCH 65: training on 226598275 raw words (219976008 effective words) took 165.9s, 1325569 effective words/s


Loss after epoch 65:42491780.0


INFO:gensim.models.word2vec:EPOCH 66 - PROGRESS: at 0.59% examples, 1285157 words/s, in_qsize 9, out_qsize 0
INFO:gensim.models.word2vec:EPOCH 66: training on 226598275 raw words (219975676 effective words) took 162.6s, 1352884 effective words/s


Loss after epoch 66:42605616.0


INFO:gensim.models.word2vec:EPOCH 67 - PROGRESS: at 0.58% examples, 1264806 words/s, in_qsize 10, out_qsize 2
INFO:gensim.models.word2vec:EPOCH 67: training on 226598275 raw words (219974275 effective words) took 163.1s, 1348813 effective words/s


Loss after epoch 67:42294312.0


INFO:gensim.models.word2vec:EPOCH 68 - PROGRESS: at 0.59% examples, 1284191 words/s, in_qsize 9, out_qsize 0
INFO:gensim.models.word2vec:EPOCH 68: training on 226598275 raw words (219973402 effective words) took 162.9s, 1350429 effective words/s


Loss after epoch 68:42310540.0


INFO:gensim.models.word2vec:EPOCH 69 - PROGRESS: at 0.61% examples, 1329519 words/s, in_qsize 8, out_qsize 1
INFO:gensim.models.word2vec:EPOCH 69: training on 226598275 raw words (219975410 effective words) took 167.3s, 1315096 effective words/s


Loss after epoch 69:42306532.0


INFO:gensim.models.word2vec:EPOCH 70 - PROGRESS: at 0.57% examples, 1232986 words/s, in_qsize 9, out_qsize 1
INFO:gensim.models.word2vec:EPOCH 70: training on 226598275 raw words (219975846 effective words) took 165.2s, 1331236 effective words/s


Loss after epoch 70:42327836.0


INFO:gensim.models.word2vec:EPOCH 71 - PROGRESS: at 0.59% examples, 1277081 words/s, in_qsize 10, out_qsize 2
INFO:gensim.models.word2vec:EPOCH 71: training on 226598275 raw words (219973789 effective words) took 165.0s, 1333246 effective words/s


Loss after epoch 71:42028380.0


INFO:gensim.models.word2vec:EPOCH 72 - PROGRESS: at 0.56% examples, 1230691 words/s, in_qsize 10, out_qsize 0
INFO:gensim.models.word2vec:EPOCH 72: training on 226598275 raw words (219974319 effective words) took 163.3s, 1347387 effective words/s


Loss after epoch 72:43780020.0


INFO:gensim.models.word2vec:EPOCH 73 - PROGRESS: at 0.58% examples, 1235741 words/s, in_qsize 9, out_qsize 1
INFO:gensim.models.word2vec:EPOCH 73: training on 226598275 raw words (219973787 effective words) took 163.7s, 1343876 effective words/s


Loss after epoch 73:41960848.0


INFO:gensim.models.word2vec:EPOCH 74 - PROGRESS: at 0.59% examples, 1291214 words/s, in_qsize 9, out_qsize 0
INFO:gensim.models.word2vec:EPOCH 74: training on 226598275 raw words (219975906 effective words) took 162.7s, 1352344 effective words/s


Loss after epoch 74:41753348.0


INFO:gensim.models.word2vec:EPOCH 75 - PROGRESS: at 0.59% examples, 1288191 words/s, in_qsize 8, out_qsize 1
INFO:gensim.models.word2vec:EPOCH 75: training on 226598275 raw words (219974505 effective words) took 162.7s, 1351786 effective words/s


Loss after epoch 75:41644528.0


INFO:gensim.models.word2vec:EPOCH 76 - PROGRESS: at 0.60% examples, 1289534 words/s, in_qsize 9, out_qsize 1
INFO:gensim.models.word2vec:EPOCH 76: training on 226598275 raw words (219976754 effective words) took 161.9s, 1358510 effective words/s


Loss after epoch 76:42072300.0


INFO:gensim.models.word2vec:EPOCH 77 - PROGRESS: at 0.60% examples, 1303552 words/s, in_qsize 9, out_qsize 0
INFO:gensim.models.word2vec:EPOCH 77: training on 226598275 raw words (219974839 effective words) took 162.3s, 1355546 effective words/s


Loss after epoch 77:41830796.0


INFO:gensim.models.word2vec:EPOCH 78 - PROGRESS: at 0.59% examples, 1288603 words/s, in_qsize 10, out_qsize 0
INFO:gensim.models.word2vec:EPOCH 78: training on 226598275 raw words (219974814 effective words) took 162.0s, 1357814 effective words/s


Loss after epoch 78:41535968.0


INFO:gensim.models.word2vec:EPOCH 79 - PROGRESS: at 0.60% examples, 1313499 words/s, in_qsize 7, out_qsize 2
INFO:gensim.models.word2vec:EPOCH 79: training on 226598275 raw words (219974106 effective words) took 163.5s, 1345263 effective words/s


Loss after epoch 79:41507712.0


INFO:gensim.models.word2vec:EPOCH 80 - PROGRESS: at 0.62% examples, 1345613 words/s, in_qsize 9, out_qsize 0
INFO:gensim.models.word2vec:EPOCH 80: training on 226598275 raw words (219975950 effective words) took 162.8s, 1351596 effective words/s


Loss after epoch 80:41336916.0


INFO:gensim.models.word2vec:EPOCH 81 - PROGRESS: at 0.60% examples, 1297655 words/s, in_qsize 9, out_qsize 1
INFO:gensim.models.word2vec:EPOCH 81: training on 226598275 raw words (219975359 effective words) took 164.6s, 1336307 effective words/s


Loss after epoch 81:41256888.0


INFO:gensim.models.word2vec:EPOCH 82 - PROGRESS: at 0.61% examples, 1339988 words/s, in_qsize 10, out_qsize 0
INFO:gensim.models.word2vec:EPOCH 82: training on 226598275 raw words (219976077 effective words) took 165.1s, 1332427 effective words/s


Loss after epoch 82:41069008.0


INFO:gensim.models.word2vec:EPOCH 83 - PROGRESS: at 0.58% examples, 1251189 words/s, in_qsize 9, out_qsize 0
INFO:gensim.models.word2vec:EPOCH 83: training on 226598275 raw words (219974498 effective words) took 165.2s, 1331947 effective words/s


Loss after epoch 83:40828812.0


INFO:gensim.models.word2vec:EPOCH 84 - PROGRESS: at 0.61% examples, 1310733 words/s, in_qsize 10, out_qsize 0
INFO:gensim.models.word2vec:EPOCH 84: training on 226598275 raw words (219973777 effective words) took 169.3s, 1299273 effective words/s


Loss after epoch 84:40791320.0


INFO:gensim.models.word2vec:EPOCH 85 - PROGRESS: at 0.33% examples, 717258 words/s, in_qsize 9, out_qsize 1
INFO:gensim.models.word2vec:EPOCH 85 - PROGRESS: at 97.37% examples, 1183709 words/s, in_qsize 9, out_qsize 1
INFO:gensim.models.word2vec:EPOCH 85: training on 226598275 raw words (219975092 effective words) took 185.2s, 1187861 effective words/s


Loss after epoch 85:40758012.0


INFO:gensim.models.word2vec:EPOCH 86 - PROGRESS: at 0.59% examples, 1290950 words/s, in_qsize 7, out_qsize 2
INFO:gensim.models.word2vec:EPOCH 86 - PROGRESS: at 89.91% examples, 1093331 words/s, in_qsize 9, out_qsize 0
INFO:gensim.models.word2vec:EPOCH 86: training on 226598275 raw words (219975540 effective words) took 197.3s, 1115172 effective words/s


Loss after epoch 86:40487952.0


INFO:gensim.models.word2vec:EPOCH 87 - PROGRESS: at 0.60% examples, 1309750 words/s, in_qsize 10, out_qsize 0
INFO:gensim.models.word2vec:EPOCH 87 - PROGRESS: at 96.41% examples, 1172059 words/s, in_qsize 9, out_qsize 1
INFO:gensim.models.word2vec:EPOCH 87: training on 226598275 raw words (219976256 effective words) took 187.0s, 1176567 effective words/s


Loss after epoch 87:40521400.0


INFO:gensim.models.word2vec:EPOCH 88 - PROGRESS: at 0.59% examples, 1279997 words/s, in_qsize 10, out_qsize 0
INFO:gensim.models.word2vec:EPOCH 88: training on 226598275 raw words (219973682 effective words) took 165.4s, 1329723 effective words/s


Loss after epoch 88:40385292.0


INFO:gensim.models.word2vec:EPOCH 89 - PROGRESS: at 0.57% examples, 1247837 words/s, in_qsize 6, out_qsize 4
INFO:gensim.models.word2vec:EPOCH 89: training on 226598275 raw words (219975453 effective words) took 162.5s, 1353799 effective words/s


Loss after epoch 89:40164828.0


INFO:gensim.models.word2vec:EPOCH 90 - PROGRESS: at 0.61% examples, 1324819 words/s, in_qsize 10, out_qsize 0
INFO:gensim.models.word2vec:EPOCH 90: training on 226598275 raw words (219974285 effective words) took 160.8s, 1368082 effective words/s


Loss after epoch 90:41217672.0


INFO:gensim.models.word2vec:EPOCH 91 - PROGRESS: at 0.60% examples, 1303267 words/s, in_qsize 8, out_qsize 1
INFO:gensim.models.word2vec:EPOCH 91: training on 226598275 raw words (219975658 effective words) took 163.6s, 1344894 effective words/s


Loss after epoch 91:39945608.0


INFO:gensim.models.word2vec:EPOCH 92 - PROGRESS: at 0.59% examples, 1288817 words/s, in_qsize 9, out_qsize 0
INFO:gensim.models.word2vec:EPOCH 92: training on 226598275 raw words (219974707 effective words) took 164.6s, 1336217 effective words/s


Loss after epoch 92:39633816.0


INFO:gensim.models.word2vec:EPOCH 93 - PROGRESS: at 0.58% examples, 1254160 words/s, in_qsize 10, out_qsize 0
INFO:gensim.models.word2vec:EPOCH 93: training on 226598275 raw words (219973866 effective words) took 164.6s, 1336219 effective words/s


Loss after epoch 93:39335348.0


INFO:gensim.models.word2vec:EPOCH 94 - PROGRESS: at 0.60% examples, 1310293 words/s, in_qsize 9, out_qsize 0
INFO:gensim.models.word2vec:EPOCH 94: training on 226598275 raw words (219972398 effective words) took 162.7s, 1352168 effective words/s


Loss after epoch 94:39096012.0


INFO:gensim.models.word2vec:EPOCH 95 - PROGRESS: at 0.58% examples, 1249518 words/s, in_qsize 9, out_qsize 0
INFO:gensim.models.word2vec:EPOCH 95: training on 226598275 raw words (219975120 effective words) took 164.8s, 1334533 effective words/s


Loss after epoch 95:39050096.0


INFO:gensim.models.word2vec:EPOCH 96 - PROGRESS: at 0.56% examples, 1230837 words/s, in_qsize 9, out_qsize 0
INFO:gensim.models.word2vec:EPOCH 96: training on 226598275 raw words (219974766 effective words) took 164.4s, 1337764 effective words/s


Loss after epoch 96:38497092.0


INFO:gensim.models.word2vec:EPOCH 97 - PROGRESS: at 0.58% examples, 1257876 words/s, in_qsize 10, out_qsize 1
INFO:gensim.models.word2vec:EPOCH 97: training on 226598275 raw words (219973377 effective words) took 164.6s, 1336298 effective words/s


Loss after epoch 97:38436708.0


INFO:gensim.models.word2vec:EPOCH 98 - PROGRESS: at 0.58% examples, 1281189 words/s, in_qsize 10, out_qsize 0
INFO:gensim.models.word2vec:EPOCH 98: training on 226598275 raw words (219973683 effective words) took 168.8s, 1303259 effective words/s


Loss after epoch 98:38193532.0


INFO:gensim.models.word2vec:EPOCH 99 - PROGRESS: at 0.58% examples, 1280493 words/s, in_qsize 9, out_qsize 0
INFO:gensim.models.word2vec:EPOCH 99: training on 226598275 raw words (219974942 effective words) took 168.1s, 1308280 effective words/s
INFO:gensim.utils:Word2Vec lifecycle event {'msg': 'training on 22659827500 raw words (21997472888 effective words) took 16488.3s, 1334127 effective words/s', 'datetime': '2023-04-23T15:48:08.114033', 'gensim': '4.3.1', 'python': '3.11.3 | packaged by conda-forge | (main, Apr  6 2023, 09:05:00) [Clang 14.0.6 ]', 'platform': 'macOS-12.6-x86_64-i386-64bit', 'event': 'train'}


Loss after epoch 99:37834140.0


(21997472888, 22659827500)

In [None]:
model.save("word2vec-100-bel-cc100.model")

In [25]:
model.wv.most_similar('кіт', topn=20)

[('кастаненка', 0.6006743907928467),
 ('клешчукевіч', 0.5838990211486816),
 ('беляжэнкі', 0.5773590803146362),
 ('крэпак', 0.5444377660751343),
 ('чарнаглаз', 0.5400937795639038),
 ('патон', 0.5291008353233337),
 ('звоскага', 0.5288887619972229),
 ('вальеха', 0.5172730684280396),
 ('святловым', 0.5152472853660583),
 ('бачкоўскі', 0.511944591999054),
 ('нікульшын', 0.5117442011833191),
 ('бокшы', 0.5115799903869629),
 ('грабеншчыкоў', 0.506933331489563),
 ('герстэн', 0.5058147311210632),
 ('заборава', 0.4989950656890869),
 ('эрына', 0.4967189133167267),
 ('рагуля', 0.4914279878139496),
 ('ласкорын', 0.4895833432674408),
 ('грабеншчыкова', 0.48926597833633423),
 ('парафянюк', 0.48552122712135315)]

In [20]:
model.wv.index_to_key

['год',
 'беларускі',
 'беларусь',
 'чалавек',
 'час',
 'дзень',
 'большыць',
 'мова',
 'новы',
 'вялікі',
 'краіна',
 'жыццё',
 'праца',
 'горад',
 'месца',
 'справа',
 'гісторыя',
 'рэспубліка',
 'слова',
 'кожны',
 'гульня',
 'дзіцё',
 'дзяржаўны',
 'сайт',
 'працаваць',
 'нацыянальны',
 'раён',
 'пытанне',
 'імя',
 'казаць',
 'галоўны',
 'свет',
 'культура',
 'кніга',
 'старонка',
 'мець',
 'раз',
 'другі',
 'атрымаць',
 'розны',
 'адзін',
 'арганізацыя',
 'школа',
 'апошні',
 'адбыцца',
 'ведаць',
 'дом',
 'зрабіць',
 'мінск',
 'цэнтр',
 'матэрыял',
 'вынік',
 'хацець',
 'правіць',
 'рабіць',
 'праект',
 'цяпер',
 'беларус',
 'права',
 'адукацыя',
 'вайна',
 'частка',
 'праграма',
 'жыць',
 'дзяржава',
 'зямля',
 'шмат',
 'міжнародны',
 'сёння',
 'гад',
 'работа',
 'інфармацыя',
 'развіццё',
 'выбар',
 'няма',
 'лепшы',
 'народны',
 'яўляцца',
 'наступны',
 'палітычны',
 'іншых',
 'праблема',
 'пачатак',
 'вёска',
 'народ',
 'магчы',
 'аляксандр',
 'кіраўнік',
 'знаходзіцца',
 'мя

In [None]:
model.wv.get_vecattr("прыдумляць", "count")

In [26]:
loss_list

[45621836.0,
 44355904.0,
 44249936.0,
 44104492.0,
 44107028.0,
 44097436.0,
 44270596.0,
 44089604.0,
 44185456.0,
 44285424.0,
 44182396.0,
 44126032.0,
 44408908.0,
 44199988.0,
 44425624.0,
 44209516.0,
 44227556.0,
 44041368.0,
 44373620.0,
 44456688.0,
 44070200.0,
 43995008.0,
 44134528.0,
 44107632.0,
 43802268.0,
 44047244.0,
 43904480.0,
 43939264.0,
 43987436.0,
 43859148.0,
 43961468.0,
 44073332.0,
 43907992.0,
 43725336.0,
 43849884.0,
 43924944.0,
 43826232.0,
 43496588.0,
 43605268.0,
 43641544.0,
 43543980.0,
 43537204.0,
 43491140.0,
 43373268.0,
 43624628.0,
 43330720.0,
 43449924.0,
 43234040.0,
 43097712.0,
 44565932.0,
 43416728.0,
 43234072.0,
 43260536.0,
 43074152.0,
 43096868.0,
 42714596.0,
 43268708.0,
 42984456.0,
 43917916.0,
 43035192.0,
 42818392.0,
 42656488.0,
 42411144.0,
 42707748.0,
 42523348.0,
 42491780.0,
 42605616.0,
 42294312.0,
 42310540.0,
 42306532.0,
 42327836.0,
 42028380.0,
 43780020.0,
 41960848.0,
 41753348.0,
 41644528.0,
 42072300.0,

In [29]:
WORD_MAP['сабака']

'сабака'