In [3]:
!pip install -U FlagEmbedding peft

Collecting peft
  Downloading peft-0.13.2-py3-none-any.whl.metadata (13 kB)
Downloading peft-0.13.2-py3-none-any.whl (320 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m320.7/320.7 kB[0m [31m5.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: peft
Successfully installed peft-0.13.2


In [4]:
# example from the repo

from FlagEmbedding import BGEM3FlagModel

model = BGEM3FlagModel('BAAI/bge-m3',
                       use_fp16=True) # Setting use_fp16 to True speeds up computation with a slight performance degradation

sentences_1 = ["What is BGE M3?", "Defination of BM25"]
sentences_2 = ["BGE M3 is an embedding model supporting dense retrieval, lexical matching and multi-vector interaction.",
               "BM25 is a bag-of-words retrieval function that ranks a set of documents based on the query terms appearing in each document"]

embeddings_1 = model.encode(sentences_1,
                            batch_size=12,
                            max_length=8192, # If you don't need such a long length, you can set a smaller value to speed up the encoding process.
                            )['dense_vecs']
embeddings_2 = model.encode(sentences_2)['dense_vecs']
similarity = embeddings_1 @ embeddings_2.T
print(similarity)

Fetching 30 files:   0%|          | 0/30 [00:00<?, ?it/s]

.gitattributes:   0%|          | 0.00/1.63k [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/191 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/15.8k [00:00<?, ?B/s]

imgs/.DS_Store:   0%|          | 0.00/6.15k [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/123 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/687 [00:00<?, ?B/s]

colbert_linear.pt:   0%|          | 0.00/2.10M [00:00<?, ?B/s]

imgs/bm25.jpg:   0%|          | 0.00/132k [00:00<?, ?B/s]

imgs/long.jpg:   0%|          | 0.00/485k [00:00<?, ?B/s]

imgs/miracl.jpg:   0%|          | 0.00/576k [00:00<?, ?B/s]

imgs/mkqa.jpg:   0%|          | 0.00/608k [00:00<?, ?B/s]

imgs/nqa.jpg:   0%|          | 0.00/158k [00:00<?, ?B/s]

imgs/others.webp:   0%|          | 0.00/21.0k [00:00<?, ?B/s]

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

long.jpg:   0%|          | 0.00/127k [00:00<?, ?B/s]

onnx/Constant_7_attr__value:   0%|          | 0.00/65.6k [00:00<?, ?B/s]

onnx/config.json:   0%|          | 0.00/698 [00:00<?, ?B/s]

onnx/tokenizer_config.json:   0%|          | 0.00/1.17k [00:00<?, ?B/s]

onnx/special_tokens_map.json:   0%|          | 0.00/964 [00:00<?, ?B/s]

model.onnx:   0%|          | 0.00/725k [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

model.onnx_data:   0%|          | 0.00/2.27G [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.1M [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/54.0 [00:00<?, ?B/s]

sparse_linear.pt:   0%|          | 0.00/3.52k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/2.27G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/444 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.1M [00:00<?, ?B/s]

  colbert_state_dict = torch.load(os.path.join(model_dir, 'colbert_linear.pt'), map_location='cpu')
  sparse_state_dict = torch.load(os.path.join(model_dir, 'sparse_linear.pt'), map_location='cpu')


[[0.62590355 0.3474958 ]
 [0.34986818 0.6782462 ]]


In [5]:
ambigious_words = {
    "ат": {
        "name": {
            "signal_phrases": ["ысым бер", "фамилия", "адам"],
        },
        "horse": {
            "signal_phrases": ["чаптыруу", "ээр токун", "улак тартыш"],
        },
        "shoot": {
            "signal_phrases": ["ок атуу", "мылтык", "автомат", "пистолет", "жаа", "аскер", "полиция"],
        }
    }
}

## Approach #1: compare sentence embeddings with each phrase and take average similarity score

In [7]:
from sklearn.metrics.pairwise import cosine_similarity


def calculate_avg_sim(sent_embedding, data):
    signal_phrases = data.get('signal_phrases', [])
    if not signal_phrases:
        return 0  # Return zero if there are no signal phrases

    # Encode signal phrases to get their embeddings
    phrase_embeddings = model.encode(signal_phrases, batch_size=len(signal_phrases))['dense_vecs']

    # Calculate cosine similarities between sentence embedding and all signal phrase embeddings
    sims = cosine_similarity([sent_embedding], phrase_embeddings).flatten()

    # Calculate the average similarity score
    avg_sim = sims.mean()
    return avg_sim

# Word-sense disambiguation function
def disambiguator(word, sentence):
    amb_word = ambigious_words.get(word)
    if amb_word is None:
        raise Exception(f"Word '{word}' not found in our dictionary")

    # Encode the sentence to get its embedding
    sent_embedding = model.encode([sentence])['dense_vecs'][0]

    results = []
    for meaning, data in amb_word.items():
        avg_score = calculate_avg_sim(sent_embedding, data)
        results.append((meaning, avg_score))

    # Sort the results by average similarity score in descending order
    results = sorted(results, key=lambda x: x[1], reverse=True)

    # Return the meaning with the highest average similarity score
    return results[0][0]

# Example usage
sentence = "Ат минүү менен алек болду"
word = "ат"
predicted_sense = disambiguator(word, sentence)
print(f"Predicted sense of '{word}' in the sentence: {predicted_sense}")

Predicted sense of 'ат' in the sentence: name


In [11]:
import json


json_file = "sentences.json"
with open(json_file, 'r', encoding='utf-8') as f:
    sentences = json.load(f)


json_file_ambiguous_words_50 = "ambigious_words.json"
with open(json_file_ambiguous_words_50, 'r', encoding='utf-8') as f:
    ambiguous_words_50 = json.load(f)


set(sentences.keys()) == set(ambiguous_words_50.keys())

True

In [12]:
# Merge JSONs
ambigious_words = ambiguous_words_50.copy()
for amb_word, meanings in ambiguous_words_50.items():
    if set(meanings.keys()) != set(sentences[amb_word].keys()):
        print(f"containers of amb_word '{amb_word}' in json mismatch! This key is deleted.")
        del ambigious_words[amb_word]
        continue
    for meaning in meanings.keys():
        ambigious_words[amb_word][meaning]["sentences"] = sentences[amb_word][meaning]["sentences"]

# Save to json
with open('ambigious_words_merged.json', 'w', encoding='utf-8') as f:
    json.dump(ambigious_words, f, ensure_ascii=False, indent=4)

In [13]:
%%time

count_correct = 0
count_all = 0
n = len(ambigious_words.keys())
for amb_word, meanings in ambigious_words.items():
    print(f"{count_all} / {n}: amb_word = {amb_word}")
    for meaning, val in meanings.items():
        for sent in val["sentences"]:
            answer = disambiguator(amb_word, sent)
            count_all += 1
            if answer == meaning:
                count_correct += 1
#     print()

0 / 43: amb_word = ач
2 / 43: amb_word = же
4 / 43: amb_word = ак
6 / 43: amb_word = кап
9 / 43: amb_word = ала
12 / 43: amb_word = кеч
14 / 43: amb_word = кош
16 / 43: amb_word = кал
18 / 43: amb_word = бай
20 / 43: amb_word = сай
22 / 43: amb_word = арык
24 / 43: amb_word = кой
26 / 43: amb_word = ай
29 / 43: amb_word = топ
31 / 43: amb_word = жар
34 / 43: amb_word = тил
47 / 43: amb_word = каз
49 / 43: amb_word = там
51 / 43: amb_word = жаш
54 / 43: amb_word = кара
56 / 43: amb_word = мал
58 / 43: amb_word = сөз
60 / 43: amb_word = бас
62 / 43: amb_word = тек
64 / 43: amb_word = уч
66 / 43: amb_word = жең
68 / 43: amb_word = курак
70 / 43: amb_word = айт
72 / 43: amb_word = түш
75 / 43: amb_word = кур
78 / 43: amb_word = тай
82 / 43: amb_word = кол
84 / 43: amb_word = күн
86 / 43: amb_word = ат
89 / 43: amb_word = жаз
93 / 43: amb_word = кат
106 / 43: amb_word = сан
108 / 43: amb_word = чал
112 / 43: amb_word = кир
115 / 43: amb_word = чек
117 / 43: amb_word = бак
119 / 43: amb_word

In [14]:
print(f"Accuracy = {count_correct / count_all}")

Accuracy = 0.5284552845528455


## Approach #2: compare sentence embeddings with each phrase and take max similarity


In [17]:
import json

# Load the merged ambiguous words and sentences from the JSON file
with open('ambigious_words_merged.json', 'r', encoding='utf-8') as f:
    ambigious_words = json.load(f)

# Word-sense disambiguation function using max similarity
def calculate_max_sim(sent_embedding, data):
    signal_phrases = data.get('signal_phrases', [])
    if not signal_phrases:
        return 0  # Return zero if there are no signal phrases

    # Encode signal phrases to get their embeddings
    phrase_embeddings = model.encode(signal_phrases, batch_size=len(signal_phrases))['dense_vecs']

    # Calculate cosine similarities between sentence embedding and all signal phrase embeddings
    sims = cosine_similarity([sent_embedding], phrase_embeddings).flatten()

    # Return the maximum similarity score
    max_sim = sims.max()
    return max_sim

def disambiguator(word, sentence):
    amb_word = ambigious_words.get(word)
    if amb_word is None:
        raise Exception(f"Word '{word}' not found in our dictionary")

    # Encode the sentence to get its embedding
    sent_embedding = model.encode([sentence])['dense_vecs'][0]

    results = []
    for meaning, data in amb_word.items():
        max_score = calculate_max_sim(sent_embedding, data)
        results.append((meaning, max_score))

    # Sort the results by max similarity score in descending order
    results = sorted(results, key=lambda x: x[1], reverse=True)

    # Return the meaning with the highest max similarity score
    return results[0][0]

# Evaluation function
def evaluate():
    count_correct = 0
    count_all = 0
    n = len(ambigious_words.keys())

    # Iterate over each ambiguous word and its meanings
    for amb_word, meanings in ambigious_words.items():
        print(f"{count_all} / {n}: amb_word = {amb_word}")
        for meaning, val in meanings.items():
            for sent in val["sentences"]:
                answer = disambiguator(amb_word, sent)
                count_all += 1
                if answer == meaning:
                    count_correct += 1

    # Calculate and print accuracy
    accuracy = count_correct / count_all
    print(f"Accuracy = {accuracy:.4f}")

# Run the evaluation
evaluate()


0 / 43: amb_word = ач
2 / 43: amb_word = же
4 / 43: amb_word = ак
6 / 43: amb_word = кап
9 / 43: amb_word = ала
12 / 43: amb_word = кеч
14 / 43: amb_word = кош
16 / 43: amb_word = кал
18 / 43: amb_word = бай
20 / 43: amb_word = сай
22 / 43: amb_word = арык
24 / 43: amb_word = кой
26 / 43: amb_word = ай
29 / 43: amb_word = топ
31 / 43: amb_word = жар
34 / 43: amb_word = тил
47 / 43: amb_word = каз
49 / 43: amb_word = там
51 / 43: amb_word = жаш
54 / 43: amb_word = кара
56 / 43: amb_word = мал
58 / 43: amb_word = сөз
60 / 43: amb_word = бас
62 / 43: amb_word = тек
64 / 43: amb_word = уч
66 / 43: amb_word = жең
68 / 43: amb_word = курак
70 / 43: amb_word = айт
72 / 43: amb_word = түш
75 / 43: amb_word = кур
78 / 43: amb_word = тай
82 / 43: amb_word = кол
84 / 43: amb_word = күн
86 / 43: amb_word = ат
89 / 43: amb_word = жаз
93 / 43: amb_word = кат
106 / 43: amb_word = сан
108 / 43: amb_word = чал
112 / 43: amb_word = кир
115 / 43: amb_word = чек
117 / 43: amb_word = бак
119 / 43: amb_word

## Approach #3: take embeddings of each words of the input sentence, calculate cosine similarity, take max value

In [18]:
import json

# Load the merged ambiguous words and sentences from the JSON file
with open('ambigious_words_merged.json', 'r', encoding='utf-8') as f:
    ambigious_words = json.load(f)

# Calculate max similarity by comparing each word's embedding in the sentence to the signal phrases
def calculate_max_word_sim(sentence_words, data):
    signal_phrases = data.get('signal_phrases', [])
    if not signal_phrases:
        return 0  # Return zero if there are no signal phrases

    # Encode signal phrases to get their embeddings
    phrase_embeddings = model.encode(signal_phrases, batch_size=len(signal_phrases))['dense_vecs']

    max_sim = 0  # Track the max similarity across all words in the sentence
    for word in sentence_words:
        # Encode each word in the sentence to get its embedding
        word_embedding = model.encode([word])['dense_vecs'][0]

        # Compute cosine similarity between the word embedding and signal phrase embeddings
        sims = cosine_similarity([word_embedding], phrase_embeddings).flatten()

        # Get the maximum similarity for this word and update the overall max if it's higher
        word_max_sim = sims.max()
        if word_max_sim > max_sim:
            max_sim = word_max_sim

    return max_sim

# Word-sense disambiguation function with word-by-word comparison
def disambiguator_word_level(word, sentence):
    amb_word = ambigious_words.get(word)
    if amb_word is None:
        raise Exception(f"Word '{word}' not found in our dictionary")

    # Split the sentence into words
    sentence_words = sentence.split()

    results = []
    for meaning, data in amb_word.items():
        max_score = calculate_max_word_sim(sentence_words, data)
        results.append((meaning, max_score))

    # Sort the results by max similarity score in descending order
    results = sorted(results, key=lambda x: x[1], reverse=True)

    # Return the meaning with the highest max similarity score
    return results[0][0]

# Evaluation function
def evaluate_word_level():
    count_correct = 0
    count_all = 0
    n = len(ambigious_words.keys())

    # Iterate over each ambiguous word and its meanings
    for amb_word, meanings in ambigious_words.items():
        print(f"{count_all} / {n}: amb_word = {amb_word}")
        for meaning, val in meanings.items():
            for sent in val["sentences"]:
                answer = disambiguator_word_level(amb_word, sent)
                count_all += 1
                if answer == meaning:
                    count_correct += 1

    # Calculate and print accuracy
    accuracy = count_correct / count_all
    print(f"Accuracy = {accuracy:.4f}")

# Run the evaluation using the word-level comparison approach
evaluate_word_level()


0 / 43: amb_word = ач
2 / 43: amb_word = же
4 / 43: amb_word = ак
6 / 43: amb_word = кап
9 / 43: amb_word = ала
12 / 43: amb_word = кеч
14 / 43: amb_word = кош
16 / 43: amb_word = кал
18 / 43: amb_word = бай
20 / 43: amb_word = сай
22 / 43: amb_word = арык
24 / 43: amb_word = кой
26 / 43: amb_word = ай
29 / 43: amb_word = топ
31 / 43: amb_word = жар
34 / 43: amb_word = тил
47 / 43: amb_word = каз
49 / 43: amb_word = там
51 / 43: amb_word = жаш
54 / 43: amb_word = кара
56 / 43: amb_word = мал
58 / 43: amb_word = сөз
60 / 43: amb_word = бас
62 / 43: amb_word = тек
64 / 43: amb_word = уч
66 / 43: amb_word = жең
68 / 43: amb_word = курак
70 / 43: amb_word = айт
72 / 43: amb_word = түш
75 / 43: amb_word = кур
78 / 43: amb_word = тай
82 / 43: amb_word = кол
84 / 43: amb_word = күн
86 / 43: amb_word = ат
89 / 43: amb_word = жаз
93 / 43: amb_word = кат
106 / 43: amb_word = сан
108 / 43: amb_word = чал
112 / 43: amb_word = кир
115 / 43: amb_word = чек
117 / 43: amb_word = бак
119 / 43: amb_word

## Approach #4: embedding of a sliding window of size 2

In [19]:
import json

# Load the merged ambiguous words and sentences from the JSON file
with open('ambigious_words_merged.json', 'r', encoding='utf-8') as f:
    ambigious_words = json.load(f)

# Calculate max similarity by comparing each 2-word window's embedding in the sentence to the signal phrases
def calculate_max_window_sim(sentence_words, data):
    signal_phrases = data.get('signal_phrases', [])
    if not signal_phrases:
        return 0  # Return zero if there are no signal phrases

    # Encode signal phrases to get their embeddings
    phrase_embeddings = model.encode(signal_phrases, batch_size=len(signal_phrases))['dense_vecs']

    max_sim = 0  # Track the max similarity across all 2-word windows in the sentence
    # Sliding window of 2 words
    for i in range(len(sentence_words) - 1):
        # Create a 2-word window
        window = " ".join(sentence_words[i:i+2])

        # Encode the 2-word window to get its embedding
        window_embedding = model.encode([window])['dense_vecs'][0]

        # Compute cosine similarity between the 2-word window embedding and signal phrase embeddings
        sims = cosine_similarity([window_embedding], phrase_embeddings).flatten()

        # Get the maximum similarity for this window and update the overall max if it's higher
        window_max_sim = sims.max()
        if window_max_sim > max_sim:
            max_sim = window_max_sim

    return max_sim

# Word-sense disambiguation function with sliding window of 2 words
def disambiguator_window_level(word, sentence):
    amb_word = ambigious_words.get(word)
    if amb_word is None:
        raise Exception(f"Word '{word}' not found in our dictionary")

    # Split the sentence into words
    sentence_words = sentence.split()

    results = []
    for meaning, data in amb_word.items():
        max_score = calculate_max_window_sim(sentence_words, data)
        results.append((meaning, max_score))

    # Sort the results by max similarity score in descending order
    results = sorted(results, key=lambda x: x[1], reverse=True)

    # Return the meaning with the highest max similarity score
    return results[0][0]

# Evaluation function
def evaluate_window_level():
    count_correct = 0
    count_all = 0
    n = len(ambigious_words.keys())

    # Iterate over each ambiguous word and its meanings
    for amb_word, meanings in ambigious_words.items():
        print(f"{count_all} / {n}: amb_word = {amb_word}")
        for meaning, val in meanings.items():
            for sent in val["sentences"]:
                answer = disambiguator_window_level(amb_word, sent)
                count_all += 1
                if answer == meaning:
                    count_correct += 1

    # Calculate and print accuracy
    accuracy = count_correct / count_all
    print(f"Accuracy = {accuracy:.4f}")

# Run the evaluation using the sliding window of 2-word approach
evaluate_window_level()


0 / 43: amb_word = ач
2 / 43: amb_word = же
4 / 43: amb_word = ак
6 / 43: amb_word = кап
9 / 43: amb_word = ала
12 / 43: amb_word = кеч
14 / 43: amb_word = кош
16 / 43: amb_word = кал
18 / 43: amb_word = бай
20 / 43: amb_word = сай
22 / 43: amb_word = арык
24 / 43: amb_word = кой
26 / 43: amb_word = ай
29 / 43: amb_word = топ
31 / 43: amb_word = жар
34 / 43: amb_word = тил
47 / 43: amb_word = каз
49 / 43: amb_word = там
51 / 43: amb_word = жаш
54 / 43: amb_word = кара
56 / 43: amb_word = мал
58 / 43: amb_word = сөз
60 / 43: amb_word = бас
62 / 43: amb_word = тек
64 / 43: amb_word = уч
66 / 43: amb_word = жең
68 / 43: amb_word = курак
70 / 43: amb_word = айт
72 / 43: amb_word = түш
75 / 43: amb_word = кур
78 / 43: amb_word = тай
82 / 43: amb_word = кол
84 / 43: amb_word = күн
86 / 43: amb_word = ат
89 / 43: amb_word = жаз
93 / 43: amb_word = кат
106 / 43: amb_word = сан
108 / 43: amb_word = чал
112 / 43: amb_word = кир
115 / 43: amb_word = чек
117 / 43: amb_word = бак
119 / 43: amb_word

## Approach #5: sliding window embeddings of size 3

In [20]:
# Load the merged ambiguous words and sentences from the JSON file
with open('ambigious_words_merged.json', 'r', encoding='utf-8') as f:
    ambigious_words = json.load(f)

# Calculate max similarity by comparing each 3-word window's embedding in the sentence to the signal phrases
def calculate_max_window_sim(sentence_words, data):
    signal_phrases = data.get('signal_phrases', [])
    if not signal_phrases:
        return 0  # Return zero if there are no signal phrases

    # Encode signal phrases to get their embeddings
    phrase_embeddings = model.encode(signal_phrases, batch_size=len(signal_phrases))['dense_vecs']

    max_sim = 0  # Track the max similarity across all 3-word windows in the sentence
    # Sliding window of 3 words
    for i in range(len(sentence_words) - 2):  # Adjusted for a 3-word window
        # Create a 3-word window
        window = " ".join(sentence_words[i:i+3])

        # Encode the 3-word window to get its embedding
        window_embedding = model.encode([window])['dense_vecs'][0]

        # Compute cosine similarity between the 3-word window embedding and signal phrase embeddings
        sims = cosine_similarity([window_embedding], phrase_embeddings).flatten()

        # Get the maximum similarity for this window and update the overall max if it's higher
        window_max_sim = sims.max()
        if window_max_sim > max_sim:
            max_sim = window_max_sim

    return max_sim

# Word-sense disambiguation function with sliding window of 3 words
def disambiguator_window_level(word, sentence):
    amb_word = ambigious_words.get(word)
    if amb_word is None:
        raise Exception(f"Word '{word}' not found in our dictionary")

    # Split the sentence into words
    sentence_words = sentence.split()

    results = []
    for meaning, data in amb_word.items():
        max_score = calculate_max_window_sim(sentence_words, data)
        results.append((meaning, max_score))

    # Sort the results by max similarity score in descending order
    results = sorted(results, key=lambda x: x[1], reverse=True)

    # Return the meaning with the highest max similarity score
    return results[0][0]

# Evaluation function
def evaluate_window_level():
    count_correct = 0
    count_all = 0
    n = len(ambigious_words.keys())

    # Iterate over each ambiguous word and its meanings
    for amb_word, meanings in ambigious_words.items():
        print(f"{count_all} / {n}: amb_word = {amb_word}")
        for meaning, val in meanings.items():
            for sent in val["sentences"]:
                answer = disambiguator_window_level(amb_word, sent)
                count_all += 1
                if answer == meaning:
                    count_correct += 1

    # Calculate and print accuracy
    accuracy = count_correct / count_all
    print(f"Accuracy = {accuracy:.4f}")

# Run the evaluation using the sliding window of 3-word approach
evaluate_window_level()


0 / 43: amb_word = ач
2 / 43: amb_word = же
4 / 43: amb_word = ак
6 / 43: amb_word = кап
9 / 43: amb_word = ала
12 / 43: amb_word = кеч
14 / 43: amb_word = кош
16 / 43: amb_word = кал
18 / 43: amb_word = бай
20 / 43: amb_word = сай
22 / 43: amb_word = арык
24 / 43: amb_word = кой
26 / 43: amb_word = ай
29 / 43: amb_word = топ
31 / 43: amb_word = жар
34 / 43: amb_word = тил
47 / 43: amb_word = каз
49 / 43: amb_word = там
51 / 43: amb_word = жаш
54 / 43: amb_word = кара
56 / 43: amb_word = мал
58 / 43: amb_word = сөз
60 / 43: amb_word = бас
62 / 43: amb_word = тек
64 / 43: amb_word = уч
66 / 43: amb_word = жең
68 / 43: amb_word = курак
70 / 43: amb_word = айт
72 / 43: amb_word = түш
75 / 43: amb_word = кур
78 / 43: amb_word = тай
82 / 43: amb_word = кол
84 / 43: amb_word = күн
86 / 43: amb_word = ат
89 / 43: amb_word = жаз
93 / 43: amb_word = кат
106 / 43: amb_word = сан
108 / 43: amb_word = чал
112 / 43: amb_word = кир
115 / 43: amb_word = чек
117 / 43: amb_word = бак
119 / 43: amb_word