In [4]:
!pip install datasets



In [5]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
from datasets import load_dataset
from nltk.translate.bleu_score import sentence_bleu
import torch

# Set device to GPU if available
device = "cuda" if torch.cuda.is_available() else "cpu"

In [53]:
def read_file_and_split(filename):
    kazakh_corpus = []
    eng_corpus = []

    try:
        with open(filename, 'r', encoding='utf-8') as file:
            for line in file:
                parts = line.strip().split('\t')
                if len(parts) == 3:  # Ensure there are exactly 3 parts: 2 sentences and a number
                    kazakh_sentence = parts[0].strip()
                    eng_sentence = parts[1].strip()
                    kazakh_corpus.append(kazakh_sentence)
                    eng_corpus.append(eng_sentence)
                else:
                    print("Invalid format in line:", line)

    except FileNotFoundError:
        print("File not found:", filename)

    return kazakh_corpus, eng_corpus

kazakh_corpus, eng_corpus = read_file_and_split("kazakh_eng_corpus.txt")

In [4]:
model = AutoModelForSeq2SeqLM.from_pretrained("amandyk/mt5-kazakh-english-translation")
tokenizer = AutoTokenizer.from_pretrained("amandyk/mt5-kazakh-english-translation")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/798 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/2.33G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/475 [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/4.31M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/74.0 [00:00<?, ?B/s]

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thouroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


In [5]:
translator = pipeline('translation', model=model, tokenizer=tokenizer, src_lang="eng", tgt_lang="kaz", batch_size=64, device=device, max_length=400)

test_data = eng_corpus

# Tokenize reference sentences
reference_sentences = [[tokenizer.tokenize(sentence)] for sentence in kazakh_corpus]
print(reference_sentences[0])

[['▁со', 'ңғы', '▁бес', '▁жылда', '▁', 'ана', '▁', 'өлім', 'і', '▁', 'шама', 'мен', '▁3', '▁есе', '▁аз', 'айды', '▁', ',', '▁бала', '▁туу', '▁көрсетк', 'іші', '▁бір', '▁жары', 'м', '▁есе', '▁өс', 'ті', '▁', '.']]


In [6]:
total_bleu = 0.0
num_samples = len(test_data)
for i in range(num_samples):
  translated_sentence = translator(test_data[i])[0]['translation_text']
  translated_tokens = tokenizer.tokenize(translated_sentence)
  bleu_score = sentence_bleu(reference_sentences[i], translated_tokens)
  print("bleu score for sentence ", i, " is ", bleu_score)
  total_bleu += bleu_score

average_bleu = total_bleu / num_samples
print("Average bleu score is ", average_bleu)

bleu score for sentence  0  is  0.324745633712975


The hypothesis contains 0 counts of 2-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 3-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 4-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()


bleu score for sentence  1  is  8.345125607304766e-233
bleu score for sentence  2  is  0.006464049263918806
bleu score for sentence  3  is  3.849261489771067e-79
bleu score for sentence  4  is  0.038736264359879834
bleu score for sentence  5  is  2.841632707086713e-78
bleu score for sentence  6  is  2.349189026226524e-79
bleu score for sentence  7  is  2.4175462357930937e-155
bleu score for sentence  8  is  1.2261937656254843e-155
bleu score for sentence  9  is  1.0603527689423161e-231




bleu score for sentence  10  is  3.495738285609867e-79
bleu score for sentence  11  is  5.092529201164552e-232
bleu score for sentence  12  is  0
bleu score for sentence  13  is  1.2237486422678451e-155
bleu score for sentence  14  is  9.023577373608169e-80
bleu score for sentence  15  is  1.1337861261109773e-231
bleu score for sentence  16  is  1.083622721174435e-155
bleu score for sentence  17  is  2.878979728837258e-155
bleu score for sentence  18  is  2.614591671051252e-232
bleu score for sentence  19  is  2.3949323699607068e-232
bleu score for sentence  20  is  0.036828971950208456
bleu score for sentence  21  is  1.3866438722948334e-78
bleu score for sentence  22  is  9.788429383461836e-232
bleu score for sentence  23  is  9.840085157783916e-232
bleu score for sentence  24  is  0
bleu score for sentence  25  is  0.051209953803315056
bleu score for sentence  26  is  8.34076112986429e-232
bleu score for sentence  27  is  1.9566726067414925e-78
bleu score for sentence  28  is  1.024

In [7]:
translator = pipeline('translation', model=model, tokenizer=tokenizer, src_lang="kaz", tgt_lang="eng", batch_size=64, device=device, max_length=400)

test_data = kazakh_corpus

# Tokenize reference sentences
reference_sentences = [[tokenizer.tokenize(sentence)] for sentence in eng_corpus]
print(reference_sentences[0])

[['▁over', '▁the', '▁last', '▁five', '▁years', '▁', ',', '▁matern', 'al', '▁', 'mortalit', 'y', '▁rate', '▁de', 'creased', '▁three', 'fold', '▁', ',', '▁', 'while', '▁the', '▁birth', 'rate', '▁is', '▁1.5', '▁times', '▁as', '▁high', '▁', '.']]


In [8]:
total_bleu = 0.0
num_samples = len(test_data)
for i in range(num_samples):
  translated_sentence = translator(test_data[i])[0]['translation_text']
  translated_tokens = tokenizer.tokenize(translated_sentence)
  bleu_score = sentence_bleu(reference_sentences[i], translated_tokens)
  print("bleu score for sentence ", i, " is ", bleu_score)
  total_bleu += bleu_score

average_bleu = total_bleu / num_samples
print("Average bleu score is ", average_bleu)

bleu score for sentence  0  is  0.2115330363637516


The hypothesis contains 0 counts of 2-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 3-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 4-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()


bleu score for sentence  1  is  1.1951155126698611e-231
bleu score for sentence  2  is  1.012071042130996e-231
bleu score for sentence  3  is  4.072434326288623e-155
bleu score for sentence  4  is  1.8531157553326982e-78
bleu score for sentence  5  is  0.21625888328869325
bleu score for sentence  6  is  8.762099411197548e-79
bleu score for sentence  7  is  0.07229703035677075
bleu score for sentence  8  is  5.085858650042012e-155
bleu score for sentence  9  is  1.4772437945117602e-155




bleu score for sentence  10  is  3.3183218536431102e-155
bleu score for sentence  11  is  1.1640469867513693e-231
bleu score for sentence  12  is  1.1484186507842885e-231
bleu score for sentence  13  is  1.2097822504111573e-231
bleu score for sentence  14  is  4.450692829663332e-155
bleu score for sentence  15  is  4.198508780749403e-155
bleu score for sentence  16  is  3.4143786380283786e-155
bleu score for sentence  17  is  3.587524556320307e-156
bleu score for sentence  18  is  6.484592771860512e-155
bleu score for sentence  19  is  0
bleu score for sentence  20  is  0.11569770095268926
bleu score for sentence  21  is  0.2626909894424158
bleu score for sentence  22  is  2.1986007635391227e-232
bleu score for sentence  23  is  7.665233328173359e-156
bleu score for sentence  24  is  0
bleu score for sentence  25  is  1.0611326633434631e-78
bleu score for sentence  26  is  1.1200407237786664e-231
bleu score for sentence  27  is  4.110904727268676e-155
bleu score for sentence  28  is  1

In [1]:
!pip install googletrans

Collecting googletrans
  Downloading googletrans-3.0.0.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting httpx==0.13.3 (from googletrans)
  Downloading httpx-0.13.3-py3-none-any.whl (55 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m55.1/55.1 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
Collecting hstspreload (from httpx==0.13.3->googletrans)
  Downloading hstspreload-2024.2.1-py3-none-any.whl (1.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m41.5 MB/s[0m eta [36m0:00:00[0m
Collecting chardet==3.* (from httpx==0.13.3->googletrans)
  Downloading chardet-3.0.4-py2.py3-none-any.whl (133 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m133.4/133.4 kB[0m [31m22.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting idna==2.* (from httpx==0.13.3->googletrans)
  Downloading idna-2.10-py2.py3-none-any.whl (58 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m58.8/58

In [3]:
from googletrans import Translator

In [7]:
from datasets import load_dataset
flores_dataset = load_dataset("facebook/flores", "eng_Latn-kaz_Cyrl")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
You can avoid this message in future by passing the argument `trust_remote_code=True`.
Passing `trust_remote_code=True` will be mandatory to load this dataset from the next major release of `datasets`.


Downloading builder script:   0%|          | 0.00/11.2k [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/11.8k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/25.6M [00:00<?, ?B/s]

Generating dev split: 0 examples [00:00, ? examples/s]

Generating devtest split: 0 examples [00:00, ? examples/s]

In [9]:
test_data = flores_dataset["devtest"]
reference_sentences = [[sentence.split()] for sentence in test_data['sentence_eng_Latn']]

In [10]:
print(reference_sentences[0])

[['"We', 'now', 'have', '4-month-old', 'mice', 'that', 'are', 'non-diabetic', 'that', 'used', 'to', 'be', 'diabetic,"', 'he', 'added.']]


In [23]:
from deep_translator import GoogleTranslator

In [22]:
!pip install deep_translator

Collecting deep_translator
  Downloading deep_translator-1.11.4-py3-none-any.whl (42 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/42.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━[0m [32m20.5/42.3 kB[0m [31m648.1 kB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.3/42.3 kB[0m [31m534.2 kB/s[0m eta [36m0:00:00[0m
Installing collected packages: deep_translator
Successfully installed deep_translator-1.11.4


In [24]:
GoogleTranslator(source='auto', target='de').translate("keep it up, you are awesome")

'Mach weiter so, du bist großartig'

In [25]:
total_bleu = 0.0
num_samples = len(test_data)
for i in range(num_samples):
  translated_sentence = GoogleTranslator(source='kk', target='en').translate(test_data['sentence_kaz_Cyrl'][i])
  translated_tokens = translated_sentence.split()
  bleu_score = sentence_bleu(reference_sentences[i], translated_tokens)
  print("bleu score for sentence ", i, " is ", bleu_score)
  total_bleu += bleu_score

average_bleu = total_bleu / num_samples
print("Average bleu score is ", average_bleu)

bleu score for sentence  0  is  0.375022891676693
bleu score for sentence  1  is  0.40949719380685723
bleu score for sentence  2  is  0.33984949826268246
bleu score for sentence  3  is  0.5128816178222517


The hypothesis contains 0 counts of 3-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 4-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()


bleu score for sentence  4  is  4.319363785610287e-155
bleu score for sentence  5  is  5.534527906726296e-155


The hypothesis contains 0 counts of 2-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()


bleu score for sentence  6  is  1.2778269941762074e-231
bleu score for sentence  7  is  4.884188340600192e-155
bleu score for sentence  8  is  0.7071067811865475
bleu score for sentence  9  is  0.23397625978961173
bleu score for sentence  10  is  0.15217874960799377
bleu score for sentence  11  is  0.5815623568378758
bleu score for sentence  12  is  0.44730704730643744
bleu score for sentence  13  is  0.3343064022537275
bleu score for sentence  14  is  0.18955977348405045
bleu score for sentence  15  is  0.3590807387457907
bleu score for sentence  16  is  0.3007034797694709
bleu score for sentence  17  is  8.723297561929817e-155
bleu score for sentence  18  is  0.396292649574711
bleu score for sentence  19  is  0.18506839709443124
bleu score for sentence  20  is  0.7744031410142033
bleu score for sentence  21  is  0.49616830003403634
bleu score for sentence  22  is  0.5538142279874649
bleu score for sentence  23  is  0.42880952290202645
bleu score for sentence  24  is  2.72286419998237

In [26]:
reference_sentences = [[sentence.split()] for sentence in test_data['sentence_kaz_Cyrl']]


In [27]:
total_bleu = 0.0
num_samples = len(test_data)
for i in range(num_samples):
  translated_sentence = GoogleTranslator(source='en', target='kk').translate(test_data['sentence_eng_Latn'][i])
  translated_tokens = translated_sentence.split()
  bleu_score = sentence_bleu(reference_sentences[i], translated_tokens)
  print("bleu score for sentence ", i, " is ", bleu_score)
  total_bleu += bleu_score

average_bleu = total_bleu / num_samples
print("Average bleu score is ", average_bleu)

bleu score for sentence  0  is  0.4855139201181536
bleu score for sentence  1  is  0.13013034134910276
bleu score for sentence  2  is  0.23636947852983192
bleu score for sentence  3  is  0.3957841915611783
bleu score for sentence  4  is  0.2345085086564769
bleu score for sentence  5  is  0.4052127766461669


The hypothesis contains 0 counts of 3-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 4-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()


bleu score for sentence  6  is  7.711523862191631e-155
bleu score for sentence  7  is  5.919709255307458e-155
bleu score for sentence  8  is  7.241926111174567e-155
bleu score for sentence  9  is  5.488048078674841e-155
bleu score for sentence  10  is  0.21688946947316234
bleu score for sentence  11  is  0.2415725261015974
bleu score for sentence  12  is  2.9456173286121714e-78
bleu score for sentence  13  is  2.66669249540442e-78
bleu score for sentence  14  is  4.601823391841808e-155
bleu score for sentence  15  is  0.25419139193618484
bleu score for sentence  16  is  2.1047259071741634e-78
bleu score for sentence  17  is  4.704862204929452e-78
bleu score for sentence  18  is  0.1535259783865636
bleu score for sentence  19  is  2.997974131986324e-78
bleu score for sentence  20  is  4.2492320052205223e-78
bleu score for sentence  21  is  5.969061643530969e-155
bleu score for sentence  22  is  6.051297874459006e-155
bleu score for sentence  23  is  0.19673862122802652
bleu score for se

The hypothesis contains 0 counts of 2-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()


bleu score for sentence  29  is  1.2508498911928379e-231
bleu score for sentence  30  is  0.16995165296029044
bleu score for sentence  31  is  0.10456442110178552
bleu score for sentence  32  is  0.4070672072665171
bleu score for sentence  33  is  0.24490093513974195
bleu score for sentence  34  is  0.525624059490303
bleu score for sentence  35  is  0.2907153684841096
bleu score for sentence  36  is  3.065777452919717e-78
bleu score for sentence  37  is  7.559895759068455e-155
bleu score for sentence  38  is  1.384292958842266e-231
bleu score for sentence  39  is  6.484592771860512e-155
bleu score for sentence  40  is  3.424330012982743e-78
bleu score for sentence  41  is  4.661032518640916e-78
bleu score for sentence  42  is  2.762347300353295e-78
bleu score for sentence  43  is  3.6799428630948515e-78
bleu score for sentence  44  is  0.48442732379638637
bleu score for sentence  45  is  0.5591430457928661
bleu score for sentence  46  is  2.874927559528803e-78
bleu score for sentence  

In [28]:
!pip install yandexfreetranslate

Collecting yandexfreetranslate
  Downloading yandexfreetranslate-1.5-py3-none-any.whl (7.8 kB)
Installing collected packages: yandexfreetranslate
Successfully installed yandexfreetranslate-1.5


In [29]:
from yandexfreetranslate import YandexFreeTranslate

yt = YandexFreeTranslate(api = "ios")

In [30]:
reference_sentences = [[sentence.split()] for sentence in test_data['sentence_eng_Latn']]

In [31]:
total_bleu = 0.0
num_samples = len(test_data)
for i in range(num_samples):
  translated_sentence = yt.translate('kk', 'en', test_data['sentence_kaz_Cyrl'][i])
  translated_tokens = translated_sentence.split()
  bleu_score = sentence_bleu(reference_sentences[i], translated_tokens)
  print("bleu score for sentence ", i, " is ", bleu_score)
  total_bleu += bleu_score

average_bleu = total_bleu / num_samples
print("Average bleu score is ", average_bleu)

bleu score for sentence  0  is  3.480275084383042e-78
bleu score for sentence  1  is  0.30553560447494055
bleu score for sentence  2  is  0.3716449043614214
bleu score for sentence  3  is  0.42192707795219725
bleu score for sentence  4  is  6.578997700301928e-155
bleu score for sentence  5  is  6.365610974651558e-155
bleu score for sentence  6  is  1.2778269941762074e-231
bleu score for sentence  7  is  4.884188340600192e-155
bleu score for sentence  8  is  0.23901088824528133
bleu score for sentence  9  is  5.974540365813254e-155
bleu score for sentence  10  is  3.3413852621966094e-78
bleu score for sentence  11  is  0.23713320246552005
bleu score for sentence  12  is  0.20257904661864384
bleu score for sentence  13  is  4.1097221129325703e-78
bleu score for sentence  14  is  0.24201306824541216
bleu score for sentence  15  is  0.34428353153440316
bleu score for sentence  16  is  0.2537948869141026
bleu score for sentence  17  is  0.32263864160302524
bleu score for sentence  18  is  0

In [32]:
reference_sentences = [[sentence.split()] for sentence in test_data['sentence_kaz_Cyrl']]

In [33]:
total_bleu = 0.0
num_samples = len(test_data)
for i in range(num_samples):
  translated_sentence = yt.translate('en', 'kk', test_data['sentence_eng_Latn'][i])
  translated_tokens = translated_sentence.split()
  bleu_score = sentence_bleu(reference_sentences[i], translated_tokens)
  print("bleu score for sentence ", i, " is ", bleu_score)
  total_bleu += bleu_score

average_bleu = total_bleu / num_samples
print("Average bleu score is ", average_bleu)

bleu score for sentence  0  is  3.3042234935730684e-78
bleu score for sentence  1  is  0.15048435361489218
bleu score for sentence  2  is  0.26613685332453085
bleu score for sentence  3  is  0.12180838505033426
bleu score for sentence  4  is  0.1142292498220673
bleu score for sentence  5  is  3.478365467705685e-78
bleu score for sentence  6  is  9.170599044431425e-155
bleu score for sentence  7  is  5.510493590849694e-155
bleu score for sentence  8  is  7.241926111174567e-155
bleu score for sentence  9  is  0.29053741985902315
bleu score for sentence  10  is  0.3359540718229689
bleu score for sentence  11  is  0.2927057121559396
bleu score for sentence  12  is  3.725551848022915e-78
bleu score for sentence  13  is  3.804027562098662e-78
bleu score for sentence  14  is  0.09615094003919297
bleu score for sentence  15  is  0.19233711633823908
bleu score for sentence  16  is  3.337887848662347e-78
bleu score for sentence  17  is  0.3059194879108606
bleu score for sentence  18  is  0.11146

In [35]:
test_data = eng_corpus

# Tokenize reference sentences
reference_sentences = [[sentence.split()] for sentence in kazakh_corpus]

In [36]:
total_bleu = 0.0
num_samples = len(test_data)
for i in range(num_samples):
  translated_sentence = GoogleTranslator(source='en', target='kk').translate(test_data[i])
  translated_tokens = translated_sentence.split()
  bleu_score = sentence_bleu(reference_sentences[i], translated_tokens)
  print("bleu score for sentence ", i, " is ", bleu_score)
  total_bleu += bleu_score

average_bleu = total_bleu / num_samples
print("Average bleu score is ", average_bleu)

bleu score for sentence  0  is  0.14568336529071027


The hypothesis contains 0 counts of 2-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 3-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 4-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()


bleu score for sentence  1  is  3.410235676232928e-232
bleu score for sentence  2  is  1.8795176866057151e-156
bleu score for sentence  3  is  0
bleu score for sentence  4  is  3.51511731805256e-233
bleu score for sentence  5  is  0
bleu score for sentence  6  is  8.187865902185692e-233
bleu score for sentence  7  is  0
bleu score for sentence  8  is  0
bleu score for sentence  9  is  0
bleu score for sentence  10  is  0
bleu score for sentence  11  is  0
bleu score for sentence  12  is  0
bleu score for sentence  13  is  0
bleu score for sentence  14  is  2.427508900860887e-233
bleu score for sentence  15  is  0
bleu score for sentence  16  is  0
bleu score for sentence  17  is  0
bleu score for sentence  18  is  0
bleu score for sentence  19  is  0
bleu score for sentence  20  is  0.01744288919478861
bleu score for sentence  21  is  9.918892480173173e-232
bleu score for sentence  22  is  0
bleu score for sentence  23  is  0
bleu score for sentence  24  is  0
bleu score for sentence  

In [40]:
GoogleTranslator(source='en', target='kk').translate(test_data[22])

'бұл бәріміз күтетін ерекше мереке.'

In [41]:
reference_sentences[22]

[['құрметті', 'қазақстандықтар', '!']]

In [43]:
test_data[22]

'it is a special holiday that all of us are expecting .'

In [46]:
reference_sentences = [[sentence.split()] for sentence in eng_corpus]
test_data = kazakh_corpus

In [55]:
total_bleu = 0.0
num_samples = len(test_data)
for i in range(num_samples):
  translated_sentence = GoogleTranslator(source='kk', target='en').translate(test_data[i])
  if translated_sentence:
    translated_tokens = translated_sentence.split()
    bleu_score = sentence_bleu(reference_sentences[i], translated_tokens)
    print("bleu score for sentence ", i, " is ", bleu_score)
    total_bleu += bleu_score

average_bleu = total_bleu / num_samples
print("Average bleu score is ", average_bleu)

The hypothesis contains 0 counts of 4-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()


bleu score for sentence  0  is  2.5751259695884524e-78
bleu score for sentence  1  is  1.6143590570897344e-78


The hypothesis contains 0 counts of 2-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 3-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()


bleu score for sentence  2  is  8.669612184277444e-232
bleu score for sentence  3  is  0
bleu score for sentence  4  is  3.8200273859445466e-155
bleu score for sentence  5  is  8.324264127738903e-232
bleu score for sentence  6  is  4.161991890344397e-155
bleu score for sentence  7  is  5.2856814140221295e-155
bleu score for sentence  8  is  9.893133360884868e-232
bleu score for sentence  9  is  3.237041400104715e-232
bleu score for sentence  10  is  9.50440384721771e-232
bleu score for sentence  11  is  9.418382295637229e-232
bleu score for sentence  12  is  9.594503055152632e-232
bleu score for sentence  13  is  3.884252021064659e-155
bleu score for sentence  14  is  1.012071042130996e-231
bleu score for sentence  15  is  0
bleu score for sentence  16  is  0
bleu score for sentence  17  is  7.843286901175869e-234
bleu score for sentence  18  is  1.1640469867513693e-231
bleu score for sentence  19  is  0
bleu score for sentence  20  is  5.593658005297295e-155
bleu score for sentence  2

In [57]:
test_data[63]

'тағдырымыз ұқсас , тарихымыз ортақ " деді .'

In [59]:
GoogleTranslator(source='kk', target='en').translate(test_data[63])

'"We have a similar destiny, we have a common history," he said.'

In [60]:
reference_sentences[63]

[['in',
  'the',
  'course',
  'of',
  'the',
  'meeting',
  'president',
  'abdullah',
  'gul',
  'was',
  'awarded',
  'the',
  'honorary',
  'doctorate',
  'degree',
  'by',
  'this',
  'university',
  '.']]

In [61]:
total_bleu = 0.0
num_samples = len(test_data)
for i in range(num_samples):
  translated_sentence = yt.translate('kk', 'en', test_data[i])
  translated_tokens = translated_sentence.split()
  bleu_score = sentence_bleu(reference_sentences[i], translated_tokens)
  print("bleu score for sentence ", i, " is ", bleu_score)
  total_bleu += bleu_score

average_bleu = total_bleu / num_samples
print("Average bleu score is ", average_bleu)

bleu score for sentence  0  is  0.17098323692758396


The hypothesis contains 0 counts of 4-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()


bleu score for sentence  1  is  1.688024152370915e-78


The hypothesis contains 0 counts of 3-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()


bleu score for sentence  2  is  3.3566500347661918e-155


The hypothesis contains 0 counts of 2-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()


bleu score for sentence  3  is  9.689041594391036e-232
bleu score for sentence  4  is  0.0960587088155178
bleu score for sentence  5  is  9.798412298238562e-232
bleu score for sentence  6  is  1.4166189181615513e-78
bleu score for sentence  7  is  0.09279771067975602
bleu score for sentence  8  is  6.13274920178966e-155
bleu score for sentence  9  is  4.554181535842438e-232
bleu score for sentence  10  is  1.1102577717991281e-231
bleu score for sentence  11  is  1.0669733992029681e-231
bleu score for sentence  12  is  9.257324954728539e-232
bleu score for sentence  13  is  3.743881382418666e-155
bleu score for sentence  14  is  1.012071042130996e-231
bleu score for sentence  15  is  0
bleu score for sentence  16  is  8.06798322521923e-232
bleu score for sentence  17  is  1.7783359869785101e-233
bleu score for sentence  18  is  6.968148412761692e-155
bleu score for sentence  19  is  0
bleu score for sentence  20  is  5.739413702632742e-155
bleu score for sentence  21  is  4.464667296032

In [62]:
reference_sentences = [[sentence.split()] for sentence in kazakh_corpus]
test_data = eng_corpus

In [63]:
total_bleu = 0.0
num_samples = len(test_data)
for i in range(num_samples):
  translated_sentence = yt.translate('en', 'kk', test_data[i])
  translated_tokens = translated_sentence.split()
  bleu_score = sentence_bleu(reference_sentences[i], translated_tokens)
  print("bleu score for sentence ", i, " is ", bleu_score)
  total_bleu += bleu_score

average_bleu = total_bleu / num_samples
print("Average bleu score is ", average_bleu)

bleu score for sentence  0  is  0.26017826742009753


The hypothesis contains 0 counts of 2-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 3-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 4-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()


bleu score for sentence  1  is  4.7929675183923966e-232
bleu score for sentence  2  is  3.388207120684361e-156
bleu score for sentence  3  is  6.846513658671787e-233
bleu score for sentence  4  is  4.42893013950398e-156
bleu score for sentence  5  is  0
bleu score for sentence  6  is  6.128085447566563e-156
bleu score for sentence  7  is  2.362550272583459e-232
bleu score for sentence  8  is  3.0984417036474726e-232
bleu score for sentence  9  is  7.992219124248642e-232
bleu score for sentence  10  is  2.1141028718581855e-232
bleu score for sentence  11  is  4.753148692240233e-232
bleu score for sentence  12  is  0
bleu score for sentence  13  is  2.5802019721284275e-232
bleu score for sentence  14  is  6.352314620699288e-233
bleu score for sentence  15  is  0
bleu score for sentence  16  is  2.3268642989202446e-232
bleu score for sentence  17  is  8.844844403089351e-232
bleu score for sentence  18  is  0
bleu score for sentence  19  is  0
bleu score for sentence  20  is  1.22386190207

In [72]:
model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M")
tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-600M")

# Initialize translator pipeline
translator = pipeline('translation', model=model, tokenizer=tokenizer, src_lang="kaz_Cyrl", tgt_lang="eng_Latn", batch_size=64, device=device)

# Load dataset
flores_dataset = load_dataset("facebook/flores", "eng_Latn-kaz_Cyrl")
test_data = flores_dataset["devtest"]

# Tokenize reference sentences
reference_sentences = [[tokenizer.tokenize(sentence)] for sentence in test_data['sentence_eng_Latn']]

You can avoid this message in future by passing the argument `trust_remote_code=True`.
Passing `trust_remote_code=True` will be mandatory to load this dataset from the next major release of `datasets`.


In [66]:
print(test_data['sentence_kaz_Cyrl'][769])
print(translator(test_data['sentence_kaz_Cyrl'][769]))
print(test_data['sentence_eng_Latn'][769])

Апиа негізі 1850 жылдары қаланды және 1959 жылдан бері Самоаның ресми астанасы болған.
[{'translation_text': 'Apia was founded in the 1850s and has been the official capital of Samoa since 1959.'}]
Apia was founded in the 1850s and has been the official capital of Samoa since 1959.


In [73]:
print(test_data['sentence_kaz_Cyrl'][757])
print(translator(test_data['sentence_kaz_Cyrl'][757]))
print(test_data['sentence_eng_Latn'][757])

Шалғай аралдарда кредит картасы қабылданбауы мүмкін, дегенмен Ұлыбритания және АҚШ валютасы қабылдануы мүмкін; алдын ала иеленушіден қолданылатын төлем әдісін сұрап біліп алыңыз.
[{'translation_text': 'Credit cards may not be accepted in remote islands, but UK and US currencies may be accepted; please check with the previous payment method.'}]
On the outlying islands credit cards will probably not be accepted, although British and United States currency may be taken; check with the owners in advance to determine what is an acceptable payment method.


In [68]:
translator = pipeline('translation', model=model, tokenizer=tokenizer, src_lang="eng_Latn", tgt_lang="kaz_Cyrl", batch_size=64, device=device)


In [70]:
print(test_data['sentence_eng_Latn'][1009])
print(translator(test_data['sentence_eng_Latn'][1009]))
print(test_data['sentence_kaz_Cyrl'][1009])

Suits are standard business attire, and coworkers call each other by their family names or by job titles.
[{'translation_text': 'Кейіпкерлер бір-біріне өздерінің тегі мен атаулары бойынша, немесе лауазымдық атаулары бойынша шақырады.'}]
Костюмдар стандартты бизнес киімі болып табылады және әріптестер бір бірін тектері немесе жұмыс атаулары арқылы атайды.


In [75]:
model = AutoModelForSeq2SeqLM.from_pretrained("amandyk/mt5-kazakh-english-translation")
tokenizer = AutoTokenizer.from_pretrained("amandyk/mt5-kazakh-english-translation")
translator = pipeline('translation', model=model, tokenizer=tokenizer, src_lang="kaz_Cyrl", tgt_lang="eng_Latn", batch_size=64, device=device)

In [74]:
print(test_data['sentence_kaz_Cyrl'][308])
print(translator(test_data['sentence_kaz_Cyrl'][308]))
print(test_data['sentence_eng_Latn'][308])

Сондай-ақ Тынық мұхитының цунамилер туралы ескерту орталығы цунами көрсеткіші болмағанын айтты.


In [76]:
print(kazakh_corpus[26])
print(yt.translate('kk', 'en', kazakh_corpus[26]))
print(eng_corpus[26])

бүгінгі сайлау - ертеңгі жарқын күнің ,
today's elections are your bright day tomorrow ,
i wish you a good health ! feel all pleasure and good with your people !


In [77]:
print(eng_corpus[19])
print(yt.translate('en', 'kk', eng_corpus[26]))
print(kazakh_corpus[19])

bloomberg.com
сізге мықты денсаулық тілеймін ! өз халқыңызбен барлық рахат пен жақсылықты сезініңіз !
астана , 2006 жылғы 5 мамыр . )


In [78]:
print(kazakh_corpus[63])
print(GoogleTranslator(source='kk', target='en').translate(kazakh_corpus[63]))
print(eng_corpus[63])

үндістан республикасының премьер-министрі манмохан сингх
Manmohan Singh, Prime Minister of the Republic of India
this will be to the mutual benefit of out two peoples , and for peace and stability in the region .


In [79]:
print(eng_corpus[63])
print(GoogleTranslator(source='en', target='kk').translate(eng_corpus[22]))
print(kazakh_corpus[63])

this will be to the mutual benefit of out two peoples , and for peace and stability in the region .
бұл бәріміз күтетін ерекше мереке.
үндістан республикасының премьер-министрі манмохан сингх
