In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import nltk
from nltk.translate.bleu_score import sentence_bleu, corpus_bleu

In [3]:
nltk.download('punkt')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

In [5]:
from transformers import MarianMTModel, MarianTokenizer

# Load the pretrained model and tokenizer
model_name = "/content/drive/MyDrive/model"
tokenizer_name = "/content/drive/MyDrive/model"

model = MarianMTModel.from_pretrained(model_name)
tokenizer = MarianTokenizer.from_pretrained(tokenizer_name)

def translate(text, model, tokenizer):
    '''
    Function that translates a given text to Twi

    Args:
        text -> the text to be translated
        trainer -> trainer instance that contains the model
        tokenizer -> tokenizer instance to tokenize the text

    Returns:
        Transalted text
    '''
    input_encodings = tokenizer(text, return_tensors='pt', padding=True)

    # Generate translation
    translated_tokens = model.generate(**input_encodings)

    # Decode the output
    translated_text = [tokenizer.decode(t, skip_special_tokens=True) for t in translated_tokens]

    return translated_text



In [6]:
def calculate_bleu_score(references, hypotheses):
    """
    Calculate BLEU score given reference translations and model predictions.

    Args:
    references: List of reference translations (list of lists of tokenized sentences)
    hypotheses: List of model-generated translations (list of tokenized sentences)

    Returns:
    BLEU score for the predictions compared to the references.
    """
    # Tokenize the reference and hypothesis translations
    references = [[ref.split()] for ref in references]
    hypotheses = [hyp.split() for hyp in hypotheses]

    # Calculate the BLEU score
    bleu_score = corpus_bleu(references, hypotheses)
    return bleu_score

In [8]:
test = ["I am very hungry", "She is sick and tired of painting"]
test_reference = ["Ɔkɔm de me paa", "Ɔyare na wabrɛ wɔ mfoniniyɛ mu"]
translated_text = translate(test, model, tokenizer)

# Generate model predictions
predicted_twi_sentences = translate(test, model, tokenizer)

# Calculate BLEU score
bleu_score = calculate_bleu_score(test_reference, predicted_twi_sentences)
print(f"BLEU Score on test set: {bleu_score}")

BLEU Score on test set: 4.38832006142665e-78
