In [None]:
import evaluate
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from datasets import load_dataset
from torch.utils.data import DataLoader, Dataset
from tqdm import tqdm
import evaluate
from comet import download_model, load_from_checkpoint
import pandas as pd
from trl import GRPOConfig, GRPOTrainer, get_peft_config, ModelConfig
from tqdm.auto import tqdm
import pickle

In [2]:
from peft import LoraConfig, get_peft_model
from transformers import AutoTokenizer, BitsAndBytesConfig, Gemma3ForCausalLM
import torch

### Config

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [4]:
torch.set_float32_matmul_precision('high')

In [None]:
import wandb
# https://docs.wandb.ai/guides/track/environment-variables
import os

WANDB_PROJECT = 'nmt'
WANDB_API_KEY = ''
os.environ['WANDB_NOTEBOOK_NAME'] = 'rl_nmt.ipynb'
os.environ['WANDB_API_KEY'] = WANDB_API_KEY
os.environ['WANDB_PROJECT'] = WANDB_PROJECT

os.environ['WANDB_MODE'] = 'offline' #'offline'
wandb.login()

In [6]:
# hide logs from commet

import logging
import pytorch_lightning as pl

# Suppress info logs
logging.getLogger("pytorch_lightning").setLevel(logging.ERROR)

In [7]:
from huggingface_hub import login

login(token="")

### Models

In [8]:
from peft import LoraConfig, get_peft_model
from transformers import AutoTokenizer, BitsAndBytesConfig, Gemma3ForCausalLM
import torch

#### Gemma

In [9]:
model_name = "google/gemma-3-1b-it"
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)

tokenizer.pad_token = tokenizer.eos_token

In [10]:
def format_prompt_text(source, tokenizer):
    content = f"Translate the following English sentence to Ukrainian. Output only the translation without any explanation or extra text.\n\nEnglish: {source}\n\nUkrainian:"
    message = [
        {"role": "user", "content": content}
    ]
    prompt = tokenizer.apply_chat_template(
        message,
        tokenize=False,
        add_generation_prompt=True
    )

    return prompt


def process_generated_text(generated_text):
    _, _, pred = generated_text.partition("Ukrainian:")
    pred = pred.strip()
    pred = pred.removeprefix("<end_of_turn>\n<start_of_turn>model\n")

    return pred

In [None]:
quantization_config = BitsAndBytesConfig(load_in_8bit=True)

model = Gemma3ForCausalLM.from_pretrained(
    model_name, 
    #quantization_config=quantization_config,
    attn_implementation="eager",  # Critical for Gemma3
    torch_dtype=torch.bfloat16,
    device_map="cuda",
)

peft_config = LoraConfig(
    lora_alpha=16,
    lora_dropout=0.05,
    r=16,
    bias="none",
    target_modules="all-linear",
    task_type="CAUSAL_LM",
    modules_to_save=[
        "lm_head",
        "embed_tokens",
    ],
)

model = get_peft_model(model, peft_config)
model.print_trainable_parameters()

#### Dragoman

In [12]:
# import os
# os.environ["TOKENIZERS_PARALLELISM"] = "false"

In [13]:
# def format_prompt_text(source, tokenizer=None):
#     prompt = f"[INST] {source} [/INST]"
#     return prompt

# def process_generated_text(generated_text):
#     pred = generated_text

#     # Remove only the last occurrence of "</s>"
#     if pred.endswith("</s>"):
#         pred = pred[:-len("</s>")]

#     pred = pred.split("[/INST]")[-1]

#     pred = pred.strip()

#     return pred

In [14]:
# # pip install bitsandbytes transformers peft torch
# from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
# from peft import PeftConfig, PeftModel
# import torch

# config = PeftConfig.from_pretrained("lang-uk/dragoman")
# quant_config = BitsAndBytesConfig(
#     load_in_4bit=True,
#     bnb_4bit_quant_type="nf4",
#     bnb_4bit_compute_dtype=torch.float16,
#     bnb_4bit_use_double_quant=False,
# )

# model = AutoModelForCausalLM.from_pretrained(
#     "mistralai/Mistral-7B-v0.1"#, quantization_config=quant_config
# )

#model = PeftModel.from_pretrained(model, "lang-uk/dragoman", is_trainable=True).to("cuda")

# tokenizer = AutoTokenizer.from_pretrained(
#     "mistralai/Mistral-7B-v0.1", use_fast=False, add_bos_token=False
# )
# tokenizer.pad_token = tokenizer.eos_token
# tokenizer.padding_side='left'


### Rewards

In [None]:
from rewards import *

In [16]:
import re
 
def format_reward_func(completions, sources, references = None, **kwargs):
    rewards = []
 
    for completion in completions:
 
      try:
        # add synthetic <think> as its already part of the prompt and prefilled for the assistant to more easily match the regex
        completion = "<think>" + completion        
        # Check if the format is correct
        regex = r"^<think>([^<]*(?:<(?!/?think>)[^<]*)*)<\/think>\n<answer>([\s\S]*?)<\/answer>$"
 
        match = re.search(regex, completion, re.DOTALL) 
        # if the format is not correct, reward is 0
        if match is None or len(match.groups()) != 2:
            rewards.append(0.0)
        else:
            rewards.append(1.0)
      except Exception:
        rewards.append(0.0)
    return rewards

In [17]:
import fasttext

# Load the language ID model
model_identify_lang = fasttext.load_model("lid.176.bin")

def lang_reward_func(completions, sources=None, references = None, **kwargs):
    translations = completions

    rewards = []

    for translation in translations:
        cleaned = translation.replace("\n", " ").strip()

        # Predict the language of the translation
        lang_prediction = model_identify_lang.predict(cleaned)[0][0]
        
        # Check if the predicted language is Ukrainian
        if lang_prediction == "__label__uk":
            rewards.append(1.0)
        else:
            rewards.append(0.0)

    return rewards


# texts = [
#     "This is a test sentence.",
#     "Це тестове речення.",
#     "Як спрвидливий вчинок? What?",
#     "Dies ist ein Testsatz."
# ]  
# lang_reward_func(texts)

In [18]:
bleu = evaluate.load("sacrebleu")

def translation_reward_func_bleu(completions, sources, references = None, **kwargs):
    translations = completions # drop reasning

    # Compute BLEU score per sample
    bleu_scores = []
    for pred, ref in zip(translations, references):
        score = bleu.compute(predictions=[pred], references=[ref])['score']
        bleu_scores.append(score)


    return bleu_scores

In [19]:
#translation_reward_func_bleu(['Hello how are you doing?'], None, ['Hello how are you?'])

In [None]:
# Load the model
model_path = download_model("Unbabel/wmt22-cometkiwi-da")
comet_model = load_from_checkpoint(model_path)
comet_model = comet_model.to(device)

In [21]:
def translation_reward_func_reff_free(completions, sources, references = None, **kwargs):
    translations = completions # drop reasning

    comet_data = [{"src": eng, "mt": pred} 
                 for eng, pred in zip(sources, translations)]
    
    # Compute COMET scores in batch
    comet_scores = comet_model.predict(comet_data, batch_size=len(comet_data), progress_bar=False)['scores']

    return comet_scores

In [22]:
#translation_reward_func_reff_free(['Як справи'], ['Hello how are you?'])

In [None]:
from sentence_transformers import SentenceTransformer

model_name_or_path = "Alibaba-NLP/gte-multilingual-base"
model_emb_sim = SentenceTransformer(model_name_or_path, trust_remote_code=True)

# # Paired source and Ukrainian translations
# source_texts = [
#     "What is the capital of China?",
#     "How to implement quick sort in Python?",
# ]

# translation_texts = [
#     "Яка столиця Китаю?",
#     "Як реалізувати швидке сортування в Пайтоні?"
# ]

# # Encode texts
# source_embeddings = model_emb_sim.encode(source_texts, normalize_embeddings=True)
# translation_embeddings = model_emb_sim.encode(translation_texts, normalize_embeddings=True)

# # Compute and print pairwise similarity
# for i, (src_emb, tgt_emb) in enumerate(zip(source_embeddings, translation_embeddings)):
#     score = src_emb @ tgt_emb  # cosine similarity since embeddings are normalized
#     print(f"Pair {i+1}:")
#     print(f"  Source: {source_texts[i]}")
#     print(f"  Translation: {translation_texts[i]}")
#     print(f"  Similarity Score: {score:.4f}\n")

In [24]:
def emb_sim_reward_func_reff_free(completions, sources, references = None, **kwargs):
    rewards = []
    source_embeddings = model_emb_sim.encode(sources, normalize_embeddings=True)
    translation_embeddings = model_emb_sim.encode(completions, normalize_embeddings=True)

    for i, (src_emb, tgt_emb) in enumerate(zip(source_embeddings, translation_embeddings)):
        score = src_emb @ tgt_emb  # cosine similarity since embeddings are normalized
        # print(f"Pair {i+1}:")
        # print(f"  Source: {source_texts[i]}")
        # print(f"  Translation: {translation_texts[i]}")
        # print(f"  Similarity Score: {score:.4f}\n")

        rewards.append(score.item())
    return rewards

### Datasets

In [25]:
def format_prompt(source, reference=None):
    datapoint = {
        "sources": source,
        "references": reference,
        "prompt": format_prompt_text(source, tokenizer),
    }

    return datapoint

In [None]:
from datasets import Dataset

def create_dataset_flores(name="dev"): 
    ds_eng = load_dataset("openlanguagedata/flores_plus", "eng_Latn")[name]
    ds_ukr = load_dataset("openlanguagedata/flores_plus", "ukr_Cyrl")[name]

    dataset = []
    for eng, ukr in zip(ds_eng, ds_ukr):
        datapoint = format_prompt(eng["text"], ukr["text"])
        dataset.append(datapoint)

    return Dataset.from_list(dataset)

train_dataset_flores = create_dataset_flores(name="dev")
test_dataset_flores = create_dataset_flores(name="devtest")

In [27]:
# import numpy as np

# def print_dataset_stats(dataset, name):
#     # Extract source and target lengths
#     source_lens = [len(item['sources'].split()) for item in dataset]
#     target_lens = [len(item['references'].split()) for item in dataset]
    
#     # Calculate statistics for sources
#     source_min = np.min(source_lens)
#     source_max = np.max(source_lens)
#     source_mean = np.mean(source_lens)
#     source_var = np.var(source_lens)
    
#     # Calculate statistics for targets
#     target_min = np.min(target_lens)
#     target_max = np.max(target_lens)
#     target_mean = np.mean(target_lens)
#     target_var = np.var(target_lens)
    
#     print(f"\nStatistics for {name} dataset:")
#     print("Sources (English):")
#     print(f"  Min length: {source_min:.2f} words")
#     print(f"  Max length: {source_max:.2f} words")
#     print(f"  Mean length: {source_mean:.2f} words")
#     print(f"  Variance: {source_var:.2f}")
    
#     print("\nTargets (Ukrainian):")
#     print(f"  Min length: {target_min:.2f} words")
#     print(f"  Max length: {target_max:.2f} words")
#     print(f"  Mean length: {target_mean:.2f} words")
#     print(f"  Variance: {target_var:.2f}")

# # Print statistics for both datasets
# print_dataset_stats(train_dataset_flores, "Train")
# print_dataset_stats(test_dataset_flores, "Test")

In [28]:
# from datasets import load_dataset
# import statistics

# # Load the WikiText-103 dataset
# dataset = load_dataset("Salesforce/wikitext", "wikitext-103-v1")

# # Choose the split to analyze: 'train', 'validation', or 'test'
# split = 'train'
# texts = dataset[split]['text']

# # Compute lengths of each text entry
# lengths = [len(text.split()) for text in texts if text.strip()]

# # Calculate statistics
# min_len = min(lengths)
# max_len = max(lengths)
# mean_len = statistics.mean(lengths)
# variance_len = statistics.variance(lengths)

# # Display the results
# print(f"Statistics for WikiText-103 ({split} split):")
# print(f"Minimum length: {min_len} tokens")
# print(f"Maximum length: {max_len} tokens")
# print(f"Mean length: {mean_len:.2f} tokens")
# print(f"Variance: {variance_len:.2f}")

In [29]:
def create_dataset_wiki(name="train"):
    ds_eng = load_dataset("Salesforce/wikitext", "wikitext-103-v1")[name]

    dataset = []
    for eng in  tqdm(ds_eng):
        datapoint = format_prompt(eng["text"])
        dataset.append(datapoint)

    return Dataset.from_list(dataset)

In [30]:
# train_path = "train_dataset.pkl"
# test_path = "test_dataset.pkl"

# if os.path.exists(train_path):
#     with open(train_path, "rb") as f:
#         train_dataset_wiki = pickle.load(f)
# else:
#     train_dataset_wiki = create_dataset_wiki(name="train")
#     with open("train_dataset.pkl", "wb") as f:
#         pickle.dump(train_dataset_wiki, f)
#     print(f"{train_path} not found.")

# if os.path.exists(test_path):
#     with open(test_path, "rb") as f:
#         test_dataset = pickle.load(f)
# else:
#     test_dataset_wiki = create_dataset_wiki(name="test")
#     with open("test_dataset.pkl", "wb") as f:
#         pickle.dump(test_dataset_wiki, f)

#     print(f"{test_path} not found.")

In [31]:
def create_dataset_wikipar(name="train"):
    streamed_dataset = load_dataset("dchaplinsky/wikipar", split=name, streaming=True)
    
    n = 10000
    ds_eng = [{'text': ex['paragraph_text']} for _, ex in zip(range(n), tqdm(streamed_dataset, total=n))]

    dataset = []
    for eng in  tqdm(ds_eng):
        datapoint = format_prompt(eng["text"])
        dataset.append(datapoint)

    return Dataset.from_list(dataset)

In [32]:
def create_dataset_wikipar_strat(name="train"):
    streamed_dataset = load_dataset("dchaplinsky/wikipar-stratified", split=name)
    
    dataset = []
    for eng in tqdm(streamed_dataset):
        datapoint = format_prompt(eng["paragraph_text"])
        dataset.append(datapoint)

    return Dataset.from_list(dataset)

In [33]:
def create_dataset_wikipar_strat(name="train"):
    dataset = load_dataset("dchaplinsky/wikipar-stratified", split=name)

    def map_fn(example):
        return format_prompt(example["paragraph_text"])

    # Apply the map function
    mapped_dataset = dataset.map(map_fn)

    return mapped_dataset

train_dataset_wikipar = create_dataset_wikipar_strat(name="train")

### Eval

In [34]:
from datasets import load_dataset
from tqdm import tqdm
import evaluate
from transformers import AutoTokenizer, TextGenerationPipeline

In [None]:
from comet import download_model, load_from_checkpoint

model_path = download_model("Unbabel/wmt22-comet-da")
model_comet = load_from_checkpoint(model_path)

model_path = download_model("Unbabel/wmt22-cometkiwi-da")
model_comet_reff_free = load_from_checkpoint(model_path)

In [None]:
# Load FLORES dataset
ds_eng = load_dataset("openlanguagedata/flores_plus", "eng_Latn")["devtest"]#.select(range(12))
ds_ukr = load_dataset("openlanguagedata/flores_plus", "ukr_Cyrl")["devtest"]#.select(range(12))
references = [[ex["text"]] for ex in ds_ukr]

In [37]:
def run_eval_flores(model, tokenizer, batch_size_eval, prompts, process_generated_text):
    pipe = TextGenerationPipeline(model=model, tokenizer=tokenizer)
    
    predictions = []
    
    # Process in batches
    for i in tqdm(range(0, len(prompts), batch_size_eval)):
        batch_prompts = prompts[i:i+batch_size_eval]
        
        # Generate translations for the batch
        batch_outputs = pipe(
            batch_prompts,
            max_new_tokens=256,
            pad_token_id=tokenizer.eos_token_id,
            batch_size=batch_size_eval
        )
        
        # Process each output in the batch
        for output in batch_outputs:
            generated_text = output[0]["generated_text"]
            pred = process_generated_text(generated_text)
            predictions.append(pred)

    return predictions    

In [38]:
def run_basic_metrics(predictions, references):
    combined_metrics = evaluate.combine([
        evaluate.load("sacrebleu"),           # BLEU
        evaluate.load("bleu"),                # d-BLEU (classic BLEU)
        evaluate.load("chrf"),                # chrF
        evaluate.load("ter"),                 # TER
    ])
    
    results = combined_metrics.compute(predictions=predictions, references=references)
    
    # Log selected metrics only
    metrics_to_log = {
        "sacrebleu_score": results.get("sacrebleu_score"),
        "bleu": results.get("bleu"),
        "chr_f_score": results.get("chr_f_score"),
        "ter_score": results.get("ter_score")
    }
    
    return metrics_to_log

In [39]:
def run_comet(predictions):
    data = []
    
    for src, mt, ref in zip(ds_eng, predictions, ds_ukr):
        data.append({
            "src": src["text"],
            "mt": mt,
            "ref": ref["text"]
        })
    
    model_output = model_comet.predict(data, batch_size=8, gpus=1)
    return {'comet': model_output.system_score}

In [40]:
def run_cometkiwi(predictions):
    data = []
    
    for src, mt, ref in zip(ds_eng, predictions, ds_ukr):
        data.append({
            "src": src["text"],
            "mt": mt,
            #"ref": ref["text"]
        })
    
    model_output = model_comet_reff_free.predict(data, batch_size=8, gpus=1)
    return {'cometkiwi': model_output.system_score} 

In [41]:
def run_eval_flores_pipeline(model, tokenizer, batch_size_eval, format_prompt_text, process_generated_text):
    # Create input prompts and references
    prompts = [format_prompt_text(ex['text'], tokenizer) for ex in ds_eng]
    
    predictions = run_eval_flores(model, tokenizer, batch_size_eval, prompts, process_generated_text)

    m1 = run_basic_metrics(predictions, references)
    m2 = run_comet(predictions)
    m3 = run_cometkiwi(predictions)
    metrics_to_log = m1 | m2 | m3
    metrics_to_log = {f"eval_{k}": v for k, v in metrics_to_log.items()}

    # # Display results
    # for metric_name, metric_value in metrics_to_log.items():
    #     print(f"{metric_name}: {metric_value}")

    return metrics_to_log

### Train

In [42]:
from transformers import TrainerCallback

# https://towardsdatascience.com/customizing-your-fine-tuning-code-using-huggingfaces-transformers-library-65cf2aa806ca/
class EvalCallback(TrainerCallback):
    def on_step_end(self, args, state, control, **kwargs):
        if state.global_step % 50 == 0:
            print(f"Step {state.global_step}: running custom logic...")

            model = kwargs["model"]

            # Example: Evaluate model on a sample input
            model.eval()
            with torch.no_grad():
                eval_metrics = run_eval_flores_pipeline(
                    model=model,
                    tokenizer=tokenizer,
                    batch_size_eval=8,
                    format_prompt_text=format_prompt_text,
                    process_generated_text=process_generated_text
                )
                print('Eval:', eval_metrics)
                wandb.log(eval_metrics)

            model.train()

In [43]:
# for i in range(1000000):
#     item = train_dataset_wikipar[i]  # Direct access by index
#     if len(item['sources']) > 550:
#         print(i)
#         break

# train_dataset_wikipar_single = train_dataset_wikipar.skip(i).take(1)
# train_dataset_wikipar_single[0]


In [None]:
#train_dataset_wikipar_single = train_dataset_wikipar.skip(35001).take(1)

text = "In recent years, artificial intelligence has rapidly evolved, transforming industries and daily life. From self-driving cars to medical diagnostics, AI systems demonstrate impressive capabilities—but they also raise ethical concerns. Language models, in particular, generate human-like text but may inadvertently reinforce biases present in training data. As researchers strive for transparency and fairness, new evaluation methods aim to capture both accuracy and nuance. Ultimately, responsible AI development depends not only on technical progress, but on thoughtful consideration of its societal impact."
prompt = format_prompt_text(text, tokenizer)
train_dataset_wikipar_single = Dataset.from_list([format_prompt(prompt)])

torch.set_float32_matmul_precision('high')

# Configure training arguments using GRPOConfig
training_args = GRPOConfig(
    bf16=True,
    
    num_train_epochs=1000000,
    output_dir="nmt",
    remove_unused_columns=False,  # to access the solution column in accuracy_reward
    
    learning_rate=1e-6,
    num_generations=8,  # 8
    per_device_train_batch_size=1, # 16
    gradient_accumulation_steps=8, # 16
    #per_device_eval_batch_size=4*2,
    max_grad_norm=0.4, # 0.5

    temperature=0.6,
    
    # Parameters that control de data preprocessing
    max_completion_length=256,
    max_prompt_length=256,

    # Parameters related to reporting and saving
    log_completions=True, 
    report_to=["wandb"],
    logging_steps=1,

    #eval_strategy="epoch",
    # eval_strategy="steps",
    # eval_steps=2,

    #push_to_hub=True,
    save_strategy="steps",
    save_steps=100,
    save_total_limit=3,

    reward_weights=[1.0, 1.0, 1.0, 0.5, 0.1, 0.1, 0.1, 0.1, 0.1],
)

trainer = GRPOTrainer(
    model=model, 
    processing_class=tokenizer,
    reward_funcs=[translation_reward_func_reff_free, emb_sim_reward_func_reff_free, lang_reward_func, reward_lexical, reward_count_uppercase_letters, reward_count_non_letters_no_spaces, reward_count_unpaired_items, reward_count_words, reward_count_unique_words], 
    args=training_args, 
    train_dataset=train_dataset_wikipar_single,
    #callbacks=[EvalCallback()],
    #eval_dataset=test_dataset_flores,
)

trainer.train()

In [121]:
t1 = "В останні роки штучний інтелект швидко розвивається, перетворюючи галузі та повсякденне життя. Від автономних автомобілів до діагностики в медицині, системи штучного інтелекту демонструють вражаючі можливості, але також викликають етичні занепокоєння. Моделі мови, зокрема, генерують текст, який виглядає людським, але можуть випадково підсилювати упередження, присутні в навчальних даних. З огляду на те, що дослідники прагнуть прозорості та справедливості, нові методи оцінювання намагаються враховувати як точність, так і нюанси. Нарешті, відповідальне розроблення штучного інтелекту залежить не лише від технічного прогресу, а й від обдуманого розгляду його соціального впливу.https://translate.google.com/translate?sl=uk;gl=uk&tl=uk|In recent years, artificial intelligence has rapidly evolved, transforming industries and daily life. From self-driving cars to medical diagnostics, AI systems demonstrate impressive capabilities—but they also raise ethical concerns. Language models, in particular,"
t2 = "In recent years, artificial intelligence has rapidly evolved, transforming industries and daily life. From self-driving cars to medical diagnostics, AI systems demonstrate impressive capabilities—but they also raise ethical concerns. Language models, in particular, generate human-like text but may inadvertently reinforce biases present in training data. As researchers strive for transparency and fairness, new evaluation methods aim to capture both accuracy and nuance. Ultimately, responsible AI development depends not only on technical progress, but on thoughtful consideration of its societal impact."

t3 = """
В останні роки штучний інтелект швидко розвивався, перетворюючи галузі та повсякденне життя. Від автономних автомобілів до діагностики в медицині, системи штучного інтелекту демонструють вражаючі можливості, але також викликають етичні занепокоєння. Моделі мови, зокрема, генерують текст, схожий на людський, але можуть випадково посилювати упередження, присутні в навчальних даних. Як дослідники прагнуть прозорості та справедливості, нові методи оцінки намагаються захопити обидва аспекти – точність та нюанси. Нарешті, відповідальне розроблення штучного інтелекту залежить не лише від технічного прогресу, а від обдуманого розгляду його соціального впливу.अशा प्रकारे.'Interpreted Ukrainian:

"""

t1=t3

In [45]:
t2 = " In recent years, artificial intelligence has rapidly evolved, transforming industries and daily life. From self-driving cars to medical diagnostics, AI systems demonstrate impressive capabilities—but they also raise ethical concerns. Language models, in particular, generate human-like text but may inadvertently reinforce biases present in training data. As researchers strive for transparency and fairness, new evaluation methods aim to capture both accuracy and nuance. Ultimately, responsible AI development depends not only on technical progress, but on thoughtful consideration of its societal impact."
t1 = "У нещодавно минулому штучний інтелект швидко розвивався, перетворюючи галузі та повсякденне життя. Автономні автомобілі, діагностика в медицині, системи штучного інтелекту демонструють вражаючі можливості, але також викликають етичні занепокоєння. Моделі мовлення генерують текст, схожий на людський, але можуть випадково посилювати упередження, які присутні в навчальних даних. Щоб досягти прозорості та справедливості, розробляються нові методи оцінювання, які збирають обидві точність і нюанси. Нарешті, відповідальне розроблення штучного інтелекту залежить не лише від технічного прогресу, але й від обдуманого розгляду його соціального впливу.roneellowஆசை. been"

In [None]:
# def get_non_letters_no_spaces(text):
#     preserved = set("0123456789()[]{}.:;!?-+=*/%<>$€₴£¥") # removed ,
#     return [char for char in text if char in preserved]

# def count_non_letters_no_spaces(text):
#     # count = 0
#     # for char in text:
#     #     if not unicodedata.category(char).startswith('L') and not char.isspace():
#     #         count += 1
#     # return count

#     return len(get_non_letters_no_spaces(text))

# def reward_count_non_letters_no_spaces(completions, sources, references = None, **kwargs):
#     rewards = []
#     for src, comp in zip(sources, completions):
#         src_count = count_non_letters_no_spaces(src)
#         comp_count = count_non_letters_no_spaces(comp)

#         rewards.append(relative_difference(src_count, comp_count))

#     return rewards

# #reward_count_non_letters_no_spaces([t1], [t2])

reward_lexical([t1], [t2]), reward_count_uppercase_letters([t1], [t2]), reward_count_non_letters_no_spaces([t1], [t2]), reward_count_words([t1], [t2]), reward_count_unique_words([t1], [t2]), reward_count_unpaired_items([t1], [t2])

In [None]:
len(tokenizer.encode(t1)), len(tokenizer.encode(t2))

In [None]:
get_non_letters_no_spaces(t1), get_non_letters_no_spaces(t2)

In [None]:
torch.set_float32_matmul_precision('high')

# Configure training arguments using GRPOConfig
training_args = GRPOConfig(
    bf16=True,
    
    num_train_epochs=1,
    output_dir="nmt",
    remove_unused_columns=False,  # to access the solution column in accuracy_reward
    
    learning_rate=1e-6,
    num_generations=8,  # 8
    per_device_train_batch_size=1, # 16
    gradient_accumulation_steps=32, # 16
    #per_device_eval_batch_size=4*2,
    max_grad_norm=0.4, # 0.5
    
    # Parameters that control de data preprocessing
    max_completion_length=256,
    max_prompt_length=256,

    # Parameters related to reporting and saving
    log_completions=True, 
    report_to=["wandb"],
    logging_steps=10,

    #eval_strategy="epoch",
    # eval_strategy="steps",
    # eval_steps=2,

    #push_to_hub=True,
    save_strategy="steps",
    save_steps=100,
    save_total_limit=3,

    reward_weights=[1.0, 1.0, 1.0, 0.5, 0.1, 0.1, 0.1, 0.1, 0.1],
)

trainer = GRPOTrainer(
    model=model, 
    processing_class=tokenizer,
    reward_funcs=[translation_reward_func_reff_free, emb_sim_reward_func_reff_free, lang_reward_func, reward_lexical, reward_count_uppercase_letters, reward_count_non_letters_no_spaces, reward_count_unpaired_items, reward_count_words, reward_count_unique_words], 
    args=training_args, 
    train_dataset=train_dataset_wikipar,
    callbacks=[EvalCallback()],
    #eval_dataset=test_dataset_flores,
)

In [None]:
trainer.train()

In [None]:
eval_results = trainer.evaluate()
print(eval_results)

In [12]:
# # Configure training arguments using GRPOConfig
# training_args = GRPOConfig(
#     output_dir="nmt",
#     learning_rate=4e-6,
#     remove_unused_columns=False,  # to access the solution column in accuracy_reward
#     per_device_train_batch_size=2,
#     per_device_eval_batch_size=16,
#     gradient_accumulation_steps=16*4,
#     num_train_epochs=100,
#     bf16=True,
#     # Parameters that control de data preprocessing
#     max_completion_length=256,  # default: 256
#     num_generations=8,  # default: 8
#     max_prompt_length=256,  # default: 512
#     # Parameters related to reporting and saving
#     log_completions=True, 
#     report_to=["wandb"],
#     logging_steps=8,
#     eval_strategy="epoch", # eval_steps=4,
#     #push_to_hub=True,
#     #save_strategy="steps",
#     #save_steps=10,
# )

# trainer = GRPOTrainer(
#     model=model, 
#     reward_funcs=[format_reward_func, translation_reward_func], 
#     args=training_args, 
#     train_dataset=train_dataset,
#     eval_dataset=test_dataset,
# )

In [13]:
# # Configure training arguments using GRPOConfig
# training_args = GRPOConfig(
#     output_dir="nmt",
#     learning_rate=1e-5,
#     remove_unused_columns=False,  # to access the solution column in accuracy_reward
#     per_device_train_batch_size=8,
#     per_device_eval_batch_size=16,
#     gradient_accumulation_steps=4,
#     num_train_epochs=100,
#     bf16=True,
#     # Parameters that control de data preprocessing
#     max_completion_length=256,  # default: 256
#     num_generations=8,  # default: 8
#     max_prompt_length=256,  # default: 512
#     # Parameters related to reporting and saving
#     log_completions=True, 
#     report_to=["wandb"],
#     logging_steps=8,
#     eval_strategy="epoch", # eval_steps=4,
#     #push_to_hub=True,
#     #save_strategy="steps",
#     #save_steps=10,
# )

# trainer = GRPOTrainer(
#     model=model, 
#     reward_funcs=[translation_reward_func_reff_free], 
#     args=training_args, 
#     train_dataset=train_dataset,
#     eval_dataset=test_dataset,
# )