In [1]:
import pandas as pd
import unidecode
from tqdm.notebook import tqdm

import torch

torch.cuda.empty_cache()
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


from transformers import T5ForConditionalGeneration, T5Tokenizer, TrainingArguments, AutoTokenizer
from peft import LoraConfig, PeftModel, get_peft_model, TaskType
from transformers import BitsAndBytesConfig
from trl import DPOTrainer
from datasets import Dataset

from utils import remove_diacritics

model_name = "google/flan-t5-small"
LORA_RUN = True

model_output_dir = f"dpo_{model_name}" 
model_output_dir += "_lora" if LORA_RUN else ""
print(f"{model_output_dir=}")


model_output_dir='dpo_google/flan-t5-small_lora'


In [2]:
df = pd.read_csv("data/t5-small.csv")

df['chosen'] = df['translations'].apply(remove_diacritics)
df["sentence"] = df["sentence"].apply(lambda x: f"translate English to Romanian: {x}")
df = df.rename(columns={"translations": "rejected", "sentence": "prompt"})
train_dataset = Dataset.from_dict({col: df[col].values.tolist() for col in df.columns})
df

Unnamed: 0,prompt,rejected,chosen
0,translate English to Romanian: I ate the cheese.,Am mâncat brânza.,Am mancat branza.
1,translate English to Romanian: Today is Monday.,Astăzi este ziua de luni.,Astazi este ziua de luni.
2,translate English to Romanian: Does he speak E...,Vorbeşte el limba engleză?,Vorbeste el limba engleza?
3,translate English to Romanian: I'm sort of tired.,Sunt oarecum obosit.,Sunt oarecum obosit.
4,translate English to Romanian: I am indebted t...,Sunt îndatorat acestuia.,Sunt indatorat acestuia.
...,...,...,...
15801,translate English to Romanian: It would be a d...,Ar fi o sarcină dificilă.,Ar fi o sarcina dificila.
15802,translate English to Romanian: I ate a burdock...,Am mâncat o tempura de rădăcini burdice.,Am mancat o tempura de radacini burdice.
15803,translate English to Romanian: You say you've ...,Aţi spus că aţi văzut o UFO?,Ati spus ca ati vazut o UFO?
15804,translate English to Romanian: It's a good sen...,"Oricum, este o frază bună.","Oricum, este o fraza buna."


In [3]:
peft_config = LoraConfig(
    r=16,
    lora_alpha=16,
    lora_dropout=0.05,
    bias="none",
    task_type=TaskType.SEQ_2_SEQ_LM,
    target_modules=["q", "v"]
)

In [4]:
model_name = 'google/flan-t5-base'  # Replace with your model
model = T5ForConditionalGeneration.from_pretrained(
    model_name,
    torch_dtype=torch.float16,
    load_in_4bit=True)

ref_model = T5ForConditionalGeneration.from_pretrained(
    model_name,
    torch_dtype=torch.float16,
    load_in_4bit=True)


tokenizer = T5Tokenizer.from_pretrained(model_name)

bin d:\Python310\lib\site-packages\bitsandbytes\libbitsandbytes_cuda121.dll


You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thouroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [5]:
# # This is an experiment where the base layers are frozen

# for name, param in model.named_parameters():
#     if 'lm_head' not in name:
#         param.requires_grad = False

In [6]:
training_args = TrainingArguments(
    per_device_train_batch_size=16,
    gradient_accumulation_steps=1,
    learning_rate=1e-4,
    lr_scheduler_type="cosine",
    # max_steps=200,
    save_strategy="epoch",
    logging_steps=1,
    output_dir=model_output_dir,
    optim="paged_adamw_32bit",
    warmup_steps=100,
    bf16=True,
    report_to="tensorboard",
    num_train_epochs=1
)

# Create DPO trainer
dpo_trainer = DPOTrainer(
    model,
    ref_model,
    args=training_args,
    train_dataset=train_dataset,
    tokenizer=tokenizer,
    peft_config=peft_config,
    beta=0.1,
    max_prompt_length=128,
    max_length=1536,
)



Map:   0%|          | 0/15806 [00:00<?, ? examples/s]

In [7]:
dpo_trainer.train()

You are using 8-bit optimizers with a version of `bitsandbytes` < 0.41.1. It is recommended to update your version as a major bug has been fixed in 8-bit optimizers.


  0%|          | 0/988 [00:00<?, ?it/s]

Could not estimate the number of tokens of the input, floating-point operations will not be computed


{'loss': 0.6362, 'learning_rate': 1.0000000000000002e-06, 'rewards/chosen': 0.012895585969090462, 'rewards/rejected': -0.11120486259460449, 'rewards/accuracies': 0.8125, 'rewards/margins': 0.1241004467010498, 'logps/rejected': -24.24505615234375, 'logps/chosen': -45.456939697265625, 'logits/rejected': -13.785174369812012, 'logits/chosen': -13.29578971862793, 'epoch': 0.0}
{'loss': 0.6735, 'learning_rate': 2.0000000000000003e-06, 'rewards/chosen': -0.05146024376153946, 'rewards/rejected': -0.11063986271619797, 'rewards/accuracies': 0.625, 'rewards/margins': 0.05917961895465851, 'logps/rejected': -20.114412307739258, 'logps/chosen': -50.49498748779297, 'logits/rejected': -14.09111499786377, 'logits/chosen': -13.873868942260742, 'epoch': 0.0}
{'loss': 0.6571, 'learning_rate': 3e-06, 'rewards/chosen': -0.0031595472246408463, 'rewards/rejected': -0.0955241322517395, 'rewards/accuracies': 0.625, 'rewards/margins': 0.09236457943916321, 'logps/rejected': -16.260791778564453, 'logps/chosen': -4

TrainOutput(global_step=988, training_loss=0.2041456379672241, metrics={'train_runtime': 269.4776, 'train_samples_per_second': 58.654, 'train_steps_per_second': 3.666, 'train_loss': 0.2041456379672241, 'epoch': 1.0})

In [10]:
dpo_trainer.model.save_pretrained(f"{model_output_dir}/final_checkpoint")
tokenizer.save_pretrained(f"{model_output_dir}/final_checkpoint")

torch.cuda.empty_cache()

base_model = T5ForConditionalGeneration.from_pretrained(
    model_name,
    return_dict=True,
    torch_dtype=torch.float16,
)
tokenizer = AutoTokenizer.from_pretrained(model_name)

model = PeftModel.from_pretrained(base_model, f"{model_output_dir}/final_checkpoint")
model = model.merge_and_unload()

model.save_pretrained(model_output_dir)
tokenizer.save_pretrained(model_output_dir)

('dpo_google/flan-t5-small_lora\\tokenizer_config.json',
 'dpo_google/flan-t5-small_lora\\special_tokens_map.json',
 'dpo_google/flan-t5-small_lora\\spiece.model',
 'dpo_google/flan-t5-small_lora\\added_tokens.json',
 'dpo_google/flan-t5-small_lora\\tokenizer.json')

In [13]:
def translate_batch(batch, model=model, tokenizer=tokenizer):
    inputTokens = tokenizer(batch, padding=True, return_tensors="pt", truncation=True).to(device)
    outputs = model.generate(input_ids = inputTokens['input_ids'].long(), attention_mask=inputTokens['attention_mask'], max_new_tokens=128)
    outputs = tokenizer.batch_decode(outputs, skip_special_tokens=True)

    for i, j in zip(batch, outputs):
        print(f"prompt: <{i}>, output:{j}")

model.cuda()
translate_batch(df.prompt.values.tolist()[:10])

prompt: <translate English to Romanian: I ate the cheese.>, output:I sam.
prompt: <translate English to Romanian: Today is Monday.>, output:ksk.
prompt: <translate English to Romanian: Does he speak English?>, output:samsassa
prompt: <translate English to Romanian: I'm sort of tired.>, output:iks.
prompt: <translate English to Romanian: I am indebted to him.>, output:iksk.
prompt: <translate English to Romanian: He refused to say more about that.>, output:yswa.
prompt: <translate English to Romanian: Let me in, please.>, output:nk, n.
prompt: <translate English to Romanian: Spring is around the corner.>, output:Spring sway.
prompt: <translate English to Romanian: Spring will arrive there soon.>, output:Spring sway.
prompt: <translate English to Romanian: You don’t need to apply in advance.>, output:nsad.
