In [None]:
# Imports
import torch
import warnings
warnings.filterwarnings('ignore')
from transformers import AutoModelForSequenceClassification, AutoTokenizer, PreTrainedTokenizerFast, TrainingArguments, Trainer
from datasets import load_dataset, load_from_disk
from trl import RewardTrainer

from pprint import pprint
import os

In [None]:
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"

In [None]:
# find optimal device
def optimal_device():
    if not torch.cuda.is_available():
        print("Sorry did not detect a cuda device.")
        return torch.device('cpu')
    
    device = 0
    memory = 0
    for d in range(torch.cuda.device_count()):
        mem = torch.cuda.mem_get_info(d)
        print(f'cuda id={d} device={torch.cuda.get_device_name(0)} memory={mem[1]} free={mem[0]}')
        if mem[0] / mem[1] > memory:
            memory = mem[0] / mem[1]
            device = d
    return torch.device(f"cuda:{device}")    

In [None]:
# Check for GPU availability
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
print(f"Using device: {torch.cuda.get_device_name({device})}")

In [None]:
# Load the pretrained DeBERTa model
model_id = "microsoft/deberta-base"
model = AutoModelForSequenceClassification.from_pretrained(model_id, num_labels=1)
tokenizer = AutoTokenizer.from_pretrained(model_id)
print(f"Using Fast Tokenizer: {tokenizer.is_fast}")
# fast_tokenizer = PreTrainedTokenizerFast(tokenizer_object=tokenizer)
model.to(device)

In [None]:
# Load RLHF dataset
dataset_id = "Dahoas/full-hh-rlhf"
dataset = load_dataset(dataset_id)

train_dataset = dataset['train']
eval_dataset = dataset['test']

print(train_dataset.column_names)
print(len(train_dataset))
print(len(eval_dataset))

In [None]:
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
    model.config.pad_token_id = model.config.eos_token_id
    
print(tokenizer.pad_token)
print(model.config.pad_token_id)

In [None]:
def formatting_func(examples,):
    kwargs = {"padding": "max_length", "truncation": True, "max_length": 512, "return_tensors": "pt"}

    prompt_plus_chosen_response = examples["prompt"] + "\n" + examples["chosen"]
    prompt_plus_rejected_response = examples["prompt"] + "\n" + examples["chosen"]
    tokens_chosen = tokenizer.encode_plus(prompt_plus_chosen_response, **kwargs)
    tokens_rejected = tokenizer.encode_plus(prompt_plus_rejected_response, **kwargs)

    return {
        "input_ids_chosen": tokens_chosen["input_ids"][0], "attention_mask_chosen": tokens_chosen["attention_mask"][0],
        "input_ids_rejected": tokens_rejected["input_ids"][0], "attention_mask_rejected": tokens_rejected["attention_mask"][0]
    }

# Format dataset for model ingestion
formatted_dataset = dataset.map(formatting_func)

# Save the formatted dataset
formatted_dataset.save_to_disk("datasets/formatted_dataset", )

In [None]:
# Load formatted dataset
formatted_dataset = load_from_disk("datasets/formatted_dataset")

In [None]:
print(formatted_dataset.column_names)
print(formatted_dataset["train"].features)
print(formatted_dataset["train"][0])
print(formatted_dataset["test"][0])

In [None]:
# Define training arguments
training_args = TrainingArguments(
    output_dir="./results",          # output directory for model and logs
    evaluation_strategy="epoch",     # evaluate each epoch
    learning_rate=2e-5,              # learning rate
    per_device_train_batch_size=16,  # batch size for training
    per_device_eval_batch_size=64,   # batch size for evaluation
    gradient_accumulation_steps=4,   # accumulate gradients
    gradient_checkpointing=True,     # use gradient checkpointing
    num_train_epochs=3,              # number of training epochs
    weight_decay=0.01,               # strength of weight decay
    save_strategy="epoch",           # save model each epoch
    load_best_model_at_end=True,     # load the best model when finished training
)

# Initialize the Trainer
trainer = RewardTrainer(
    model=model,
    args=training_args,
    tokenizer=tokenizer,
    train_dataset=formatted_dataset["train"],
    eval_dataset=formatted_dataset["test"],
)

In [None]:
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
    model.config.pad_token_id = model.config.eos_token_id
    
print(tokenizer.pad_token)
print(model.config.pad_token_id)

In [None]:
# Start training
trainer.train()

In [None]:
# Save the model
model.save_pretrained("./models/deberta-rlhf-v1")

# Push model to Huggingface Hub
trainer.push_to_hub()