[Rewar Modeling](https://huggingface.co/docs/trl/main/en/reward_trainer)

**Install dependencies**

In [None]:
!pip install datasets
!pip install peft
!pip install trl

**Import Necessary Libraries**

In [None]:
import warnings
import torch
from datasets import load_dataset
from transformers import AutoModelForSequenceClassification, AutoTokenizer, HfArgumentParser

from trl import (
    RewardTrainer,
    get_kbit_device_map,
    get_peft_config,
    get_quantization_config,
    setup_chat_format,
)

from dataclasses import dataclass
from transformers import TrainingArguments
from typing import List, Literal, Optional


**Framwork parameters**

In [None]:
@dataclass
class ModelConfig:
    model_name_or_path: Optional[str] = 'facebook/opt-350m'
    model_revision: str = "main"
    torch_dtype: Optional[Literal["auto", "bfloat16", "float16", "float32"]] = None
    trust_remote_code: bool = False
    attn_implementation: Optional[str] = None
    use_peft: bool = False
    lora_r: int = 16
    lora_alpha: int = 32
    lora_dropout: float = 0.05
    lora_target_modules: Optional[List[str]] = None
    lora_modules_to_save: Optional[List[str]] = None
    lora_task_type: str = "CAUSAL_LM"
    use_rslora: bool = False
    load_in_8bit: bool = False
    load_in_4bit: bool = False
    bnb_4bit_quant_type: Literal["fp4", "nf4"] = "nf4"
    use_bnb_nested_quant: bool = False

    def __post_init__(self):
        if self.load_in_8bit and self.load_in_4bit:
            raise ValueError("You can't use 8 bit and 4 bit precision at the same time")

        if isinstance(self.lora_target_modules, list) and len(self.lora_target_modules) == 1:
            self.lora_target_modules = self.lora_target_modules[0]


@dataclass
class RewardConfig(TrainingArguments):
    max_length: Optional[int] = 256
    dataset_num_proc: Optional[int] = None
    center_rewards_coefficient: Optional[float] = None
    remove_unused_columns: bool = False
    output_dir: str = './save_new_reward'
    report_to: str ="none"
    logging_steps: int=10

@dataclass
class ScriptArguments:
    dataset_name: str = 'Anthropic/hh-rlhf'
    dataset_train_split: str = "train"
    dataset_test_split: str = "test"
    config: Optional[str] = None
    gradient_checkpointing_use_reentrant: bool = False
    ignore_bias_buffers: bool = False

**Load data and train a reward model**

In [None]:
parser = HfArgumentParser(ScriptArguments)
script_args = parser.parse_args_into_dataclasses(return_remaining_strings=True)[0]

parser = HfArgumentParser(RewardConfig)
training_args = parser.parse_args_into_dataclasses(return_remaining_strings=True)[0]

parser = HfArgumentParser(ModelConfig)
model_config = parser.parse_args_into_dataclasses(return_remaining_strings=True)[0]

training_args.gradient_checkpointing_kwargs = dict(use_reentrant=False)

################
# Model & Tokenizer
################
torch_dtype = (
    model_config.torch_dtype
    if model_config.torch_dtype in ["auto", None]
    else getattr(torch, model_config.torch_dtype)
)
quantization_config = get_quantization_config(model_config)
model_kwargs = dict(
    revision=model_config.model_revision,
    device_map=get_kbit_device_map() if quantization_config is not None else None,
    quantization_config=quantization_config,
    use_cache=False if training_args.gradient_checkpointing else True,
    torch_dtype=torch_dtype,
)
tokenizer = AutoTokenizer.from_pretrained(
    model_config.model_name_or_path, trust_remote_code=model_config.trust_remote_code, use_fast=True
)
model = AutoModelForSequenceClassification.from_pretrained(
    model_config.model_name_or_path, num_labels=1, trust_remote_code=model_config.trust_remote_code, **model_kwargs
)
# Align padding tokens between tokenizer and model
model.config.pad_token_id = tokenizer.pad_token_id

# If post-training a base model, use ChatML as the default template
if tokenizer.chat_template is None:
    model, tokenizer = setup_chat_format(model, tokenizer)

if model_config.use_peft and model_config.lora_task_type != "SEQ_CLS":
    warnings.warn(
        "You are using a `task_type` that is different than `SEQ_CLS` for PEFT. This will lead to silent bugs"
        " Make sure to pass --lora_task_type SEQ_CLS when using this script with PEFT."
    )

############################
# Load dataset and preprocess
############################
train_dataset = load_dataset(script_args.dataset_name,split="train[:5%]")
eval_dataset = load_dataset(script_args.dataset_name,split="test[:5%]")

# Tokenize chosen/rejected pairs of inputs
# Adapt this section to your needs for custom datasets
def preprocess_function(examples):
    new_examples = {
        "input_ids_chosen": [],
        "attention_mask_chosen": [],
        "input_ids_rejected": [],
        "attention_mask_rejected": [],
    }
    for chosen, rejected in zip(examples["chosen"], examples["rejected"]):
        tokenized_chosen = tokenizer(chosen)
        tokenized_rejected = tokenizer(rejected)

        new_examples["input_ids_chosen"].append(tokenized_chosen["input_ids"])
        new_examples["attention_mask_chosen"].append(tokenized_chosen["attention_mask"])
        new_examples["input_ids_rejected"].append(tokenized_rejected["input_ids"])
        new_examples["attention_mask_rejected"].append(tokenized_rejected["attention_mask"])

    return new_examples


# Preprocess the dataset and filter out examples that are longer than args.max_length
train_dataset = train_dataset.map(
    preprocess_function,
    batched=True,
    num_proc=4,
)
train_dataset = train_dataset.filter(
    lambda x: len(x["input_ids_chosen"]) <= training_args.max_length
    and len(x["input_ids_rejected"]) <= training_args.max_length
)

eval_dataset = eval_dataset.map(
    preprocess_function,
    batched=True,
    num_proc=4,
)
eval_dataset = eval_dataset.filter(
    lambda x: len(x["input_ids_chosen"]) <= training_args.max_length
    and len(x["input_ids_rejected"]) <= training_args.max_length
)
##########
# Training
##########
trainer = RewardTrainer(
    model=model,
    tokenizer=tokenizer,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    peft_config=get_peft_config(model_config),
)
trainer.train()

############################
# Save model and push to Hub
############################
trainer.save_model(training_args.output_dir)
metrics = trainer.evaluate()
trainer.log_metrics("eval", metrics)
trainer.save_metrics("eval", metrics)

# Save and push to hub
trainer.save_model(training_args.output_dir)
if training_args.push_to_hub:
    trainer.push_to_hub(dataset_name=script_args.dataset_name)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/685 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/644 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/441 [00:00<?, ?B/s]



pytorch_model.bin:   0%|          | 0.00/663M [00:00<?, ?B/s]

Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-350m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


README.md:   0%|          | 0.00/5.77k [00:00<?, ?B/s]

train.jsonl.gz:   0%|          | 0.00/13.2M [00:00<?, ?B/s]

train.jsonl.gz:   0%|          | 0.00/16.2M [00:00<?, ?B/s]

train.jsonl.gz:   0%|          | 0.00/20.1M [00:00<?, ?B/s]

train.jsonl.gz:   0%|          | 0.00/25.7M [00:00<?, ?B/s]

test.jsonl.gz:   0%|          | 0.00/743k [00:00<?, ?B/s]

test.jsonl.gz:   0%|          | 0.00/875k [00:00<?, ?B/s]

test.jsonl.gz:   0%|          | 0.00/1.05M [00:00<?, ?B/s]

test.jsonl.gz:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/160800 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/8552 [00:00<?, ? examples/s]

Map (num_proc=4):   0%|          | 0/8040 [00:00<?, ? examples/s]

Filter:   0%|          | 0/8040 [00:00<?, ? examples/s]

Map (num_proc=4):   0%|          | 0/428 [00:00<?, ? examples/s]

Filter:   0%|          | 0/428 [00:00<?, ? examples/s]

You're using a GPT2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
Could not estimate the number of tokens of the input, floating-point operations will not be computed


Step,Training Loss
10,0.8471
20,0.6653
30,0.7198
40,0.7289
50,0.6968
60,0.7249
70,0.701
80,0.7142
90,0.7443
100,0.7319








***** eval metrics *****
  epoch                   =        3.0
  eval_accuracy           =     0.5634
  eval_loss               =     0.6931
  eval_runtime            = 0:00:28.51
  eval_samples_per_second =     11.886
  eval_steps_per_second   =      1.508


In [None]:
!pip list
#!pip install accelerate==0.34.1

Package                            Version
---------------------------------- --------------------
absl-py                            1.4.0
accelerate                         0.34.2
aiohappyeyeballs                   2.4.3
aiohttp                            3.10.10
aiosignal                          1.3.1
alabaster                          0.7.16
albucore                           0.0.16
albumentations                     1.4.15
altair                             4.2.2
annotated-types                    0.7.0
anyio                              3.7.1
argon2-cffi                        23.1.0
argon2-cffi-bindings               21.2.0
array_record                       0.5.1
arviz                              0.19.0
astropy                            6.1.4
astropy-iers-data                  0.2024.10.21.0.33.21
astunparse                         1.6.3
async-timeout                      4.0.3
atpublic                           4.1.0
attrs                              24.2.0
audioread      