<a href="https://colab.research.google.com/github/rushilbhat/AIMO/blob/main/RM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install datasets transformers torch bitsandbytes peft
!pip install --upgrade accelerate

Collecting datasets
  Downloading datasets-2.19.1-py3-none-any.whl (542 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/542.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m143.4/542.0 kB[0m [31m4.2 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m542.0/542.0 kB[0m [31m7.9 MB/s[0m eta [36m0:00:00[0m
Collecting bitsandbytes
  Downloading bitsandbytes-0.43.1-py3-none-manylinux_2_24_x86_64.whl (119.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m119.8/119.8 MB[0m [31m13.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting peft
  Downloading peft-0.11.1-py3-none-any.whl (251 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m251.6/251.6 kB[0m [31m26.9 MB/s[0m eta [36m0:00:00[0m
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K     [90m━━━━━━━━━━━━

In [None]:
import time
import re
import pandas as pd
import torch
import torch.nn as nn
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig, AutoConfig, LlamaForCausalLM, TrainingArguments, Trainer, BitsAndBytesConfig, DataCollatorForSeq2Seq
from peft import get_peft_model, LoraConfig, TaskType, prepare_model_for_kbit_training

In [None]:
def extract_code(entry):
  code_match = re.search(r'<llm-code>(.*?)</llm-code>', entry['generated_solution'], re.DOTALL)
  if code_match:
      entry['generated_solution'] = code_match.group(1).strip()
  else:
    entry['generated_solution'] = ""
  return entry

def tokenize_and_label(entry):
    question = entry['question']
    generated_solution = entry['generated_solution']
    is_correct = entry['is_correct']

    input_text = f"{question} {generated_solution}"
    input_ids = tokenizer.encode(input_text)
    question_ids = tokenizer.encode(question)

    labels = [-100.0] * len(question_ids) + [1.0 if is_correct else 0.0] * (len(input_ids) - len(question_ids)) # don't need to worry about token that spans across the end of the question and the beginning of the generated solution
    return {"input_ids": input_ids, "labels": labels}

In [None]:
class VerifierModel(LlamaForCausalLM):
    def __init__(self, config):
        super().__init__(config)
        self.verifier_head = nn.Linear(1, 1, bias=True)
        self.special_token_id = config.bos_token_id

    def forward(self, input_ids, attention_mask=None, labels=None, **kwargs):
        outputs = super().forward(input_ids, attention_mask=attention_mask, labels=labels)

        logits = outputs.logits
        special_token_logits = logits[:, :, self.special_token_id]
        verification_scores = special_token_logits * self.gain + self.bias

        outputs.verification_scores = verification_scores

        if labels is not None:
            mask = torch.ones_like(labels, dtype=torch.float)
            mask[labels == -100.0] = 0.0

            masked_verification_scores = verification_scores * mask
            masked_labels = labels * mask

            loss_fct = nn.MSELoss()
            verification_loss = loss_fct(masked_verification_scores.view(-1), masked_labels.view(-1))
            outputs.loss = verification_loss

        return outputs


In [None]:
# model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16, device_map="auto")
# model.generation_config = GenerationConfig.from_pretrained(model_name)
# model.generation_config.pad_token_id = model.generation_config.eos_token_id

# config = AutoConfig.from_pretrained(model_name)

In [None]:
model_name = "deepseek-ai/deepseek-math-7b-rl"

nf4_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.bfloat16
)
verifier = VerifierModel.from_pretrained(model_name, quantization_config=nf4_config)

verifier = prepare_model_for_kbit_training(verifier)

lora_config = LoraConfig(
    task_type=TaskType.CAUSAL_LM,   # Task type
    r=8,                           # Rank of the low-rank matrices
    lora_alpha=1,                 # Alpha scaling parameter
    target_modules=['q_proj', 'v_proj'], # Target modules for LoRA
    lora_dropout=0.1               # Dropout for LoRA
)
verifier = get_peft_model(verifier, lora_config)



tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"


dataset = load_dataset('nvidia/OpenMathInstruct-1', split='train')
math_dataset = dataset.filter(lambda entry: entry['dataset'] == 'math')
pot_math_dataset = math_dataset.filter(lambda entry: entry['error_message']=='').map(extract_code)
verifier_dataset = pot_math_dataset.select(range(100000)).map(tokenize_and_label, remove_columns=pot_math_dataset.column_names)

data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=None)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/626 [00:00<?, ?B/s]

`low_cpu_mem_usage` was None, now set to True since model is quantized.


model.safetensors.index.json:   0%|          | 0.00/23.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-000002.safetensors:   0%|          | 0.00/8.59G [00:00<?, ?B/s]

model-00002-of-000002.safetensors:   0%|          | 0.00/5.23G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some weights of VerifierModel were not initialized from the model checkpoint at deepseek-ai/deepseek-math-7b-rl and are newly initialized: ['bias', 'gain']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


generation_config.json:   0%|          | 0.00/121 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.09k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/4.61M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Downloading readme:   0%|          | 0.00/6.91k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/1.33G [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/6.42G [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/203M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/981M [00:00<?, ?B/s]

Generating train split: 0 examples [00:00, ? examples/s]

Generating validation split: 0 examples [00:00, ? examples/s]

Filter:   0%|          | 0/7321344 [00:00<?, ? examples/s]

Filter:   0%|          | 0/5685842 [00:00<?, ? examples/s]

Map:   0%|          | 0/1463566 [00:00<?, ? examples/s]

Map:   0%|          | 0/100000 [00:00<?, ? examples/s]

In [None]:
verifier.print_trainable_parameters()

trainable params: 3,932,160 || all params: 6,914,297,858 || trainable%: 0.0569


In [None]:
temp = verifier_dataset.select(range(100))

In [None]:
training_args = TrainingArguments(
    output_dir="verifier_output",
    num_train_epochs=1,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=8,
    learning_rate=1e-5,
)

trainer = Trainer(
    model=verifier,
    args=training_args,
    train_dataset=temp,
    data_collator = data_collator
)

In [None]:
trainer.train()

`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.


Step,Training Loss


TrainOutput(global_step=3, training_loss=0.4266487757364909, metrics={'train_runtime': 210.037, 'train_samples_per_second': 0.476, 'train_steps_per_second': 0.014, 'total_flos': 719995026924048.0, 'train_loss': 0.4266487757364909, 'epoch': 0.96})

In [None]:
from torch.utils.data import DataLoader

data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=None)
train_dataloader = DataLoader(verifier_dataset, batch_size=4, collate_fn=data_collator)

In [None]:
first_batch = next(iter(train_dataloader))

In [None]:
first_batch['attention_mask'][0]

tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [None]:
first_batch['input_ids'][0]

tensor([100000,    774,    245,  22069,  15043,   1317,     11,    856,    317,
           254,   1604,    280,   8650,    279,    254,   5013,    473,    254,
          6947,    276,    254,   1420,  65026,     16,     20,     11,    207,
            23,   1026,     30,    473,   4300,   4027,   1666,  83130,    185,
           185,     87,     16,     11,    320,     16,     11,   1376,     17,
            11,    320,     17,    403,    207,     15,     11,    207,     15,
            11,    570,     16,     20,     11,    207,     23,    185,    185,
         20457,    403,  83130,   6034,     87,     17,    570,   1376,     16,
             8,    746,     17,    919,    334,     88,     17,    570,    320,
            16,      8,    746,     17,      8,    185,  20457, 100001, 100001,
        100001, 100001, 100001, 100001, 100001, 100001, 100001, 100001, 100001,
        100001, 100001, 100001, 100001, 100001, 100001, 100001, 100001, 100001,
        100001, 100001, 100001, 100001, 

In [None]:
first_batch['labels'][0]

tensor([-100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100,
        -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100,
        -100, -100, -100, -100, -100, -100,    1,    1,    1,    1,    1,    1,
           1,    1,    1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
           1,    1,    1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
           1,    1,    1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
           1,    1,    1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
           1,    1,    1,    1, -100, -100, -100, -100, -100, -100, -100, -100,
        -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100,
        -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100,
        -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100])

In [None]:
pot_math_dataset_sub1 = pot_math_dataset.select(range(100000)).map(tokenize)
pot_math_dataset_sub2 = pot_math_dataset.select(range(100000,200000)).map(tokenize)
pot_math_dataset_sub3 = pot_math_dataset.select(range(200000,300000)).map(tokenize)

df1 = pd.DataFrame(pot_math_dataset_sub1)
df2 = pd.DataFrame(pot_math_dataset_sub2)
df3 = pd.DataFrame(pot_math_dataset_sub3)

print(df1['tokens'].sum()/100000)
print(df2['tokens'].sum()/100000)
print(df3['tokens'].sum()/100000)

1463566 * 79

In [None]:
torch.cuda.empty_cache()