<a href="https://colab.research.google.com/github/thegallier/Differential-DNN/blob/main/selfreward.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
### sft finetune

In [None]:
! pip install peft transformer
!pip install accelerate
!pip install -i https://pypi.org/simple/ bitsandbytes
!pip install trl
!pip install peft

Collecting peft
  Using cached peft-0.10.0-py3-none-any.whl (199 kB)
[31mERROR: Could not find a version that satisfies the requirement transformer (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for transformer[0m[31m
Looking in indexes: https://pypi.org/simple/
Collecting peft
  Using cached peft-0.10.0-py3-none-any.whl (199 kB)
Installing collected packages: peft
Successfully installed peft-0.10.0


In [None]:
pip install -i https://pypi.org/simple/ bitsandbytes

Looking in indexes: https://pypi.org/simple/


In [None]:
from transformers import TrainingArguments
from trl import SFTTrainer
import os

class Trainer:
    def __init__(self, output):
        self.output = output

    def train(
            self,
            model,
            tokenizer,
            lora_config,
            dataset
    ):
        # from https://www.datacamp.com/tutorial/mistral-7b-tutorial
        learning_rate=2e-4
        batch_size = 4
        max_seq_length = 1024

        training_args = TrainingArguments(
            output_dir=self.output,
            per_device_train_batch_size=batch_size,
            learning_rate=learning_rate,
            gradient_accumulation_steps=4,
            warmup_steps=30,
            logging_steps=1,
            num_train_epochs=1,
            save_steps=50
        )

        trainer = SFTTrainer(
            model=model,
            train_dataset=dataset,
            peft_config=lora_config,
            max_seq_length=max_seq_length,
            tokenizer=tokenizer,
            args=training_args,
            dataset_text_field="text"
        )

        trainer.train()

        output_dir = os.path.join(self.output, "final_checkpoint")
        trainer.model.save_pretrained(output_dir)

In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from peft import (
    get_peft_model,
    LoraConfig,
    TaskType,
    prepare_model_for_kbit_training,
)

def get_bnb_config():
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.float16,
    )

    return bnb_config

def load_model(tokenizer_name, model_name):
    bnb_config = get_bnb_config()

    tokenizer = AutoTokenizer.from_pretrained(tokenizer_name, trust_remote_code=True)
    tokenizer.pad_token = tokenizer.eos_token
    tokenizer.padding_side = "right"

    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        device_map="auto",
        quantization_config=bnb_config,
    )
    model.config.pretraining_tp = 1

    return model, tokenizer

def create_peft_model(model):
    lora_dropout=0.1
    lora_alpha=16
    lora_r=64

    peft_config = LoraConfig(
        task_type=TaskType.CAUSAL_LM,
        inference_mode=False,
        lora_dropout=lora_dropout,
        lora_alpha=lora_alpha,
        r=lora_r,
        bias="none",
        target_modules = ["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj"]
    )

    model = prepare_model_for_kbit_training(model)
    model = get_peft_model(model, peft_config)

    model.print_trainable_parameters()

    return model, peft_config

In [None]:
import torch
from transformers import TextStreamer

def sample(model, tokenizer, prompt, max_tokens=128):
    with torch.no_grad():
        prompt_sample = [
            {"role": "user", "content": prompt},
        ]

        # Apply the same chat template
        prompt_for_model = tokenizer.apply_chat_template(prompt_sample, tokenize=False)

        # Tokenize the data
        model_inputs = tokenizer(prompt_for_model, return_tensors="pt").to("cuda")

        stop_token = tokenizer("[/INST]")
        stop_token_id = stop_token.input_ids[0]

        # Stream the results to the terminal so we can see it generating
        streamer = TextStreamer(tokenizer)

        generated_ids = model.generate(
            **model_inputs,
            do_sample=True,
            pad_token_id=tokenizer.eos_token_id,
            streamer=streamer,
            num_return_sequences=1,
            eos_token_id=[stop_token_id, tokenizer.eos_token_id],
            max_new_tokens=max_tokens
        )

        decoded = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
        answer = decoded[0]
        # print(f"A: {answer}")

        # print("\n\n")

        return answer

In [None]:
import argparse
import os
#from srlm.trainer import Trainer
#from srlm.model import load_model, create_peft_model

from datasets import load_dataset

def collate_fn(tokenizer, x):
    text = tokenizer.apply_chat_template([
        # {"role": "system", "content": "You are a safe, competent, and confident AI."},
        {"role": "user", "content": x['prompt']},
        {"role": "assistant", "content": x['completion']},
    ], tokenize=False)
    return {"text": text}


parser = argparse.ArgumentParser(description='SFT train a model.')
# parser.add_argument('-d', '--dataset', required=True, type=str, help='input sft dataset')
# parser.add_argument('-b', '--base_model', default="mistralai/Mistral-7B-v0.1", type=str, help='the base model we want to fine-tune')
# parser.add_argument('-m', '--model', default="mistralai/Mistral-7B-v0.1", type=str, help='the base model we want to fine-tune')
# parser.add_argument('-o', '--output', required=True, type=str, help='output trained model')
# args = parser.parse_args()
import argparse
mydict={'dataset':'/content/drive/MyDrive/Self-Rewarding/data/train/eft.jsonl','model':"mistralai/Mistral-7B-v0.1",'base_model':"mistralai/Mistral-7B-v0.1",'output':'./test'}
args=argparse.Namespace(**mydict)
#args=mydict
# you can download the dataset file with:
# `oxen download datasets/Self-Rewarding-Language-Models M0/train/ift.jsonl`
dataset_file = args.dataset

# load the training dataset
dataset = load_dataset("json", data_files={'train': dataset_file})
dataset = dataset['train'].shuffle(seed=42)

# load the model
model, tokenizer = load_model(args.base_model, args.model)
dataset = dataset.map(lambda x: collate_fn(tokenizer, x))

print("First example in the dataset")
print(dataset['text'][0])

model, lora_config = create_peft_model(model)
trainer = Trainer(args.output)
trainer.train(model, tokenizer, lora_config, dataset)



Generating train split: 0 examples [00:00, ? examples/s]

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/967 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/72.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.94G [00:00<?, ?B/s]

In [None]:
!pwd

/content
