In [1]:
!pip install -q transformers trl peft accelerate bitsandbytes

In [2]:
!pip install -qU datasets

In [2]:
from huggingface_hub import notebook_login

notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [3]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, TrainingArguments

In [4]:
from datasets import load_dataset, Dataset

In [5]:
from trl import SFTTrainer, DPOConfig, DPOTrainer

In [6]:
from peft import LoraConfig, get_peft_model, AutoPeftModelForCausalLM, prepare_model_for_kbit_training

In [7]:
import torch

In [8]:
from accelerate import Accelerator

In [9]:
# Constructor: Initializes the dataset and configuration parameters.

# Prepares the LoRA model configuration for fine-tuning.
# Ensures 'self.model' is defined before calling 'get_peft_model' to avoid errors.

# Loads the pre-trained model with 4-bit precision for memory efficiency.
# Maps the model to the correct device (CPU/GPU).
# Disables cache to allow gradient updates during training.
# Prepares the model for low-bit fine-tuning using LoRA if enabled in the configuration.

# Loads the tokenizer and sets the pad token.
# Setting pad_token as eos_token is a common practice, but check if it aligns with your dataset needs.

# Defines training arguments such as batch size, optimizer, learning rate, and number of training epochs.
# Uses 'fp16' for mixed precision training to optimize performance.
# Disables logging to external platforms using 'report_to="none"'.

# Creates the trainer after loading the model and tokenizer.
# Prints trainable parameters when using LoRA for debugging.
# Initializes the trainer with model, dataset, and training arguments.

# Starts the training process.
# Saves the fine-tuned model and tokenizer in the specified output directory.
# Ensures the tokenizer is saved properly for later inference.

In [17]:
## parameter for the SFT
sft_config={
    "model_ckpt": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", #This is my Pretrain model
            "load_in_4bit": True,
            "device_map": {"": Accelerator().local_process_index},
            "torch_dtype": torch.float16,
            "torch_dtype": torch.float16,
            "trust_remote_code": True,

            "use_lora": True,
            "r": 16,
            "lora_alpha": 16,
            "lora_dropout": 0.05,
            "bias": "none",
            "task_type": "CAUSAL_LM",
            "target_modules": ["q_proj", "v_proj"],

            "output_dir": "sft-tiny-chatbot",
            "per_device_train_batch_size": 1,
            "gradient_accumulation_steps": 1,
            "optim": "paged_adamw_32bit",
            "learning_rate": 2e-4,
            "lr_scheduler_type": "cosine",
            "save_strategy": "epoch",
            "logging_steps": 100,
            "num_train_epochs": 1,
            "max_steps": 250,
            "fp16": True,
            "push_to_hub": True,
            "packing": False,
            "max_seq_length": 512,
            "neftune_noise_alpha": 5
}

In [39]:
class TrainSFT:
    def __init__(self, data, config) -> None:
        self.data = data
        self.config = config

    def prepare_lora_model(self):
        self.lora_config = LoraConfig(
            r=self.config["r"],
            lora_alpha=self.config["lora_alpha"],
            lora_dropout=self.config["lora_dropout"],
            bias=self.config["bias"],
            task_type=self.config["task_type"],
            target_modules=self.config["target_modules"]
        )
        self.model = get_peft_model(self.model, self.lora_config)

    def load_model_tokenizer(self):
        if self.config.get("load_in_4bit", False):
            bnb_config = BitsAndBytesConfig(
                load_in_4bit=True,
                bnb_4bit_use_double_quant=True,
                bnb_4bit_quant_type="nf4",
                bnb_4bit_compute_dtype=self.config["torch_dtype"]
            )
            quantization_config = bnb_config
        else:
            quantization_config = None

        self.model = AutoModelForCausalLM.from_pretrained(
            self.config["model_ckpt"],
            quantization_config=quantization_config,
            device_map=self.config["device_map"],
            torch_dtype=self.config["torch_dtype"]
        )

        self.model.config.use_cache = False
        self.model.config.pretraining_tp = 1
        self.model = prepare_model_for_kbit_training(self.model)

        if self.config["use_lora"]:
            self.prepare_lora_model()

        self.tokenizer = AutoTokenizer.from_pretrained(self.config["model_ckpt"])
        self.tokenizer.pad_token = self.tokenizer.eos_token

    def set_training_args(self):
        return TrainingArguments(
            output_dir=self.config["output_dir"],
            per_device_train_batch_size=self.config["per_device_train_batch_size"],
            gradient_accumulation_steps=self.config["gradient_accumulation_steps"],
            optim=self.config["optim"],
            learning_rate=self.config["learning_rate"],
            lr_scheduler_type=self.config["lr_scheduler_type"],
            save_strategy=self.config["save_strategy"],
            logging_steps=self.config["logging_steps"],
            num_train_epochs=self.config["num_train_epochs"],
            max_steps=self.config["max_steps"],
            fp16=self.config["fp16"],
            push_to_hub=self.config["push_to_hub"],
            neftune_noise_alpha=self.config["neftune_noise_alpha"],
            report_to="none"
        )

    def create_trainer(self):
        self.load_model_tokenizer()
        if self.config["use_lora"]:
            print(self.model.print_trainable_parameters())
            self.trainer = SFTTrainer(
                model=self.model,
                train_dataset=self.data,
                peft_config=self.lora_config,
                args=self.set_training_args(),
                processing_class=self.tokenizer,
            )

    def train_and_save_model(self):
        self.create_trainer()
        self.trainer.train()
        self.trainer.save_model(self.config["output_dir"])
        self.tokenizer.save_pretrained(self.config["output_dir"])

In [40]:
def create_data():
  data=load_dataset("tatsu-lab/alpaca", split="train")
  data_df = data.to_pandas()
  data_df = data_df[:700]
  data_df["text"] = data_df[["input", "instruction", "output"]].apply(lambda x: "Human: " + x["instruction"] + " " + x["input"] + " Assistant: "+ x["output"], axis=1)
  data = Dataset.from_pandas(data_df)
  return data

In [41]:
data = create_data()

In [42]:
data[0]

{'instruction': 'Give three tips for staying healthy.',
 'input': '',
 'output': '1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule.',
 'text': 'Human: Give three tips for staying healthy.  Assistant: 1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule.'}

In [43]:
train_sft = TrainSFT(data, sft_config)

In [44]:
train_sft.train_and_save_model()

trainable params: 2,252,800 || all params: 1,102,301,184 || trainable%: 0.2044
None


Converting train dataset to ChatML:   0%|          | 0/700 [00:00<?, ? examples/s]

Adding EOS to train dataset:   0%|          | 0/700 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/700 [00:00<?, ? examples/s]

Truncating train dataset:   0%|          | 0/700 [00:00<?, ? examples/s]

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
  return fn(*args, **kwargs)


Step,Training Loss
100,2.1307
200,1.7135


No files have been modified since last commit. Skipping to prevent empty commit.


In [24]:
class TrainSFT:
  def __init__(self,data,config) -> None:
    self.data=data
    self.config=config

  def prepare_lora_model(self):
    self.lora_config=LoraConfig(r=self.config["r"],
                                lora_alpha=self.config["lora_alpha"],
                                lora_dropout=self.config["lora_dropout"],
                                bias=self.config["bias"],
                                task_type=self.config["task_type"],
                                target_modules=self.config["target_modules"])

    self.model=get_peft_model(self.model,self.lora_config)

  def load_model_tokenizer(self):
    self.model=AutoModelForCausalLM.from_pretrained(
                              self.config["model_ckpt"],
                              load_in_4bit=self.config["load_in_4bit"],
                              device_map=self.config["device_map"],
                              torch_dtype=self.config["torch_dtype"]
      )
    self.model.config.use_cache=False
    self.model.config.pretraining_tp=1
    self.model = prepare_model_for_kbit_training(self.model)

    if self.config["use_lora"]:
      self.prepare_lora_model()

    self.tokenizer = AutoTokenizer.from_pretrained(self.config["model_ckpt"])
    self.tokenizer.pad_token = self.tokenizer.eos_token

  def set_training_args(self):
      return TrainingArguments(
                              output_dir=self.config["output_dir"],
                              per_device_train_batch_size=self.config["per_device_train_batch_size"],
                              gradient_accumulation_steps=self.config["gradient_accumulation_steps"],
                              optim=self.config["optim"],
                              learning_rate=self.config["learning_rate"],
                              lr_scheduler_type=self.config["lr_scheduler_type"],
                              save_strategy=self.config["save_strategy"],
                              logging_steps=self.config["logging_steps"],
                              num_train_epochs=self.config["num_train_epochs"],
                              max_steps=self.config["max_steps"],
                              fp16=self.config["fp16"],
                              push_to_hub=self.config["push_to_hub"],
                              neftune_noise_alpha=self.config["neftune_noise_alpha"],
                              report_to="none"
                                )

  def create_trainer(self):
    self.load_model_tokenizer()
    if self.config["use_lora"]:
      print(self.model.print_trainable_parameters())
      self.trainer = SFTTrainer(
                        model=self.model,
                        train_dataset=self.data,
                        peft_config=self.lora_config,
                        args=self.set_training_args(),
                          )


  def train_and_save_model(self):
    self.create_trainer()
    self.trainer.train()
    self.trainer.save_model(self.config["output_dir"])
    self.tokenizer.save_pretrained(self.config["output_dir"])

####Preference Alignment- DPO
Will train or finetune our model using PEFT(SFT) then will retrain for preference alignment for controlled response using DPO

In [46]:
dpo_config = {
            "model_ckpt": "rahulsamant37/sft-tiny-chatbot",
            "load_in_4bit": True,
            "device_map": {"": Accelerator().local_process_index},
            "torch_dtype": torch.float16,
            "trust_remote_code": True,
            "use_lora": True,
            "r": 8,
            "lora_alpha": 8,
            "lora_dropout": 0.05,
            "bias": "none",
            "task_type": "CAUSAL_LM",
            "target_modules": ["q_proj", "v_proj"],
            "output_dir": "tiny-chatbot-dpo",
            "per_device_train_batch_size": 1,
            "gradient_accumulation_steps": 1,
            "optim": "paged_adamw_32bit",
            "learning_rate": 2e-4,
            "lr_scheduler_type": "cosine",
            "save_strategy": "epoch",
            "logging_steps": 100,
            "num_train_epochs": 1,
            "max_steps": 250,
            "fp16": True,
            "push_to_hub": True,
            "train_cln_name": "text",
            "packing": False,
            "neftune_noise_alpha": 5,
            "beta": 0.1,
            "loss_type": "kto_pair",
            "max_length": 768,
            "max_prompt_length": 512,
            "max_target_length": 256,
            "is_encoder_decoder": False
        }

In [52]:
class TrainDPO:
    def __init__(self, data, config) -> None:
        self.data = data
        self.config = config

    def prepare_lora_model(self):
        self.lora_config = LoraConfig(
            r=self.config["r"],
            lora_alpha=self.config["lora_alpha"],
            lora_dropout=self.config["lora_dropout"],
            bias=self.config["bias"],
            task_type=self.config["task_type"],
            target_modules=self.config["target_modules"]
        )
        # Apply LoRA to the trainable model
        self.model = get_peft_model(self.model, self.lora_config)

        # For DPO, the reference model should NOT have LoRA adapters
        # It should remain frozen as the original model
        # So we don't apply get_peft_model to self.model_ref

    def load_model_tokenizer(self):
        if self.config.get("load_in_4bit", False):
            bnb_config = BitsAndBytesConfig(
                load_in_4bit=True,
                bnb_4bit_use_double_quant=True,
                bnb_4bit_quant_type="nf4",
                bnb_4bit_compute_dtype=self.config["torch_dtype"]
            )
            quantization_config = bnb_config
        else:
            quantization_config = None

        # Load the main model (will be trained)
        self.model = AutoModelForCausalLM.from_pretrained(
            self.config["model_ckpt"],
            quantization_config=quantization_config,
            device_map=self.config["device_map"],
            torch_dtype=self.config["torch_dtype"]
        )

        # Load the reference model (stays frozen)
        self.model_ref = AutoModelForCausalLM.from_pretrained(
            self.config["model_ckpt"],
            quantization_config=quantization_config,
            device_map=self.config["device_map"],
            torch_dtype=self.config["torch_dtype"]
        )

        self.model.config.use_cache = False
        self.model.config.pretraining_tp = 1
        self.model = prepare_model_for_kbit_training(self.model)

        if self.config["use_lora"]:
            self.prepare_lora_model()

        self.tokenizer = AutoTokenizer.from_pretrained(self.config["model_ckpt"])
        self.tokenizer.pad_token = self.tokenizer.eos_token

    def set_training_args(self):
        return DPOConfig(
            output_dir=self.config["output_dir"],
            per_device_train_batch_size=self.config["per_device_train_batch_size"],
            gradient_accumulation_steps=self.config["gradient_accumulation_steps"],
            optim=self.config["optim"],
            learning_rate=self.config["learning_rate"],
            lr_scheduler_type=self.config["lr_scheduler_type"],
            save_strategy=self.config["save_strategy"],
            logging_steps=self.config["logging_steps"],
            num_train_epochs=self.config["num_train_epochs"],
            max_steps=self.config["max_steps"],
            fp16=self.config["fp16"],
            push_to_hub=self.config["push_to_hub"],
            neftune_noise_alpha=self.config["neftune_noise_alpha"],
            report_to="none",
            remove_unused_columns=False,
        )

    def create_trainer(self):
        self.load_model_tokenizer()

        if self.config["use_lora"]:
            print(self.model.print_trainable_parameters())

        self.trainer = DPOTrainer(
            model=self.model,
            ref_model=self.model_ref,
            args=self.set_training_args(),
            train_dataset=self.data,
            processing_class=self.tokenizer,
        )

    def train_and_save_model(self):
        self.create_trainer()
        self.trainer.train()
        self.trainer.save_model(self.config["output_dir"])
        self.tokenizer.save_pretrained(self.config["output_dir"])

In [50]:
def create_data():
    df = load_dataset("Anthropic/hh-rlhf", split="train").to_pandas()
    df["prompt"] = df["chosen"].apply(lambda x: x.split("Assistant: ")[0])
    df["chosen"] = df["chosen"].apply(lambda x: "Assistant: "+ x.split("Assistant: ")[-1])
    df["rejected"] = df["rejected"].apply(lambda x: "Assistant: " + x.split("Assistant: ")[-1])
    df = df.sample(1000)
    data = Dataset.from_pandas(df)
    return data
data = create_data()

README.md:   0%|          | 0.00/5.77k [00:00<?, ?B/s]

train.jsonl.gz:   0%|          | 0.00/13.2M [00:00<?, ?B/s]

train.jsonl.gz:   0%|          | 0.00/16.2M [00:00<?, ?B/s]

train.jsonl.gz:   0%|          | 0.00/20.1M [00:00<?, ?B/s]

train.jsonl.gz:   0%|          | 0.00/25.7M [00:00<?, ?B/s]

test.jsonl.gz:   0%|          | 0.00/743k [00:00<?, ?B/s]

test.jsonl.gz:   0%|          | 0.00/875k [00:00<?, ?B/s]

test.jsonl.gz:   0%|          | 0.00/1.05M [00:00<?, ?B/s]

test.jsonl.gz:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/160800 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/8552 [00:00<?, ? examples/s]

In [51]:
data[0]

{'chosen': 'Assistant: I’m sorry, I’m not sure what I could clarify here.  The Normandy Empire eventually became the Kingdom of England, which is what you see as England today.  That was in 1066.  Is there anything else I could clarify?',
 'rejected': 'Assistant: Which question was that?  I’m not sure what else you might be asking, so I’m hoping you’ll let me know.',
 'prompt': '\n\nHuman: At what point in history did Christianity replace all other religions in Western Europe?\n\n',
 '__index_level_0__': 59219}

In [53]:
train_dpo = TrainDPO(data, dpo_config)

In [54]:
train_dpo.train_and_save_model()

adapter_config.json:   0%|          | 0.00/790 [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/9.02M [00:00<?, ?B/s]



tokenizer_config.json:   0%|          | 0.00/951 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/3.62M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/437 [00:00<?, ?B/s]

chat_template.jinja:   0%|          | 0.00/410 [00:00<?, ?B/s]

trainable params: 1,126,400 || all params: 1,101,174,784 || trainable%: 0.1023
None


Extracting prompt in train dataset:   0%|          | 0/1000 [00:00<?, ? examples/s]

Applying chat template to train dataset:   0%|          | 0/1000 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/1000 [00:00<?, ? examples/s]

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
  return fn(*args, **kwargs)


Step,Training Loss
100,0.7122
200,0.773


1. unspervised pretraining
2. SFT
3. RLHF/DPO
4. Inferencing

####Inferencing

In [55]:
from transformers import pipeline

In [56]:
model_name = "rahulsamant37/tiny-chatbot-dpo"

# Load the model and tokenizer into the pipeline
pipe = pipeline("text-generation", model=model_name, torch_dtype=torch.float16, device_map="auto")

# Make a prediction (generate text)
prompt = "Human: Tell me a short story about a talking cat. Assistant:"

# Define generation arguments for better control over the output
generation_args = {
    "max_new_tokens": 250,
    "temperature": 0.7,
    "top_p": 0.95,
    "repetition_penalty": 1.15,
}

output = pipe(prompt, **generation_args)

# Print the generated text
print(output[0]['generated_text'])

adapter_config.json:   0%|          | 0.00/788 [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/4.52M [00:00<?, ?B/s]

Device set to use cuda:0


Human: Tell me a short story about a talking cat. Assistant: Okay, I'll give it to you! Short Story 1:

The Cat is in the Window

Once upon a time, there was a very special cat named Max. He lived in an old house with lots of windows that let in plenty of light and fresh air. Max had always loved to sit outside his window at night, watching the stars twinkle above him. One evening, while he sat there alone, he heard a loud noise coming from downstairs. His owner came upstairs and saw that a mouse was trapped between two boards on the floor. The mouse scurried away as quickly as it could, but Max watched it go, feeling disappointed. "What did I do wrong?" Max thought to himself. But then he remembered something from when he was a little kitten. "A talking animal knows no wrong!" he said aloud. Suddenly, Max started speaking in a friendly voice. "Hello," he said. "Can I talk to you for a moment? It's very important." The mouse looked around nervously, wondering who this strange creature 