## Install Libraries

In [None]:
!pip install datasets
!pip install peft
!pip install trl
!pip install bitsandbytes
!pip install accelerate

## Import Libraries

In [2]:
import os
import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,
)
from peft import LoraConfig, PeftModel
from trl import SFTTrainer

## Load Model and Tokenizer

In [3]:
model_name = "mistralai/Mistral-7B-Instruct-v0.2"

In [4]:
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import torch

nf4_config = BitsAndBytesConfig(
   load_in_4bit=True,
   bnb_4bit_quant_type="nf4",
   bnb_4bit_use_double_quant=True,
   bnb_4bit_compute_dtype=torch.bfloat16
)

base_model = AutoModelForCausalLM.from_pretrained(model_name, device_map='auto', quantization_config=nf4_config, use_cache=False)
tokenizer = AutoTokenizer.from_pretrained(model_name)

tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

config.json:   0%|          | 0.00/596 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/3 [00:00<?, ?it/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.94G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/111 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.46k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/72.0 [00:00<?, ?B/s]

## Load Dataset - SFT

In [5]:
import pandas as pd
from datasets import Dataset

In [6]:
sft_data = pd.read_csv("SFT_Training_Data.csv",encoding='windows-1252')
sft_data = sft_data[:100]
sft_data= sft_data[["prompt"]]

In [7]:
dataset = Dataset.from_pandas(sft_data)

In [8]:
dataset

Dataset({
    features: ['prompt'],
    num_rows: 100
})

## Create Configs - SFT

In [9]:
# LoRA configuration
peft_config = LoraConfig(
    r=16,
    lora_alpha=16,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=['k_proj', 'gate_proj', 'v_proj', 'up_proj', 'q_proj', 'o_proj', 'down_proj']
)

In [10]:
# Training arguments
training_args = TrainingArguments(
    per_device_train_batch_size=1,
    gradient_accumulation_steps=1,
    gradient_checkpointing=True,
    learning_rate=5e-5,
    lr_scheduler_type="cosine",
    max_steps=-1,
    num_train_epochs=1,
    save_strategy="no",
    logging_steps=1,
    optim="paged_adamw_32bit",
    warmup_steps=2,
    bf16=False,
    output_dir="mistral",
)

In [11]:
trainer = SFTTrainer(
    model=base_model,
    train_dataset=dataset,
    peft_config=peft_config,
    dataset_text_field="prompt",
    max_seq_length=None,
    tokenizer=tokenizer,
    args=training_args,
    packing=False,
)



Map:   0%|          | 0/100 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


## Train - SFT

In [12]:
trainer.train()



Step,Training Loss
1,2.8184
2,2.5867
3,2.8927
4,3.1431
5,2.1645
6,3.1385
7,2.3481
8,1.9595
9,2.5277
10,2.0112


TrainOutput(global_step=100, training_loss=1.614221517443657, metrics={'train_runtime': 513.7719, 'train_samples_per_second': 0.195, 'train_steps_per_second': 0.195, 'total_flos': 1227772938706944.0, 'train_loss': 1.614221517443657, 'epoch': 1.0})

In [13]:
# Save trained model
trainer.model.save_pretrained("mistral-sft")

In [None]:
# Empty VRAM
# del trainer
# import gc
# gc.collect()
# gc.collect()

0

## Model Architecture - SFT

In [16]:
trainer.model

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): MistralForCausalLM(
      (model): MistralModel(
        (embed_tokens): Embedding(32000, 4096)
        (layers): ModuleList(
          (0-31): 32 x MistralDecoderLayer(
            (self_attn): MistralSdpaAttention(
              (q_proj): lora.Linear4bit(
                (base_layer): Linear4bit(in_features=4096, out_features=4096, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.05, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=4096, out_features=16, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=16, out_features=4096, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
              )
              (k_proj): lora.Linear4bit(
                (base_layer):

## Inference - SFT

In [47]:
prompt = "I feel sad all the time. What should I do?"

In [48]:
batch = tokenizer(prompt, return_tensors="pt")

In [49]:
result = trainer.model.generate(input_ids=batch["input_ids"]).to('cuda')

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


In [50]:
decoded = tokenizer.decode(result[0])

In [51]:
decoded

'<s> I feel sad all the time. What should I do?\n\nFeeling sad all the'

## Load Dataset - DPO

In [17]:
# Load dataset
from datasets import load_dataset
dataset = load_dataset("csv", data_files="DPO_Training_Data.csv",split="train")

Generating train split: 0 examples [00:00, ? examples/s]

## Create Configs - DPO

In [20]:
from trl import DPOTrainer

In [23]:
# LoRA configuration
peft_config = LoraConfig(
    r=16,
    lora_alpha=16,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=['k_proj', 'gate_proj', 'v_proj', 'up_proj', 'q_proj', 'o_proj', 'down_proj']
)

In [24]:
# Training arguments
training_args = TrainingArguments(
    per_device_train_batch_size=1,
    gradient_accumulation_steps=1,
    gradient_checkpointing=True,
    learning_rate=5e-5,
    max_steps=5,
    num_train_epochs=1,
    save_strategy="no",
    logging_steps=1,
    optim="paged_adamw_32bit",
    warmup_steps=2,
    bf16=False,
    output_dir="mistral-log",
)

In [25]:
# Create DPO trainer
dpo_trainer = DPOTrainer(
    trainer.model,
    ref_model=None,
    args=training_args,
    train_dataset=dataset,
    tokenizer=tokenizer,
    peft_config=peft_config,
    beta=0.1,
    max_prompt_length=1024,
    max_length=1536,
)

## Train - DPO

In [26]:
# Fine-tune model with DPO
dpo_trainer.train()

Could not estimate the number of tokens of the input, floating-point operations will not be computed


Step,Training Loss
1,0.6931
2,0.6931
3,0.1044
4,0.0038
5,0.0001


TrainOutput(global_step=5, training_loss=0.2989414378156653, metrics={'train_runtime': 102.1151, 'train_samples_per_second': 0.049, 'train_steps_per_second': 0.049, 'total_flos': 0.0, 'train_loss': 0.2989414378156653, 'epoch': 0.05})

In [30]:
# Save trained model
dpo_trainer.model.save_pretrained("mistral-dpo")

## Model Architecture - DPO

In [29]:
dpo_trainer.model

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): MistralForCausalLM(
      (model): MistralModel(
        (embed_tokens): Embedding(32000, 4096)
        (layers): ModuleList(
          (0-31): 32 x MistralDecoderLayer(
            (self_attn): MistralSdpaAttention(
              (q_proj): lora.Linear4bit(
                (base_layer): Linear4bit(in_features=4096, out_features=4096, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=4096, out_features=16, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=16, out_features=4096, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
              )
              (k_proj): lora.Linear4bit(
                (base_layer): Li

## Inference - DPO

In [42]:
prompt = "I feel sad all the time. What should I do?"

In [43]:
batch = tokenizer(prompt, return_tensors="pt")

In [44]:
result = dpo_trainer.model.generate(input_ids=batch["input_ids"]).to('cuda')

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


In [45]:
decoded = tokenizer.decode(result[0])

In [46]:
decoded

'<s> I feel sad all the time. What should I do?\n\nFeeling sad all the'