<a href="https://colab.research.google.com/github/mudogruer/LLMs/blob/main/Fine_Tuning_Mistral7b.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Fine-Tuning Mistral

In [None]:
!pip install -q -U transformers bitsandbytes peft datasets accelerate trl

# Loading Tokenizer

In [None]:
from transformers import AutoTokenizer

base_model = "mistralai/Mistral-7B-v0.1"

tokenizer = AutoTokenizer.from_pretrained(
    base_model,
    padding_side = "right",
    add_eos_token = True,
)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.add_bos_token, tokenizer.add_eos_token

In [None]:
tokenizer

# Loading the Model

In [None]:
import torch
from transformers import BitsAndBytesConfig

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=False,
    bnb_4bit_compute_dtype=torch.bfloat16
)

In [None]:
from transformers import AutoModelForCausalLM

model = AutoModelForCausalLM.from_pretrained(
    base_model,
    load_in_4bit=True,
    quantization_config=bnb_config,
    torch_dtype=torch.bfloat16,
    device_map="auto",
)

# Loading the Dataset

In [None]:
from datasets import load_dataset

dataset_name = "databricks/databricks-dolly-15k"

train_dataset = load_dataset(dataset_name, split="train[0:800]")
eval_dataset = load_dataset(dataset_name, split="train[800:1000]")

# Understanding the Model

In [None]:
train_dataset

In [None]:
train_dataset.to_pandas()

In [None]:
train_dataset.to_pandas().dtypes

In [None]:
train_dataset.to_pandas().value_counts("category")

# Generating the Prompt Format

In [None]:
def generate_prompt(sample):
    full_prompt =f"""<s>[INST]{sample['instruction']}
    {f"Here is some context: {sample['context']}" if len(sample["context"]) > 0 else None}
    [/INST] {sample['response']}</s>"""
    return {"text": full_prompt}

In [None]:
train_dataset[0]

In [None]:
print(generate_prompt(train_dataset[0]))

In [None]:
generated_train_dataset = train_dataset.map(
    generate_prompt, remove_columns=list(train_dataset.features))
generated_val_dataset = eval_dataset.map(
    generate_prompt, remove_columns=list(train_dataset.features))

In [None]:
generated_train_dataset

In [None]:
generated_train_dataset[5]["text"]

In [None]:
tokenizer(generated_train_dataset[5]["text"])

# LoRA Configuration

In [None]:
from peft import prepare_model_for_kbit_training

model.gradient_checkpointing_enable()

model = prepare_model_for_kbit_training(model)

In [None]:
def print_trainable_parameters(model):
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )

In [None]:
from peft import LoraConfig, get_peft_model

lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=[
        "q_proj",
        "k_proj",
        "v_proj",
        "o_proj",
        "gate_proj",
        "up_proj",
        "down_proj",
        "lm_head",
    ],
    bias="none",
    lora_dropout=0.05,
    task_type="CAUSAL_LM",
)

In [None]:
from peft import get_peft_model

model = get_peft_model(model, lora_config)

print_trainable_parameters(model)

In [None]:
print(model)

# Model Training

In [None]:
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
secret_value_0 = user_secrets.get_secret("HF")

!huggingface-cli login --token $secret_value_0

In [None]:
from transformers import TrainingArguments

training_arguments = TrainingArguments(
    output_dir="./results",
    num_train_epochs=1,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=1,
    optim="paged_adamw_32bit",
    save_strategy="steps",
    save_steps=25,
    logging_steps=25,
    learning_rate=2e-4,
    weight_decay=0.001,
    max_steps=50,
    evaluation_strategy="steps",
    eval_steps=25,
    do_eval=True,
    report_to="none",
)

In [None]:
from trl import SFTTrainer

# Setting sft parameters
trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    args=training_arguments,
    train_dataset=generated_train_dataset,
    eval_dataset=generated_val_dataset,
    peft_config=lora_config,
    dataset_text_field="text",
)

In [None]:
model.config.use_cache = False
trainer.train()

In [None]:
my_finetuned_model = "mistral-7b-dolly"

trainer.model.push_to_hub(my_finetuned_model)