<a href="https://colab.research.google.com/github/sjanorkar/LLM-finetuning/blob/main/llm_finetuning_falcon_multi_dataset.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip3 install bitsandbytes==0.41.3
!pip3 install peft==0.11.1
!pip3 install trl==0.8.6
!pip3 install accelerate==0.30.1
!pip3 install datasets==2.19.2
!pip3 install transformers==4.41.2


Collecting bitsandbytes==0.41.3
  Downloading bitsandbytes-0.41.3-py3-none-any.whl (92.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.6/92.6 MB[0m [31m8.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: bitsandbytes
Successfully installed bitsandbytes-0.41.3
Collecting peft==0.11.1
  Downloading peft-0.11.1-py3-none-any.whl (251 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m251.6/251.6 kB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m
Collecting accelerate>=0.21.0 (from peft==0.11.1)
  Downloading accelerate-0.31.0-py3-none-any.whl (309 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m309.4/309.4 kB[0m [31m24.6 MB/s[0m eta [36m0:00:00[0m
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch>=1.13.0->peft==0.11.1)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch>=1.13.0->peft==0.11.1)
  Using cached nv

In [2]:
import torch
import transformers
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, TrainingArguments
from peft import (LoraConfig, get_peft_model, get_peft_model_state_dict)
from datasets import load_dataset
from trl import SFTTrainer, DataCollatorForCompletionOnlyLM

In [3]:
dataset_name = "Vezora/Tested-22k-Python-Alpaca"
model_id = "tiiuae/falcon-rw-1b"

# Bits and Bytes config

In [4]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16)


# Load Tokenizer

In [5]:
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/234 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.11M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/99.0 [00:00<?, ?B/s]

# Load Model

In [6]:
def load_model(index, path):
  if path is not None:
    model_id = path

  model = AutoModelForCausalLM.from_pretrained(
      model_id,
      quantization_config=bnb_config,
      device_map={"": 0})

  model.config.use_cache = False
  model.config.pretraining_tp = 1
  return model

# Pre fine tuning inference

In [9]:
%%time
def generate_inference(prompt, model):
  inputs = tokenizer(prompt, return_tensors="pt", return_attention_mask=False)

  outputs = model.generate(**inputs, max_length=300, temperature=0.5)
  text = tokenizer.batch_decode(outputs)[0]
  return text

CPU times: user 5 µs, sys: 0 ns, total: 5 µs
Wall time: 8.58 µs


# PEFT parameters

In [10]:
LORA_R = 16
LORA_ALPHA = 16
LORA_DROPOUT= 0.05
LORA_TARGET_MODULES = [
    "q_proj",
    "k_proj",
    "v_proj",
    "o_proj",
    "gate_proj",
    "up_proj",
    "down_proj",
    "lm_head",
]

BATCH_SIZE = 64
MICRO_BATCH_SIZE = 4
GRADIENT_ACCUMULATION_STEPS = BATCH_SIZE // MICRO_BATCH_SIZE
LEARNING_RATE = 3e-4
TRAIN_STEPS = 300

In [11]:
peft_params = LoraConfig(
    r=LORA_R,
    lora_alpha=LORA_ALPHA,
    target_modules=LORA_TARGET_MODULES,
    lora_dropout=LORA_DROPOUT,
    bias="none",
    task_type="CAUSAL_LM"
)

# Load Dataset

In [12]:
import datasets
from datasets import Dataset

dataset = load_dataset(dataset_name)
instruction = dataset["train"]["instruction"]

input = dataset["train"]["input"]
output = dataset["train"]["output"]

temp_dataset_1 = Dataset.from_dict({"instruction": instruction[0:5000], "input": input[0:5000], "output": output[0:5000]})
dataset_1 = datasets.DatasetDict({"train": temp_dataset_1})

temp_dataset_2 = Dataset.from_dict({"instruction": instruction[5000:10000], "input": input[5000:10000], "output": output[5000:10000]})
dataset_2 = datasets.DatasetDict({"train": temp_dataset_2})

temp_dataset_3 = Dataset.from_dict({"instruction": instruction[10000:15000], "input": input[10000:15000], "output": output[10000:15000]})
dataset_3 = datasets.DatasetDict({"train": temp_dataset_3})


temp_dataset_4 = Dataset.from_dict({"instruction": instruction[15000:], "input": input[15000:], "output": output[15000:]})
dataset_4 = datasets.DatasetDict({"train": temp_dataset_4})
dataset_4

datasets = [dataset_1, dataset_2, dataset_3, dataset_4]

Downloading readme:   0%|          | 0.00/4.46k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/49.4M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/22608 [00:00<?, ? examples/s]

# Training params

In [13]:
def get_training_params():
  training_params = TrainingArguments(
      output_dir="./results",
      num_train_epochs=3,
      per_device_train_batch_size=4,
      per_device_eval_batch_size=4,
      gradient_accumulation_steps=4,
      optim="paged_adamw_32bit",
      save_steps=100,
      logging_steps=100,
      learning_rate=2e-4,
      eval_strategy="steps",
      weight_decay=0.001,
      fp16=False,
      bf16=True,
      max_grad_norm=0.3,
      max_steps=-1,
      warmup_ratio=0.03,
      group_by_length=True,
      lr_scheduler_type="constant",
      report_to="tensorboard"
  )
  return training_params


In [14]:
def generate_prompt(data_point):
    return f"""Write a python code for following problem statement
### Instruction:
{data_point["instruction"]}
### Response:
{data_point["output"]}"""

CUTOFF_LEN = 3056
def tokenize(prompt, add_eos_token=True):
    result = tokenizer(
        prompt,
        truncation=True,
        max_length=CUTOFF_LEN,
        padding=False,
        return_tensors=None,
    )
    if (
        result["input_ids"][-1] != tokenizer.eos_token_id
        and len(result["input_ids"]) < CUTOFF_LEN
        and add_eos_token
    ):
        result["input_ids"].append(tokenizer.eos_token_id)
        result["attention_mask"].append(1)

    result["labels"] = result["input_ids"].copy()

    return result

def generate_and_tokenize_prompt(data_point):
    full_prompt = generate_prompt(data_point)
    tokenized_full_prompt = tokenize(full_prompt)
    return tokenized_full_prompt

In [15]:
def train_test_split(dataset):
  train_val = dataset["train"].train_test_split(
      test_size=100, shuffle=True, seed=42
  )
  train_data = (
      train_val["train"].map(generate_and_tokenize_prompt)
  )
  val_data = (
      train_val["test"].map(generate_and_tokenize_prompt)
  )

  return train_data, val_data

In [16]:
data_collator = transformers.DataCollatorForSeq2Seq(
    tokenizer, pad_to_multiple_of=8, return_tensors="pt", padding=True,
)

In [25]:
%%time
def train(train_data, val_data, training_params, llm, path):
  trainer = transformers.Trainer(
      model=llm,
      train_dataset=train_data,
      eval_dataset=val_data,
      args=training_params,
      data_collator=data_collator
  )
  llm.config.use_cache = False
  old_state_dict = llm.state_dict
  llm.state_dict = (
      lambda self, *_, **__: get_peft_model_state_dict(
          self, old_state_dict()
      )
  ).__get__(llm, type(llm))

  # Clear cache to free up memory
  torch.cuda.empty_cache()

  llm = torch.compile(model)

  trainer.train()
  llm.save_pretrained(path)

CPU times: user 5 µs, sys: 0 ns, total: 5 µs
Wall time: 9.3 µs


In [None]:
for index, dataset in enumerate(datasets):
  OUTPUT_DIR = f"/home/results/falcon/dataset-{index}"

  train_data, val_data = train_test_split(dataset)
  training_args = get_training_params()
  path = ""

  if index == 0:
    path = model_id
  else:
    path = f"/home/results/falcon/dataset-{index-1}"

  print("Dataset: ", index)
  print("Model: ", path)

  model = load_model(index, path)
  model = get_peft_model(model, peft_params)
  model.print_trainable_parameters()

  print("Fine-Tuning: ", path)
  train(train_data, val_data, get_training_params(), model, f"/home/results/falcon/dataset-{index}")

Map:   0%|          | 0/4900 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]



Dataset:  0
Model:  tiiuae/falcon-rw-1b
trainable params: 837,632 || all params: 1,312,462,848 || trainable%: 0.0638
Fine-Tuning:  tiiuae/falcon-rw-1b


  self.pid = os.fork()


Step,Training Loss,Validation Loss


In [None]:
model.push_to_hub("swapnilj/falcon-rw-1b-sj")