<a href="https://colab.research.google.com/github/shouvikcirca/LLMs/blob/main/Copy_of_Llama3_8B_FinetuningOnCMPFScheme.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Using 🤗 PEFT & bitsandbytes to finetune a LoRa checkpoint




In [None]:
! pip install gcsfs==2024.9.0.post1
!pip install -q bitsandbytes datasets accelerate loralib
!pip install -q git+https://github.com/huggingface/transformers.git@main git+https://github.com/huggingface/peft.git

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m69.1/69.1 MB[0m [31m12.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m480.6/480.6 kB[0m [31m26.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m5.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m8.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m194.1/194.1 kB[0m [31m9.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
from huggingface_hub import notebook_login

notebook_login()
# meta-llama/Meta-Llama-3-8B-Instruct

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:
!nvidia-smi -L

GPU 0: Tesla T4 (UUID: GPU-d4b90672-0b3c-b936-1baf-4c1676c70d5c)


### Setup the model

In [None]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="0"
import torch
import torch.nn as nn
import bitsandbytes as bnb
from transformers import AutoTokenizer, AutoConfig, AutoModelForCausalLM

model = AutoModelForCausalLM.from_pretrained(
    "meta-llama/Llama-3.2-1B-Instruct",
    load_in_4bit=True,
    device_map='auto',
)

tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-1B-Instruct")
if tokenizer.pad_token is None:
    tokenizer.add_special_tokens({'pad_token': '[PAD]'})
    model.resize_token_embeddings(len(tokenizer))

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/877 [00:00<?, ?B/s]

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


model.safetensors:   0%|          | 0.00/2.47G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/189 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/54.5k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/296 [00:00<?, ?B/s]

The new embeddings will be initialized from a multivariate normal distribution that has old embeddings' mean and covariance. As described in this article: https://nlp.stanford.edu/~johnhew/vocab-expansion.html. To disable this, use `mean_resizing=False`


### Freezing the original weights


In [None]:
for param in model.parameters():
  param.requires_grad = False  # freeze the model - train adapters later
  if param.ndim == 1:
    # cast the small parameters (e.g. layernorm) to fp32 for stability
    param.data = param.data.to(torch.float32)

model.gradient_checkpointing_enable()  # reduce number of stored activations
model.enable_input_require_grads()

class CastOutputToFloat(nn.Sequential):
  def forward(self, x): return super().forward(x).to(torch.float32)
model.lm_head = CastOutputToFloat(model.lm_head)

### Setting up the LoRa Adapters

In [None]:
def print_trainable_parameters(model):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )

In [None]:
from peft import LoraConfig, get_peft_model

config = LoraConfig(
    r=16, #attention heads
    lora_alpha=32, #alpha scaling
    # target_modules=["q_proj", "v_proj"], #if you know the
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM" # set this for CLM or Seq2Seq
)

model = get_peft_model(model, config)
print_trainable_parameters(model)

trainable params: 1703936 || all params: 750981120 || trainable%: 0.226894652158499


## Data

In [None]:
import transformers
# from datasets import load_dataset
# data = load_dataset("Abirate/english_quotes")


In [None]:
# data

DatasetDict({
    train: Dataset({
        features: ['quote', 'author', 'tags'],
        num_rows: 2508
    })
})

In [None]:
from datasets import Dataset, DatasetDict

In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount = True)

Mounted at /content/drive


In [None]:
fileToRead = '/content/drive/MyDrive/LLM/CMPF_Scheme_Formatted_Document.txt'

with open(fileToRead,'r') as f:
    data = f.read().split('--------------------------------')

In [None]:
dataList = []

for i in range(len(data)):
    dataList.append({'query':data[i].split('\n')[1], 'response':''.join(data[i].split('\n')[2:])})

In [None]:
import pandas as pd
dataFrame = pd.DataFrame(dataList)

In [None]:
dataFrame.head()

Unnamed: 0,query,response
0,"Paragraph 1, Subparagraph 1",The Coal Mines Provident Fund scheme was frame...
1,"Paragraph 1, Subparagraph 2",The Coal Mines Provident Fund Scheme shall app...
2,"Paragraph 1, Subparagraph 3",This subparagraph was reconstituted by S.R.O. ...
3,"Paragraph 1, Subparagraph 4",This clause was inserted vide S.R.O. 3566 date...
4,Paragraph 1,"""Short Title and Application"""


In [None]:
train_dataset = Dataset.from_pandas(dataFrame)

In [None]:
dataset_dict = DatasetDict({
    "train": train_dataset
})

In [None]:
dataset_dict

DatasetDict({
    train: Dataset({
        features: ['query', 'response'],
        num_rows: 27
    })
})

In [None]:
def merge_columns(example):
    example["prediction"] = example["query"] + "\n" + example["response"]
    return example

dataset_dict['train'] = dataset_dict['train'].map(merge_columns)
# data['train']["prediction"][:5]

Map:   0%|          | 0/27 [00:00<?, ? examples/s]

In [None]:
dataset_dict['train']['prediction'][:2]

['Paragraph 1, Subparagraph 1\nThe Coal Mines Provident Fund scheme was framed in exercise of the powers conferred by Section 3 of the Coal Mines Provident Fund and Miscellaneous ProvisionsAct, 1948 (XLVI of 1948).',
 'Paragraph 1, Subparagraph 2\nThe Coal Mines Provident Fund Scheme shall apply to all coal mines in West Bengal, Bihar, Maharastra, the Central Provinces and Berar, Nagaland and Odisha including those in partially excluded areas in the provinces of West Bengal, Bihar, Central Provinces and Berar and Orissa to which the Coal Mines Provident Fund and Miscellaneous Provisions Act, 1948, has been applied under Sub-section(1) of Section 92 of the Government Of India Act, 1935.In this subparagrapha. The word \'Bombay\' which had been inserted vide S.R.O. 3566 dated 31.10.57 was substituted by the word \'Maharastra\' by S.O. 2276dated 3.9.60 published on 27.9.60 b. The words "West Bengal" were inserted by the Government of India, Ministry of Labour Notification No. PF 15(9) 50, 

In [None]:
# def merge_columns(example):
#     example["prediction"] = example["quote"] + " ->: " + str(example["tags"])
#     return example

# data['train'] = data['train'].map(merge_columns)
# data['train']["prediction"][:5]

Map:   0%|          | 0/2508 [00:00<?, ? examples/s]

["“Be yourself; everyone else is already taken.” ->: ['be-yourself', 'gilbert-perreira', 'honesty', 'inspirational', 'misattributed-oscar-wilde', 'quote-investigator']",
 "“I'm selfish, impatient and a little insecure. I make mistakes, I am out of control and at times hard to handle. But if you can't handle me at my worst, then you sure as hell don't deserve me at my best.” ->: ['best', 'life', 'love', 'mistakes', 'out-of-control', 'truth', 'worst']",
 "“Two things are infinite: the universe and human stupidity; and I'm not sure about the universe.” ->: ['human-nature', 'humor', 'infinity', 'philosophy', 'science', 'stupidity', 'universe']",
 "“So many books, so little time.” ->: ['books', 'humor']",
 "“A room without books is like a body without a soul.” ->: ['books', 'simile', 'soul']"]

In [None]:
data['train'][0]

{'quote': '“Be yourself; everyone else is already taken.”',
 'author': 'Oscar Wilde',
 'tags': ['be-yourself',
  'gilbert-perreira',
  'honesty',
  'inspirational',
  'misattributed-oscar-wilde',
  'quote-investigator'],
 'prediction': "“Be yourself; everyone else is already taken.” ->: ['be-yourself', 'gilbert-perreira', 'honesty', 'inspirational', 'misattributed-oscar-wilde', 'quote-investigator']"}

In [None]:
dataset_dict = dataset_dict.map(lambda samples: tokenizer(samples['prediction']), batched=True)

Map:   0%|          | 0/27 [00:00<?, ? examples/s]

In [None]:
dataset_dict

DatasetDict({
    train: Dataset({
        features: ['query', 'response', 'prediction', 'input_ids', 'attention_mask'],
        num_rows: 27
    })
})

### Training

In [None]:

trainer = transformers.Trainer(
    model=model,
    train_dataset=dataset_dict['train'],
    args=transformers.TrainingArguments(
        per_device_train_batch_size=10,
        gradient_accumulation_steps=1,
        warmup_steps=50,
        max_steps=150,
        learning_rate=2e-4,
        fp16=True,
        logging_steps=1,
        output_dir='outputs'
    ),
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False)
)
model.config.use_cache = False  # silence the warnings. Please re-enable for inference!
trainer.train()

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

 ··········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc




Step,Training Loss
1,3.2789
2,3.1524
3,3.0418
4,3.2659
5,3.0157
6,3.4926
7,3.0663
8,3.1609
9,3.2734
10,3.1552




TrainOutput(global_step=150, training_loss=1.3350621982415518, metrics={'train_runtime': 284.1069, 'train_samples_per_second': 5.28, 'train_steps_per_second': 0.528, 'total_flos': 2469379008688128.0, 'train_loss': 1.3350621982415518, 'epoch': 50.0})

## Share adapters on the 🤗 Hub

In [None]:
# model.push_to_hub("samwit/bloom-7b1-lora-tagger",
#                   use_auth_token=True,
#                   commit_message="basic training",
#                   private=True)

## Load adapters from the Hub

In [None]:
# import torch
# from peft import PeftModel, PeftConfig
# from transformers import AutoModelForCausalLM, AutoTokenizer

# peft_model_id = "samwit/bloom-7b1-lora-tagger"
# config = PeftConfig.from_pretrained(peft_model_id)
# model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, return_dict=True, load_in_8bit=True, device_map='auto')
# tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)

# # Load the Lora model
# model = PeftModel.from_pretrained(model, peft_model_id)

In [None]:
model.generation_config.pad_token_id = tokenizer.pad_token_id

## Inference

In [None]:
model.config.use_cache = False

In [None]:
batch = tokenizer("Full form of CMPFO ?", return_tensors='pt').to('cuda')

with torch.amp.autocast('cuda'):
  output_tokens = model.generate(**batch, max_new_tokens=100)

print('\n\n', tokenizer.decode(output_tokens[0], skip_special_tokens=True))



 Full form of CMPFO? Coal Mining Protective Organization
a. Coal Miners   b. Coal Miners Association
c. Coal Miners Union of France   d. Coal Miners Union of Italy

The best answer is b- Coal Miners Association.    Coal Mining Protective Organization is the full form of CMPFO.    This organization was established in 1928 to protect the interests of coal miners.    It is a voluntary organization.    The words "Union" and "Association" have been added to make it


In [None]:
output_tokens[0]

tensor([128000,  31305,     12,     18,    315,    328,   2056,   8548,     13,
           220,  15951,  30105,    220,     16,     13,     20,     13,   5547,
           578,  10913,  10423,   3804,     12,  34541,   6684,   1952,  25850,
         66882,    791,  10423,   5046,   6890,  65591,    264,   3804,     12,
         34541,   6684,    389,  25850,  66882,   1234,    279,  25850,  20214,
           389,    220,     16,     13,     20,     13,    220],
       device='cuda:0')