## Model ::  Fine-tuning

In [28]:
!pip install -Uq accelerate bitsandbytes datasets transformers peft trl sentencepiece wandb langchain huggingface_hub

In [46]:
from huggingface_hub import notebook_login
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [30]:
import os
os.environ["WANDB_PROJECT"] = "finetuning01"

In [31]:
# b2554c627fb67f988a0b985e753984a9dbb565b7

import wandb
if wandb.run is not None:
 wandb.finish()

In [32]:
from datasets import load_dataset
dataset_name = "squad_v2"
dataset = load_dataset(dataset_name, split="train")
eval_dataset = load_dataset(dataset_name, split="validation")

In [33]:
dataset

Dataset({
    features: ['id', 'title', 'context', 'question', 'answers'],
    num_rows: 130319
})

In [34]:
eval_dataset

Dataset({
    features: ['id', 'title', 'context', 'question', 'answers'],
    num_rows: 11873
})

## The Squad V2 dataset is composed of various features, which we can see here:

```
{'id': Value(dtype='string', id=None),
'title': Value(dtype='string', id=None),
'context': Value(dtype='string', id=None),
'question': Value(dtype='string', id=None),
'answers': Sequence(feature={'text': Value(dtype='string', id=None),
'answer_start': Value(dtype='int32', id=None)}, length=-1, id=None)}
```

In [51]:
model_id = "google/gemma-2b"
new_model_name = f"pankajshakya627/gemma-2b-peft-{dataset_name}_pankaj"

### Load the model:

In [36]:
import torch
from transformers import AutoModelForCausalLM, BitsAndBytesConfig

bnb_config = BitsAndBytesConfig(
  load_in_4bit=True,
  bnb_4bit_quant_type="nf4",
  bnb_4bit_compute_dtype=torch.float16,
  )

device_map="auto"

base_model = AutoModelForCausalLM.from_pretrained(
 model_id,
 quantization_config=bnb_config,
 device_map="auto",
 trust_remote_code=True,
)
base_model.config.use_cache = False

config.json:   0%|          | 0.00/627 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/13.5k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.95G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/67.1M [00:00<?, ?B/s]

Gemma's activation function should be approximate GeLU and not exact GeLU.
Changing the activation function to `gelu_pytorch_tanh`.if you want to use the legacy `gelu`, edit the `model.config` to set `hidden_activation=gelu`   instead of `hidden_act`. See https://github.com/huggingface/transformers/pull/29402 for more details.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/137 [00:00<?, ?B/s]

In [37]:
from google.colab import drive
drive.mount('/content/gdrive')

# output_dir = "/content/gdrive/My Drive/results"

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [38]:
output_dir = "/content/gdrive/My Drive/results"

### set up a tokenizer

In [39]:
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

tokenizer_config.json:   0%|          | 0.00/1.11k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/4.24M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.5M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/555 [00:00<?, ?B/s]

### Setup LORA and other training arguments:

In [40]:
from transformers import TrainingArguments, EarlyStoppingCallback
from peft import LoraConfig
# More info: https://github.com/huggingface/transformers/pull/24906


base_model.config.pretraining_tp = 1

peft_config = LoraConfig(
    lora_alpha=16,
    lora_dropout=0.1,
    r=64,
    bias="none",
    task_type="CAUSAL_LM",
  )


training_args = TrainingArguments(
    output_dir=output_dir,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4,
    learning_rate=2e-4,
    logging_steps=10,
    max_steps=100,   ## Keeping it low for Demo purpose
    num_train_epochs=100,
    evaluation_strategy="steps",
    eval_steps=5,
    save_total_limit=5,
    push_to_hub=False,
    load_best_model_at_end=True,
    report_to="wandb"
  )

In [41]:
from trl import SFTTrainer

trainer = SFTTrainer(
    model=base_model,
    train_dataset=dataset,
    eval_dataset=eval_dataset,
    peft_config=peft_config,
    dataset_text_field="question", # this depends on the dataset!
    max_seq_length=512,
    tokenizer=tokenizer,
    args=training_args,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=200)]
  )

# b2554c627fb67f988a0b985e753984a9dbb565b7

trainer.train()

Map:   0%|          | 0/130319 [00:00<?, ? examples/s]

Map:   0%|          | 0/11873 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss,Validation Loss
5,No log,4.8584
10,4.714800,4.678044
15,4.714800,4.553153
20,4.446800,4.492146
25,4.446800,4.455347
30,4.578400,4.420628
35,4.578400,4.385575
40,4.415900,4.349412
45,4.415900,4.316589
50,4.190700,4.287412


Step,Training Loss,Validation Loss
5,No log,4.8584
10,4.714800,4.678044
15,4.714800,4.553153
20,4.446800,4.492146
25,4.446800,4.455347
30,4.578400,4.420628
35,4.578400,4.385575
40,4.415900,4.349412
45,4.415900,4.316589
50,4.190700,4.287412


TrainOutput(global_step=100, training_loss=4.32614948272705, metrics={'train_runtime': 2508.5018, 'train_samples_per_second': 0.638, 'train_steps_per_second': 0.04, 'total_flos': 340497179148288.0, 'train_loss': 4.32614948272705, 'epoch': 0.01})

In [42]:
trainer.model.save_pretrained(
 os.path.join(output_dir, "final_checkpoint"),
)

In [52]:
new_model_name

'pankajshakya627/gemma-2b-peft-squad_v2_pankaj'

In [50]:
trainer.model.push_to_hub(
 repo_id="pankajshakya627/gemma-2b-peft-squad_v2_pankaj"
)

adapter_model.safetensors:   0%|          | 0.00/29.5M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/pankajshakya627/gemma-2b-peft-squad_v2_pankaj/commit/cadf60d9a30bdd1a6420ef0d99e28a88d54b4a0a', commit_message='Upload model', commit_description='', oid='cadf60d9a30bdd1a6420ef0d99e28a88d54b4a0a', pr_url=None, pr_revision=None, pr_num=None)

#### Load the model back using a combination of our Hugging Face username and the repository name

In [55]:
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from langchain.llms import HuggingFacePipeline




model_id = "google/gemma-2b"
config = PeftConfig.from_pretrained(new_model_name)
model = AutoModelForCausalLM.from_pretrained(model_id)
model = PeftModel.from_pretrained(model, new_model_name)
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token

pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_length=256
  )

llm = HuggingFacePipeline(pipeline=pipe)


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

The model 'PeftModelForCausalLM' is not supported for text-generation. Supported models are ['BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'LlamaForCausalLM', 'CodeGenForCausalLM', 'CohereForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'ElectraForCausalLM', 'ErnieForCausalLM', 'FalconForCausalLM', 'FuyuForCausalLM', 'GemmaForCausalLM', 'GitForCausalLM', 'GPT2LMHeadModel', 'GPT2LMHeadModel', 'GPTBigCodeForCausalLM', 'GPTNeoForCausalLM', 'GPTNeoXForCausalLM', 'GPTNeoXJapaneseForCausalLM', 'GPTJForCausalLM', 'LlamaForCausalLM', 'MambaForCausalLM', 'MarianForCausalLM', 'MBartForCausalLM', 'MegaForCausalLM', 'MegatronBertForCausalLM', 'MistralForCausalLM', 'MixtralForCausalLM', 'MptForCausalLM', 'MusicgenForCausalLM', 'MusicgenMelodyForCausalLM', 'MvpForCausalLM', 'OpenLlam