In [1]:
!pip install -q -U accelerate peft bitsandbytes transformers trl datasets

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m40.1/40.1 kB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.3/61.3 MB[0m [31m13.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.6/11.6 MB[0m [31m113.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m544.8/544.8 kB[0m [31m36.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.3/3.3 MB[0m [31m95.4 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
import os
import transformers
import torch
from google.colab import userdata
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM
from transformers import BitsAndBytesConfig, GemmaConfig
from peft import LoraConfig
from trl import SFTTrainer

In [3]:
os.environ["HF_TOKEN"] = userdata.get('HF_TOKEN')

Prerequisites

In [4]:
model_id = "google/gemma-2b"
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

In [5]:
tokenizer = AutoTokenizer.from_pretrained(model_id, token=os.environ["HF_TOKEN"])

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    device_map={"":0},
    token=os.environ["HF_TOKEN"]
)

tokenizer_config.json:   0%|          | 0.00/33.6k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/4.24M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.5M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/636 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/627 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/13.5k [00:00<?, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.95G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/67.1M [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/137 [00:00<?, ?B/s]

In [6]:
text = "Quote : Imagine is more"
device = "cuda:0"
input = tokenizer(text, return_tensors="pt").to(device)

output = model.generate(**input, max_new_tokens=30)
print(tokenizer.decode(output[0], skip_special_tokens=True))

Quote : Imagine is more than just a place to live. It’s a place to call home.

Imagine is a new residential development by Imagine Group. The project is


In [7]:
os.environ["WANDB_DISABLED"] = "false"

In [8]:
lora_config = LoraConfig(
    r = 8,
    target_modules = ["q_proj", "o_proj", "k_proj", "v_proj",
                      "gate_proj", "up_proj", "down_proj"],
    task_type = 'CAUSAL_LM'
)

In [9]:
from datasets import load_dataset

data = load_dataset("Abirate/english_quotes")

README.md: 0.00B [00:00, ?B/s]

quotes.jsonl:   0%|          | 0.00/647k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/2508 [00:00<?, ? examples/s]

In [13]:
data["train"]["quote"][1]

"“I'm selfish, impatient and a little insecure. I make mistakes, I am out of control and at times hard to handle. But if you can't handle me at my worst, then you sure as hell don't deserve me at my best.”"

In [11]:
def formatting_func(example):
    return f"Quote: {example['quote']}\nAuthor: {example['author']}"

In [15]:
print(formatting_func(data["train"][1]))

Quote: “I'm selfish, impatient and a little insecure. I make mistakes, I am out of control and at times hard to handle. But if you can't handle me at my worst, then you sure as hell don't deserve me at my best.”
Author: Marilyn Monroe


In [16]:
data['train']

Dataset({
    features: ['quote', 'author', 'tags'],
    num_rows: 2508
})

In [19]:
trainer = SFTTrainer(
    model = model,
    train_dataset = data['train'],
    args = transformers.TrainingArguments(
        per_device_train_batch_size = 1,
        gradient_accumulation_steps = 4,
        warmup_steps = 2,
        max_steps = 100,
        learning_rate = 2e-4,
        fp16 = True,
        logging_steps = 1,
        output_dir = 'outputs',
        optim = "paged_adamw_8bit"
    ),
    peft_config = lora_config,
    formatting_func = formatting_func,
)



In [20]:
trainer.train()

  | |_| | '_ \/ _` / _` |  _/ -_)


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize?ref=models
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mmohsin416[0m ([33mmohsin416-mohsin[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Step,Training Loss
1,2.655
2,2.0929
3,2.5811
4,2.823
5,2.3842
6,2.4444
7,2.7956
8,2.0643
9,2.8926
10,2.2232


TrainOutput(global_step=100, training_loss=1.8122899252176286, metrics={'train_runtime': 181.9872, 'train_samples_per_second': 2.198, 'train_steps_per_second': 0.549, 'total_flos': 235740440186880.0, 'train_loss': 1.8122899252176286, 'epoch': 0.1594896331738437})

In [21]:
text = "Quote : A woman is like a tea bag"
device = "cuda:0"
input = tokenizer(text, return_tensors="pt").to(device)

output = model.generate(**input, max_new_tokens=20)
print(tokenizer.decode(output[0], skip_special_tokens=True))

Quote : A woman is like a tea bag. You never know how strong she is until she gets in hot water.
Author : Eleanor Roosevelt
