In [1]:
!pip3 install transformers peft bitsandbytes -q --upgrade
!pip3 install trl accelerate datasets -q --upgrade

In [2]:
import os
import transformers
import torch
from peft import LoraConfig
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, GemmaTokenizer
from datasets import load_dataset
from trl import SFTTrainer

In [3]:
from google.colab import userdata
os.environ['HF_TOKEN']= userdata.get('HF_TOKEN')

In [6]:
model_name= "google/gemma-2b"
bnb_config= BitsAndBytesConfig(
    load_in_4bit= True,
    bnb_4bit_quant_type= "nf4",
    bnb_4bit_compute_dtype= torch.bfloat16
)
tokenizer= AutoTokenizer.from_pretrained(
    model_name,
    token= os.environ['HF_TOKEN']
)
model= AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config= bnb_config,
    device_map= {"":0},
    token= os.environ['HF_TOKEN']
)

tokenizer_config.json:   0%|          | 0.00/1.11k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/4.24M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.5M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/555 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/627 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/13.5k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.95G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/67.1M [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/137 [00:00<?, ?B/s]

In [7]:
input= '''Quote: As long as you are ....., you won't fear tomorrow.
As long as you are fearful, you won't loose tomorrow.'''

device= "cuda:0"
inputs= tokenizer(input, return_tensors= "pt").to(device)
outputs= model.generate(**inputs, max_new_tokens= 20)
print(tokenizer.decode(outputs[0], skip_special_tokens= True))

As long as you are ....., you won't fear tomorrow. 
As long as you are fearful, you won't loose tomorrow.
fill in the blanks with the correct form of the verbs in brackets

1. I (be) .... very happy when


In [9]:
os.environ["WANDB_DISABLED"]= "false"
lora_config= LoraConfig(
  r = 8,
  target_modules= ["q_proj", "o_proj", "k_proj",
                   "v_proj", "gate_proj", "up_proj",
                   "down_proj"],
  task_type= "CAUSAL_LM"
)
def formatting_function(example):
  text= f"Quote: {example['quote'][0]}\nAuthor: {example['author'][0]}"
  return [text]

In [11]:
from datasets import load_dataset
data= load_dataset("Abirate/english_quotes")
data= data.map(lambda x: tokenizer(x["quote"]), batched= True)

Map:   0%|          | 0/2508 [00:00<?, ? examples/s]

In [16]:
args= transformers.TrainingArguments(
    per_device_train_batch_size= 1,
    gradient_accumulation_steps= 4,
    warmup_steps= 2,
    max_steps= 100,
    learning_rate= 2e-4,
    fp16= True,
    logging_steps= 1,
    output_dir= "outputs",
    optim= "paged_adamw_8bit"
)
trainer= SFTTrainer(
    model= model,
    train_dataset= data["train"],
    args= args,
    peft_config= lora_config,
    formatting_func= formatting_function
)



In [17]:
trainer.train()

Step,Training Loss
1,1.6802
2,0.6298
3,1.0225
4,1.032
5,0.4194
6,1.2287
7,1.0911
8,0.3294
9,0.5644
10,0.5083


TrainOutput(global_step=100, training_loss=0.1439643805567175, metrics={'train_runtime': 80.7746, 'train_samples_per_second': 4.952, 'train_steps_per_second': 1.238, 'total_flos': 54994550906880.0, 'train_loss': 0.1439643805567175, 'epoch': 66.67})

In [18]:
input= '''Quote: As long as you are ....., you won't fear tomorrow.
As long as you are fearful, you won't loose tomorrow.'''

device= "cuda:0"
inputs= tokenizer(input, return_tensors= "pt").to(device)
outputs= model.generate(**inputs, max_new_tokens= 20)
print(tokenizer.decode(outputs[0], skip_special_tokens= True))

Quote: As long as you are ....., you won't fear tomorrow. 
As long as you are fearful, you won't loose tomorrow.
Author: Aung San Suu Kyi
Quote: The most wasted of all days is one
