In [1]:
!pip install -U xformers --index-url https://download.pytorch.org/whl/cu121
!pip install --no-deps packaging ninja einops flash-attn trl peft accelerate bitsandbytes
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"

Looking in indexes: https://download.pytorch.org/whl/cu121
Collecting xformers
  Downloading https://download.pytorch.org/whl/cu121/xformers-0.0.29.post1-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (1.0 kB)
INFO: pip is looking at multiple versions of torch to determine which version is compatible with other requirements. This could take a while.
Collecting torch==2.5.1 (from xformers)
  Downloading https://download.pytorch.org/whl/cu121/torch-2.5.1%2Bcu121-cp311-cp311-linux_x86_64.whl (780.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m780.5/780.5 MB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch==2.5.1->xformers)
  Downloading https://download.pytorch.org/whl/cu121/nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m23.7/23.7 MB[0m [31m44.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting nvidia-cuda-runtime-cu12==12.

In [2]:
import torch
import os
from google.colab import userdata
from datasets import load_dataset
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TrainingArguments,GemmaTokenizer
from peft import LoraConfig
from trl import SFTTrainer

In [3]:
import os
os.environ["HF_TOKEN"]=userdata.get("HF_TOKEN")

In [4]:
model_id="google/gemma-2b"
bnb_config=BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

In [5]:
tokenizer=AutoTokenizer.from_pretrained(model_id,use_auth_token=True)



tokenizer_config.json:   0%|          | 0.00/33.6k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/4.24M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.5M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/636 [00:00<?, ?B/s]

In [6]:
model=AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    device_map={"": 0},
    token=os.environ["HF_TOKEN"]
)

config.json:   0%|          | 0.00/627 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/13.5k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.95G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/67.1M [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/137 [00:00<?, ?B/s]

In [9]:
text="Quote: imagination is more,"
device="cuda:0"
input=tokenizer(text,return_tensors="pt").to(device)
output=model.generate(**input,max_new_tokens=50)

In [10]:
answer=tokenizer.decode(output[0],skip_special_tokens=True)

In [11]:
answer

'Quote: imagination is more, than the sum of its parts.\n\nThe first thing you need to know about me is that I am a creative. I love to create. I love to design. I love to build. I love to write. I love to paint. I'

In [12]:
os.environ["WANDB_DISABLED"]="false"
lora_config=LoraConfig(
    r=8,
    target_modules=["q_proj","o_proj","k_proj","v_proj","gate_proj","up_proj","down_proj"],
    task_type="CAUSAL_LM"
)

In [13]:
from datasets import load_dataset
data=load_dataset("Abirate/english_quotes")
data=data.map(lambda samples:tokenizer(samples["quote"]),batched=True)

README.md:   0%|          | 0.00/5.55k [00:00<?, ?B/s]

quotes.jsonl:   0%|          | 0.00/647k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/2508 [00:00<?, ? examples/s]

Map:   0%|          | 0/2508 [00:00<?, ? examples/s]

In [14]:
def format_func(example):
  text=f"Quote: {example['quote'][0]}\nAuthor: {example['author'][0]}"
  return [text]

In [15]:
import transformers
trainer = SFTTrainer(
    model=model,
    train_dataset=data['train'],
    args=transformers.TrainingArguments(
        per_device_train_batch_size=1,
        gradient_accumulation_steps=4,
        warmup_steps=2,
        max_steps=100,
        learning_rate=2e-4,
        fp16=True,
        logging_steps=1,
        output_dir="outputs",
        optim="paged_adamw_8bit"
    ),
    peft_config=lora_config,
    formatting_func=format_func
)



Converting train dataset to ChatML:   0%|          | 0/2508 [00:00<?, ? examples/s]

Applying chat template to train dataset:   0%|          | 0/2508 [00:00<?, ? examples/s]

Applying chat template to train dataset:   0%|          | 0/2508 [00:00<?, ? examples/s]

In [16]:
trainer.train()



<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mmanumanuvkm123[0m ([33mmanumanuvkm123-toch-institute[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


Step,Training Loss
1,2.5601
2,1.627
3,2.4797
4,2.7515
5,2.2985
6,2.473
7,2.8773
8,2.2332
9,3.1761
10,2.2073


TrainOutput(global_step=100, training_loss=2.055391828417778, metrics={'train_runtime': 140.4789, 'train_samples_per_second': 2.847, 'train_steps_per_second': 0.712, 'total_flos': 189744345784320.0, 'train_loss': 2.055391828417778})

In [27]:
text="Quote: a women like a teabag,"
device="cuda:0"
input=tokenizer(text,return_tensors="pt").to(device)
output=model.generate(**input,max_new_tokens=50)

In [28]:
print(tokenizer.decode(output[0],skip_special_tokens=True))

Quote: a women like a teabag, you never know how strong she is until she's in hot water.

I'm not sure if this is a quote or not, but I've heard it before. I'm not sure if it's a quote or not
