In [None]:
!pip install -q accelerate==0.21.0 peft==0.4.0 bitsandbytes==0.40.2 transformers==4.31.0 trl==0.4.7

In [None]:
!pip install --upgrade transformers accelerate


Collecting transformers
  Using cached transformers-4.41.2-py3-none-any.whl (9.1 MB)
Collecting accelerate
  Using cached accelerate-0.31.0-py3-none-any.whl (309 kB)
Collecting tokenizers<0.20,>=0.19 (from transformers)
  Using cached tokenizers-0.19.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.6 MB)
Installing collected packages: tokenizers, transformers, accelerate
  Attempting uninstall: tokenizers
    Found existing installation: tokenizers 0.13.3
    Uninstalling tokenizers-0.13.3:
      Successfully uninstalled tokenizers-0.13.3
  Attempting uninstall: transformers
    Found existing installation: transformers 4.31.0
    Uninstalling transformers-4.31.0:
      Successfully uninstalled transformers-4.31.0
  Attempting uninstall: accelerate
    Found existing installation: accelerate 0.21.0
    Uninstalling accelerate-0.21.0:
      Successfully uninstalled accelerate-0.21.0
Successfully installed accelerate-0.31.0 tokenizers-0.19.1 transformers-4.41.2


In [None]:
!pip install datasets



In [None]:
!pip install --upgrade transformers datasets peft bitsandbytes


Collecting peft
  Using cached peft-0.11.1-py3-none-any.whl (251 kB)
Collecting bitsandbytes
  Using cached bitsandbytes-0.43.1-py3-none-manylinux_2_24_x86_64.whl (119.8 MB)
Installing collected packages: bitsandbytes, peft
  Attempting uninstall: bitsandbytes
    Found existing installation: bitsandbytes 0.40.2
    Uninstalling bitsandbytes-0.40.2:
      Successfully uninstalled bitsandbytes-0.40.2
  Attempting uninstall: peft
    Found existing installation: peft 0.4.0
    Uninstalling peft-0.4.0:
      Successfully uninstalled peft-0.4.0
Successfully installed bitsandbytes-0.43.1 peft-0.11.1


In [None]:
import os
import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,
)
from peft import LoraConfig, get_peft_model, TaskType
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer

In [None]:
# Configuration parameters
model_name = "meta-llama/Meta-Llama-3-8B"
dataset_name = "wikitext"
output_dir = "./results"
num_train_epochs = 3
per_device_train_batch_size = 4
gradient_accumulation_steps = 4
optim = "adamw_torch"
save_steps = 1000
logging_steps = 200
learning_rate = 5e-5
weight_decay = 0.01
fp16 = True
bf16 = False
max_grad_norm = 1.0
max_steps = -1
warmup_ratio = 0.1
group_by_length = True
lr_scheduler_type = "linear"
packing = False
max_seq_length = 512
lora_alpha = 16
lora_dropout = 0.1
lora_r = 8
use_4bit = True
bnb_4bit_compute_dtype = "bfloat16"
bnb_4bit_quant_type = "nf4"
use_nested_quant = True
device_map = "auto"

# Load dataset
dataset = load_dataset(dataset_name, 'wikitext-2-raw-v1')

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [None]:
# Set Hugging Face API token
os.environ['HUGGINGFACE_API_KEY'] = ''

In [None]:
# Reduce dataset size for training
small_train_dataset = dataset["train"].select(range(100))  # Select first 100 rows
small_eval_dataset = dataset["validation"].select(range(10))  # Select first 10 rows


In [None]:
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=os.environ['HUGGINGFACE_API_KEY'])


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [None]:
# Set padding token
tokenizer.pad_token = tokenizer.eos_token

In [None]:
# Tokenize dataset
def tokenize_function(examples):
    return tokenizer(examples['text'], padding='max_length', truncation=True, max_length=max_seq_length)

#tokenized_datasets = dataset.map(tokenize_function, batched=True)
tokenized_train_dataset = small_train_dataset.map(tokenize_function, batched=True)
tokenized_eval_dataset = small_eval_dataset.map(tokenize_function, batched=True)


In [None]:
# Load pre-trained model with quantization configuration for QLoRA
bnb_config = BitsAndBytesConfig(
    load_in_4bit=use_4bit,
    bnb_4bit_use_double_quant=use_nested_quant,
    bnb_4bit_quant_type=bnb_4bit_quant_type,
    bnb_4bit_compute_dtype=torch.bfloat16
)

In [None]:
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    torch_dtype=torch.bfloat16,
    device_map="auto",token = os.environ['HUGGINGFACE_API_KEY']
)

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [None]:
lora_config = LoraConfig(
    r=16,  # Rank of the decomposition
    lora_alpha=32,
    target_modules=["self_attn.k_proj", "self_attn.v_proj"],  # Update target modules for Meta-Llama-3-8B
    lora_dropout=0.05,
    bias="none",
    task_type=TaskType.CAUSAL_LM
)

In [None]:
model = get_peft_model(model, lora_config)

In [None]:
# Training arguments
training_args = TrainingArguments(
    output_dir=output_dir,
    evaluation_strategy="epoch",
    learning_rate=learning_rate,
    per_device_train_batch_size=per_device_train_batch_size,
    per_device_eval_batch_size=per_device_train_batch_size,
    num_train_epochs=num_train_epochs,
    weight_decay=weight_decay,
    save_steps=save_steps,
    logging_steps=logging_steps,
    fp16=fp16,
    bf16=bf16,
    max_grad_norm=max_grad_norm,
    max_steps=max_steps,
    warmup_ratio=warmup_ratio,
    group_by_length=group_by_length,
    lr_scheduler_type=lr_scheduler_type,
    report_to="tensorboard"
)



In [None]:
# Custom Data Collator to return loss
from transformers import DataCollatorForLanguageModeling

data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False,
)

# Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train_dataset,
    eval_dataset=tokenized_eval_dataset,
    data_collator=data_collator  # Add data collator to handle input processing
)

In [None]:
# Train the model
trainer.train()

Epoch,Training Loss,Validation Loss
1,No log,1.914502
2,No log,1.854219
3,No log,1.824749


TrainOutput(global_step=75, training_loss=3.302066650390625, metrics={'train_runtime': 59.3465, 'train_samples_per_second': 5.055, 'train_steps_per_second': 1.264, 'total_flos': 6921370415923200.0, 'train_loss': 3.302066650390625, 'epoch': 3.0})

In [None]:
# Testing the fine-tuned model
test_input = "The history of natural language processing"
test_input_ids = tokenizer.encode(test_input, return_tensors="pt")
generated_text = model.generate(test_input_ids, max_length=50)
print(tokenizer.decode(generated_text[0], skip_special_tokens=True))

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


The history of natural language processing (NLP) is closely tied to the history of artificial intelligence as a whole. Despite the field’s youth, it has already passed through several distinct phases. NLP research was initiated by the first generation of artificial
