This notebook includes the process of fine-tuning the Meta-Llama-3-8B-Instruct model using LoRA (Low-Rank Adaptation). It covers setting up the environment with necessary libraries, loading the model with 4-bit quantization for memory efficiency. The notebook configures LoRA-specific parameters, sets up training arguments with Hugging Face's transformers, and uses the SFTTrainer.

In [None]:
!pip install peft



In [None]:
from peft import LoraConfig, get_peft_model, TaskType

In [None]:
dir = "meta-llama/Meta-Llama-3-8B-Instruct"
access_token = "YOUR_PINECONE_API_KEY" # Replace with your pinecone api key

In [None]:
from transformers import BitsAndBytesConfig

In [None]:
import torch

In [None]:
!pip install -U bitsandbytes

Collecting bitsandbytes
  Downloading bitsandbytes-0.45.0-py3-none-manylinux_2_24_x86_64.whl.metadata (2.9 kB)
Downloading bitsandbytes-0.45.0-py3-none-manylinux_2_24_x86_64.whl (69.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m69.1/69.1 MB[0m [31m32.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: bitsandbytes
Successfully installed bitsandbytes-0.45.0


In [None]:
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant = True,
    bnb_4bit_quant_type = "nf4"
)

In [None]:
from huggingface_hub import notebook_login

notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:
!pip install -U bitsandbytes



In [None]:
import bitsandbytes
from transformers import LlamaForCausalLM, Trainer, TrainingArguments, AutoTokenizer


model = LlamaForCausalLM.from_pretrained(dir, token = access_token,quantization_config=quantization_config)
tokenizer = AutoTokenizer.from_pretrained(dir, token=access_token)
tokenizer.pad_token = tokenizer.eos_token

config.json:   0%|          | 0.00/654 [00:00<?, ?B/s]

`low_cpu_mem_usage` was None, now default to True since model is quantized.


model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/4 [00:00<?, ?it/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/1.17G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/187 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/51.0k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/73.0 [00:00<?, ?B/s]

In [None]:
import pandas as pd
dataset = pd.read_excel('data_5983_final.xlsx')

In [None]:
from sklearn.model_selection import train_test_split

train_dataset, val_test_dataset = train_test_split(dataset, test_size=0.2, random_state=42)

# 50% validation, 50% test
val_dataset, test_dataset = train_test_split(val_test_dataset, test_size=0.5, random_state=42)

print(f"Training dataset size: {len(train_dataset)}")
print(f"Validation dataset size: {len(val_dataset)}")
print(f"Test dataset size: {len(test_dataset)}")

Training dataset size: 2246
Validation dataset size: 281
Test dataset size: 281


In [None]:
lora_config = LoraConfig(
    task_type=TaskType.CAUSAL_LM,
    inference_mode=False,
    r=8,
    lora_alpha=16,
    lora_dropout=0.1,
    bias= "none"
)

In [None]:
model = get_peft_model(model, lora_config)

In [None]:
val_dataset = val_dataset.sample(frac=1, random_state=42).iloc[:100]

In [None]:
from transformers import TrainingArguments

training_args = TrainingArguments(
    output_dir="./finetuned-llama-latest",
    evaluation_strategy="steps",
    eval_steps=1200,
    optim="paged_adamw_32bit",
    save_strategy="steps",
    save_steps=1200,
    max_steps=6000,
    learning_rate=6e-5,
    warmup_ratio=0.03,
    group_by_length = True,
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    weight_decay=0.001,
    save_total_limit=2,
    load_best_model_at_end=True,
    metric_for_best_model="loss",
    gradient_accumulation_steps=1,
    lr_scheduler_type="linear",
    bf16=True,
    logging_steps=50,
    push_to_hub=True
)




In [None]:
!pip install trl

Collecting trl
  Downloading trl-0.12.2-py3-none-any.whl.metadata (11 kB)
Collecting datasets>=2.21.0 (from trl)
  Downloading datasets-3.1.0-py3-none-any.whl.metadata (20 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets>=2.21.0->trl)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets>=2.21.0->trl)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets>=2.21.0->trl)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.9.0,>=2023.1.0 (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets>=2.21.0->trl)
  Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)
Downloading trl-0.12.2-py3-none-any.whl (365 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m365.7/365.7 kB[0m [31m8.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading datasets-3.1.0-py3-none-any.whl (480 kB)
[2K   [90m━━━━━━━━━━━━━━━━

In [None]:
from trl import SFTTrainer

In [None]:
from datasets import Dataset

train_dataset = Dataset.from_pandas(train_dataset)
val_dataset = Dataset.from_pandas(val_dataset)


In [None]:
trainer = SFTTrainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    peft_config = lora_config,
    max_seq_length=128,
    eval_dataset=val_dataset,
    packing = False,
)


Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.


Map:   0%|          | 0/2246 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

max_steps is given, it will override any value given in num_train_epochs


In [None]:
trainer.train()

Step,Training Loss,Validation Loss
1200,2.1194,1.999602
2400,1.9194,1.911506
3600,1.96,1.854072
4800,1.7597,1.819842
6000,1.8987,1.798655


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


TrainOutput(global_step=6000, training_loss=1.987241766611735, metrics={'train_runtime': 5160.2097, 'train_samples_per_second': 1.163, 'train_steps_per_second': 1.163, 'total_flos': 3.260876332191744e+16, 'train_loss': 1.987241766611735, 'epoch': 2.671415850400712})

In [None]:
from huggingface_hub import notebook_login

notebook_login()

In [None]:
trainer.push_to_hub()

CommitInfo(commit_url='https://huggingface.co/supkon/finetuned-llama-latest/commit/bcbf847d06436963420fbefb8f134dc3dd012975', commit_message='End of training', commit_description='', oid='bcbf847d06436963420fbefb8f134dc3dd012975', pr_url=None, repo_url=RepoUrl('https://huggingface.co/supkon/finetuned-llama-latest', endpoint='https://huggingface.co', repo_type='model', repo_id='supkon/finetuned-llama-latest'), pr_revision=None, pr_num=None)

In [None]:
model.save_pretrained("./finetuned-llama-updated")
tokenizer.save_pretrained("./finetuned-llama-updated")

('./finetuned-llama-updated/tokenizer_config.json',
 './finetuned-llama-updated/special_tokens_map.json',
 './finetuned-llama-updated/tokenizer.json')