# Importing Libraries and Setting Paths

In [1]:
import os
from huggingface_hub import login
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments
from peft import LoraConfig, get_peft_model
from datasets import load_dataset
from trl import SFTTrainer

In [5]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [6]:
huggingface_token = "your_token"
data_path = r"path_to/Data/train_dataset_text.jsonl"
output_dir = r"path_to/Models/fine_tuned_model"
llama_lora_path = r"path_to/Models/fine_tuned_llama_lora"
llama_merged_path = r"path_to/Models/fine_tuned_llama_merged"
gguf_path = r"path_to/Models/model.gguf"
models_path = r"path_to/Models"

# Meta-Llama-3-8B-Instruct model

In [None]:
# Log in to Hugging Face
os.environ["HUGGINGFACE_TOKEN"] = huggingface_token
login(os.environ["HUGGINGFACE_TOKEN"])

# Set device to CUDA for A100 GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

model_name = "meta-llama/Meta-Llama-3-8B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float16,
    device_map="auto"
)

lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)
model = get_peft_model(model, lora_config)

dataset = load_dataset("json", data_files={"train": data_path})
print("Dataset size:", len(dataset["train"]))


In [None]:

def formatting_func(example):
    return example["text"]

training_args = TrainingArguments(
    output_dir=output_dir,
    per_device_train_batch_size=1,
    gradient_accumulation_steps=2,
    learning_rate=2e-4,
    max_steps=10,
    warmup_steps=50,
    logging_steps=10,
    save_steps=1,
    eval_strategy="no",
    fp16=True,
    bf16=False,
    optim="adamw_torch",
    report_to="none",
    torch_compile=False,
    label_names=["labels"]
)

trainer = SFTTrainer(
    model=model,
    train_dataset=dataset["train"],
    args=training_args,
    peft_config=lora_config,
    formatting_func=formatting_func,
    processing_class=tokenizer
)

trainer.train()
print("done")

Using device: cuda


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

Dataset size: 87949


Applying formatting function to train dataset:   0%|          | 0/87949 [00:00<?, ? examples/s]

Adding EOS to train dataset:   0%|          | 0/87949 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/87949 [00:00<?, ? examples/s]

Truncating train dataset:   0%|          | 0/87949 [00:00<?, ? examples/s]

Step,Training Loss
10,1.6055


done


In [None]:
# Save the fine-tuned model with LoRA
trainer.model.save_pretrained(llama_lora_path)
tokenizer.save_pretrained(llama_lora_path)

merged_model = model.merge_and_unload()
merged_model.save_pretrained(llama_merged_path)
tokenizer.save_pretrained(llama_merged_path)

In [None]:
# Convert the fine-tuned model to GGUF format
!cd llama.cpp && python convert_hf_to_gguf.py llama_merged_path --outfile gguf_path --outtype f16