In [None]:
import os
import json
from datasets import load_dataset
import torch

# Setting the working directory
os.chdir('/kaggle/working/')
os.system('rm -rf LLaMA-Factory')
os.system('git clone --depth 1 https://github.com/hiyouga/LLaMA-Factory.git')
os.chdir('LLaMA-Factory')

# Install required dependencies
!pip install -q torch==2.3.1 torchvision==0.18.1 torchaudio==2.3.1
!pip uninstall -q -y jax
!pip install -q -e .[torch,bitsandbytes,liger-kernel]
!pip uninstall wandb -y

# Verify that CUDA is available
try:
    assert torch.cuda.is_available() is True
except AssertionError:
    print("Please set up a GPU before using LLaMA Factory")


In [None]:
# Kaggle Secrets Handling
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()

# Hugging Face login
hf_token = user_secrets.get_secret("HF_TOKEN")

from huggingface_hub import login
login(token=hf_token)

# Hyperparameter configurations

In [None]:
stage = "ppo"
data_size = 40000
base_model = "meta-llama/Llama-3.2-1B-Instruct"
dataset_name = "Muadil/ppo_datasets_summary"
reward_model_name = "/kaggle/input/adapter/saves/skywork_llama/lora/reward"
batch_size = 4
epoch_size = 1

extract_reward_model_name = "Skywork"

In [None]:
# Loading the Hugging Face dataset
dataset = load_dataset(dataset_name)

# Selecting the first 1k data

resized_dataset = dataset["train"].select(range(min(int(data_size*1.25), len(dataset["train"]))))
print(f"Size of the data set: {len(resized_dataset)}")

# Split the dataset into 80% (train) and 20% (eval)
train_test_split = resized_dataset.train_test_split(test_size=0.2, seed=42)

# Separate Train and Eval sets
train_dataset = train_test_split["train"]["prompt"]
eval_dataset = train_test_split["test"]["prompt"]
# Save to JSON file

output_file = f"data/{stage}_dataset_{data_size}.json"
with open(output_file, "w", encoding="utf-8") as f:
    json.dump(train_dataset, f, indent=2, ensure_ascii=False)

output_file = f"data/{stage}_eval_dataset_{data_size}.json"
with open(output_file, "w", encoding="utf-8") as f:
    json.dump(eval_dataset, f, indent=2, ensure_ascii=False)

import json

# Read dataset file
with open('data/dataset_info.json', 'r', encoding='utf-8') as f:
    dataset_info = json.load(f)

# New data
new_data = {
     f"{stage}_dataset_{data_size}": {
    "file_name": f"{stage}_dataset_{data_size}.json",
    "columns": {
      "prompt": "text"
    }
  }
 }

new_eval_data = {
     f"{stage}_eval_dataset_{data_size}": {
    "file_name": f"{stage}_eval_dataset_{data_size}.json",
    "columns": {
      "prompt": "text"
    }
  }
 }
# Add new data to existing file
dataset_info.update(new_data)
dataset_info.update(new_eval_data)

# Save back updated data
with open('data/dataset_info.json', 'w', encoding='utf-8') as f:
    json.dump(dataset_info, f, indent=2, ensure_ascii=False)

print("The new data was successfully added and the file was saved.")

print(f"The first {data_size} of data was successfully saved to {output_file}.")

In [None]:
import json

args = dict(
  stage=stage,
  quantization_bit=4, #nf4 to do quantization, default is None so it does not quantize.
  quantization_method="bitsandbytes", # we enter in which format to quantize. bitsandbytes, hqq, eetq, etc.
  do_train=True,
  model_name_or_path= base_model, # use bnb-4bit-quantized Llama-3-8B-Instruct model
  dataset=f"{stage}_dataset_{data_size}",             # use alpaca and identity datasets
    #eval_dataset = f"{stage}_eval_dataset_{data_size}",
  template="llama3",                     # use llama3 prompt template
  finetuning_type="lora",                   # use LoRA adapters to save memory
  lora_target="all",                     # attach LoRA adapters to all linear layers
  reward_model = reward_model_name,
    output_dir="llama3_lora",                  # the path to save LoRA adapters
  per_device_train_batch_size=batch_size,               # the batch size
  gradient_accumulation_steps=4,               # the gradient accumulation steps
  lr_scheduler_type="cosine",                 # use cosine learning rate scheduler
  logging_steps=10,                      # log every 10 steps
  warmup_ratio=0.1,                      # use warmup scheduler
  save_steps=1000,                      # save checkpoint every 1000 steps
  learning_rate=5e-5,                     # the learning rate
  num_train_epochs=epoch_size,                    # the epochs of training
  max_samples=500,                      # use 500 examples in each dataset
  max_grad_norm=1.0,                     # clip gradient norm to 1.0
  loraplus_lr_ratio=16.0,                   # use LoRA+ algorithm with lambda=16.0
  fp16=True,                         # use float16 mixed precision training
  enable_liger_kernel=True,                   # use liger kernel for efficient training
  #report_to="wandb",
  run_name=f"username/repository",
  #load_best_model_at_end=True,  # Ensure the best model is loaded at the end
  metric_for_best_model="eval_loss",  # Metric to monitor for the best model
  greater_is_better=False,
    #evaluation_strategy="steps",
)

json.dump(args, open("train_llama3.json", "w", encoding="utf-8"), indent=2)
# Start the training process
!llamafactory-cli train train_llama3.json

In [None]:
# Saving and exporting the model
export_args = dict(
    model_name_or_path=base_model,  # original model
    adapter_name_or_path="llama3_lora",  # saved LoRA adapters
    template="llama3",  # same template
    finetuning_type="lora",  # use of the same LoRA
    export_dir="llama3_lora_merged",  # where to register the unified model
    export_size=5,  # model file size (GB)
    export_device="cpu",  # export device
    export_hub_model_id= "username/repository",  # Hugging Face model ID
)

# Export the model and upload it to Hugging Face

json.dump(export_args, open("merge_llama3.json", "w", encoding="utf-8"), indent=2)
!llamafactory-cli export merge_llama3.json