In [1]:
import os
import wandb
from dotenv import load_dotenv
load_dotenv('/mnt/data1tb/thangcn/datnv2/.env')

def setup_wandb(project_name: str, run_name: str):
    # Set up your API KEY
    try:
        api_key = os.getenv("WANDB_API_KEY")
        wandb.login(key=api_key)
        print("Successfully logged into WandB.")
    except KeyError:
        raise EnvironmentError("WANDB_API_KEY is not set in the environment variables.")
    except Exception as e:
        print(f"Error logging into WandB: {e}")
    
    # Optional: Log models
    os.environ["WANDB_LOG_MODEL"] = "checkpoint"
    os.environ["WANDB_WATCH"] = "all"
    os.environ["WANDB_SILENT"] = "true"
    
    # Initialize the WandB run
    try:
        wandb.init(project=project_name, name=run_name)
        print(f"WandB run initialized: Project - {project_name}, Run - {run_name}")
    except Exception as e:
        print(f"Error initializing WandB run: {e}")


setup_wandb(project_name="ft_for_rag", run_name="llama3-fc")

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /home/duyhoang/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mthang19431[0m ([33mthang19431-hanoi-university-of-science-and-technology[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Successfully logged into WandB.


WandB run initialized: Project - ft_for_rag, Run - llama3-fc


In [2]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import torch

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
MODEL_NAME = "thang1943/Llama-3-8B-Instruct-Finance-RAG"
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.bfloat16
)

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True)

model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME, quantization_config=quantization_config, device_map="cuda"
)

Loading checkpoint shards: 100%|██████████| 4/4 [00:03<00:00,  1.14it/s]


In [4]:
for name, module in model.named_modules():
    print(module)

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(128264, 4096)
    (layers): ModuleList(
      (0-31): 32 x LlamaDecoderLayer(
        (self_attn): LlamaAttention(
          (q_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear4bit(in_features=4096, out_features=1024, bias=False)
          (v_proj): Linear4bit(in_features=4096, out_features=1024, bias=False)
          (o_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear4bit(in_features=4096, out_features=14336, bias=False)
          (up_proj): Linear4bit(in_features=4096, out_features=14336, bias=False)
          (down_proj): Linear4bit(in_features=14336, out_features=4096, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm((4096,), eps=1e-05)
        (post_attention_layernorm): LlamaRMSNorm((4096,), eps=1e-05)
      )
    )
    (norm): LlamaRMSNorm((409

In [5]:
from peft import LoraConfig, get_peft_model

In [6]:
config = LoraConfig(
    r=32,   # LoRA rank - suggested values: 8, 16, 32, 64, 128
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", 
                    "gate_proj", "up_proj", "down_proj"],
    lora_alpha=16,
    lora_dropout=0,  
    bias="none",    
    use_rslora=False,   
    loftq_config=None  
)

In [7]:
peft_model = get_peft_model(model, config)
peft_model.print_trainable_parameters()

trainable params: 83,886,080 || all params: 8,114,212,864 || trainable%: 1.0338


In [8]:
device = 'cuda' 

In [9]:
peft_model = peft_model.to(device)

In [10]:
from peft import prepare_model_for_kbit_training

peft_model = prepare_model_for_kbit_training(
    peft_model,
    use_gradient_checkpointing=True
)

In [11]:
from datasets import load_dataset

# Loading the dataset
dataset = load_dataset("Salesforce/xlam-function-calling-60k", split="train")
dataset = dataset.select(range(15000))
print(f"Using a sample size of {len(dataset)} for fine-tuning.")

Using a sample size of 15000 for fine-tuning.


In [12]:
print(dataset)
print(dataset[0])

Dataset({
    features: ['id', 'query', 'answers', 'tools'],
    num_rows: 15000
})
{'id': 0, 'query': 'Where can I find live giveaways for beta access and games?', 'answers': '[{"name": "live_giveaways_by_type", "arguments": {"type": "beta"}}, {"name": "live_giveaways_by_type", "arguments": {"type": "game"}}]', 'tools': '[{"name": "live_giveaways_by_type", "description": "Retrieve live giveaways from the GamerPower API based on the specified type.", "parameters": {"type": {"description": "The type of giveaways to retrieve (e.g., game, loot, beta).", "type": "str", "default": "game"}}}]'}


In [13]:
import pandas as pd

In [14]:
dataset_df = pd.DataFrame(dataset)

In [15]:
dataset_df.head()

Unnamed: 0,id,query,answers,tools
0,0,Where can I find live giveaways for beta acces...,"[{""name"": ""live_giveaways_by_type"", ""arguments...","[{""name"": ""live_giveaways_by_type"", ""descripti..."
1,1,I need to understand the details of the Ethere...,"[{""name"": ""web_chain_details"", ""arguments"": {""...","[{""name"": ""peers"", ""description"": ""Retrieves a..."
2,2,What is the T3MA for 'ETH/BTC' using a 1h inte...,"[{""name"": ""t3ma"", ""arguments"": {""symbol"": ""ETH...","[{""name"": ""t3ma"", ""description"": ""Fetches the ..."
3,3,List titles originally aired on networks '1' a...,"[{""name"": ""list_titles"", ""arguments"": {""networ...","[{""name"": ""get_animes"", ""description"": ""Retrie..."
4,4,Fetch the competitor standings for the recentl...,"[{""name"": ""stagecompetitorstandings"", ""argumen...","[{""name"": ""stagecompetitorstandings"", ""descrip..."


In [16]:
max_seq_length = 2048

In [17]:
# Set chat template cho Llama-3 (nếu chưa có)
llama3_template = """{% for message in messages %}
{% if message['role'] == 'system' %}
{{ '<|start_header_id|>system<|end_header_id|>\n\n' + message['content'] + '<|eot_id|>' }}
{% elif message['role'] == 'user' %}
{{ '<|start_header_id|>user<|end_header_id|>\n\n' + message['content'] + '<|eot_id|>' }}
{% elif message['role'] == 'assistant' %}
{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' + message['content'] + '<|eot_id|>' }}
{% endif %}
{% endfor %}
{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}"""

tokenizer.chat_template = llama3_template

# Hàm format dữ liệu
def format_conversation(examples):
    formatted_texts = []
    for query, tools, answer in zip(examples['query'], examples['tools'], examples['answers']):
        messages = [
            {"role": "system", "content": f"You are a helpful assistant with tools. Use these when needed:\n{tools}"},
            {"role": "user", "content": query},
            {"role": "assistant", "content": answer}
        ]
        formatted = tokenizer.apply_chat_template(
            messages,
            tokenize=False,
            add_generation_prompt=False
        )
        formatted_texts.append(formatted)
    return {"text": formatted_texts}

# Load và xử lý dataset
dataset = load_dataset("Salesforce/xlam-function-calling-60k", split="train").select(range(15000))
dataset = dataset.map(format_conversation, batched=True, remove_columns=dataset.column_names)

# Kiểm tra kết quả
print(dataset[0]["text"])

<|start_header_id|>system<|end_header_id|>

You are a helpful assistant with tools. Use these when needed:
[{"name": "live_giveaways_by_type", "description": "Retrieve live giveaways from the GamerPower API based on the specified type.", "parameters": {"type": {"description": "The type of giveaways to retrieve (e.g., game, loot, beta).", "type": "str", "default": "game"}}}]<|eot_id|>
<|start_header_id|>user<|end_header_id|>

Where can I find live giveaways for beta access and games?<|eot_id|>
<|start_header_id|>assistant<|end_header_id|>

[{"name": "live_giveaways_by_type", "arguments": {"type": "beta"}}, {"name": "live_giveaways_by_type", "arguments": {"type": "game"}}]<|eot_id|>
<|start_header_id|>assistant<|end_header_id|>




In [18]:
from transformers import TrainingArguments

args = TrainingArguments(
        output_dir = "outputs",             
        per_device_train_batch_size = 8,  # Controls the batch size per device
        gradient_accumulation_steps = 2,  # Accumulates gradients to simulate a larger batch
        warmup_steps = 5,
        learning_rate = 2e-4,             # Sets the learning rate for optimization
        num_train_epochs = 3,
        fp16 = not torch.cuda.is_bf16_supported(),
        bf16 = torch.cuda.is_bf16_supported(),
        optim = "adamw_8bit",
        weight_decay = 0.01,              # Regularization term for preventing overfitting
        lr_scheduler_type = "linear",     # Chooses a linear learning rate decay
        seed = 3407,                        
        report_to = "wandb",              # Enables Weights & Biases (W&B) logging
        logging_steps = 1,                # Sets frequency of logging to W&B
        logging_strategy = "steps",       # Logs metrics at each specified step
        save_strategy = "no",               
        load_best_model_at_end = True,    # Loads the best model at the end
        save_only_model = False           # Saves entire model, not only weights
    )

In [19]:
from trl import SFTTrainer

trainer = SFTTrainer(
    model = peft_model,
    processing_class = tokenizer,
    train_dataset = dataset,      
    args = args
)

No label_names provided for model class `PeftModel`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


In [20]:
gpu_stats = torch.cuda.get_device_properties(0)
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
print(f"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
print(f"{start_gpu_memory} GB of memory reserved.")

GPU = NVIDIA GeForce RTX 4090. Max memory = 23.65 GB.
9.904 GB of memory reserved.


In [21]:
trainer.train()

TypeError: device() received an invalid combination of arguments - got (NoneType), but expected one of:
 * (torch.device device)
      didn't match because some of the arguments have invalid types: (!NoneType!)
 * (str type, int index = -1)
