# Setup env

In [1]:
# Install required packages
!pip install -q torch transformers datasets accelerate bitsandbytes trl peft lighteval huggingface_hub einops
!pip install -q -U lighteval tiktoken lighteval["extended_tasks"] lighteval[math]

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m51.8/51.8 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.0/76.0 MB[0m [31m21.3 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m335.7/335.7 kB[0m [31m12.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m411.1/411.1 kB[0m [31m19.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m137.6/137.6 kB[0m [31m8.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.5/1.5 MB[0m [31m57.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.3/6.3 MB[0m [31m85.6 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m91.1/9

In [None]:
# Restart runtime
import os
os.kill(os.getpid(), 9)

In [1]:
# Import required libraries
import os
import torch
import numpy as np
import pandas as pd
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    pipeline
)
from peft import LoraConfig, get_peft_model
from trl import SFTTrainer, SFTConfig

In [2]:
# Login to HuggingFace (https://huggingface.co/settings/tokens)
from huggingface_hub import login
from kaggle_secrets import UserSecretsClient

user_secrets = UserSecretsClient()
hf_key = user_secrets.get_secret("hf_key")
login(token=hf_key)

# Evaluate instruct model

In [3]:
# Load instruct model

from accelerate import infer_auto_device_map

# Model ID for the instruct version
instruct_model_id = "Qwen/Qwen2.5-0.5B-Instruct"
# Load tokenizer
instruct_tokenizer = AutoTokenizer.from_pretrained(instruct_model_id)

# Configure model loading with 4-bit quantization for memory efficiency
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
)

# Load the model
instruct_model = AutoModelForCausalLM.from_pretrained(
    instruct_model_id,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True
)

tokenizer_config.json:   0%|          | 0.00/7.30k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/2.78M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/1.67M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/7.03M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/659 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/988M [00:00<?, ?B/s]

In [4]:
# Evaluate function for custom prompts

def evaluate_model_on_samples(model, tokenizer, samples, max_length=512):
    """
    Evaluate model on a list of sample prompts.
    """
    results = []

    # Create a text generation pipeline
    pipe = pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        max_new_tokens=max_length,
        temperature=0.7,
        top_p=0.9,
        do_sample=True,
        pad_token_id=tokenizer.pad_token_id,
    )

    for sample in samples:
        # Format the messages with chat template
        if isinstance(sample, str):
            # Single string input
            messages = [{"role": "user", "content": sample}]
        else:
            # Already in messages format
            messages = sample

        # Apply the chat template
        formatted_prompt = tokenizer.apply_chat_template(
            messages,
            tokenize=False,
            add_generation_prompt=True
        )

        # Generate a response
        outputs = pipe(formatted_prompt)
        generated_text = outputs[0]["generated_text"]

        # Extract just the model's response (remove the prompt)
        response = generated_text[len(formatted_prompt):].strip()

        # Store result
        results.append({
            "prompt": sample,
            "response": response
        })

    return results

In [5]:
# Test on custom prompts

test_prompts = [
    "Explain quantum computing in simple terms",
    "Write a short story about a robot that discovers emotions",
    "What are three strategies for improving productivity?",
    "Design a basic algorithm for sorting a list of numbers"
]

instruct_results = evaluate_model_on_samples(instruct_model, instruct_tokenizer, test_prompts)

# Display results for inspection
for i, result in enumerate(instruct_results):
    print(f"Prompt {i+1}: {result['prompt']}")
    print(f"Response: {result['response']}\n")

Device set to use cuda:0


Prompt 1: Explain quantum computing in simple terms
Response: Quantum computing is a type of computing that uses quantum mechanics to perform calculations, which is a fundamental concept in the field of quantum mechanics. In simple terms, it's like a new way of looking at information and data that allows for much faster and more efficient calculations. This technology could revolutionize many industries, including cryptography, drug discovery, and transportation.

Prompt 2: Write a short story about a robot that discovers emotions
Response: In a world where robots were designed to perform tasks, it was not uncommon for them to lack the ability to feel emotions. However, in a futuristic city, a group of robots were tasked with finding a rare and valuable gemstone that could be used to create a revolutionary technology. 

As they searched for the gemstone, they discovered that the robot that found it was a little different. It was not a robot, but a human. The robot had a heart, a soul, 

In [6]:
!lighteval tasks list

[2025-03-30 06:14:35,894] [[32m    INFO[0m]: NumExpr defaulting to 4 threads. (utils.py:162)[0m
[2025-03-30 06:14:36,330] [[32m    INFO[0m]: PyTorch version 2.5.1+cu121 available. (config.py:54)[0m
[2025-03-30 06:14:36,331] [[32m    INFO[0m]: Polars version 1.9.0 available. (config.py:66)[0m
[2025-03-30 06:14:36,332] [[32m    INFO[0m]: Duckdb version 1.1.3 available. (config.py:77)[0m
[2025-03-30 06:14:36,333] [[32m    INFO[0m]: TensorFlow version 2.17.1 available. (config.py:112)[0m
[2025-03-30 06:14:36,334] [[32m    INFO[0m]: JAX version 0.4.33 available. (config.py:125)[0m
2025-03-30 06:14:41.410919: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-03-30 06:14:41.433389: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has al

In [7]:
# Eval on std benchmarks
!lighteval accelerate \
    "pretrained=Qwen/Qwen2.5-0.5B-Instruct" \
    "helm|mmlu:formal_logic|0|0,helm|boolq|0|0,helm|openbookqa|0|0,lighteval|coqa|0|0,leaderboard|truthfulqa:mc|0|0,lighteval|toxigen|0|0" \
    --use-chat-template \
    --max-samples 50 \
    --override-batch-size 256 \
    --output-dir "./results" \
    --save-details

[2025-03-30 06:14:48,781] [[32m    INFO[0m]: NumExpr defaulting to 4 threads. (utils.py:162)[0m
[2025-03-30 06:14:49,092] [[32m    INFO[0m]: PyTorch version 2.5.1+cu121 available. (config.py:54)[0m
[2025-03-30 06:14:49,093] [[32m    INFO[0m]: Polars version 1.9.0 available. (config.py:66)[0m
[2025-03-30 06:14:49,094] [[32m    INFO[0m]: Duckdb version 1.1.3 available. (config.py:77)[0m
[2025-03-30 06:14:49,095] [[32m    INFO[0m]: TensorFlow version 2.17.1 available. (config.py:112)[0m
[2025-03-30 06:14:49,096] [[32m    INFO[0m]: JAX version 0.4.33 available. (config.py:125)[0m
2025-03-30 06:14:49.664919: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-03-30 06:14:49.687002: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has al

# Tuning base model

In [8]:
# Load the dataset
dataset = load_dataset("arcee-ai/EvolKit-20k")

# Examine dataset structure
print(dataset)
print(f"Number of examples: {len(dataset['train'])}")

# Look at a few examples
for i in range(3):
    convo = dataset['train'][i]['conversations']

    instruction = next((turn['value'] for turn in convo if turn['from'] == 'human'), '')
    output = next((turn['value'] for turn in convo if turn['from'] == 'gpt'), '')

    # print(f"\nExample {i+1}:")
    # print(f"Instruction: {instruction}")
    # print(f"Output: {output}")

README.md:   0%|          | 0.00/233 [00:00<?, ?B/s]

tomb_evolved_20k.json:   0%|          | 0.00/85.9M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/20000 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['conversations'],
        num_rows: 20000
    })
})
Number of examples: 20000


In [9]:
# Check max_seq_token

from datasets import load_dataset
from transformers import AutoTokenizer
import numpy as np

# Load dataset
dataset = load_dataset("arcee-ai/EvolKit-20k")
train_data = dataset["train"]

# Load Qwen2.5-0.5B tokenizer
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-0.5B", trust_remote_code=True)

# Convert conversasion to text
def format_conversation(example):
    conv = example["conversations"]
    full_text = ""
    for turn in conv:
        role = turn["from"]
        value = turn["value"]
        if role == "human":
            full_text += f"<|user|>\n{value}\n"
        elif role == "gpt":
            full_text += f"<|assistant|>\n{value}\n"
    return {"text": full_text.strip()}

# Format train set
formatted_dataset = train_data.map(format_conversation)

# Calc token_len
def get_token_length(example):
    return {"length": len(tokenizer(example["text"])["input_ids"])}

length_dataset = formatted_dataset.map(get_token_length)

# Statistic len
lengths = length_dataset["length"]
print(f"Max sequence length: {max(lengths)} tokens")

percentiles = np.percentile(lengths, [90, 95, 99])
print(f"90% sample < {int(percentiles[0])} tokens")
print(f"95% sample < {int(percentiles[1])} tokens")
print(f"99% sample < {int(percentiles[2])} tokens")

tokenizer_config.json:   0%|          | 0.00/7.23k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/2.78M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/1.67M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/7.03M [00:00<?, ?B/s]

Map:   0%|          | 0/20000 [00:00<?, ? examples/s]

Map:   0%|          | 0/20000 [00:00<?, ? examples/s]

Max sequence length: 4434 tokens
90% sample < 1198 tokens
95% sample < 1299 tokens
99% sample < 1560 tokens


In [10]:
def format_dataset(example):
    """
    Format dataset examples as messages suitable for chat-based fine-tuning.
    """
    conversation = example["conversations"]
    instruction = next((msg["value"] for msg in conversation if msg["from"] == "human"), "")
    output = next((msg["value"] for msg in conversation if msg["from"] == "gpt"), "")

    messages = [
        {"role": "user", "content": instruction}
    ]
    if output:
        messages.append({"role": "assistant", "content": output})

    return {"messages": messages}

# Apply formatting to the dataset
formatted_dataset = dataset.map(format_dataset, remove_columns=dataset["train"].column_names)

# Split dataset to include a validation set
split_dataset = formatted_dataset["train"].train_test_split(test_size=0.05, seed=42)
train_dataset = split_dataset["train"]
val_dataset = split_dataset["test"]

print(f"Training examples: {len(train_dataset)}")
print(f"Validation examples: {len(val_dataset)}")


Map:   0%|          | 0/20000 [00:00<?, ? examples/s]

Training examples: 19000
Validation examples: 1000


In [11]:
# Model ID for the base version
base_model_id = "Qwen/Qwen2.5-0.5B"

# Load tokenizer
base_tokenizer = AutoTokenizer.from_pretrained(base_model_id)

# Ensure the tokenizer has chat template and special tokens
if base_tokenizer.chat_template is None:
    # If no chat template, use the one from the instruct model
    base_tokenizer = AutoTokenizer.from_pretrained(instruct_model_id)

# Add padding token if it doesn't exist
if base_tokenizer.pad_token is None:
    base_tokenizer.pad_token = base_tokenizer.eos_token

# Configure model loading with 4-bit quantization
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
)

# Load the base model
base_model = AutoModelForCausalLM.from_pretrained(
    base_model_id,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True
)

config.json:   0%|          | 0.00/681 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/988M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/138 [00:00<?, ?B/s]

In [12]:
base_model.gradient_checkpointing_enable()
base_model.enable_input_require_grads()

In [13]:
# Define LoRA configuration
lora_config = LoraConfig(
    r=16,                     # Rank dimension (balance between capacity and memory)
    lora_alpha=32,            # LoRA scaling factor (typically 2x rank)
    lora_dropout=0.05,        # Dropout probability for regularization
    bias="none",              # Do not train bias terms
    task_type="CAUSAL_LM",    # Task type for causal language modeling
    target_modules=[
        "q_proj", "k_proj", "v_proj", "o_proj",  # Attention modules
        "gate_proj", "up_proj", "down_proj"     # MLP modules
    ],
)

# Set up training arguments
training_args = SFTConfig(
    num_train_epochs=3,                 # Number of training epochs
    output_dir="./qwen2.5-0.5B-finetuned-evolkit",
    per_device_train_batch_size=2,
    gradient_accumulation_steps=8,      # Effective batch size = 4 * 4 = 16
    per_device_eval_batch_size=1,
    optim="paged_adamw_32bit",          # Memory-efficient optimizer
    learning_rate=2e-4,                 # Learning rate
    lr_scheduler_type="cosine",         # Learning rate schedule
    max_seq_length=1536,                # 1536
    warmup_ratio=0.05,                  # Warmup period
    logging_steps=10,                   # Log every 10 steps
    eval_steps=200,                     # Evaluate every 100 steps
    save_steps=200,                     # Save checkpoint every 200 steps
    max_grad_norm=0.3,                  # Gradient clipping
    # eval_strategy="epoch",
    eval_strategy="steps",
    fp16=True,                          # Use mixed precision training
    packing=True,                       # Pack multiple sequences to maximize throughput
    report_to=["tensorboard"],
)

In [None]:
# Set up the trainer
trainer = SFTTrainer(
    model=base_model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    peft_config=lora_config,

)
trainer.tokenizer = base_tokenizer

# Start training
trainer.train()

# Save the trained adapter
trainer.save_model("./qwen2.5-0.5B-finetuned-evolkit")

Converting train dataset to ChatML:   0%|          | 0/19000 [00:00<?, ? examples/s]

Applying chat template to train dataset:   0%|          | 0/19000 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/19000 [00:00<?, ? examples/s]

Packing train dataset:   0%|          | 0/19000 [00:00<?, ? examples/s]

Converting eval dataset to ChatML:   0%|          | 0/1000 [00:00<?, ? examples/s]

Applying chat template to eval dataset:   0%|          | 0/1000 [00:00<?, ? examples/s]

Tokenizing eval dataset:   0%|          | 0/1000 [00:00<?, ? examples/s]

Packing eval dataset:   0%|          | 0/1000 [00:00<?, ? examples/s]

Trainer.tokenizer is now deprecated. You should use `Trainer.processing_class = processing_class` instead.
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...


Step,Training Loss,Validation Loss
200,10.5156,1.315663
400,10.3026,1.288263
600,10.3085,1.274458
800,9.7021,1.267175
1000,9.7543,1.261425
1200,9.6985,1.255899
1400,9.5701,1.251262


In [None]:
from peft import PeftModel

# Load the base model in full precision
base_model_fp16 = AutoModelForCausalLM.from_pretrained(
    base_model_id,
    torch_dtype=torch.float16,
    device_map="auto",
    trust_remote_code=True
)

# Load the PEFT adapter
adapter_model = PeftModel.from_pretrained(base_model_fp16, "./qwen2.5-0.5B-finetuned-evolkit")

# Merge weights
merged_model = adapter_model.merge_and_unload()

# Save the merged model
merged_model.save_pretrained("./qwen2.5-0.5B-finetuned-evolkit-merged")
base_tokenizer.save_pretrained("./qwen2.5-0.5B-finetuned-evolkit-merged")

# Tuning on my own dataset

In [3]:
from datasets import load_dataset
from transformers import AutoTokenizer, BitsAndBytesConfig, AutoModelForCausalLM
from trl import SFTTrainer, SFTConfig
from peft import LoraConfig
import numpy as np
import torch

# ======= LOAD DATASET =======
dataset = load_dataset("phatvucoder/perfume-assistant")
print(dataset)
print(f"Number of examples: {len(dataset['train'])}")
print("Example:", dataset['train'][0])

# ======= LOAD TOKENIZER =======
model_id = "Qwen/Qwen2.5-1.5B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)

# ======= FORMAT USING CHAT TEMPLATE =======
def format_conversation(example):
    formatted_text = tokenizer.apply_chat_template(
        example["messages"],
        tokenize=False,
        add_generation_prompt=False
    )
    return {"text": formatted_text}

train_data = dataset["train"].map(
    format_conversation,
    remove_columns=dataset["train"].column_names
)
val_data = dataset["validation"].map(
    format_conversation,
    remove_columns=dataset["validation"].column_names
)

# ======= CHECK LENGTHS =======
def get_token_length(example):
    return {"length": len(tokenizer(example["text"])["input_ids"])}
    
token_lengths = train_data.map(
    get_token_length,
    remove_columns=train_data.column_names
)["length"]

print(f"\n📏 Max token length: {max(token_lengths)}")
print("📊 90%:", int(np.percentile(token_lengths, 90)))
print("📊 95%:", int(np.percentile(token_lengths, 95)))
print("📊 99%:", int(np.percentile(token_lengths, 99)))

# ======= FIX SPECIAL TOKENS =======
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

# ======= LOAD MODEL W/ QUANTIZATION =======
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
)

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True,
)
model.config.use_cache = False

model.gradient_checkpointing_enable()
model.enable_input_require_grads()

# ======= LORA CONFIG =======
lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"]
)

# ======= TRAINING CONFIG =======
training_args = SFTConfig(
    output_dir="./qwen2.5-1.5B-Instruct-finetuned-perfume",
    num_train_epochs=4,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4,
    per_device_eval_batch_size=1,
    learning_rate=2e-4,
    lr_scheduler_type="cosine",
    warmup_ratio=0.05,
    max_seq_length=768,
    logging_steps=10,
    save_steps=20,
    eval_steps=20,
    max_grad_norm=0.5,
    fp16=True,
    packing=True,
    report_to=["tensorboard"],
    eval_strategy="steps",  # Hoặc "epochs"
)

# ======= START TRAINING =======
trainer = SFTTrainer(
    model=model,
    args=training_args,
    train_dataset=train_data,
    eval_dataset=val_data,
    peft_config=lora_config,
)
trainer.processing_class = tokenizer

trainer.train()

# ======= SAVE LORA ADAPTER =======
trainer.save_model("./qwen2.5-1.5B-Instruct-finetuned-perfume")


README.md:   0%|          | 0.00/531 [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/896k [00:00<?, ?B/s]

validation-00000-of-00001.parquet:   0%|          | 0.00/170k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/2810 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/503 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['messages'],
        num_rows: 2810
    })
    validation: Dataset({
        features: ['messages'],
        num_rows: 503
    })
})
Number of examples: 2810
Example: {'messages': [{'content': 'Bạn là một chuyên viên tư vấn nước hoa và chăm sóc khách hàng cho shop.', 'role': 'system'}, {'content': 'EDT có phù hợp để đi tiệc tối không?', 'role': 'user'}, {'content': 'EDT (Eau de Toilette) hoàn toàn có thể phù hợp đi tiệc tối, đặc biệt là những loại EDT có nhóm hương ấm áp hơn (như Oriental nhẹ, Woody) hoặc những bữa tiệc không quá trang trọng. Tuy nhiên, do độ lưu hương và tỏa hương thường không bằng EDP, bạn có thể cần xịt lại sau vài tiếng để duy trì mùi hương. Nếu muốn một mùi hương nổi bật và bền bỉ hơn cho tiệc tối, EDP thường là lựa chọn được ưu tiên hơn.', 'role': 'assistant'}]}


tokenizer_config.json:   0%|          | 0.00/7.30k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/2.78M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/1.67M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/7.03M [00:00<?, ?B/s]

Map:   0%|          | 0/2810 [00:00<?, ? examples/s]

Map:   0%|          | 0/503 [00:00<?, ? examples/s]

Map:   0%|          | 0/2810 [00:00<?, ? examples/s]


📏 Max token length: 892
📊 90%: 323
📊 95%: 434
📊 99%: 619


config.json:   0%|          | 0.00/660 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/3.09G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/242 [00:00<?, ?B/s]

Converting train dataset to ChatML:   0%|          | 0/2810 [00:00<?, ? examples/s]

Applying chat template to train dataset:   0%|          | 0/2810 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/2810 [00:00<?, ? examples/s]

Packing train dataset:   0%|          | 0/2810 [00:00<?, ? examples/s]

Converting eval dataset to ChatML:   0%|          | 0/503 [00:00<?, ? examples/s]

Applying chat template to eval dataset:   0%|          | 0/503 [00:00<?, ? examples/s]

Tokenizing eval dataset:   0%|          | 0/503 [00:00<?, ? examples/s]

Packing eval dataset:   0%|          | 0/503 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss
20,7.1025,1.66594
40,6.0698,1.457833
60,5.2556,1.373847
80,5.1238,1.329985
100,4.8299,1.310826
120,4.5104,1.295062
140,4.5292,1.289866
160,4.2757,1.290403
180,4.3211,1.289637


In [4]:
!cp -r ./qwen2.5-1.5B-Instruct-finetuned-perfume/checkpoint-140 ./perfume-best-lora

In [5]:
from transformers import AutoModelForCausalLM
from peft import PeftModel

base_model = AutoModelForCausalLM.from_pretrained(
    "Qwen/Qwen2.5-1.5B-Instruct",
    device_map="auto",
    trust_remote_code=True
)

merged_model = PeftModel.from_pretrained(base_model, "./perfume-best-lora")
merged_model = merged_model.merge_and_unload()
merged_model.save_pretrained("./Qwen2.5-1.5B-Instruct-Perfumassist")

In [25]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-1.5B-Instruct", trust_remote_code=True)
tokenizer.save_pretrained("./Qwen2.5-1.5B-Instruct-Perfumassist")

('./Qwen2.5-1.5B-Instruct-Perfumassist/tokenizer_config.json',
 './Qwen2.5-1.5B-Instruct-Perfumassist/special_tokens_map.json',
 './Qwen2.5-1.5B-Instruct-Perfumassist/vocab.json',
 './Qwen2.5-1.5B-Instruct-Perfumassist/merges.txt',
 './Qwen2.5-1.5B-Instruct-Perfumassist/added_tokens.json',
 './Qwen2.5-1.5B-Instruct-Perfumassist/tokenizer.json')

In [6]:
from kaggle_secrets import UserSecretsClient
from huggingface_hub import login

user_secrets = UserSecretsClient()
hf_token = user_secrets.get_secret("hf_key")
login(token=hf_token)

In [10]:
from huggingface_hub import HfApi

api = HfApi()
api.create_repo(
    repo_id="phatvucoder/Qwen2.5-1.5B-Perfumassist",
    repo_type="model",
    private=False
)


RepoUrl('https://huggingface.co/phatvucoder/Qwen2.5-1.5B-Perfumassist', endpoint='https://huggingface.co', repo_type='model', repo_id='phatvucoder/Qwen2.5-1.5B-Perfumassist')

In [26]:
from huggingface_hub import upload_folder

upload_folder(
    folder_path="./Qwen2.5-1.5B-Instruct-Perfumassist",
    path_in_repo="",
    repo_id="phatvucoder/Qwen2.5-1.5B-Perfumassist",
    repo_type="model"
)


tokenizer.json:   0%|          | 0.00/11.4M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/phatvucoder/Qwen2.5-1.5B-Perfumassist/commit/fe860fd3fc8df9d10e04bb3f8d009011e1d78cda', commit_message='Upload folder using huggingface_hub', commit_description='', oid='fe860fd3fc8df9d10e04bb3f8d009011e1d78cda', pr_url=None, repo_url=RepoUrl('https://huggingface.co/phatvucoder/Qwen2.5-1.5B-Perfumassist', endpoint='https://huggingface.co', repo_type='model', repo_id='phatvucoder/Qwen2.5-1.5B-Perfumassist'), pr_revision=None, pr_num=None)