In [1]:
import os
import wandb
import torch
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM
from trl import SFTTrainer, SFTConfig

# Clear GPU memory at start
torch.cuda.empty_cache()
print(f"GPU memory before: {torch.cuda.memory_allocated()/1024**3:.2f}GB")

# Set memory management environment variable
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'

  from .autonotebook import tqdm as notebook_tqdm


GPU memory before: 0.00GB


In [2]:
wandb.login()

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: Paste an API key from your profile and hit enter:[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33msohanv[0m ([33msohv[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Currently logged in as: [33msohanv[0m ([33msohv[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [3]:
wandb_project = "qwen-big-auto-correct-sft"
wandb_run_name = "qwen-big-auto-correct-sft-run"

wandb.init(
    project=wandb_project,
    name=wandb_run_name,
    mode="online",
)

In [4]:
dataset_path = "/root/auto_correct.jsonl"
dataset = load_dataset("json", data_files=dataset_path)

Generating train split: 6000 examples [00:00, 173438.99 examples/s]
Generating train split: 6000 examples [00:00, 173438.99 examples/s]


In [5]:
split = dataset["train"].train_test_split(
    test_size=0.1,
    seed=42,
    shuffle=True,
)
dataset = {"train": split["train"], "validation": split["test"]}

In [6]:
# Preprocess the dataset to create a 'text' field instead of using formatting_func
def preprocess_example(example):
    """Convert the conversation format to a single text field"""
    messages = example["messages"]
    formatted_text = ""
    
    for message in messages:
        role = message["role"]
        content = message["content"]
        
        # Extract text content safely
        if isinstance(content, dict) and "parts" in content:
            parts = content["parts"]
            if isinstance(parts, list):
                content_text = " ".join(str(part) for part in parts)
            else:
                content_text = str(parts)
        else:
            content_text = str(content)
        
        # Format based on role
        if role == "system":
            formatted_text += f"<|system|>\n{content_text}\n"
        elif role == "user":
            formatted_text += f"<|user|>\n{content_text}\n"
        elif role == "assistant":
            formatted_text += f"<|assistant|>\n{content_text}\n"
    
    return {"text": formatted_text.strip()}

# Apply preprocessing to create text field
print("Preprocessing dataset...")
train_dataset = dataset["train"].map(preprocess_example, remove_columns=dataset["train"].column_names)
eval_dataset = dataset["validation"].map(preprocess_example, remove_columns=dataset["validation"].column_names)

print(f"Train dataset size: {len(train_dataset)}")
print(f"Eval dataset size: {len(eval_dataset)}")
print(f"Sample text field: {train_dataset[0]['text'][:200]}...")

Preprocessing dataset...


Map: 100%|██████████| 5400/5400 [00:00<00:00, 16546.74 examples/s]
Map: 100%|██████████| 5400/5400 [00:00<00:00, 16546.74 examples/s]
Map: 100%|██████████| 600/600 [00:00<00:00, 16309.57 examples/s]

Train dataset size: 5400
Eval dataset size: 600
Sample text field: <|system|>
You are ChatGPT, a large language model trained by OpenAI.
<|user|>
I've noticed that the tires on my 2024 Honda are wearing unevenly, and I suspect it might be due to a problem with the fr...





In [7]:
model_name = "Qwen/Qwen2.5-1.5B"

supports_bf16 = torch.cuda.is_available() and torch.cuda.is_bf16_supported()
load_dtype = torch.bfloat16 if supports_bf16 else torch.float32

print(f"[INFO] BF16 supported: {supports_bf16}. Loading dtype: {load_dtype}")

# Check available GPU memory before loading
if torch.cuda.is_available():
    gpu_memory = torch.cuda.get_device_properties(0).total_memory / 1024**3
    allocated_memory = torch.cuda.memory_allocated(0) / 1024**3
    free_memory = gpu_memory - allocated_memory
    print(f"[INFO] GPU total: {gpu_memory:.1f}GB, allocated: {allocated_memory:.1f}GB, free: {free_memory:.1f}GB")

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=load_dtype,
    device_map="auto",                    # Automatically distribute layers across devices
    offload_folder="./offload",           # Disk offload folder for weights
    low_cpu_mem_usage=True,              # Reduce CPU memory usage during loading
    trust_remote_code=True,
    # max_memory={0: "10GB", "cpu": "30GB"},  # Uncomment to limit GPU usage
)

model.config.use_cache = False

# Check memory after model loading
if torch.cuda.is_available():
    allocated_after = torch.cuda.memory_allocated(0) / 1024**3
    print(f"[INFO] GPU memory after model loading: {allocated_after:.1f}GB")

tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)

if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
    model.config.pad_token_id = model.config.eos_token_id
tokenizer.padding_side = "right"

[INFO] BF16 supported: True. Loading dtype: torch.bfloat16
[INFO] GPU total: 23.6GB, allocated: 0.0GB, free: 23.6GB
[INFO] GPU memory after model loading: 2.9GB
[INFO] GPU memory after model loading: 2.9GB


In [8]:
sft_config = SFTConfig(
    output_dir="/root/models/finetuned_qwen2.5-1.5b-auto-correct/",
    run_name=wandb_run_name,
    num_train_epochs=6,
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    gradient_accumulation_steps=4,
    learning_rate=1e-5,
    weight_decay=0.01,
    optim="adamw_torch",
    lr_scheduler_type="linear",
    warmup_steps=100,
    max_grad_norm=1.0,
    fp16= not supports_bf16,
    bf16=supports_bf16,
    gradient_checkpointing=True,
    report_to="wandb",
    logging_dir="/root/logs/qwen-auto-1.5",
    logging_strategy="steps",
    logging_steps=50,
    eval_strategy="steps",
    eval_steps=100,
    save_strategy="epoch",
    seed=42,
    push_to_hub=False,
    group_by_length=True,
)

In [9]:
if sft_config.gradient_checkpointing and hasattr(model, "gradient_checkpointing_enable"):
    model.gradient_checkpointing_enable()

print("\n[DEBUG] Sample preprocessed training strings:")
for i in range(min(3, len(train_dataset))):
    text = train_dataset[i]["text"]
    print(text[:800] + "...")
    print("-" * 80)

trainer = SFTTrainer(
    model=model,
    args=sft_config,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    # No formatting_func needed since we preprocessed the data
)

print("Starting fine-tuning with SFTTrainer...")
train_result = trainer.train()
print("\n[DEBUG] Train result:", train_result)

print("\n[DEBUG] Running final evaluation...")
eval_results = trainer.evaluate()
print("\n[DEBUG] Final evaluation results:", eval_results)
try:
    wandb.log(eval_results)
except Exception as e:
    print(f"[WARN] wandb.log failed: {e}")

final_model_dir = "/root/models/finetuned_qwen2.5-1.5b-auto-correct/"
os.makedirs(final_model_dir, exist_ok=True)
print(f"\nSaving final model to {final_model_dir}")
trainer.save_model(final_model_dir)
tokenizer.save_pretrained(final_model_dir)

print("Fine-tuning done successfully!")


[DEBUG] Sample preprocessed training strings:
<|system|>
You are ChatGPT, a large language model trained by OpenAI.
<|user|>
I've noticed that the tires on my 2024 Honda are wearing unevenly, and I suspect it might be due to a problem with the front-left ball joint. How can I accurately diagnose whether the ball joint is the issue, and what steps should I take to repair it if necessary?
<|assistant|>
To diagnose a bad ball joint, you can perform a visual inspection for any cracks or leaks in the joints. You can also lift the vehicle and move the wheel for any play; a functional ball joint should not have excessive movement. Additionally, listen for any unusual noises while driving over bumps. If the ball joint is indeed faulty, it should be replaced immediately, as failing to do so can affect vehicle control and tire wear. You should ...
--------------------------------------------------------------------------------
<|system|>
You are ChatGPT, a large language model trained by OpenAI

Adding EOS to train dataset: 100%|██████████| 5400/5400 [00:00<00:00, 78950.23 examples/s]
Adding EOS to train dataset: 100%|██████████| 5400/5400 [00:00<00:00, 78950.23 examples/s]
Tokenizing train dataset: 100%|██████████| 5400/5400 [00:01<00:00, 3551.93 examples/s]
Tokenizing train dataset: 100%|██████████| 5400/5400 [00:01<00:00, 3551.93 examples/s]
Truncating train dataset: 100%|██████████| 5400/5400 [00:00<00:00, 751143.89 examples/s]
Truncating train dataset: 100%|██████████| 5400/5400 [00:00<00:00, 751143.89 examples/s]
Adding EOS to eval dataset: 100%|██████████| 600/600 [00:00<00:00, 64713.60 examples/s]
Adding EOS to eval dataset: 100%|██████████| 600/600 [00:00<00:00, 64713.60 examples/s]
Tokenizing eval dataset: 100%|██████████| 600/600 [00:00<00:00, 3515.60 examples/s]
Tokenizing eval dataset: 100%|██████████| 600/600 [00:00<00:00, 3515.60 examples/s]s]
Truncating eval dataset: 100%|██████████| 600/600 [00:00<00:00, 281496.91 examples/s]



Starting fine-tuning with SFTTrainer...


Step,Training Loss,Validation Loss
100,1.2143,1.144202
200,1.0922,1.06568
300,1.0599,1.037297
400,1.0074,1.021329
500,1.0021,1.011263
600,1.0003,1.002257
700,0.9805,0.992135
800,0.9825,0.987023
900,0.9527,0.982597
1000,0.9683,0.979173



[DEBUG] Train result: TrainOutput(global_step=4050, training_loss=0.9548715841034312, metrics={'train_runtime': 2968.8828, 'train_samples_per_second': 10.913, 'train_steps_per_second': 1.364, 'total_flos': 4.674275844350976e+16, 'train_loss': 0.9548715841034312})

[DEBUG] Running final evaluation...



[DEBUG] Final evaluation results: {'eval_loss': 0.954037070274353, 'eval_runtime': 10.421, 'eval_samples_per_second': 57.576, 'eval_steps_per_second': 28.788}

Saving final model to /root/models/finetuned_qwen2.5-1.5b-auto-correct/
Fine-tuning done successfully!
Fine-tuning done successfully!


In [10]:
from huggingface_hub import HfApi, login
import os

# Login to Hugging Face with token
print("Logging in to Hugging Face...")
# Replace with your actual HF token
HF_TOKEN = "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"  # get from https://huggingface.co/settings/tokens
login(token=HF_TOKEN)

Logging in to Hugging Face...


In [11]:
# Model information
model_path = "/root/models/finetuned_qwen2.5-1.5b-auto-correct/"
repo_name = "sohv/finetuned-qwen2.5-1.5b-auto-correct"

print(f"Preparing to push model from: {model_path}")
print(f"Target repository: {repo_name}")

# Load the model and tokenizer
model = AutoModelForCausalLM.from_pretrained(model_path)
tokenizer = AutoTokenizer.from_pretrained(model_path)

# Push to hub
print("Pushing model to Hugging Face Hub...")
model.push_to_hub(repo_name, private=False)  # Set private=True if you want a private repo
tokenizer.push_to_hub(repo_name, private=False)

print(f"Model successfully pushed to: https://huggingface.co/{repo_name}")

Preparing to push model from: /root/models/finetuned_qwen2.5-1.5b-auto-correct/
Target repository: sohv/finetuned-qwen2.5-1.5b-auto-correct
Pushing model to Hugging Face Hub...
Pushing model to Hugging Face Hub...


Processing Files (0 / 0)                : |          |  0.00B /  0.00B            
[A
Processing Files (0 / 1)                :   0%|          | 1.17MB / 6.17GB, 1.47MB/s  
[A
Processing Files (0 / 1)                :   0%|          | 1.17MB / 6.17GB, 1.47MB/s  

[A[A
[A

Processing Files (0 / 2)                :   0%|          | 3.52MB / 6.17GB, 3.52MB/s  

[A[A
[A

Processing Files (0 / 2)                :   0%|          | 3.52MB / 6.17GB, 3.52MB/s  
[A

Processing Files (0 / 2)                :   0%|          | 9.38MB / 6.17GB, 7.82MB/s  
[A

Processing Files (0 / 2)                :   0%|          | 9.38MB / 6.17GB, 7.82MB/s  
[A

Processing Files (0 / 2)                :   0%|          | 17.6MB / 6.17GB, 12.6MB/s  
[A

Processing Files (0 / 2)                :   0%|          | 17.6MB / 6.17GB, 12.6MB/s  
[A

Processing Files (0 / 2)                :   1%|          | 37.5MB / 6.17GB, 23.4MB/s  
[A

Processing Files (0 / 2)                :   1%|          | 37.5MB / 6.

Model successfully pushed to: https://huggingface.co/sohv/finetuned-qwen2.5-1.5b-auto-correct
