# Fine-tune Qwen3-14B on HAProxy Dataset (Kaggle)

Ce notebook permet de finetuner le modèle `unsloth/Qwen3-14B-unsloth-bnb-4bit` sur le dataset HAProxy.
Il est conçu pour être exécuté sur Kaggle avec un GPU (T4 x2 ou P100/T4).

In [None]:
%%capture
# Installation des dépendances Unsloth pour Kaggle
# On utilise [kaggle-new] pour les environnements Kaggle récents
!pip install "unsloth[kaggle-new] @ git+https://github.com/unslothai/unsloth.git"
# Fix pour AttributeError: 'int' object has no attribute 'mean' (Regression dans Transformers 4.57+ / TRL 0.24+)
# On downgrade vers des versions connues comme stables avec Unsloth
!pip install --no-deps "transformers==4.46.0" "trl==0.12.0" "peft==0.13.2" "accelerate==1.0.1" "bitsandbytes==0.44.1"

In [None]:
from unsloth import FastLanguageModel
import torch

max_seq_length = 2048 # Longueur de séquence supportée
dtype = None # Auto-détection (float16 pour Tesla T4, bfloat16 pour Ampere+)
load_in_4bit = True # 4bit quantization pour réduire la mémoire

model_name = "unsloth/Qwen3-14B-unsloth-bnb-4bit"

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = model_name,
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)

In [None]:
# Configuration LoRA
model = FastLanguageModel.get_peft_model(
    model,
    r = 16,
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj"],
    lora_alpha = 16,
    lora_dropout = 0,
    bias = "none",
    use_gradient_checkpointing = "unsloth",
    random_state = 3407,
    use_rslora = False,
    loftq_config = None,
)

In [None]:
from datasets import load_dataset

# Chemin du dataset sur Kaggle
dataset_file = "/kaggle/input/haproxy/haproxy_dataset_qa.jsonl"

# Prompt template (Alpaca style)
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
You are an expert on HAProxy. Answer the following question based on the provided context.

### Input:
{}

### Response:
{}"""

EOS_TOKEN = tokenizer.eos_token

def formatting_prompts_func(examples):
    questions = examples["question"]
    responses = examples["response"]
    texts = []
    for question, response in zip(questions, responses):
        # On utilise la question comme input et la réponse comme output
        text = alpaca_prompt.format(question, response) + EOS_TOKEN
        texts.append(text)
    return { "text" : texts, }

dataset = load_dataset("json", data_files = dataset_file, split = "train")
dataset = dataset.map(formatting_prompts_func, batched = True)

In [None]:
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    dataset_num_proc = 1, # IMPORTANT: 1 seul process pour éviter les deadlocks
    packing = False,
    args = TrainingArguments(
        per_device_train_batch_size = 1, # Réduit pour éviter OOM/Freeze
        gradient_accumulation_steps = 8, # Augmenté pour compenser
        warmup_steps = 5,
        max_steps = 60, # Ajuster selon le besoin (ex: 300 pour un epoch complet sur petit dataset)
        learning_rate = 2e-4,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
        report_to = "none", # Désactive WandB pour éviter les prompts bloquants
    ),
)

In [None]:
trainer_stats = trainer.train()

In [None]:
# Sauvegarde du modèle
output_model_name = "Qwen3-14B-unsloth-bnb-4bit-haproxy-expert"
model.save_pretrained(output_model_name)
tokenizer.save_pretrained(output_model_name)

# Pour sauvegarder en GGUF si besoin (optionnel)
# model.save_pretrained_gguf(output_model_name, tokenizer, quantization_method = "q4_k_m")

In [None]:
# Test d'inférence
FastLanguageModel.for_inference(model) # Enable native 2x faster inference

question = "Quelles sont les principales fonctionnalités de base d'HAProxy ?"
input_text = alpaca_prompt.format(question, "")

inputs = tokenizer([input_text], return_tensors = "pt").to("cuda")

outputs = model.generate(**inputs, max_new_tokens = 512, use_cache = True)
response = tokenizer.batch_decode(outputs)
print(response[0])