# installation dependances

In [1]:
%%capture
import os

print(os.environ.keys())

if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth vllm
else:
    # [NOTE] Do the below ONLY in Colab! Use [[pip install unsloth vllm]]
    !pip install --no-deps unsloth vllm
# Install latest Hugging Face for Gemma-3!
!pip install --no-deps git+https://github.com/huggingface/transformers@v4.49.0-Gemma-3
!pip install -U ipywidgets

# configuration du model

In [2]:
from unsloth import FastModel
import torch

max_seq_length = 2048

model, tokenizer = FastModel.from_pretrained(
    model_name = "unsloth/gemma-3-1b-pt-unsloth-bnb-4bit",
    max_seq_length = max_seq_length, # Choose any for long context!
    load_in_4bit = True,  # 4 bit quantization to reduce memory
    load_in_8bit = False, # [NEW!] A bit more accurate, uses 2x memory
    full_finetuning = False, # [NEW!] We have full finetuning now!
    # token = "hf_...", # use one if using gated models
)




🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!
INFO 04-08 11:37:59 [__init__.py:239] Automatically detected platform cuda.
==((====))==  Unsloth 2025.3.19: Fast Gemma3 patching. Transformers: 4.50.0.dev0. vLLM: 0.8.3.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.568 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29.post2. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Unsloth: Using float16 precision for gemma3 won't work! Using float32.


In [3]:

model = FastModel.get_peft_model(
    model,
    finetune_vision_layers     = False, # Turn off for just text!
    finetune_language_layers   = True,  # Should leave on!
    finetune_attention_modules = False,  # Attention good for GRPO
    finetune_mlp_modules       = True,  # SHould leave on always!

    r = 8,           # Larger = higher accuracy, but might overfit
    lora_alpha = 8,  # Recommended alpha == r at least
    lora_dropout = 0,
    bias = "none",
    random_state = 3407,
)

Unsloth: Making `model.base_model.model.model` require gradients


In [4]:
from datasets import load_dataset

# Load CSV into Hugging Face dataset
dataset = load_dataset("csv", data_files="fr-train-dataset2.csv")

# Access the split (train by default)
train_dataset = dataset["train"]

# Preview
print(train_dataset[0])
train_dataset

{'Address': 'Supermarché Carrefour, 147 Rue de Rivoli, 75001 Paris, France', 'Building_Number': 147, 'City': 'Paris', 'Recipient': 'Supermarché Carrefour', 'Street_Name_old': 'Rue de Rivoli', 'Street_Name': 'de Rivoli', 'type_voie': 'Rue', 'Zip_Code': 75001, 'Country': 'France', 'repetition': None}


Dataset({
    features: ['Address', 'Building_Number', 'City', 'Recipient', 'Street_Name_old', 'Street_Name', 'type_voie', 'Zip_Code', 'Country', 'repetition'],
    num_rows: 2695
})

In [5]:
train_dataset[0]["Address"]

'Supermarché Carrefour, 147 Rue de Rivoli, 75001 Paris, France'

In [6]:
# Define columns to exclude from tgt
exclude_cols = ["Address", "Street_Name_old"]  # 👈 Add any column to ignore here

# Determine tgt columns
tgt_cols = [col for col in train_dataset.column_names if col not in exclude_cols]
src_col = "Address"


# Transform
def transform(example):
    return {
        "question": example[src_col],
        "answer": {col: example[col] for col in tgt_cols}
    }

dataset_refined = train_dataset.map(transform)


# Preview
print(dataset_refined[0])


print(dataset_refined.column_names)


{'Address': 'Supermarché Carrefour, 147 Rue de Rivoli, 75001 Paris, France', 'Building_Number': 147, 'City': 'Paris', 'Recipient': 'Supermarché Carrefour', 'Street_Name_old': 'Rue de Rivoli', 'Street_Name': 'de Rivoli', 'type_voie': 'Rue', 'Zip_Code': 75001, 'Country': 'France', 'repetition': None, 'question': 'Supermarché Carrefour, 147 Rue de Rivoli, 75001 Paris, France', 'answer': {'Building_Number': 147, 'City': 'Paris', 'Country': 'France', 'Recipient': 'Supermarché Carrefour', 'Street_Name': 'de Rivoli', 'Zip_Code': 75001, 'repetition': None, 'type_voie': 'Rue'}}
['Address', 'Building_Number', 'City', 'Recipient', 'Street_Name_old', 'Street_Name', 'type_voie', 'Zip_Code', 'Country', 'repetition', 'question', 'answer']


In [7]:
dataset_refined[0]["answer"]

{'Building_Number': 147,
 'City': 'Paris',
 'Country': 'France',
 'Recipient': 'Supermarché Carrefour',
 'Street_Name': 'de Rivoli',
 'Zip_Code': 75001,
 'repetition': None,
 'type_voie': 'Rue'}

In [8]:
dataset_refined[0]["question"]

'Supermarché Carrefour, 147 Rue de Rivoli, 75001 Paris, France'

# configuration de l'entrainement

In [9]:
import json

def format_example(example):
    return {
        "text": f"Parsing: {example['question']} \nChamps: {json.dumps(example['answer'], ensure_ascii=False)}"+tokenizer.eos_token
    }

formatted_dataset = dataset_refined.map(format_example)
formatted_dataset = formatted_dataset.remove_columns(
    [col for col in formatted_dataset.column_names if col != "text"]
)

print(formatted_dataset[0])



{'text': 'Parsing: Supermarché Carrefour, 147 Rue de Rivoli, 75001 Paris, France \nChamps: {"Building_Number": 147, "City": "Paris", "Country": "France", "Recipient": "Supermarché Carrefour", "Street_Name": "de Rivoli", "Zip_Code": 75001, "repetition": null, "type_voie": "Rue"}<eos>'}


In [10]:
from trl import SFTTrainer, SFTConfig

trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=formatted_dataset,
    args=SFTConfig(
        max_seq_length=2048,
        per_device_train_batch_size=2,
        gradient_accumulation_steps=4,
        warmup_steps=5,
        max_steps=30,
        learning_rate=2e-4,
        logging_steps=1,
        optim="adamw_8bit",
        weight_decay=0.01,
        lr_scheduler_type="linear",
        seed=3407,
        report_to="none",
    ),
)


Unsloth: Switching to float32 training since model cannot work with float16


In [11]:
tokenizer.decode(trainer.train_dataset[100]["input_ids"])

'<bos>Parsing: 79 Place de la République, 35000 Rennes, France \nChamps: {"Building_Number": 79, "City": "Rennes", "Country": "France", "Recipient": null, "Street_Name": "de la République", "Zip_Code": 35000, "repetition": null, "type_voie": "Place"}<eos>'

In [12]:
# @title Show current memory stats
gpu_stats = torch.cuda.get_device_properties(0)
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
print(f"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
print(f"{start_gpu_memory} GB of memory reserved.")

GPU = Tesla T4. Max memory = 14.568 GB.
1.66 GB of memory reserved.


In [13]:
trainer_stats = trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 2,695 | Num Epochs = 1 | Total steps = 30
O^O/ \_/ \    Batch size per device = 2 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (2 x 4 x 1) = 8
 "-____-"     Trainable parameters = 5,031,936/1,000,000,000 (0.50% trained)
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.


Step,Training Loss
1,2.739
2,2.7118
3,2.6703
4,2.7182
5,2.6712
6,2.533
7,2.3258
8,2.2003
9,2.0313
10,1.873


In [14]:
# @title Show final memory and time stats
used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
used_memory_for_lora = round(used_memory - start_gpu_memory, 3)
used_percentage = round(used_memory / max_memory * 100, 3)
lora_percentage = round(used_memory_for_lora / max_memory * 100, 3)
print(f"{trainer_stats.metrics['train_runtime']} seconds used for training.")
print(
    f"{round(trainer_stats.metrics['train_runtime']/60, 2)} minutes used for training."
)
print(f"Peak reserved memory = {used_memory} GB.")
print(f"Peak reserved memory for training = {used_memory_for_lora} GB.")
print(f"Peak reserved memory % of max memory = {used_percentage} %.")
print(f"Peak reserved memory for training % of max memory = {lora_percentage} %.")

76.2106 seconds used for training.
1.27 minutes used for training.
Peak reserved memory = 1.66 GB.
Peak reserved memory for training = 0.0 GB.
Peak reserved memory % of max memory = 11.395 %.
Peak reserved memory for training % of max memory = 0.0 %.


# Save It

In [15]:
raise SystemExit("Execution stopped here on purpose.")

SystemExit: Execution stopped here on purpose.

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [16]:
#trainer.save_model("gemma3-address-parser")
model.save_pretrained("gemma3-address-parser-lora", save_adapter=True)
tokenizer.save_pretrained("gemma3-address-parser-lora")

model.config.save_pretrained("gemma3-address-parser-base")

In [19]:
from unsloth import FastModel
model, tokenizer = FastModel.from_pretrained(
    model_name = "gemma3-address-parser-lora", # YOUR MODEL YOU USED FOR TRAINING
    max_seq_length = 2048,
    load_in_4bit = True,
)

address = "Leclerc 10 bis route Victor Hugo 92200 Neuilly-sur-Seine"

prompt = f"Parsing: {address} \nChamps:"
inputs = tokenizer(prompt, return_tensors="pt").to("cuda")

outputs = model.generate(
    **inputs,
    max_new_tokens=100,
    temperature = 1.0, top_p = 0.95, top_k = 64,
    do_sample=False,
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.pad_token_id
)

print(tokenizer.decode(outputs[0], skip_special_tokens=True))

==((====))==  Unsloth 2025.3.19: Fast Gemma3 patching. Transformers: 4.50.0.dev0. vLLM: 0.8.3.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.568 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29.post2. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Unsloth: Using float16 precision for gemma3 won't work! Using float32.
Parsing: Leclerc 10 bis route Victor Hugo 92200 Neuilly-sur-Seine 
Champs: {"Building_Number": 10, "City": "Neuilly-sur-Seine", "Country": "France", "Recipient": "Leclerc", "Street_Name": "Victor Hugo", "Zip_Code": 92200, "repetition": "bis", "type_voie": "route"}


In [None]:
# Change to True to save to GGUF


model.save_pretrained_merged("gemma3-address-parser-finetune", tokenizer)

In [26]:
model.save_pretrained_gguf("gemma3-address-parser-finetune",
    quantization_type = "F16", # For now only Q8_0, BF16, F16 supported
)

Unsloth: Updating system package directories
Unsloth: All commands will now use admin permissions (sudo)
Unsloth: Install GGUF and other packages
Unsloth GGUF:hf-to-gguf:Loading model: gemma3-address-parser-finetune
Unsloth GGUF:gguf.gguf_writer:gguf: This GGUF file is for Little Endian only
Unsloth GGUF:hf-to-gguf:Exporting model...
Unsloth GGUF:hf-to-gguf:gguf: loading model part 'model.safetensors'
Unsloth GGUF:hf-to-gguf:token_embd.weight,                 torch.bfloat16 --> F16, shape = {1152, 262144}
Unsloth GGUF:hf-to-gguf:output_norm.weight,                torch.bfloat16 --> F32, shape = {1152}
Unsloth GGUF:hf-to-gguf:Set meta model
Unsloth GGUF:hf-to-gguf:Set model parameters
Unsloth GGUF:hf-to-gguf:Set model tokenizer
Unsloth GGUF:gguf.vocab:Setting special token type bos to 2
Unsloth GGUF:gguf.vocab:Setting special token type eos to 1
Unsloth GGUF:gguf.vocab:Setting special token type unk to 3
Unsloth GGUF:gguf.vocab:Setting special token type pad to 0
Unsloth GGUF:gguf.vocab

Unsloth: GGUF conversion:   0%|          | 0/100 [00:00<?, ?it/s]

Unsloth GGUF:hf-to-gguf:Model successfully exported to ./
Unsloth: Converted to gemma3-address-parser-finetune.F16.gguf with size = 2.0G
Unsloth: Successfully saved GGUF to:
gemma3-address-parser-finetune.F16.gguf


['gemma3-address-parser-finetune.F16.gguf']

Unsloth: Merging weights into 16bit: 100%|██████████| 1/1 [00:13<00:00, 13.61s/it]


Unsloth: Updating system package directories
Unsloth: All commands will now use admin permissions (sudo)
Unsloth: Install GGUF and other packages


RuntimeError: Unsloth: `gemma3-address-parser-finetune-gguf` does not exist?

In [None]:
from unsloth import FastLanguageModel

# Load the model with LoRA adapter
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "gemma3-address-parser-lora",  # The LoRA repo (with adapter_config.json)
    base_model_name = "unsloth/gemma-3-1b-pt-unsloth-bnb-4bit", # The base model name (e.g. "unsloth/gemma-2b")
    load_in_4bit = False,
)

# Merge LoRA into base model weights
model = FastLanguageModel.merge_lora(model)

# Save the merged model (base + LoRA)
model.save_pretrained("gemma3-address-parser-merged")
tokenizer.save_pretrained("gemma3-address-parser-merged")


# run it

In [None]:
address = "Leclerc 10 bis route Victor Hugo 92200 Neuilly-sur-Seine"

prompt = f"Parsing: {address} \nChamps:"
inputs = tokenizer(prompt, return_tensors="pt").to("cuda")

outputs = model.generate(
    **inputs,
    max_new_tokens=100,
    temperature = 1.0, top_p = 0.95, top_k = 64,
    do_sample=False,
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.pad_token_id
)

print(tokenizer.decode(outputs[0], skip_special_tokens=True))


# use unsloth to run model

In [None]:
raise SystemExit("Execution stopped here on purpose.")

In [None]:
# skip
from unsloth import FastLanguageModel
import torch

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "gemma3-address-parser-lora", # YOUR MODEL YOU USED FOR TRAINING
    max_seq_length = 1024,
    dtype = torch.float16,
    load_in_4bit = False,
)
FastLanguageModel.for_inference(model) # Enable native 2x faster inference

In [None]:
# skip
address = "Leclerc 10 bis avenue Victor Hugo 92200 Neuilly-sur-Seine"
prompt = f"Parsing: {address} \nChamps:"
inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
outputs = model.generate(
    **inputs,
    max_new_tokens=100,
    temperature = 1.0, top_p = 0.95, top_k = 64,
    do_sample=False,
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.pad_token_id
)

print(tokenizer.decode(outputs[0], skip_special_tokens=True))