# installation dependances

In [None]:
%%capture
import os

print(os.environ.keys())

if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth vllm
else:
    # [NOTE] Do the below ONLY in Colab! Use [[pip install unsloth vllm]]
    !pip install --no-deps unsloth vllm
# Install latest Hugging Face for Gemma-3!
!pip install --no-deps git+https://github.com/huggingface/transformers@v4.49.0-Gemma-3
!pip install -U ipywidgets

# configuration du model

In [None]:
from unsloth import FastModel
import torch

max_seq_length = 2048

model, tokenizer = FastModel.from_pretrained(
    #model_name = "unsloth/gemma-3-1b-pt-unsloth-bnb-4bit",
    model_name = "google/gemma-3-1b-pt",
    max_seq_length = max_seq_length, # Choose any for long context!
    load_in_4bit = True,  # 4 bit quantization to reduce memory
    load_in_8bit = False, # [NEW!] A bit more accurate, uses 2x memory
    full_finetuning = False, # [NEW!] We have full finetuning now!
    # token = "hf_...", # use one if using gated models
)




In [None]:

model = FastModel.get_peft_model(
    model,
    finetune_vision_layers     = False, # Turn off for just text!
    finetune_language_layers   = True,  # Should leave on!
    finetune_attention_modules = False,  # Attention good for GRPO
    finetune_mlp_modules       = True,  # SHould leave on always!

    r = 8,           # Larger = higher accuracy, but might overfit
    lora_alpha = 8,  # Recommended alpha == r at least
    lora_dropout = 0,
    bias = "none",
    random_state = 3407,
)

In [None]:
from datasets import load_dataset

# Load CSV into Hugging Face dataset
dataset = load_dataset("csv", data_files="Jeu_SLM_enrichi_avec_rues_compos_es__tirets_.csv")

# Access the split (train by default)
train_dataset = dataset["train"]

# Preview
print(train_dataset[0])
train_dataset

# configuration de l'entrainement

In [None]:
import json

def format_example(example):
    return {
        "text": f"{example['prompt']}"+tokenizer.eos_token
    }

formatted_dataset = train_dataset.map(format_example)
formatted_dataset = formatted_dataset.remove_columns(
    [col for col in formatted_dataset.column_names if col != "text"]
)

print(formatted_dataset[0])



In [None]:
from trl import SFTTrainer, SFTConfig

trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=formatted_dataset,
    args=SFTConfig(
        max_seq_length=2048,
        per_device_train_batch_size=2,
        gradient_accumulation_steps=4,
        warmup_steps=5,
        max_steps=100,
        learning_rate=2e-4,
        logging_steps=1,
        optim="adamw_8bit",
        weight_decay=0.01,
        lr_scheduler_type="linear",
        seed=3407,
        report_to="none",
    ),
)


In [None]:
tokenizer.decode(trainer.train_dataset[100]["input_ids"])

In [None]:
# @title Show current memory stats
gpu_stats = torch.cuda.get_device_properties(0)
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
print(f"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
print(f"{start_gpu_memory} GB of memory reserved.")

In [None]:
trainer_stats = trainer.train()

In [None]:
# @title Show final memory and time stats
used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
used_memory_for_lora = round(used_memory - start_gpu_memory, 3)
used_percentage = round(used_memory / max_memory * 100, 3)
lora_percentage = round(used_memory_for_lora / max_memory * 100, 3)
print(f"{trainer_stats.metrics['train_runtime']} seconds used for training.")
print(
    f"{round(trainer_stats.metrics['train_runtime']/60, 2)} minutes used for training."
)
print(f"Peak reserved memory = {used_memory} GB.")
print(f"Peak reserved memory for training = {used_memory_for_lora} GB.")
print(f"Peak reserved memory % of max memory = {used_percentage} %.")
print(f"Peak reserved memory for training % of max memory = {lora_percentage} %.")

# Save It

In [None]:
raise SystemExit("Execution stopped here on purpose.")

In [None]:
#trainer.save_model("gemma3-address-parser")
model.save_pretrained("gemma3-address-parser-lora", save_adapter=True)
tokenizer.save_pretrained("gemma3-address-parser-lora")

model.config.save_pretrained("gemma3-address-parser-base")

In [None]:
from unsloth import FastModel
model, tokenizer = FastModel.from_pretrained(
    model_name = "gemma3-address-parser-lora", # YOUR MODEL YOU USED FOR TRAINING
    max_seq_length = 2048,
    load_in_4bit = True,
)

address = "Leclerc 10 bis route Victor Hugo 92200 Neuilly-sur-Seine"
address = "Nathalie Dubois, 25 Rue du Faubourg Saint-Antoine, 06000 Nice, France"

prompt = f"Parsing: {address} \nChamps:"
inputs = tokenizer(prompt, return_tensors="pt").to("cuda")

outputs = model.generate(
    **inputs,
    max_new_tokens=100,
    temperature = 1.0, top_p = 0.95, top_k = 64,
    do_sample=False,
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.pad_token_id
)

print(tokenizer.decode(outputs[0], skip_special_tokens=True))

In [None]:
# Change to True to save to GGUF
model.save_pretrained_merged("gemma3-address-parser-finetune", tokenizer)

In [None]:
model.save_pretrained_gguf("gemma3-address-parser-finetune",
    quantization_type = "F16", # For now only Q8_0, BF16, F16 supported
)
model.save_pretrained_gguf("gemma3-address-parser-finetune",
    quantization_type = "Q8_0", # For now only Q8_0, BF16, F16 supported
)

# run it

In [None]:
address = "Leclerc 10 bis route Victor Hugo 92200 Neuilly-sur-Seine"

prompt = f"Parsing: {address} \nChamps:"
inputs = tokenizer(prompt, return_tensors="pt").to("cuda")

outputs = model.generate(
    **inputs,
    max_new_tokens=100,
    temperature = 1.0, top_p = 0.95, top_k = 64,
    do_sample=False,
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.pad_token_id
)

print(tokenizer.decode(outputs[0], skip_special_tokens=True))


# use llama to run gguf model

In [None]:
raise SystemExit("Execution stopped here on purpose.")

In [None]:
!pip install llama-cpp-python


In [None]:
from llama_cpp import Llama

import logging
#logging.getLogger("llama_cpp").setLevel(logging.WARNING)

llm = Llama(
    model_path="gemma3-address-parser-finetune.F16.gguf",  # your gguf file path
    n_ctx=32768,
    n_threads=72,
    verbose=False# adjust to your CPU
)
#address = "Leclerc 10 bis route Victor Hugo 92200 Neuilly-sur-Seine"
address = "Nathalie Dubois, 25 Rue du Faubourg Saint-Antoine, 06000 Nice, France"
prompt = f"Parsing: {address} \nChamps:"

response = llm(prompt, max_tokens=2048)
print(response["choices"][0]["text"])


# against test dataset

In [None]:
import json
import pandas as pd
from tqdm import tqdm
import math
from transformers import AutoTokenizer

def is_equivalent(expected, found):
    if pd.isna(expected) and (found is None or (isinstance(found, float) and math.isnan(found))):
        return True
    return expected == found

# Load CSV file
df = pd.read_csv("fr-test-dataset2.csv") 
expected_fields = ['Building_Number' 
                   , 'City', 'Country', 'Recipient', 'Street_Name', 'Zip_Code', 'repetition', 'type_voie']


# path to your CSV
from unsloth import FastModel
model, tokenizer = FastModel.from_pretrained(
    model_name = "gemma3-address-parser-lora", # YOUR MODEL YOU USED FOR TRAINING
    max_seq_length = 2048,
    load_in_4bit = True,
)


model.eval()

exact_match = 0
field_total = 0
field_correct = 0
address_parsed = 0
sorties = []
for _, row in tqdm(df.iterrows(), total=len(df)):
    address_parsed+=1
    if address_parsed ==10: break
        
    prompt = f"Parsing: {row['Address']} \nChamps:"

    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
    outputs = model.generate(
        **inputs,
        max_new_tokens=100,
        temperature = 1.0, top_p = 0.95, top_k = 64,
        do_sample=False,
        eos_token_id=tokenizer.eos_token_id,
        pad_token_id=tokenizer.pad_token_id
    )
    
    pred = tokenizer.decode(outputs[0], skip_special_tokens=True)
    pred_json_only = pred.split("Champs:")[1].strip()
    #print("pred :: "+pred)
    #print("pred json only :: "+ pred_json_only)
    try:
        pred_json = json.loads(pred_json_only)

    except:
        pred_json = {}

    match = True
    #print("pred json :: "+ str(pred_json))
    for field in expected_fields:
        expected_value = row[field]
        #print("field name::" + field)
        #print("exepcted value ::"+expected_value)
        field_total += 1
        found_value = pred_json.get(field)
        if isinstance(found_value, str) and isinstance(expected_value, str):
            found_value = found_value.lower()
            expected_value = expected_value.lower()
        #print("found value ::"+ str(pred_json.get(field)) )
        if is_equivalent(row[field], pred_json.get(field)):
            field_correct += 1
        else: 
            if str(found_value) == str(expected_value):
                field_correct += 1
            else:
                match = False
                print(f"adresse : {str(row['Address'])}")
                print(f"field : {str(field)} :: expected : {str(expected_value)} :: found : {str(found_value)} ")

    if match:
        exact_match += 1
        print(f"ok : {str(row['Address'])}")

    sorties.append({
        "demande": row['Address'],
        "prediction": pred_json,
        "attendue": expected_fields,
        "match": match
    })
    

# Results
total = len(df)
total = 10
print(f"\nExact Match Accuracy: {exact_match / total:.2%}")
print(f"\nExact Match : {str(exact_match)}")
print(f"Field-Level Accuracy: {field_correct / field_total:.2%}")
print(f"Field-Level : {str(field_correct)}")
print(f"total field : {str(3)}")