classification with a fine-tuned model 


In [None]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments, pipeline
from datasets import Dataset
import json
import numpy as np
from sklearn.metrics import accuracy_score, f1_score

# Conversion en un objet Dataset compatible avec Hugging Face.
with open("jsons/enriched_dataset.json", "r", encoding="utf-8") as f:
    raw_data = json.load(f)

texts = [item["text"] for item in raw_data["dataset"]]
labels = [item["label"] for item in raw_data["dataset"]]

dataset = Dataset.from_dict({"text": texts, "label": labels})

# Step 2: Load tokenizer and model
model_name = "cartesinus/multilingual_minilm-amazon_massive-intent_eu7"
tokenizer = AutoTokenizer.from_pretrained(model_name)

model = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    num_labels=3,  # label 0: suivi, label 1: réclamation, label 2: commande
    ignore_mismatched_sizes=True 
    # ignore_mismatched_sizes=True permet d’ajuster automatiquement la dernière couche de classification.
) 

# Step 3: Define tokenization function
def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True, max_length=128)
# Convertit les textes en tokens utilisables par le modèle, avec padding/truncation à une longueur fixe de 128.

# Step 4: Split the dataset into train and test sets
# Séparation du jeu de données en 80% entraînement et 20% et tokenization des 2
split_dataset = dataset.train_test_split(test_size=0.2, seed=42)
train_dataset = split_dataset["train"].map(tokenize_function, batched=True)
eval_dataset = split_dataset["test"].map(tokenize_function, batched=True)

# Step 5: Define TrainingArguments
training_args = TrainingArguments(
    output_dir="./fine_tuned_model_11",      # Save directory
    evaluation_strategy="epoch",             # Evaluate after every epoch
    learning_rate=2e-5,                      # Learning rate
    per_device_train_batch_size=8,           # Train batch size
    per_device_eval_batch_size=8,            # Eval batch size
    num_train_epochs=5,                      # Number of epochs
    weight_decay=0.01,                       # Weight decay
    save_strategy="epoch",                   # Save after each epoch
    logging_dir="./logs",                    # Logging directory
    logging_steps=1,                         # Log every step
    load_best_model_at_end=True,             # Load best model
    metric_for_best_model="accuracy",        # Monitor accuracy
    greater_is_better=True,                  # Higher accuracy is better
    report_to="none",                        # Disable external logging
)

# Step 6: Define compute_metrics function
# Compare les prédictions du modèle aux vraies étiquettes
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return {
        "accuracy": accuracy_score(labels, predictions),
        "f1": f1_score(labels, predictions, average="weighted")
    }

# Step 7: Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)

# Step 8: Fine-tune the model
trainer.train()

# Step 9: Save the fine-tuned model and tokenizer
trainer.save_model("./fine_tuned_model_11")
tokenizer.save_pretrained("./fine_tuned_model_11")


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at cartesinus/multilingual_minilm-amazon_massive-intent_eu7 and are newly initialized because the shapes did not match:
- classifier.bias: found shape torch.Size([60]) in the checkpoint and torch.Size([3]) in the model instantiated
- classifier.weight: found shape torch.Size([60, 384]) in the checkpoint and torch.Size([3, 384]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/314 [00:00<?, ? examples/s]

Map:   0%|          | 0/79 [00:00<?, ? examples/s]

  trainer = Trainer(


Epoch,Training Loss,Validation Loss


Loading and using the model

In [1]:
from transformers import pipeline

# pipeline("text-classification", ...) charge automatiquement le tokenizer et le modèle.
classifier = pipeline("text-classification", model="./fine_tuned_model_11")
text= "ou est mon livreur?"
result= classifier(text)
print(result)




Device set to use cpu


[{'label': 'LABEL_0', 'score': 0.638332188129425}]


Classification with a RAG-based model

In [2]:
import json
import numpy as np
from sentence_transformers import SentenceTransformer
from sklearn.neighbors import NearestNeighbors

# Load dataset
with open('enriched_dataset.json', 'r', encoding='utf-8') as f:
    data = json.load(f)

texts = [item['text'] for item in data['dataset']]
labels = [item['label'] for item in data['dataset']]

# Label mapping
label_names = {
    0: "Suivi (Tracking)",
    1: "Réclamation (Complaint)",
    2: "Commande (Order)",
    4: "Ambiguë (Ambiguous)"
}

# Load embedding model
# Utilisation d’un modèle multilingue pour transformer chaque phrase en vecteur dense de taille fixe.
# Ces vecteurs seront utilisés pour mesurer la similarité.
embedding_model = SentenceTransformer('paraphrase-multilingual-MiniLM-L12-v2')
text_embeddings = embedding_model.encode(texts)

# Fit Nearest Neighbors
# Entraînement d’un modèle k-NN sur les embeddings.
# On utilisera les 3 voisins les plus proches pour classifier une nouvelle phrase.
n_neighbors = 3
nn = NearestNeighbors(n_neighbors=n_neighbors)
nn.fit(text_embeddings)

# Classifier function with output
# Fonction qui prend une phrase en entrée, génère son embedding, et la compare aux voisins.
def classify_input(text, ambiguity_threshold=0.15):
    # Calcule l’embedding du texte d’entrée, Cherche les k voisins les plus proches et Récupère leurs labels.
    embedding = embedding_model.encode([text])
    distances, indices = nn.kneighbors(embedding)
    neighbor_labels = np.array(labels)[indices[0]]

# Calcule la distribution des labels dans les voisins pour estimer une probabilité par intention (probabilités d’intention)
    counts = np.bincount(neighbor_labels, minlength=3)
    probs = counts / n_neighbors
    top_probs = np.sort(probs)

    # Ambiguity check
    # Si les 3 meilleures probabilités sont trop proches, la requête est jugée ambiguë.
    # Cela empêche de faire des prédictions trop incertaines.
    if top_probs[-1] - top_probs[-2] < ambiguity_threshold and top_probs[-2] - top_probs[-3] < ambiguity_threshold:
        predicted_label = 4
        confidence = None
    else:
        predicted_label = int(np.argmax(probs))
        confidence = probs[predicted_label]

    # Display results
    print("\n" + "=" * 50)
    print(f"Input: '{text}'")
    if predicted_label == 4:
        print("Predicted: Ambiguë (Ambiguous)")
    else:
        print(f"Predicted: {label_names[predicted_label]} (confidence: {confidence * 100:.1f}%)")

    print("\nProbabilities:")
    for i in range(3):
        print(f"- {label_names[i]}: {probs[i] * 100:.1f}%")
    print("=" * 50 + "\n")


Example usage

In [3]:
classify_input("quel est l'etat de ma commande?")


Input: 'quel est l'etat de ma commande?'
Predicted: Suivi (Tracking) (confidence: 100.0%)

Probabilities:
- Suivi (Tracking): 100.0%
- Réclamation (Complaint): 0.0%
- Commande (Order): 0.0%



Prompt Classification (zero shot)

In [6]:
from transformers import pipeline
import torch

# List of all 60 intent labels (here's a shortened version — use full in practice)
intent_labels = [
    "suivi", "commande", "reclamation"
]

# Load the model
classifier = pipeline(
    "zero-shot-classification",
    model="cartesinus/multilingual_minilm-amazon_massive-intent_eu7",
    device=0 if torch.cuda.is_available() else -1
)

# Define a simple function for classification
def predict_intent(text):
    result = classifier(text, intent_labels, multi_label=False)
    return result['labels'][0], result['scores'][0]

# Example usage
message = "Je veux acheter Doliprane"
intent, confidence = predict_intent(message)
print(f"Message: {message}")
print(f"Predicted intent: {intent} ({confidence:.2%})")


Device set to use cpu
Failed to determine 'entailment' label id from the label2id mapping in the model config. Setting to -1. Define a descriptive label2id mapping in the model config to ensure correct outputs.
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Message: Je veux acheter Doliprane
Predicted intent: suivi (92.88%)


In [4]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
# pour le traitement tensoriel
import torch 
# pour transformer les logits en probabilités.
import torch.nn.functional as F

# Load tokenizer and model
model_name = "cartesinus/multilingual_minilm-amazon_massive-intent_eu7"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

# Put model in evaluation mode (désactive dropout, batch norm, etc.)
model.eval()

# Define the text
text = "ou est ma commande?"

# Tokenize the input
# tokenisation avec ajout automatique de batch
inputs = tokenizer(text, return_tensors="pt")

# Forward pass
# désactive le calcul du gradient (no_grad) car ce n’est pas un entraînement.
# logits: sorties brutes du modèle.
# softmax: convertit les logits en probabilités de chaque classe
with torch.no_grad():
    outputs = model(**inputs)
    logits = outputs.logits
    probs = F.softmax(logits, dim=1)

# Get predicted class index
# Sélectionne l’indice de la classe avec la probabilité la plus élevée et Récupèrer le score de confiance.
predicted_index = torch.argmax(probs, dim=1).item()
confidence = torch.max(probs).item()

# Load class labels
id2label = model.config.id2label
predicted_label = id2label[predicted_index]

# Print result
print(f"Text: {text}")
print(f"Predicted intent: {predicted_label}")
print(f"Confidence: {confidence:.2%}")


Text: ou est ma commande?
Predicted intent: takeaway_query
Confidence: 99.53%
