In [8]:
from transformers import CamembertTokenizer, CamembertForSequenceClassification
from transformers import Trainer, TrainingArguments, DataCollatorWithPadding
from datasets import load_dataset
import evaluate
import numpy as np
import torch


In [5]:
# Charger ton modèle DÉJÀ ENTRAÎNÉ
tokenizer = CamembertTokenizer.from_pretrained("./camembert-xnli-final")
model = CamembertForSequenceClassification.from_pretrained("./camembert-xnli-final")

if torch.cuda.is_available():
    model = model.to("cuda")
    print("✓ Modèle chargé sur GPU !")

✓ Modèle chargé sur GPU !


In [6]:
# Charger dataset
dataset = load_dataset("xnli", "fr")

def preprocess_function(examples):
    return tokenizer(
        examples["premise"],
        examples["hypothesis"],
        truncation=True,
        padding="max_length",
        max_length=128
    )

tokenized_dataset = dataset.map(preprocess_function, batched=True)
tokenized_dataset = tokenized_dataset.remove_columns(["premise", "hypothesis"])
tokenized_dataset = tokenized_dataset.rename_column("label", "labels")
tokenized_dataset.set_format("torch")
print("Dataset prêt !")

Dataset prêt !


In [7]:
# Évaluation
accuracy_metric = evaluate.load("accuracy")

def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    return accuracy_metric.compute(predictions=predictions, references=labels)

trainer = Trainer(
    model=model,
    args=TrainingArguments(output_dir="./temp", per_device_eval_batch_size=64),
    tokenizer=tokenizer,
    data_collator=DataCollatorWithPadding(tokenizer=tokenizer),
    compute_metrics=compute_metrics,
)

# TEST FINAL
results = trainer.evaluate(tokenized_dataset["test"])

print("\n" + "="*50)
print("RÉSULTATS FINAUX SUR TEST SET")
print("="*50)
print(f"Accuracy obtenue : {results['eval_accuracy']*100:.2f}%")
print(f"Accuracy article : 82.5%")
print("="*50)

  trainer = Trainer(



RÉSULTATS FINAUX SUR TEST SET
Accuracy obtenue : 81.78%
Accuracy article : 82.5%
