<a href="https://colab.research.google.com/github/ludoveltz/test_github_fev25/blob/main/Exc_3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from torch.nn.functional import softmax

class BERTSentimentAnalyzer:
    def __init__(self):
        # Initialisation du modèle et du tokenizer
        self.model_name = 'distilbert-base-uncased-finetuned-sst-2-english'
        self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
        self.model = AutoModelForSequenceClassification.from_pretrained(self.model_name)

        # Configuration du device (GPU si disponible, sinon CPU)
        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
        self.model = self.model.to(self.device)

        # Définition des labels
        self.labels = ['NEGATIVE', 'POSITIVE']

    def preprocess_text(self, text):
        # Nettoyage du texte
        text = ' '.join(text.split())

        # Tokenization avec les paramètres demandés
        encoded = self.tokenizer(
            text,
            add_special_tokens=True,
            padding='max_length',
            truncation=True,
            return_tensors='pt'
        )

        # Déplacement vers le device approprié
        return {k: v.to(self.device) for k, v in encoded.items()}

    def predict(self, text):
        # Prétraitement du texte
        inputs = self.preprocess_text(text)

        # Passage en mode évaluation et désactivation du gradient
        self.model.eval()
        with torch.no_grad():
            # Obtention des prédictions
            outputs = self.model(**inputs)
            probabilities = softmax(outputs.logits, dim=1)

        # Récupération des résultats
        predicted_class = torch.argmax(probabilities).item()
        confidence_score = probabilities[0][predicted_class].item()

        # Construction du dictionnaire de résultats
        return {
            'text': text,
            'sentiment': self.labels[predicted_class],
            'confidence': confidence_score,
            'probabilities': {
                label: prob.item()
                for label, prob in zip(self.labels, probabilities[0])
            }
        }

# Test de l'analyseur
analyzer = BERTSentimentAnalyzer()

# Tests avec différents textes
test_texts = [
    "I really enjoyed this movie! It was fantastic.",
    "This was a terrible experience, I'm very disappointed.",
    "The documentary was interesting but could be better."
]

# Analyse des résultats
print("\nRésultats de l'analyse de sentiment :")
for text in test_texts:
    result = analyzer.predict(text)
    print("\nTexte:", result['text'])
    print("Sentiment:", result['sentiment'])
    print("Score de confiance:", f"{result['confidence']:.4f}")
    print("Probabilités détaillées:")
    for label, prob in result['probabilities'].items():
        print(f"  {label}: {prob:.4f}")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/629 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]


Résultats de l'analyse de sentiment :

Texte: I really enjoyed this movie! It was fantastic.
Sentiment: POSITIVE
Score de confiance: 0.9999
Probabilités détaillées:
  NEGATIVE: 0.0001
  POSITIVE: 0.9999

Texte: This was a terrible experience, I'm very disappointed.
Sentiment: NEGATIVE
Score de confiance: 0.9998
Probabilités détaillées:
  NEGATIVE: 0.9998
  POSITIVE: 0.0002

Texte: The documentary was interesting but could be better.
Sentiment: NEGATIVE
Score de confiance: 0.8809
Probabilités détaillées:
  NEGATIVE: 0.8809
  POSITIVE: 0.1191
