# TP : Analyse de Sentiment avec SentiWordNet

Nom :Khedidja Rahmani
Master : MICR  
Module :Ressource Lexicale

Objectif :
Analyser le sentiment des tweets en utilisant SentiWordNet et NLTK.


In [None]:
# =========================
# STEP 0: Install & Import
# =========================
import nltk
from nltk.corpus import sentiwordnet as swn
from nltk.corpus import wordnet
from nltk.stem import WordNetLemmatizer
import re

# Download necessary NLTK data
nltk.download('wordnet', quiet=True)
nltk.download('sentiwordnet', quiet=True)
nltk.download('omw-1.4', quiet=True)

# =========================
# STEP 1: Preprocessing
# =========================
lemmatizer = WordNetLemmatizer()

def preprocess(text):
    # Remove non-alphabetic characters and lowercase
    tokens = re.findall(r'[a-zA-Z]+', text.lower())
    return tokens

def get_wordnet_pos(word):
    # Simple heuristic mapping for SentiWordNet
    if word.endswith('ly'):
        return wordnet.ADV
    elif word.endswith(('ful','ous','able','ive')):
        return wordnet.ADJ
    elif word in {'be', 'am', 'is', 'are', 'was', 'were', 'have', 'has', 'do', 'make', 'love', 'play'}:
        return wordnet.VERB
    else:
        return wordnet.NOUN

def lemmatize_tokens(tokens):
    return [lemmatizer.lemmatize(word, get_wordnet_pos(word)) for word in tokens]

# =========================
# STEP 2: Sentiment Scoring
# =========================
def get_sentiment_score(tokens):
    pos_score = 0
    neg_score = 0
    obj_score = 0
    count = 0

    for word in tokens:
        wn_pos = get_wordnet_pos(word)
        synsets = list(wordnet.synsets(word, pos=wn_pos))
        if synsets:
            syn = synsets[0]  # take first synset for simplicity
            swn_syn = swn.senti_synset(syn.name())
            pos_score += swn_syn.pos_score()
            neg_score += swn_syn.neg_score()
            obj_score += swn_syn.obj_score()
            count += 1

    if count > 0:
        return pos_score/count, neg_score/count, obj_score/count
    else:
        return 0,0,1  # neutral if no words matched

# =========================
# STEP 3: Example Dataset
# =========================
tweets = [
    "I love this new phone! It's amazing and works perfectly.",
    "This is the worst movie I have ever seen.",
    "The food was okay, nothing special but not bad.",
    "I am extremely happy with the service!",
    "I hate waiting in long lines."
]

# =========================
# STEP 4: Analyze Sentiment
# =========================
for tweet in tweets:
    tokens = preprocess(tweet)
    lemmas = lemmatize_tokens(tokens)
    pos, neg, obj = get_sentiment_score(lemmas)
    if pos > neg:
        sentiment = "Positive"
    elif neg > pos:
        sentiment = "Negative"
    else:
        sentiment = "Neutral"
    print(f"Tweet: {tweet}")
    print(f"Positive: {pos:.2f}, Negative: {neg:.2f}, Objective: {obj:.2f} → Sentiment: {sentiment}")
    print("--------------------------------------------------")


Tweet: I love this new phone! It's amazing and works perfectly.
Positive: 0.14, Negative: 0.00, Objective: 0.86 → Sentiment: Positive
--------------------------------------------------
Tweet: This is the worst movie I have ever seen.
Positive: 0.10, Negative: 0.23, Objective: 0.68 → Sentiment: Negative
--------------------------------------------------
Tweet: The food was okay, nothing special but not bad.
Positive: 0.10, Negative: 0.23, Objective: 0.67 → Sentiment: Negative
--------------------------------------------------
Tweet: I am extremely happy with the service!
Positive: 0.22, Negative: 0.03, Objective: 0.75 → Sentiment: Positive
--------------------------------------------------
Tweet: I hate waiting in long lines.
Positive: 0.03, Negative: 0.07, Objective: 0.90 → Sentiment: Negative
--------------------------------------------------
