In [1]:
from nltk.sentiment import SentimentIntensityAnalyzer
import nltk
# Download the vader_lexicon resource
nltk.download('vader_lexicon')

[nltk_data] Downloading package vader_lexicon to /root/nltk_data...


True

In [3]:
# Import the necessary libraries
import pandas as pd
from nltk.sentiment import SentimentIntensityAnalyzer

# Load the dataset containing the "commentaire" column
data = pd.read_csv('/content/data_cleann.csv')

# Drop rows with missing values in the "commentaire" column
data = data.dropna(subset=['commentaire'])

# Initialize the SentimentIntensityAnalyzer
sia = SentimentIntensityAnalyzer()

# Analyze the sentiment of each comment and add a new column "sentiment" to the DataFrame
data['sentiment'] = data['commentaire'].apply(lambda x: sia.polarity_scores(x)['compound'])

# Define a list of threshold values to try
threshold_values = [0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1]

# Function to classify comments based on the threshold
def classify_sentiment(score, threshold):
    return 'positive' if score > threshold else 'negative'

# Evaluate different threshold values and store the results
results = {}
for threshold in threshold_values:
    data['sentiment_label'] = data['sentiment'].apply(lambda x: classify_sentiment(x, threshold))
    accuracy = (data['sentiment_label'] == data['sentiment_label']).mean()
    results[threshold] = accuracy

# Find the threshold with the highest accuracy
best_threshold = max(results, key=results.get)

# Use the best threshold to classify comments
data['sentiment_label'] = data['sentiment'].apply(lambda x: classify_sentiment(x, best_threshold))

# Print the DataFrame with the sentiment classification
print(data[['commentaire', 'sentiment_label']])

# Now, let's add a new comment in French to test the model
new_comment_french = " je n'ai pas du tout aimé le service."

# Analyze the sentiment of the new comment
new_comment_sentiment = sia.polarity_scores(new_comment_french)['compound']

# Classify the sentiment of the new comment based on the best threshold
new_comment_sentiment_label = classify_sentiment(new_comment_sentiment, best_threshold)

# Print the sentiment label for the new comment
print(f"Predicted Sentiment Label for New Comment: {new_comment_sentiment_label}")


                                             commentaire sentiment_label
0      remerciements sinceres devouement exceptionnel...        negative
1      ravie partager experience positive precieuseme...        positive
2      somme aller o maroc soins accompagne maman con...        negative
3      cellule reclamation bonjour a tous remercie be...        negative
4      tiens a remercier sincerement filles prise cha...        negative
...                                                  ...             ...
12149              mauvaise nourriture propre recommande        negative
12150                mauvais service mauvaise nourriture        negative
12151              tres mauvais service serveurs impolis        negative
12152  superbe batiment mauresque serveur raciste inj...        negative
12153                                     decevant trois        negative

[12150 rows x 2 columns]
Predicted Sentiment Label for New Comment: negative


In [None]:
# Filter and display only the negative comments
negative_comments = data[data['sentiment_label'] == 'negative'][['commentaire', 'sentiment_label']]
print(negative_comments)


                                             commentaire sentiment_label
0      remerciements sinceres devouement exceptionnel...        negative
2      somme aller o maroc soins accompagne maman con...        negative
3      cellule reclamation bonjour a tous remercie be...        negative
4      tiens a remercier sincerement filles prise cha...        negative
5      orl edghiri medecin nul ramene pere consulter ...        negative
...                                                  ...             ...
12149              mauvaise nourriture propre recommande        negative
12150                mauvais service mauvaise nourriture        negative
12151              tres mauvais service serveurs impolis        negative
12152  superbe batiment mauresque serveur raciste inj...        negative
12153                                     decevant trois        negative

[9969 rows x 2 columns]


In [None]:
# Enregistrez les modifications dans autre fichier
data.to_csv('/content/model3.csv', index=False)