# **Modeling**

In [6]:
import pandas as pd
positive_words =["رائع", "ممتاز", "جيد", "جميل", "أحب", "ممتازة", "زوين", "مزيان", "فرحان", "نشيط", "محبوب", "ممتاز", "مبروك", "سعيد", "شجاع", "مدهش", "ذكي", "محترم", "ضريف", "متألق", "نافع", "كريم", "محظوظ", "خلوق", "حنين", "مساعد", "عزيز", "متفاهم", "مثقف", "متعاون", "مبدع", "جذاب", "مهتم", "صبور", "موهوب", "متفائل", "ضحوك", "مهذب", "خلاق", "رزين", "وافي", "مسؤول", "محترف", "مجتهد", "شهم", "وفي", "منتبه", "ملتزم", "مجامل", "طموح", "واثق", "حازم", "مرن", "متوازن", "متمكن", "متحمس", "متعاطف", "متسامح", "مثير", "مؤدب", "معقول", "وديع", "متحرر"]
negative_words = ["سيء", "أسوأ", "لا أحب", "رديء", "فظيع", "كارثة", "خايب", "عياق", "خاين", "كذاب", "فاشل", "حزين", "غضبان", "جبان", "ضعيف", "كسول", "مزعج", "حقود", "أناني", "متكبر", "عدواني", "كسول", "ملل", "منافق", "بخيل", "غبي", "ممل", "مكتئب", "متشائم", "وقح", "معقد", "نرجسي", "مستهتر", "متردد", "عنيف", "متوحش", "غشاش", "حاقد", "خبيث", "غدار", "متسلط", "طماع", "عاصي", "مريض", "خائن", "كاذب", "متهور", "مضطرب", "مدمر", "سيئ", "فوضوي", "جاهل", "قاسي", "مكروه", "غيران", "حاقد", "متزمت", "منافق", "مجنون", "بائس", "مشتت", "متعب", "منهك", "مشوش"]
sentiment_lexicon = pd.DataFrame({
    'word': positive_words + negative_words,
    'sentiment': ['positif'] * len(positive_words) + ['négatif'] * len(negative_words) #j'ai assurer que je vais d'avoir la meme quantite du mot positif ou bien negatif pour les deux listes
})
sentiment_lexicon

Unnamed: 0,word,sentiment
0,رائع,positif
1,ممتاز,positif
2,جيد,positif
3,جميل,positif
4,أحب,positif
...,...,...
122,بائس,négatif
123,مشتت,négatif
124,متعب,négatif
125,منهك,négatif


In [7]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [8]:
!pip install pyarabic



In [10]:
from pyarabic.araby import tokenize

def classify_sentiment(text, lexicon):
    tokens = tokenize(text)
    sentiments = []
    for token in tokens:
        sentiment = lexicon[lexicon['word'] == token]['sentiment']
        if not sentiment.empty:
            sentiments.append(sentiment.values[0])
    if 'positif' in sentiments:
        return 'positif'
    elif 'négatif' in sentiments:
        return 'négatif'
    else:
        return 'neutre'


df = pd.read_csv('/content/drive/MyDrive/finalprocessing2.csv')


df['sentiment'] = df['comment'].apply(lambda x: classify_sentiment(x, sentiment_lexicon))

print(df[['comment', 'sentiment']].head())

df.to_csv('modeled.csv', index=False)


                                             comment sentiment
0  اواه نتوما اش كتقولو احسن دراما بالنسبه 2 وجوه...    neutre
1  مهرجان وطني يحتفي بالدراما المغربية يقوم بتكري...    neutre
2  الريف عاني والحوز لازال يعاني الريف الاقل تجار...    neutre
3  الحروف ادا سمحتم شكرا ودائما فخر وجه هد الخبر ...    neutre
4            ماشاء الله اخي خالد صديق الدرب والدراسة    neutre


# **Entrainement du modele**

In [11]:
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score



X_train, X_test, y_train, y_test = train_test_split(df['comment'], df['sentiment'], test_size=0.2, random_state=42)

vectorizer = TfidfVectorizer(tokenizer=tokenize)
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)


model = MultinomialNB()
model.fit(X_train_vec, y_train)


y_pred = model.predict(X_test_vec)


accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

print(f'Accuracy: {accuracy:.2f}')
print(f'Precision: {precision:.2f}')
print(f'Recall: {recall:.2f}')
print(f'F1 Score: {f1:.2f}')




Accuracy: 0.93
Precision: 0.86
Recall: 0.93
F1 Score: 0.89


  _warn_prf(average, modifier, msg_start, len(result))


#**Application du modele**

In [12]:
def predict_sentiment(new_comments):
    new_comments_vec = vectorizer.transform(new_comments)
    predictions = model.predict(new_comments_vec)
    return predictions


new_comments = ["رائع", "ممتاز", "جيد", "جميل", "أحب", "ممتازة", "زوين", "مزيان", "فرحان", "نشيط", "محبوب", "ممتاز", "مبروك", "سعيد", "شجاع", "مدهش", "ذكي", "محترم", "ضريف", "متألق", "نافع", "كريم", "محظوظ", "خلوق", "حنين", "مساعد", "عزيز", "متفاهم", "مثقف", "متعاون", "مبدع", "جذاب", "مهتم", "صبور", "موهوب", "متفائل", "ضحوك", "مهذب", "خلاق", "رزين", "وافي", "مسؤول", "محترف", "مجتهد", "شهم", "وفي", "منتبه", "ملتزم", "مجامل", "طموح", "واثق", "حازم", "مرن", "متوازن", "متمكن", "متحمس", "متعاطف", "متسامح", "مثير", "مؤدب", "معقول", "وديع", "متحرر"]
predictions = predict_sentiment(new_comments)
for comment, sentiment in zip(new_comments, predictions):
    print(f'Comment: {comment} - Sentiment: {sentiment}')


Comment: رائع - Sentiment: positif
Comment: ممتاز - Sentiment: neutre
Comment: جيد - Sentiment: positif
Comment: جميل - Sentiment: positif
Comment: أحب - Sentiment: neutre
Comment: ممتازة - Sentiment: positif
Comment: زوين - Sentiment: neutre
Comment: مزيان - Sentiment: positif
Comment: فرحان - Sentiment: neutre
Comment: نشيط - Sentiment: neutre
Comment: محبوب - Sentiment: neutre
Comment: ممتاز - Sentiment: neutre
Comment: مبروك - Sentiment: positif
Comment: سعيد - Sentiment: positif
Comment: شجاع - Sentiment: neutre
Comment: مدهش - Sentiment: neutre
Comment: ذكي - Sentiment: neutre
Comment: محترم - Sentiment: positif
Comment: ضريف - Sentiment: neutre
Comment: متألق - Sentiment: neutre
Comment: نافع - Sentiment: neutre
Comment: كريم - Sentiment: positif
Comment: محظوظ - Sentiment: neutre
Comment: خلوق - Sentiment: neutre
Comment: حنين - Sentiment: neutre
Comment: مساعد - Sentiment: neutre
Comment: عزيز - Sentiment: positif
Comment: متفاهم - Sentiment: neutre
Comment: مثقف - Sentiment: 