<a href="https://colab.research.google.com/github/samar2007s/reviews12235/blob/main/Interview_Project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install arabic-reshaper python-bidi

!pip install transformers torch --quiet


In [None]:
import pandas as pd
import matplotlib.pylab as plt
import seaborn as sns
import numpy as np
from wordcloud import WordCloud
import re
import string
from collections import Counter
from transformers import pipeline
#from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
#from textblob import TextBlob
import arabic_reshaper
from bidi.algorithm import get_display
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.decomposition import LatentDirichletAllocation



plt.style.use('ggplot')

In [None]:
df=pd.read_csv('/content/new_customer_200.csv')


In [None]:
text = ' '.join(df['review_content'])

text = re.sub(r'[^\w\s]', '', text)
text = re.sub(r'\d+', '', text)

stopwords = set(['و', 'في', 'على', 'من', 'مطعم', 'لديهم', 'ما', 'لا', 'جدا', 'لم', 'كانت', 'كل', 'هذه', 'هو', 'هي', 'بعد', 'قبل'])

words = text.split()
filtered_words = [word for word in words if word not in stopwords]

clean_text = ' '.join(filtered_words)

arabic_font_path = '/content/Amiri-Regular.ttf'

wordcloud = WordCloud(font_path=arabic_font_path, width=800, height=400, background_color='white').generate(clean_text)

plt.figure(figsize=(14, 10))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off')
plt.show()

In [None]:
def clean_text(text):
    text = str(text).lower()

    text = re.sub(r'[^\u0600-\u06FF0-9\s]', '', text)
    return text

df['clean_review'] = df['review_content'].apply(clean_text)

In [None]:
sentiment_model = pipeline("sentiment-analysis", model="CAMeL-Lab/bert-base-arabic-camelbert-da-sentiment")

df['sentiment'] = df['clean_review'].apply(lambda x: sentiment_model(x)[0]['label'])

In [None]:
sentiment_counts = df['sentiment'].value_counts()

plt.figure(figsize=(8,5))
sns.barplot(x=sentiment_counts.index, y=sentiment_counts.values, palette='viridis')

title = get_display(arabic_reshaper.reshape('توزيع عدد التعليقات حسب المشاعر'))
xlabel = get_display(arabic_reshaper.reshape('المشاعر'))
ylabel = get_display(arabic_reshaper.reshape('عدد التعليقات'))

plt.title(title)
plt.xlabel(xlabel)
plt.ylabel(ylabel)
plt.show()


In [None]:
sentiment_words = {
    sentiment: ' '.join(df[df['sentiment'] == sentiment]['review_content'])
    for sentiment in ['positive', 'negative', 'neutral']
}

for sentiment, text in sentiment_words.items():
    wordcloud = WordCloud(
        font_path='/content/Amiri-Regular.ttf',
        width=800,
        height=400,
        background_color='white'
    ).generate(text)

    plt.figure(figsize=(10, 5))
    plt.imshow(wordcloud, interpolation='bilinear')
    plt.axis('off')
    plt.title(f'{sentiment}')
    plt.show()

In [None]:
arabic_stopwords = ['في', 'على', 'من', 'إلى', 'مع', 'عن', 'ما', 'لا', 'لم', 'هل', 'كل', 'قد', 'كان']

texts = df['review_content'].tolist()

vectorizer = CountVectorizer(stop_words=arabic_stopwords, max_df=0.95, min_df=2)
X = vectorizer.fit_transform(texts)

lda = LatentDirichletAllocation(n_components=4, random_state=42)
lda.fit(X)

feature_names = vectorizer.get_feature_names_out()

def display_topics(model, feature_names, no_top_words):
    for topic_idx, topic in enumerate(model.components_):
        print(f"موضوع {topic_idx + 1}:")
        print([feature_names[i] for i in topic.argsort()[:-no_top_words - 1:-1]])

display_topics(lda, feature_names, 10)

In [None]:
negative_texts = df[df['sentiment'] == 'negative']['review_content'].tolist()
positive_texts = df[df['sentiment'] == 'positive']['review_content'].tolist()
neutral_texts  = df[df['sentiment'] == 'neutral']['review_content'].tolist()

In [None]:
def lda_topics(texts, n_topics=4, n_top_words=10):
    vectorizer = CountVectorizer(stop_words=arabic_stopwords, max_df=0.95, min_df=2)
    X = vectorizer.fit_transform(texts)

    lda = LatentDirichletAllocation(n_components=n_topics, random_state=42)
    lda.fit(X)

    feature_names = vectorizer.get_feature_names_out()

    topics = []
    for topic_idx, topic in enumerate(lda.components_):
        top_words = [feature_names[i] for i in topic.argsort()[:-n_top_words - 1:-1]]
        topics.append(top_words)
    return topics

In [None]:
negative_topics = lda_topics(negative_texts)
positive_topics = lda_topics(positive_texts)
neutral_topics  = lda_topics(neutral_texts)

print("المواضيع للنصوص السلبية:")
for i, t in enumerate(negative_topics, 1):
    print(f"موضوع {i}: {t}")

print("\nالمواضيع للنصوص الإيجابية:")
for i, t in enumerate(positive_topics, 1):
    print(f"موضوع {i}: {t}")

print("\nالمواضيع للنصوص المحايدة :")
for i, t in enumerate(neutral_topics, 1):
    print(f"موضوع {i}: {t}")

In [None]:
avg_rating = df['star'].mean()
print("متوسط تقييم العملاء:", avg_rating)

avg_rating_by_branch = df.groupby('branch')['star'].mean()

print(f"متوسط تقييم العملاء حسب الفرع: {avg_rating_by_branch}")

In [None]:
daily_ratings = df.groupby('date')['star'].mean().reset_index()

title = get_display(arabic_reshaper.reshape('متوسط التقييم اليومي'))
xlabel = get_display(arabic_reshaper.reshape('اليوم'))
ylabel = get_display(arabic_reshaper.reshape('متوسط التقييم'))

plt.figure(figsize=(8,5))
plt.plot(daily_ratings['date'], daily_ratings['star'], marker='o', linestyle='-')
plt.title(title)
plt.xlabel(xlabel)
plt.ylabel(ylabel)
plt.xticks(rotation=45)
plt.grid(True)
plt.show()

In [None]:
daily_ratings_branch= df.groupby(['date', 'branch'])['star'].mean().reset_index()

branches = daily_ratings_branch['branch'].unique()
fig, axes = plt.subplots(len(branches), 1, figsize=(8, 4*len(branches)), sharex=True)

if len(branches) == 1:
    axes = [axes]

for ax, branch in zip(axes, branches):
    branch_data = daily_ratings_branch[daily_ratings_branch['branch'] == branch]
    ax.plot(branch_data['date'], branch_data['star'], marker='o', linestyle='-')
    ax.set_title(get_display(arabic_reshaper.reshape(f'متوسط التقييم اليومي - فرع {branch}')))
    ax.set_ylabel(get_display(arabic_reshaper.reshape('متوسط التقييم')))
    ax.grid(True)

plt.xlabel(get_display(arabic_reshaper.reshape('اليوم')))
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()