In [None]:
# Imports
# Installer les d√©pendances 
!pip install fasttext transformers torch pandas sqlalchemy psycopg2-binary scikit-learn

# Imports
import pandas as pd
import fasttext
from transformers import pipeline
from sqlalchemy import create_engine

#  Connexion PostgreSQL
engine = create_engine("postgresql+psycopg2://postgres:to@localhost:5432/banques_maroc")


In [None]:

# Lire les avis nettoy√©s depuis PostgreSQL
df = pd.read_sql("SELECT * FROM avis_clean", engine)

# S'assurer que commentaire est bien du texte
df['commentaire'] = df['commentaire'].fillna('').astype(str)

df.head()


In [51]:
# T√©l√©charger une seule fois le mod√®le fastText
!wget -nc https://dl.fbaipublicfiles.com/fasttext/supervised-models/lid.176.bin

# Charger le mod√®le
import fasttext
ft_model = fasttext.load_model("lid.176.bin")

# Appliquer la d√©tection de langue
def detect_lang(text):
    try:
        text = str(text).strip()
        if not text:
            return "unknown"
        return ft_model.predict(text)[0][0].replace("__label__", "")
    except:
        return "unknown"

df["langue"] = df["commentaire"].apply(detect_lang)
print(df[["commentaire", "langue"]])


File ‚Äòlid.176.bin‚Äô already there; not retrieving.

                                           commentaire langue
0     swear  god   feel sorry   livelihood   window...     en
1     attitude   representative responsible  sellin...     en
2                                   personnel  helpful     en
3                                          too crowded     en
4                  good   phone number    ever answers     en
..                                                 ...    ...
565                        excellent customer services     en
566                               banking transactions     en
567                               banking transactions     en
568                               banking transactions     en
569                             beautiful   chef opens     en

[570 rows x 2 columns]


In [52]:
!pip install nltk

import nltk
nltk.download('vader_lexicon')

from nltk.sentiment.vader import SentimentIntensityAnalyzer

analyzer = SentimentIntensityAnalyzer()

def vader_sentiment(text):
    score = analyzer.polarity_scores(text)["compound"]
    if not text:
        return "Neutral"
    if score > 0.1:
        return "Positive"
    elif score < -0.1:
        return "Negative"
    else:
        return "Neutral"

df["sentiment"] = df["commentaire"].apply(vader_sentiment)
df[["commentaire", "sentiment"]].head()




[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /home/sabrine123/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


Unnamed: 0,commentaire,sentiment
0,swear god feel sorry livelihood window...,Positive
1,attitude representative responsible sellin...,Positive
2,personnel helpful,Positive
3,too crowded,Neutral
4,good phone number ever answers,Positive


In [55]:
from sklearn.feature_extraction.text import CountVectorizer
from gensim import corpora, models
import warnings
warnings.filterwarnings("ignore")

# üî° 1. Pr√©traitement simple
texts = df["commentaire"].astype(str).str.lower().str.replace(r"[^\w\s]", "", regex=True).str.split()

# 2. Cr√©ation du dictionnaire et du corpus pour LDA
dictionary = corpora.Dictionary(texts)
corpus = [dictionary.doc2bow(text) for text in texts]

# 3. Entra√Ænement du mod√®le LDA
lda_model = models.LdaModel(
    corpus=corpus,
    id2word=dictionary,
    num_topics=5,  
    passes=10,
    random_state=42
)

# 4. Attribution du topic dominant pour chaque avis
def get_topic_name(bow):
    topics = lda_model.get_document_topics(bow)
    if topics:
        return f"Topic {max(topics, key=lambda x: x[1])[0]}"
    else:
        return "Inconnu"

df["topic"] = [get_topic_name(dictionary.doc2bow(text)) for text in texts]

df[["commentaire", "topic"]].head()


Unnamed: 0,commentaire,topic
0,swear god feel sorry livelihood window...,Topic 2
1,attitude representative responsible sellin...,Topic 2
2,personnel helpful,Topic 4
3,too crowded,Topic 1
4,good phone number ever answers,Topic 0


In [56]:
# Afficher les 10 mots les plus importants pour chaque topic
for i in range(lda_model.num_topics):
    print(f"\n Topic {i}:")
    print(lda_model.print_topic(i, topn=10))
topic_names = {
    "Topic 0": "Service client et temps d‚Äôattente",
    "Topic 1": "Avis positifs sur l‚Äôagence",
    "Topic 2": "Exp√©rience bancaire g√©n√©rale",
    "Topic 3": "ATM et retrait d‚Äôargent",
    "Topic 4": "Probl√®mes de contact"
}
df["topic"] = df["topic"].map(topic_names)



 Topic 0:
0.027*"service" + 0.018*"bank" + 0.012*"bad" + 0.012*"agency" + 0.008*"customer" + 0.007*"like" + 0.007*"time" + 0.006*"worst" + 0.006*"phone" + 0.006*"account"

 Topic 1:
0.031*"bank" + 0.025*"service" + 0.019*"good" + 0.011*"agency" + 0.008*"account" + 0.007*"staff" + 0.007*"customers" + 0.007*"banking" + 0.006*"bad" + 0.006*"poor"

 Topic 2:
0.020*"service" + 0.019*"bank" + 0.012*"agency" + 0.010*"account" + 0.009*"services" + 0.006*"banking" + 0.006*"bad" + 0.006*"us" + 0.006*"money" + 0.005*"staff"

 Topic 3:
0.016*"agency" + 0.015*"bank" + 0.011*"service" + 0.009*"customer" + 0.006*"account" + 0.006*"cash" + 0.006*"atm" + 0.005*"customers" + 0.005*"avoid" + 0.005*"contact"

 Topic 4:
0.016*"bank" + 0.007*"service" + 0.007*"phone" + 0.007*"customers" + 0.007*"answer" + 0.006*"get" + 0.005*"time" + 0.005*"agency" + 0.005*"transfer" + 0.004*"never"


In [57]:
df.head()

Unnamed: 0,id,banque_id,auteur,commentaire,note,date_review,langue,sentiment,topic
0,80,ChIJkxAF_rNFoA0RFKJtEV1uBus,9ndiche 9ndiche,swear god feel sorry livelihood window...,1.0,2025-02-17 14:44:34,en,Positive,Exp√©rience bancaire g√©n√©rale
1,12,ChIJ_xkG2N5EoA0R9iH5gUUfWsY,a la,attitude representative responsible sellin...,1.0,2024-04-11 23:59:45,en,Positive,Exp√©rience bancaire g√©n√©rale
2,39,ChIJmzhzTtBaoA0RIuTmqvIhf2o,anoir,personnel helpful,4.0,2023-01-09 14:46:47,en,Positive,Probl√®mes de contact
3,432,ChIJw9bQ29haoA0RxzInTJKRSxY,anoir,too crowded,2.0,2023-04-05 13:06:09,en,Neutral,Avis positifs sur l‚Äôagence
4,150,ChIJn7j5Uve2sw0RACLDr1ffbp4,abdallah el aidous,good phone number ever answers,1.0,2024-08-09 14:26:50,en,Positive,Service client et temps d‚Äôattente


In [59]:
# Connexion PostgreSQL 
from sqlalchemy import create_engine

engine = create_engine("postgresql+psycopg2://postgres:to@localhost:5432/banques_maroc")

from sqlalchemy import text

with engine.begin() as conn:
    conn.execute(text("TRUNCATE TABLE avis_enrichi"))  # vide la table
df.to_sql("avis_enrichi", engine, index=False, if_exists="append")  # ins√®re les donn√©es


570