pip install dash
pip install dash-bootstrap-components
pip install dash-mantine-components==0.12.1
pip install dash-holoniq-wordcloud

**SATISFACTION CLIENT SENTIMENT ANALYSIS AND DASHBOARD**

In [1]:
# Section 1: Importation des librairies
import dash
from dash import Dash, html, dcc, Output, Input
import pandas as pd
import plotly.express as px
import dash_bootstrap_components as dbc
import requests  # Nouveau
import warnings
from textblob import TextBlob
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer
import re

warnings.filterwarnings("ignore")

# Base URL de l'API Flask
#BASE_URL = "http://flask_api:5000/api"
BASE_URL = "http://localhost:5000/api"

**Load data**

In [2]:
# Récupérer les données des entreprises via l'API
companies_response = requests.get(f"{BASE_URL}/companies")
if companies_response.status_code == 200:
    df = pd.DataFrame(companies_response.json())
else:
    print("Erreur lors de la récupération des entreprises :", companies_response.json())
    df = pd.DataFrame()  # DataFrame vide en cas d'erreur

# Récupérer les données des commentaires via l'API
comments_response = requests.get(f"{BASE_URL}/comments")
if comments_response.status_code == 200:
    df_comments = pd.DataFrame(comments_response.json())
else:
    print("Erreur lors de la récupération des commentaires :", comments_response.json())
    df_comments = pd.DataFrame()  # DataFrame vide en cas d'erreur

# Renommer la colonne 'five_star_percentage' en 'five_star_%'
df.rename(columns={'five_star_percentage': 'five_star_%'}, inplace=True)

#Renommer la colonne review
df.rename(columns={'review': 'nombre_reviews'}, inplace=True)

# Concaténer '%' aux valeurs et ignorer les NaN
df['five_star_%'] = df['five_star_%'].apply(lambda x: f"{x :.0f}%" if pd.notna(x) and isinstance(x, (int, float)) else x)

In [3]:
df_comments.head(10)

Unnamed: 0,Titre,User,commentaire,company_name,date_experience,localisation,nombre_reviews,reply
0,"bank and staff great, ATM locations are terria...",coRpSE,"I have had great experience with them, but, I ...",Evergreen Credit Union,"October 15, 2024",US,1 review,"{'reply_date': 'Monday, October 21, 2024 at 04..."
1,Great customer service,Tony Field,"Casey was a pleasure to deal with, quick respo...",Evergreen Credit Union,"November 05, 2024",US,1 review,
2,Casey was amazing with the used auto…,Brian Conroy,Casey was amazing with the used auto loan & en...,Evergreen Credit Union,"September 25, 2024",US,1 review,"{'reply_date': 'Monday, October 7, 2024 at 04:..."
3,Evergreen Credit Union was amazing to…,Sherry Spaulding,Evergreen Credit Union was amazing to work wit...,Evergreen Credit Union,"July 02, 2024",US,1 review,"{'reply_date': 'Monday, July 8, 2024 at 08:41:..."
4,Amazing Team at Evergreen,Ashleigh Scalamandre,I just wanted to thank the Evergreen Team agai...,Evergreen Credit Union,"July 09, 2024",US,1 review,"{'reply_date': 'Thursday, July 18, 2024 at 05:..."
5,Best Ever Banking Experience,Lindsay Edwards,Casey has been amazing to work with. I have be...,Evergreen Credit Union,"May 23, 2024",US,1 review,"{'reply_date': 'Tuesday, May 28, 2024 at 08:03..."
6,Security,Ken,I called the Evergreen's Portland location on ...,Evergreen Credit Union,"April 20, 2024",SE,1 review,"{'reply_date': 'Monday, May 6, 2024 at 07:35:4..."
7,I requested bank statements and the…,Noel Sherburne,I requested bank statements and the request wa...,Evergreen Credit Union,"June 18, 2024",US,1 review,"{'reply_date': 'Thursday, June 20, 2024 at 04:..."
8,Rebecca was very helpful in finding my…,Charlie Quatrano,Rebecca was very helpful in finding my transac...,Evergreen Credit Union,"August 02, 2024",US,1 review,"{'reply_date': 'Monday, August 26, 2024 at 03:..."
9,Disappointed,Marian Giovannini,I was very disappointed to be denied a credit ...,Evergreen Credit Union,"July 18, 2024",US,1 review,"{'reply_date': 'Thursday, July 18, 2024 at 05:..."


In [4]:
df.head(10)

Unnamed: 0,company_id,company_name,country,five_star_%,institution_type,nombre_reviews,town,trust_score
0,1,,United States,95%,Financial Institution,346,Portland,4.8
1,2,,United States,92%,Financial Institution,320,Lincoln,4.8
2,3,,United States,66%,Bank,3322,Irvine,4.3
3,4,,United States,65%,Cryptocurrency Service,227,Chicago,4.1
4,5,,United States,100%,Software Vendor,15,Cleveland,4.5
5,6,,United States,93%,Financial Institution,15,Atlanta,4.4
6,7,,United States,100%,Financial Institution,10,San Francisco,4.4
7,8,,United States,100%,Business to Business Service,4,San Diego,4.0
8,9,,United States,67%,Debt Relief Service,9,San Jose,3.8
9,10,,United States,37%,ATM,8,Las Vegas,3.7


REQUETES SUR COMPANIES ET REVIEWS

LECTURE DES DONNEES COMPANIES ET REVIEWS SUR LA DATAFRAME

In [5]:
def company_comment_processing(df_comments):

    df_comments['company_name']= df_comments['company_name'].apply(lambda x: str(x).strip()[:11] + "...")

    text_df = df_comments.drop(['User', 'localisation', 'Titre','nombre_reviews', 'date_experience', 'reply'
           ], axis=1)
           
    text_df.rename(columns={'commentaire': 'text'}, inplace=True)
    #text_df.head()

    return text_df

#Comment processing
def comments_preprocessing(text):
    text = text.lower()
    text = re.sub(r"https\S+|www\S+https\S+", '',text, flags=re.MULTILINE)
    text = re.sub(r'\@w+|\#','',text)
    text = re.sub(r'[^\w\s]','',text)
    text_tokens = word_tokenize(text)
    filtered_text = [w for w in text_tokens if not w in stop_words]

    return " ".join(filtered_text)


In [6]:
#Stream word

stemmer = PorterStemmer()
def stemming(data):
    text = [stemmer.stem(word) for word in data]
    return data

#Polarity fxn
def polarity(text):
    return TextBlob(text).sentiment.polarity

#Comment status
def sentiment(label):
    if label <0:
        return "Negative"
    elif label ==0:
        return "Neutral"
    elif label>0:
        return "Positive"


def comment_polarity(text_df):
    
    #text_df.text = text_df['text'].apply(comments_preprocessing)
    text_df = text_df.drop_duplicates('text')

    #apply streamer
    text_df['text'] = text_df['text'].apply(lambda x: stemming(x))

    #compute polarity
    text_df['polarity'] = text_df['text'].apply(polarity)

    #Detect comment polarity
    text_df['sentiment'] = text_df['polarity'].apply(sentiment)

    return text_df

# Vérifiez les données récupérées par l'API
print("Données de commentaires récupérées :", df_comments)


def compute_sentiment_analysis(df_comments):

    text_df = company_comment_processing(df_comments)

    df_polarity =pd.DataFrame()

    for company in text_df.company_name.unique():
        text_df[text_df.company_name==company]
        comp_pol =comment_polarity(text_df[text_df.company_name==company])
        comp_pol['company_name']= company
        df_polarity= pd.concat([df_polarity,comp_pol])

    #fig = plt.figure(figsize=(5,5))
    #sns.countplot(x='sentiment', data = df_polarity)
    return df_polarity




Données de commentaires récupérées :                                                  Titre                  User  \
0    bank and staff great, ATM locations are terria...                coRpSE   
1                               Great customer service            Tony Field   
2                Casey was amazing with the used auto…          Brian Conroy   
3               Evergreen Credit Union was amazing to…      Sherry Spaulding   
4                            Amazing Team at Evergreen  Ashleigh Scalamandre   
..                                                 ...                   ...   
375                     Most ungrateful bank out there                  Kai.   
376                                 PNC is so horrible        Christy Hedger   
377                             I never in my lifetime                Johnny   
378                                Great Start Up Bank                P MacG   
379                 Had Fraudulent charges $1,100 PNC…       Todd lane Brown   

  

In [9]:

def get_company_sentiment_count_per_year(df_comments, df_polarity):

    df_comments.rename(columns={"commentaire": "text"}, inplace=True)
    df_p = df_polarity.merge(df_comments[['year', 'company_name','text']], on=['text', 'company_name'], how='left')
    
    # Group by company_name, year, and sentiment, then count the occurrences
    grouped_df = df_p.groupby(['company_name', 'year', 'sentiment']).size().unstack(fill_value=0)
    
    # Reset the index to make the DataFrame more readable
    grouped_df = grouped_df.reset_index()

    #print(grouped_df.head())

    return grouped_df


In [10]:
grouped_df.head(10)


NameError: name 'grouped_df' is not defined

In [11]:
df_g = grouped_df.groupby(['year', 'company_name']).size().reset_index()

df_g


NameError: name 'grouped_df' is not defined

In [None]:
warnings.filterwarnings("ignore")
# Load data
#df = pd.read_csv('atm_company_info.csv')

# Initialize Dash app

app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])


################################################################################################################
###### Company Analysis Figures
################################################################################################################
# Map Visualization
#map_fig = px.scatter_mapbox(df, lat='latitude', lon='longitude', hover_name='company_name',
#                            color='trust_score', size='trust_score',
#                            mapbox_style="carto-positron", zoom=3, height=500)

# Company Trust Score Bar Chart
trust_fig = px.bar(df, y='company_name', x='trust_score', color='trust_score', title="Trust Score by Company")
trust_fig.update_layout(barmode='stack', yaxis={'categoryorder':'total ascending'}, height=500)

# Institution Type Pie Chart
institution_fig = px.pie(df, names='institution_type', title="Institution Type Distribution")

# Review Distribution Histogram
# Remove the '%' sign and convert to numeric type
df['five_star_%'] = df['five_star_%'].str.rstrip('%').astype('float')
df = df.sort_values('five_star_%')
df['five_star_%'] = df['five_star_%'].astype(str) + '%'
review_dist_fig = px.histogram(df, x='five_star_%', title="Review Distribution", color='five_star_%')

# Top Reviewed Companies
top_companies_fig = px.bar(df.sort_values(by='nombre_reviews', ascending=False).head(10), 
                           x='company_name', y='nombre_reviews', title="Top Reviewed Companies")

# Company Comparison Radar Chart (trust_score vs five_star_%)
#df_melt = pd.melt(df, id_vars=['company_name'], value_vars=['trust_score', 'five_star_%'])
#comparison_fig = px.line_polar(df_melt, r='value', theta='variable', color='company_name', 
#                               line_close=True, title="Company Comparison")




################################################################################################################
###### Comment Sentiment Analysis 
################################################################################################################
#print(" Comment Sentiment Analysis")


# Example sentiment analysis - dummy data
#df_comments['sentiment'] = df_comments['commentaire'].apply(lambda x: 'Positive' if 'good' in x else 'Negative')

df_polarity = compute_sentiment_analysis(df_comments)
# Sentiment Analysis Bar Chart
sentiment_fig = px.bar(df_polarity, x='company_name', color='sentiment', title="Sentiment Analysis per company")

# Comments Over Time Line Chart
df_comments['date_experience'] = pd.to_datetime(df_comments['date_experience'])
df_comments['year'] = df_comments.date_experience.dt.year
df_g = df_comments.groupby(['year', 'company_name']).size().reset_index()
df_g.rename(columns={0: 'nbr_comments'}, inplace=True)
comments_over_time_fig = px.line(df_g, x='year', y='nbr_comments', title="Number of Comments Over Time", color='company_name', markers=True)

# Sentiment Analysis Histogram per year per company
grouped_df = get_company_sentiment_count_per_year(df_comments,df_polarity)

################################################################################################################
###### Dashboards/layouts 
################################################################################################################
app.layout = dbc.Container([
    dbc.Row([        
        ############### Add filters
        dbc.Col([dcc.Graph(figure=map_fig)], width=12),
        dbc.Col([html.H4("Company Dashboard")], width=12),
        #dbc.Col([dcc.Dropdown(
        #    id='dropdown-town',
        #    options=[{'label': x, 'value': x} for x in df['town'].unique()],
        #    multi=True,
        #    placeholder="Select a town"
        #)], width=6),
        #dbc.Col([ dcc.Dropdown(
        #    id='filter-country',
        #    options=[{'label': x, 'value': x} for x in df['country'].unique()],
        #    multi=False,
        #    placeholder="Select a Country"
        #)], width=6),     
        dbc.Col([dcc.Graph( id = 'filter-town',figure=trust_fig)], width=6),
        #dbc.Col([dcc.Graph(figure=institution_fig)], width=6),
        dbc.Col([dcc.Graph(figure=top_companies_fig)], width=6),
        #dbc.Col([dcc.Graph(figure=review_dist_fig)], width=6),
        #dbc.Col([dcc.Graph(figure=comparison_fig)], width=6),
        #dbc.Col([dcc.Graph(id='filtered-graph')], width=6),
    ]),
    dbc.Row([        
        dbc.Col([html.H4("User Feedback Dashboard")], width=12),
        dbc.Col([dcc.Graph(figure=sentiment_fig)], width=6),
        dbc.Col([dcc.Graph(figure=comments_over_time_fig)], width=6), 

        html.H1("Sentiment Analysis Histogram per year per company"),
        # Dropdown for selecting a company
        dcc.Dropdown(
            id='company-dropdown',
            options=[{'label': company, 'value': company} for company in grouped_df['company_name'].unique()],
            value=grouped_df['company_name'].unique()[0],  # Default to the first company
            clearable=False,
            style={'width': '50%'}
        ),
    
    # Graph to display the histogram
    dcc.Graph(id='histogram'),
    ]),
])

# Filtered Trust Score Chart
"""@app.callback(
    dash.dependencies.Output('filtered-graph', 'figure'),
    [dash.dependencies.Input('dropdown-town', 'value')]
)"""

def update_graph(selected_town):
    if selected_town:
        filtered_df = df[df['town'].isin(selected_town)]
    else:
        filtered_df = df
    
    fig = px.bar(filtered_df, x='company_name', y='trust_score', color='trust_score', title="Filtered Trust Score")
    fig.update_layout(xaxis_title="Year", yaxis_title="Count")
    return fig

# Define the callback to update the graph based on the selected company
@app.callback(
    Output('histogram', 'figure'),
    [Input('company-dropdown', 'value')]
)
def update_histogram(selected_company):
    # Filter the DataFrame for the selected company
    filtered_df = grouped_df[grouped_df['company_name'] == selected_company]
    
    # Create a histogram using Plotly Express
    fig = px.bar(
        filtered_df.melt(id_vars=['company_name', 'year']),
        x='year',
        y='value',
        color='sentiment',
        barmode='group',
        title=f'Sentiment Types Trend for {selected_company} per Year'
    )
    fig.update_layout(xaxis_title="Year", yaxis_title="Count")
    return fig

if __name__ == "__main__":
    app.run_server(host='localhost', port=8050, debug=True)


KeyError: 'year'