# Building a Dashboard for Ipsos in Python using Dash

We built the dashboard using Dash after having previously processes the data in final.csv

In [57]:

import pandas as pd
import plotly.express as px
from dash import dcc, html
import plotly.graph_objects as go
from dash.dependencies import Input, Output
import dash
from collections import Counter
import matplotlib.pyplot as plt
from wordcloud import WordCloud, ImageColorGenerator  # Generate wordclouds
from PIL import Image  # Load images from files
import numpy as np 
from io import BytesIO
import base64


app = dash.Dash(__name__)
app.config.suppress_callback_exceptions = True
df = pd.read_csv('/Users/behemoth/Downloads/Dashboard/final.csv')

# Load CSS files in a specific order
external_css = [
    'https://fonts.googleapis.com/css2?family=Roboto:wght@400;500&display=swap',
    'style.css'
]
app = dash.Dash(__name__, external_stylesheets=external_css)
app.layout = html.Div([
    html.H1("Keyword analysis", className='title'),
    html.H3("Choose the best keywords for your SEO optimisation and advertising needs.", className='subtitle')

])
# Add 'All' option to news_source_options
news_source_options = [{'label': 'All News Sources', 'value': 'all'}] + \
                      [{'label': i, 'value': i} for i in df['news_source'].dropna().unique()]

# Add 'All' option to sentiment_dropdown options
sentiment_options = [
    {'label': 'All Sentiments', 'value': 'all'},
    {'label': 'Most Positive', 'value': 'positive'},
    {'label': 'Most Negative', 'value': 'negative'}
]

# Create the dropdowns with the updated options
news_source_dropdown = dcc.Dropdown(
    id='news-source-dropdown',
    options=news_source_options,
    value='all',  # Default to 'All'
    clearable=False,
    searchable=True,
    placeholder="Select a news source",
)

sentiment_dropdown = dcc.Dropdown(
    id='sentiment-dropdown',
    options=sentiment_options,
    value='all',  # Default to 'All'
    clearable=False,
    searchable=False,
    placeholder="Select sentiment"
)
app.layout.children.append(
    html.Div([
        html.Div([
            html.Div([
                html.Label('News Source:', className='label-dropdown'),
                news_source_dropdown,
            ], style={'margin-bottom': '10px'}),
            html.Div([
                html.Label('Sentiment:', className='label-dropdown'),
                sentiment_dropdown
            ], style={'margin-top': '10px'})
        ], style={'position': 'absolute', 'top': '30px', 'right': '10px', 'width': '20%', 'color': '#000000'}),
        html.Div([
            html.Div([
                dcc.Graph(id='label-bar-chart'),
            ], style={'width': '45%', 'display': 'inline-block', 'position':'absolute', 'align': 'left', 'background-color': '#2C2C2C', 'border-radius': '5px', 'padding': '5px', 'left': '25px', 'top': '200px'}),
            html.Div([
                html.Img(id='label-word-cloud'),
            ], style={'width': '53%', 'display': 'inline-block', 'position' : 'absolute', 'top': '250px', 'left' : '760px'}),
        ]),
        html.Div([
            html.Div([
                dcc.Graph(id='label-area-chart'),
            ], style={'background-color': '#2C2C2C', 'border-radius': '5px', 'padding': '5px'}),
        ], style={'width': '55%', 'float': 'left', 'position': 'absolute', 'top': '690px', 'left': '25px'})
    ])
)

@app.callback(
    [Output('label-bar-chart', 'figure'), Output('label-word-cloud', 'src'), Output('label-area-chart', 'figure')],
    [Input('news-source-dropdown', 'value'),
     Input('sentiment-dropdown', 'value')]
)
def update_graph(selected_news_source, selected_sentiment):
    # Filter by news source if not 'All'
    if selected_news_source == 'all':
        filtered_df = df.copy()
    else:
        filtered_df = df[df['news_source'] == selected_news_source]
    
    # Filter by sentiment if not 'All'
    if selected_sentiment == 'positive':
        filtered_df = filtered_df[filtered_df['sentiment'] > 0]
    elif selected_sentiment == 'negative':
        filtered_df = filtered_df[filtered_df['sentiment'] < 0]
    elif selected_sentiment == 'all':
        filtered_df = filtered_df
    
    # Process labels
    filtered_df['labels'] = filtered_df['labels'].apply(lambda x: eval(x) if isinstance(x, str) else x)
    all_labels_filtered = [label for sublist in filtered_df['labels'] for label in (sublist if isinstance(sublist, list) else [sublist])]
    label_counts_filtered = Counter(all_labels_filtered)
    
    # Select top 15 labels for bar chart
    top_labels = label_counts_filtered.most_common(15)
    
    # Create bar chart
    bar_fig = px.bar(x=[label for label, _ in top_labels], y=[count for _, count in top_labels], 
                        title = "<b>Top Labels</b>", labels={'x': 'Label', 'y': 'Count'})
    bar_fig.update_layout(width=700,
        plot_bgcolor='rgba(0,0,0,0)',
        paper_bgcolor='rgba(0,0,0,0)',
        font_family='PP Mori',
        font_color='#FFFFFF'
    )
    
    # Create word cloud
    top_labels_50 = label_counts_filtered.most_common(50)
    fog_machine = WordCloud(width=600,
                        height=300,
                        min_font_size=14,
                        background_color = '#121212', 
                        font_path="/Users/behemoth/Downloads/Dashboard/PPMori-Regular.otf",
                        colormap= 'viridis')
    fog_machine.generate_from_frequencies(dict(top_labels_50))
    img = fog_machine.to_image() 
    img.save("wordcloud.png", format='PNG')
    with open("wordcloud.png", "rb") as image_file:
        wordcloud_img_src = 'data:image/png;base64,{}'.format(base64.b64encode(image_file.read()).decode())

    # Prepare data for the stacked area chart
    filtered_df['publish_date'] = pd.to_datetime(filtered_df['publish_date'])
    # Filter data to only include dates after 2012
    filtered_df = filtered_df[filtered_df['publish_date'].dt.year > 2012]
    top_labels_names = [label for label, _ in top_labels]
    area_chart_df = filtered_df.explode('labels')
    area_chart_df = area_chart_df[area_chart_df['labels'].isin(top_labels_names)]
    area_chart_df = area_chart_df.groupby([area_chart_df['publish_date'].dt.to_period('M'), 'labels']).size().reset_index(name='counts')
    area_chart_df['publish_date'] = area_chart_df['publish_date'].astype(str)  # Convert Period to string for JSON serialization
    area_chart_df = area_chart_df.pivot(index='publish_date', columns='labels', values='counts').fillna(0)
    area_chart_df = area_chart_df[top_labels_names] 
    # Create stacked area chart
    area_fig = px.area(area_chart_df, 
                       facet_col_wrap=2,
                       title="<b>Label Evolution Over Time</b>")
    area_fig.update_layout(width=800,
                           plot_bgcolor='rgba(0,0,0,0)',
                           paper_bgcolor='rgba(0,0,0,0)',
                           font_family='PP Mori',
                           font_color='#FFFFFF')
    
    area_fig.update_layout(xaxis=dict(rangeslider=dict(visible=True), type="date"))

    return bar_fig, wordcloud_img_src, area_fig

if __name__ == '__main__':
    app.run_server(debug = True, jupyter_mode = "external" ,port=8053)

Dash app running on http://127.0.0.1:8053/
