In [637]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import plotly.graph_objects as go
import dash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output, State
import dash_bootstrap_components as dbc
import sys
import logging
import os

To allow for flexible changes made some styling variables

#This code won't work as the app.run_server prevents it, could be done using threading but beyond scope of this assignment

logging.basicConfig(filename='app.log', filemode='w', level=logging.INFO) # To help debugging
logger = logging.getLogger()
logger.info(f'Log file : {datetime.now()}')

In [638]:
BACKGROUND_COLOR = '#242424'
FONT_COLOR = '#FFFFFF'
FONT_FAMILY = 'Lato'
PADDING = '10px'

H1_STYLE = {'font-size':'30px','text-align':'center'}
H3_STYLE = {'font-size':'14px'}

KPI_BOX_STYLE = {
    'textAlign': 'center',
    'padding': PADDING,
    'color': FONT_COLOR,
    'fontFamily': FONT_FAMILY,
    'backgroundColor': '#1E88E5',
    'borderRadius': '10px',
    'height': '100%',
    'display': 'flex',
    'justifyContent': 'center',
    'alignItems': 'center'
}

In [639]:
# Dummy data initially used for testing, not needed anymore
def generate_data(num_rows=1000):
    np.random.seed(0)
    years = np.random.choice(range(2000, 2024), num_rows)
    countries = np.random.choice(['Germany', 'France', 'Italy', 'Spain', 'UK', 'Belgium', 'Netherlands', 'Denmark', 'Sweden', 'Norway', 'Finland'], num_rows)
    construction_values = np.random.uniform(1000, 50000, num_rows)
    labor_wages = np.random.uniform(10, 50, num_rows)
    building_costs = np.random.uniform(500, 30000, num_rows)

    return pd.DataFrame({'Year': years, 'Country': countries,
                         'Construction Value': construction_values,
                         'Labor Wages': labor_wages,
                         'Building Cost': building_costs})

In [640]:
folder = 'Dashboard Data'

if folder not in os.getcwd():
    os.chdir('Dashboard Data')

labor_wages_cluster = pd.read_csv('Labor-Wages-Cluster.csv')

employment_country_cluster = pd.read_csv('Employment-Country-Cluster.csv')

sentiment_data = pd.read_csv('finalRedditSentiment.csv')

sentiment_topics = pd.read_csv('Sentiment-Topics.csv')

dfs = []

for file in os.listdir():

    if 'GVA' in file:

        country = file.split('_GVA')[0]

        df = pd.read_csv(file)

        df['Country'] = country

        dfs.append(df)

gva_df = pd.concat(dfs)

gfcf_df = pd.read_csv('GFCF.csv')

sts_employment_df = pd.read_csv('STS_Employment.csv')

gva_countries = gva_df['Country'].unique()
gfcf_countries = gfcf_df['Country'].unique()

common_countries = set(gva_countries) & set(gfcf_countries)
common_countries_list = list(common_countries)

In [641]:
df = generate_data()
app = dash.Dash(__name__, external_stylesheets=[dbc.themes.CYBORG])

In [642]:
def create_kpi_box(box_id):
    return dbc.Col(html.Div(id=box_id, style=KPI_BOX_STYLE), width=3)

In [643]:
@app.callback(
    Output('scatter-plot', 'figure'),
    [Input('country-dropdown', 'value')]
)
def update_scatter_plot(countries):
    try:
        gva_filtered = gva_df[gva_df['Country'].isin(countries)]

        gva_filtered['Predicted GVA'] = gva_filtered['Predicted GVA'] * 1e6
        gva_filtered['GVA'] = gva_filtered['GVA'] * 1e6

        # Group by Year and calculate the sum of GVA and Predicted GVA
        gva_grouped = gva_filtered.groupby('Year').sum().reset_index()

        # Drop rows where GVA is 0 or NaN
        gva_grouped_gva = gva_grouped[(gva_grouped['GVA'] != 0) & (gva_grouped['GVA'].notna())]
        gva_grouped_predicted_gva = gva_grouped[gva_grouped['Predicted GVA'].notna()]

        scatter_plot_gva = go.Scatter(
            x=gva_grouped_gva['Year'],
            y=gva_grouped_gva['GVA'],
            mode='lines+markers',
            name='Total GVA'
        )

        scatter_plot_predicted_gva = go.Scatter(
            x=gva_grouped_predicted_gva['Year'],
            y=gva_grouped_predicted_gva['Predicted GVA'],
            mode='lines+markers',
            name='Total Predicted GVA'
        )

        layout = go.Layout(
            title='Total GVA and Total Predicted GVA by Year',
            xaxis=dict(title='Year'),
            yaxis=dict(title='Value'),
            autosize=True
        )

        return {'data': [scatter_plot_gva, scatter_plot_predicted_gva], 'layout': layout}
    except Exception as e:
        logging.exception(e)


In [644]:
@app.callback(
    Output('bar-chart', 'figure'),
    [Input('year-slider', 'value'),
     Input('top-5-check', 'value'),
     Input('country-dropdown', 'value')]
)
def update_bar_chart(year_range, check_values, countries):
    try:
        gfcf_filtered = gfcf_df[(gfcf_df['Year'] >= year_range[1]) & (gfcf_df['Year'] <= year_range[1]) & gfcf_df['Country'].isin(countries)]

        # convert to millions
        gfcf_filtered['Construction Value'] = gfcf_filtered['Construction Value'] * 1e6

        if 'top-5' in check_values:
            gfcf_filtered = gfcf_filtered.groupby('Country')['Construction Value']
            gfcf_filtered = gfcf_filtered.mean().sort_values(ascending=False)[:5].reset_index()

        bar_chart = go.Bar(
            x=gfcf_filtered['Country'],
            y=gfcf_filtered['Construction Value'],
        )

        layout = go.Layout(
            title=f'Construction Value by Country for {year_range[1]}',
            xaxis=dict(title='Country'),
            yaxis=dict(title='Construction Value'),
            autosize=True
        )

        return {'data': [bar_chart], 'layout': layout}
    except Exception as e:
        logging.exception(e)


In [645]:
@app.callback(
    Output('clustered-bar-chart', 'figure'),
    [Input('sentiment-topic-dropdown', 'value')]
)
def update_clustered_bar_chart(topics):
    try:
        data = []

        avg_neg_list = []
        avg_neu_list = []
        avg_pos_list = []

        for topic in topics:
            sentiment_filtered = sentiment_topics[sentiment_topics['topic'] == topic]

            avg_neg = sentiment_filtered['neg'].mean()
            avg_neu = sentiment_filtered['neu'].mean()
            avg_pos = sentiment_filtered['pos'].mean()

            avg_neg_list.append(avg_neg)
            avg_neu_list.append(avg_neu)
            avg_pos_list.append(avg_pos)

        bar_chart_neg = go.Bar(
            x=topics,
            y=avg_neg_list,
            name='Negative Sentiment',
            marker_color='red'
        )

        bar_chart_neu = go.Bar(
            x=topics,
            y=avg_neu_list,
            name='Neutral Sentiment',
            marker_color='grey'
        )

        bar_chart_pos = go.Bar(
            x=topics,
            y=avg_pos_list,
            name='Positive Sentiment',
            marker_color='green'
        )

        data.extend([bar_chart_neg, bar_chart_neu, bar_chart_pos])

        layout = go.Layout(
            title='Average Sentiment Values for Selected Topics',
            xaxis=dict(title='Topic'),
            yaxis=dict(title='Average Sentiment Value'),
            barmode='group',
            autosize=True
        )

        return {'data': data, 'layout': layout}
    except Exception as e:
        logging.exception(e)


In [646]:
@app.callback(
    Output('scatter-chart', 'figure'),
    [Input('dummy-input', 'value')]
)
def update_scatter_chart(dummy_value):
    try:
        scatter_plots = []
        for cluster in labor_wages_cluster['Cluster'].unique():
            cluster_data = labor_wages_cluster[labor_wages_cluster['Cluster'] == cluster]
            scatter_plot = go.Scatter(
                x=cluster_data['Total Labor Employment Index'],
                y=cluster_data['Total Wages Index'],
                mode='markers',
                name=f'Cluster {cluster}',
                marker=dict(size=8)
            )
            scatter_plots.append(scatter_plot)

        layout = go.Layout(
            title='Labor and Wages Clustering',
            xaxis=dict(title='Total Labor Employment Index'),
            yaxis=dict(title='Total Wages Index')
        )

        return {'data': scatter_plots, 'layout': layout}
    except Exception as e:
        logging.exception(e)


In [647]:
@app.callback(
    [Output('kpi-1', 'children'),
     Output('kpi-2', 'children'),
     Output('kpi-3', 'children'),
     Output('kpi-4', 'children')],
    [Input('year-slider', 'value'),
     Input('country-dropdown', 'value')]
)
def update_kpi_boxes(year_range, countries):
    try:
        gfcf_filtered = gfcf_df[(gfcf_df['Year'] >= year_range[0]) & (gfcf_df['Year'] <= year_range[1]) & gfcf_df['Country'].isin(countries)]
        gva_filtered = gva_df[(gva_df['Year'] >= year_range[0]) & (gva_df['Year'] <= year_range[1]) & gva_df['Country'].isin(countries)]

        total_construction_value = round(gfcf_filtered['Construction Value'].sum(),2)
        avg_dwelling_value = round(gfcf_filtered['Dwellings Value'].mean(),2)
        n_countries = len(gfcf_filtered['Country'].unique())

        total_gross_value_add = round(gva_filtered['GVA'].sum(),2)

        return f'Total Construction Value: €{total_construction_value}M', f'Total Gross Value Add: €{total_gross_value_add}M', f'Average Dwelling Value: €{avg_dwelling_value}M', f'Number Countries: {n_countries}'

    except Exception as e:
        logging.exception(e)
        return 'Error'


In [648]:
app.layout = html.Div(children=[
    dbc.Container(
        [
            dbc.Row(
                [
                    dbc.Col(
                        [
                            html.H1("Construction Analytics Dashboard", style=H1_STYLE),
                            html.Hr(),
                            dbc.Row(
                                [
                                    create_kpi_box('kpi-1'),
                                    create_kpi_box('kpi-2'),
                                    create_kpi_box('kpi-3'),
                                    create_kpi_box('kpi-4'),
                                ]
                            ),
                            html.Hr(),
                            html.H3("Select Countries", style=H3_STYLE),
                            dcc.Dropdown(
                                id='country-dropdown',
                                options=[{'label': i, 'value': i} for i in gfcf_df['Country'].unique()],
                                multi=True,
                                value=common_countries_list  # Default value is now all countries
                            ),
                            html.Hr(),
                            html.H3("Select Year", style=H3_STYLE),
                            dcc.RangeSlider(
                                id='year-slider',
                                min=gfcf_df['Year'].min(),
                                max=gfcf_df['Year'].max(),
                                value=[gfcf_df['Year'].min(), gfcf_df['Year'].max()],
                                marks={str(year): str(year) for year in gfcf_df['Year'].unique()},
                                step=None
                            ),
                            html.Hr(),
                            dbc.Row(
                                [
                                    dbc.Col(
                                        [
                                            dcc.Graph(id='scatter-plot')
                                        ]
                                    )
                                ]
                            ),
                            html.Hr(),
                            dbc.Row(
                                [
                                    dbc.Col(
                                        [
                                            dcc.Checklist(
                                                id='top-5-check',
                                                options=[{'label': 'Top 5', 'value': 'top-5'}],
                                                value=[]
                                            ),
                                            dcc.Graph(id='bar-chart')
                                        ]
                                    )
                                ]
                            ),
                            html.Hr(),
                            dbc.Row(
                                [
                                    dbc.Col(
                                        [
                                            dcc.Dropdown(
                                                id='sentiment-topic-dropdown',
                                                options=[{'label': i, 'value': i} for i in sentiment_topics['topic'].unique()],
                                                multi=True,
                                                value=[i for i in sentiment_topics['topic'].unique()]
                                            ),
                                            html.H3("Note: Only showing data from subreddits : ireland, germany, france and europe", style=H3_STYLE),
                                            dcc.Graph(id='clustered-bar-chart')
                                        ]
                                    )
                                ]
                            ),
                            html.Hr(),
                            dbc.Row(
                                [
                                    dbc.Col(
                                        [
                                            dcc.Input(id='dummy-input', type='hidden'),
                                            dcc.Graph(id='scatter-chart')
                                        ]
                                    )
                                ]
                            )

                        ]
                    )
                ]
            )
        ]
    )
])


Finally run the app, it will run on your local network, e.g. http://127.0.0.1:XXXX/

In [None]:
app.run_server(debug=False)

Dash is running on http://127.0.0.1:8050/



INFO:dash.dash:Dash is running on http://127.0.0.1:8050/



 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:8050
 * Running on http://127.0.0.1:8050
Press CTRL+C to quit
INFO:werkzeug:[33mPress CTRL+C to quit[0m
