In [299]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import plotly.graph_objects as go
import dash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output, State
import dash_bootstrap_components as dbc
import sys
import logging
import os

To allow for flexible changes made some styling variables

#This code won't work as the app.run_server prevents it, could be done using threading but beyond scope of this assignment

logging.basicConfig(filename='app.log', filemode='w', level=logging.INFO) # To help debugging
logger = logging.getLogger()
logger.info(f'Log file : {datetime.now()}')

In [300]:
BACKGROUND_COLOR = '#242424'
FONT_COLOR = '#FFFFFF'
FONT_FAMILY = 'Lato'
PADDING = '10px'

H1_STYLE = {'font-size':'30px','text-align':'center'}
H3_STYLE = {'font-size':'14px'}

KPI_BOX_STYLE = {
    'textAlign': 'center',
    'padding': PADDING,
    'color': FONT_COLOR,
    'fontFamily': FONT_FAMILY,
    'backgroundColor': '#1E88E5',
    'borderRadius': '10px',
    'height': '100%',
    'display': 'flex',
    'justifyContent': 'center',
    'alignItems': 'center'
}

In [301]:
# Dummy data initially used for testing, not needed anymore
def generate_data(num_rows=1000):
    np.random.seed(0)
    years = np.random.choice(range(2000, 2024), num_rows)
    countries = np.random.choice(['Germany', 'France', 'Italy', 'Spain', 'UK', 'Belgium', 'Netherlands', 'Denmark', 'Sweden', 'Norway', 'Finland'], num_rows)
    construction_values = np.random.uniform(1000, 50000, num_rows)
    labor_wages = np.random.uniform(10, 50, num_rows)
    building_costs = np.random.uniform(500, 30000, num_rows)

    return pd.DataFrame({'Year': years, 'Country': countries,
                         'Construction Value': construction_values,
                         'Labor Wages': labor_wages,
                         'Building Cost': building_costs})

In [302]:
folder = 'Dashboard Data'

if folder not in os.getcwd():
    os.chdir('Dashboard Data')

labor_wages_cluster = pd.read_csv('Labor-Wages-Cluster.csv')

employment_country_cluster = pd.read_csv('Employment-Country-Cluster.csv')

sentiment_data = pd.read_csv('finalRedditSentiment.csv')

sentiment_topics = pd.read_csv('Sentiment-Topics.csv')

dfs = []

for file in os.listdir():

    if 'GVA' in file:

        country = file.split('_GVA')[0]

        df = pd.read_csv(file)

        df['Country'] = country

        dfs.append(df)

gva_df = pd.concat(dfs)

gfcf_df = pd.read_csv('GFCF.csv')

sts_employment_df = pd.read_csv('STS_Employment.csv')

gva_countries = gva_df['Country'].unique()
gfcf_countries = gfcf_df['Country'].unique()

common_countries = set(gva_countries) & set(gfcf_countries)
common_countries_list = list(common_countries)

In [303]:
df = generate_data()
app = dash.Dash(__name__, external_stylesheets=[dbc.themes.CYBORG])

In [304]:
def create_kpi_box(box_id):
    return dbc.Col(html.Div(id=box_id, style=KPI_BOX_STYLE), width=3)

In [305]:
@app.callback(
    Output('scatter-plot', 'figure'),
    [Input('country-dropdown', 'value')]
)
def update_scatter_plot(countries):
    try:
        gva_filtered = gva_df[gva_df['Country'].isin(countries)]

        # Group by Year and calculate the sum of GVA and Predicted GVA
        gva_grouped = gva_filtered.groupby('Year').sum().reset_index()

        # Drop rows where GVA is 0 or NaN
        gva_grouped_gva = gva_grouped[(gva_grouped['GVA'] != 0) & (gva_grouped['GVA'].notna())]
        gva_grouped_predicted_gva = gva_grouped[gva_grouped['Predicted GVA'].notna()]

        scatter_plot_gva = go.Scatter(
            x=gva_grouped_gva['Year'],
            y=gva_grouped_gva['GVA'],
            mode='lines+markers',
            name='Total GVA'
        )

        scatter_plot_predicted_gva = go.Scatter(
            x=gva_grouped_predicted_gva['Year'],
            y=gva_grouped_predicted_gva['Predicted GVA'],
            mode='lines+markers',
            name='Total Predicted GVA'
        )

        layout = go.Layout(
            title='Total GVA and Total Predicted GVA by Year',
            xaxis=dict(title='Year'),
            yaxis=dict(title='Value'),
            autosize=True
        )

        return {'data': [scatter_plot_gva, scatter_plot_predicted_gva], 'layout': layout}
    except Exception as e:
        logging.exception(e)


In [306]:
@app.callback(
    Output('bar-chart', 'figure'),
    [Input('year-slider', 'value'),
     Input('top-5-check', 'value'),
     Input('country-dropdown', 'value')]
)
def update_bar_chart(year_range, check_values, countries):
    try:
        df_filtered = df[(df['Year'] >= year_range[0]) & (df['Year'] <= year_range[1]) & df['Country'].isin(countries)]

        if 'top-5' in check_values:
            df_filtered = df_filtered.sort_values('Construction Value', ascending=False)[:5]

        bar_chart = go.Bar(
            x=df_filtered['Country'],
            y=df_filtered['Construction Value'],
        )

        layout = go.Layout(
            title='Construction Value by Country',
            xaxis=dict(title='Country'),
            yaxis=dict(title='Construction Value'),
            autosize=True
        )

        return {'data': [bar_chart], 'layout': layout}
    except Exception as e:
        logging.exception(e)


In [307]:
@app.callback(
    Output('histogram', 'figure'),
    [Input('year-slider', 'value'),
     Input('wage-cost-radio', 'value'),
     Input('country-dropdown', 'value')]
)
def update_histogram(year_range, radio_value, countries):
    try:
        df_filtered = df[(df['Year'] >= year_range[0]) & (df['Year'] <= year_range[1]) & df['Country'].isin(countries)]

        histogram = go.Histogram(
            x=df_filtered[radio_value],
        )

        layout = go.Layout(
            title=f'{radio_value} Distribution',
            xaxis=dict(title=radio_value),
            yaxis=dict(title='Frequency'),
            autosize=True
        )

        return {'data': [histogram], 'layout': layout}
    except Exception as e:
        logging.exception(e)


In [308]:
@app.callback(
    Output('pie-chart', 'figure'),
    [Input('year-slider', 'value'),
     Input('country-dropdown', 'value')]
)
def update_pie_chart(year_range, countries):
    try:
        df_filtered = df[(df['Year'] >= year_range[0]) & (df['Year'] <= year_range[1]) & df['Country'].isin(countries)]
        pie_chart = go.Pie(
            labels=df_filtered['Country'],
            values=df_filtered['Construction Value'],
            hole=.3
        )

        layout = go.Layout(
            title='Construction Value by Country',
            autosize=True
        )

        return {'data': [pie_chart], 'layout': layout}
    except Exception as e:
        logging.exception(e)

In [309]:
@app.callback(
    [Output('kpi-1', 'children'),
     Output('kpi-2', 'children'),
     Output('kpi-3', 'children'),
     Output('kpi-4', 'children')],
    [Input('year-slider', 'value'),
     Input('country-dropdown', 'value')]
)
def update_kpi_boxes(year_range, countries):
    try:
        gfcf_filtered = gfcf_df[(gfcf_df['Year'] >= year_range[0]) & (gfcf_df['Year'] <= year_range[1]) & gfcf_df['Country'].isin(countries)]
        gva_filtered = gva_df[(gva_df['Year'] >= year_range[0]) & (gva_df['Year'] <= year_range[1]) & gva_df['Country'].isin(countries)]

        total_construction_value = round(gfcf_filtered['Construction Value'].sum(),2)
        avg_dwelling_value = round(gfcf_filtered['Dwellings Value'].mean(),2)
        n_countries = len(gfcf_filtered['Country'].unique())

        total_gross_value_add = round(gva_filtered['GVA'].sum(),2)

        return f'Total Construction Value: €{total_construction_value}M', f'Total Gross Value Add: €{total_gross_value_add}M', f'Average Dwelling Value: €{avg_dwelling_value}M', f'Number Countries: {n_countries}'

    except Exception as e:
        logging.exception(e)
        return 'Error'


In [310]:
app.layout = html.Div(children=[
    dbc.Container(
        [
            dbc.Row(
                [
                    dbc.Col(
                        [
                            html.H1("Construction Analytics Dashboard", style=H1_STYLE),
                            html.Hr(),
                            dbc.Row(
                                [
                                    create_kpi_box('kpi-1'),
                                    create_kpi_box('kpi-2'),
                                    create_kpi_box('kpi-3'),
                                    create_kpi_box('kpi-4'),
                                ]
                            ),
                            html.Hr(),
                            html.H3("Select Countries", style=H3_STYLE),
                            dcc.Dropdown(
                                id='country-dropdown',
                                options=[{'label': i, 'value': i} for i in gfcf_df['Country'].unique()],
                                multi=True,
                                value=common_countries_list  # Default value is now all countries
                            ),
                            html.Hr(),
                            html.H3("Select Year", style=H3_STYLE),
                            dcc.RangeSlider(
                                id='year-slider',
                                min=gfcf_df['Year'].min(),
                                max=gfcf_df['Year'].max(),
                                value=[gfcf_df['Year'].min(), gfcf_df['Year'].max()],
                                marks={str(year): str(year) for year in gfcf_df['Year'].unique()},
                                step=None
                            ),
                            html.Hr(),
                            dbc.Row(
                                [
                                    dbc.Col(
                                        [
                                            html.H3("Scatter Plot", style=H3_STYLE),
                                            html.Hr(),
                                            dcc.Graph(id='scatter-plot')
                                        ]
                                    )
                                ]
                            ),
                            html.Hr(),
                            dbc.Row(
                                [
                                    dbc.Col(
                                        [
                                            html.H3("Bar Chart", style=H3_STYLE),
                                            html.Hr(),
                                            dcc.Checklist(
                                                id='top-5-check',
                                                options=[{'label': 'Top 5', 'value': 'top-5'}],
                                                value=[]
                                            ),
                                            dcc.Graph(id='bar-chart')
                                        ]
                                    )
                                ]
                            ),
                            html.Hr(),
                            dbc.Row(
                                [
                                    dbc.Col(
                                        [
                                            html.H3("Histogram", style=H3_STYLE),
                                            html.Hr(),
                                            dcc.RadioItems(
                                                id='wage-cost-radio',
                                                options=[{'label': i, 'value': i} for i in ['Labor Wages', 'Building Cost']],
                                                value='Labor Wages'
                                            ),
                                            dcc.Graph(id='histogram')
                                        ]
                                    )
                                ]
                            ),
                            html.Hr(),
                            dbc.Row(
                                [
                                    dbc.Col(
                                        [
                                            html.H3("Pie Chart", style=H3_STYLE),
                                            html.Hr(),
                                            dcc.Graph(id='pie-chart')
                                        ]
                                    )
                                ]
                            )
                        ]
                    )
                ]
            )
        ]
    )
])

Finally run the app, it will run on your local network, e.g. http://127.0.0.1:XXXX/

In [311]:
app.run_server(debug=False)

Dash is running on http://127.0.0.1:8050/



INFO:dash.dash:Dash is running on http://127.0.0.1:8050/



 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:8050
 * Running on http://127.0.0.1:8050
Press CTRL+C to quit
INFO:werkzeug:[33mPress CTRL+C to quit[0m
127.0.0.1 - - [07/Jun/2023 20:42:59] "GET / HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [07/Jun/2023 20:42:59] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [07/Jun/2023 20:43:00] "GET /_dash-layout HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [07/Jun/2023 20:43:00] "GET /_dash-layout HTTP/1.1" 200 -
127.0.0.1 - - [07/Jun/2023 20:43:00] "GET /_dash-dependencies HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [07/Jun/2023 20:43:00] "GET /_dash-dependencies HTTP/1.1" 200 -
127.0.0.1 - - [07/Jun/2023 20:43:00] "GET /_favicon.ico?v=2.10.0 HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [07/Jun/2023 20:43:00] "GET /_favicon.ico?v=2.10.0 HTTP/1.1" 200 -
127.0.0.1 - - [07/Jun/2023 20:43:00] "GET /_dash-component-suites/dash/dcc/async-dropdown.js HTTP/1.1" 304 -
INFO:werkzeug:127.0.0.1 - - [07/Jun/2023 20:43:00] "[36mGET /_dash-component-suites/dash/dcc/async-dropdown.js HTTP