# Data visualization project - WikiViz 
<hr style="border:3px solid gray">

# Imports 

In [1]:
import plotly.express as px
import pandas as pd
from dash import Dash, dcc, html, Input, Output, ctx
import plotly.graph_objects as go
import dash
import json
import numpy as np
import time

# Loading data

In [2]:
# Loading the dataset
df = pd.read_csv("TheAgeDatasetV5.csv")

# Loading geojson of world
with open('medium.geo.json', 'r', encoding='utf-8') as f:
    geojson = json.load(f)

# Loading dictionary of continents and associated countries
with open('continents.json', 'r') as f:
    continent_mapping = json.load(f)

with open('categories.json', 'r') as f:
    category_mapping = json.load(f)


# Global filter constants

In [3]:
all_occupations = sorted(df["Occupation"].unique().tolist())
all_genders = df["Gender"].unique().tolist()
all_countries =  sorted(df["AssociatedModernCountry"].unique().tolist())

all_countries_len = len(all_countries)
all_genders_len = len(all_genders)
all_occupations_len = len(all_occupations)

filters = {"countries": all_countries, "genders": all_genders, "occupations": all_occupations}

# Helper functions

In [4]:
def applyFilters(df, countries = None, genders = None, occupations = None, alive_in = None):
    # Filter on countries 
    if (countries != None and len(countries) != all_countries_len):
        df = df[df['AssociatedModernCountry'].isin(countries)]
        
    # filter on genders
    if (genders != None and len(genders) != all_genders_len):
        df = df[df['Gender'].isin(genders)]
        
    # filter on occupations 
    if (occupations != None and len(occupations) != all_occupations_len):
        df = df[df['Occupation'].isin(occupations)]
    
    # filter on alive individuals
    if alive_in != None: 
        assert len(alive_in) == 2, "Must provide both birthyear and deathyear to find alive indivduals within timeframe"
        df = df[(df['Birth year'] <= alive_in[1]) & (df['Death year'] >= alive_in[0])]
    
    return df

In [5]:
def replace_countries_with_continents(continent_dict, list_countries):
    new_list = []
    for country in list_countries:
        found = False
        for continent, countries in continent_dict.items():
            if country in countries and set(countries).issubset(set(list_countries)):
                if continent not in new_list:
                    new_list.append(continent)
                found = True
                break
        if not found:
            new_list.append(country)
    return new_list


# Dash setup

In [6]:
# Create the Dash app
app = dash.Dash(__name__)


# Defining the different components
filters_widget = html.Div([
            html.Hr(),
            html.P("Select the countries you want to compare"),
            dcc.Dropdown(
                options=
                    [{'label': country, 'value': country} for country in list(continent_mapping.keys()) + all_countries],
                multi=True,
                id='dropdown-checklist-country',
                placeholder="Select options...",
                style={'width': '100%'}
            ),
            html.Hr(),
            html.P("Select the occupation you want to compare"),
            dcc.Dropdown(
                options=
                    [{'label': occupation, 'value': occupation} for occupation in list(category_mapping.keys()) + [x for x in all_occupations if x not in {"Education", "Unspecified"}]],
                multi=True,
                id='dropdown-checklist-occupation',
                placeholder="Select options...",
                style={'width': '100%'}
            ),
            html.Hr(),
            # add dropdown gender instead of button
            html.P("Select the genders you want to compare"),
            dcc.Dropdown(
                options=
                [{'label':gender,'value': gender} for gender in all_genders],
                multi = True,
                id = 'dropdown-checklist-gender',
                placeholder="Select options...",
                style={'width': '100%'}
            )
            ], style={'width': '100%', 'margin': 'auto'})

tiles = html.Div([
            html.Div([
            html.Div(id='total-observations-tile', style={'padding': '5px', 'border-right': '1px solid lightgray'}),
            html.Div(id='selected-years-tile', style={'padding': '5px', 'border-right': '1px solid lightgray'})
            ], style={'display': 'flex', 'justify-content': 'space-between', 'align-items': 'center', 'width': '25%'})
            ], style={'background-color': 'lightgray', 'height': '50px', 'display': 'flex', 'align-items': 'center'})

slider = html.Div([
        html.Button('1917-1921', id='button-1', n_clicks=0),
        html.Button('1939-1945', id='button-2', n_clicks=0),
        html.Button('1980-1985', id='button-3', n_clicks=0),
        dcc.RangeSlider(
            id='year-slider',
            min=-1000,
            max=2021,
            value=[-1000, 2021],
            marks={str(year): str(year) for year in range(-1000, 2021, 200)},
            step=None
        )], style={'width': '100%', 'margin': 'auto'})

app.layout = html.Div([
    tiles,
    html.Div([
        html.Div([dcc.Graph(id='choropleth'), dcc.Dropdown(['No normalization', 'Boxplot adaption'], 'No normalization', id='choropleth-dropdown', clearable=False)], 
                 style={'width': '55%', 'display': 'inline-block'}),
        dcc.Graph(id='sunburst', style={'width': '45%', 'display': 'inline-block'})
    ], style={'marginTop': 25}),
    html.Div([
        html.Button('1917-1921', id='button-1', n_clicks=0),
        html.Button('1939-1945', id='button-2', n_clicks=0),
        html.Button('1980-1985', id='button-3', n_clicks=0),
        dcc.RangeSlider(
            id='year-slider',
            min=-1000,
            max=2021,
            value=[-1000, 2021],
            marks={str(year): str(year) for year in range(-1000, 2021, 200)},
            step=None
        )], style={'width': '100%', 'margin': 'auto'}),  
    dcc.Graph(id='linechart', style={'width': '100%', 'margin': 'auto'}),
    dcc.Dropdown(['Number of Births', 'Number of Deaths', 'Average Age at Death'], 'Number of Births', id='linechart-dropdown-datatype', clearable=False),
    dcc.Dropdown(['Aggregated view', 'Exploded view'], 'Aggregated view', id='linechart-dropdown-mode', clearable=False), 
    filters_widget
])



<hr style="border:3px solid gray">


# Choropleth functions 

In [7]:
country_lookup = {feature['properties']['admin']: feature 
                   for feature in geojson['features']}

def get_highlights(selections, geojson=geojson, country_lookup= country_lookup):
    geojson_highlights = dict()
    for k in geojson.keys():
        if k != 'features':
            geojson_highlights[k] = geojson[k]
        else:
            geojson_highlights[k] = [country_lookup[selection] for selection in selections]        
    return geojson_highlights


def get_choropleth(all_df, selection_df, filter_countries, norm_mode):
    
    # Get appropriote colorscale 
    if norm_mode == "Boxplot adaption":
        Q1 = all_df['Counts'].quantile(0.25)
        Q3 = all_df['Counts'].quantile(0.75)
        IQR = Q3 - Q1

        # Define color scale based on quartiles and IQR
        color_scale = [
                [0, 'rgb(242,240,247)'],
                [max(0, (Q1 - 1.5 * IQR) / all_df['Counts'].max()), 'rgb(218,218,235)'],
                [Q1 / all_df['Counts'].max(), 'rgb(188,189,220)'],
                [(Q1 + Q3) / 2 / all_df['Counts'].max(), 'rgb(158,154,200)'],
                [Q3 / all_df['Counts'].max(), 'rgb(128,125,186)'],
                [min(1, (Q3 + 1.5 * IQR) / all_df['Counts'].max()), 'rgb(106,81,163)'],
                [1, 'rgb(74,20,134)'],
            ]
        
    else: 
        color_scale = [ 'rgb(242,240,247)', 
                       'rgb(218,218,235)', 
                       'rgb(188,189,220)', 
                       'rgb(158,154,200)', 
                       'rgb(128,125,186)', 
                       'rgb(106,81,163)', 
                       'rgb(74,20,134)']
 
    
    # Base choropleth layer --------------#
    fig = px.choropleth_mapbox(all_df, geojson=geojson, 
                               color="Counts",                               
                               locations="Country", 
                               featureidkey="properties.admin",
                               color_continuous_scale=color_scale,
                               range_color = [0,all_df['Counts'].max()], 
                               opacity=0.25)

    # Second layer - Highlights ----------#
    highlights = get_highlights(filter_countries)
    
    fig.add_trace(
        px.choropleth_mapbox(selection_df, geojson=highlights, 
                                 color="Counts",
                                 locations="Country", 
                                 featureidkey="properties.admin",
                                 color_continuous_scale=color_scale,                                 
                                 opacity=1).data[0]
    )

    #------------------------------------#
    fig.update_layout(mapbox_style="carto-positron", 
                      mapbox_zoom=0.35,
                      mapbox_center={"lat": 30.0, "lon": 0.00},
                      margin={"r":0,"t":0,"l":0,"b":0},
                      uirevision='constant',
                      title = "Distribution of selected people in the World")
    
    return fig



@app.callback(
    [Output('choropleth', 'figure'),
    Output('dropdown-checklist-country', 'value'),
    Output('dropdown-checklist-occupation', 'value')],
    [Input('choropleth', 'clickData'),
     Input('choropleth-dropdown', 'value'), 
     Input('year-slider', 'value'),
     Input('sunburst', 'clickData'),
     Input('dropdown-checklist-country', 'value'), 
     Input('dropdown-checklist-occupation', 'value'),
     Input('dropdown-checklist-gender', 'value')])

def update_choropleth_and_handle_dropdown_filtrations(clickData, norm_mode, yearinterval, sunburst_clickData, country_dropdown, occupation_dropdown, gender_dropdown): 
    global filters
    
    # Handle filtration on occupation-dropdown 
    if ctx.triggered_id == "dropdown-checklist-occupation":
        if occupation_dropdown != []: 
            cats = list(category_mapping.keys())
            cats_in_selection = [x for x in occupation_dropdown if x in cats]
            selection_without_cats = [x for x in occupation_dropdown if x not in cats]
            filters["occupations"] = list(set([x for cat in cats_in_selection for x in category_mapping[cat]] + selection_without_cats))
        else: 
            filters["occupations"] = all_occupations
    
    # Handle filtrations on click
    if ctx.triggered_id == "sunburst":
        catagory = sunburst_clickData['points'][0]['parent']
        label = sunburst_clickData['points'][0]['label']
        if catagory == "":  
            if filters["occupations"] == category_mapping[label]:
                filters["occupations"] = all_occupations 
                occupation_dropdown = []
            else: 
                filters["occupations"] = category_mapping[label]
                occupation_dropdown = [label]
        else: 
            filters["occupations"] = [label]
            occupation_dropdown = [label]
    
    # Handle filtration from country dropdown 
    if ctx.triggered_id == "dropdown-checklist-country":
        if country_dropdown != []: 
            continents = list(continent_mapping.keys())
            continents_in_selection = [x for x in country_dropdown if x in continents]
            selection_without_continents = [x for x in country_dropdown if x not in continents]
            filters["countries"] = list(set([x for continent in continents_in_selection for x in continent_mapping[continent]] + selection_without_continents))
            country_dropdown = replace_countries_with_continents(continent_mapping, filters["countries"])
        else: 
            filters["countries"] = all_countries

    # Handle filtrations on click 
    if ctx.triggered_id == "choropleth":
        location = clickData['points'][0]['location']
        countries = filters["countries"]

        if len(countries) == all_countries_len: 
            countries = [location]
            country_dropdown = [location]
        
        elif location not in countries:
            countries.append(location) 
            country_dropdown = replace_countries_with_continents(continent_mapping, countries)
          
        else:
            countries.remove(location)
            if len(countries) == 0: 
                countries = all_countries
                country_dropdown = []
            country_dropdown = replace_countries_with_continents(continent_mapping, countries)
    
        filters["countries"] = countries
    
    # Create right dataframes for figure
    all_df = applyFilters(df, genders = filters["genders"], occupations = filters["occupations"], alive_in = yearinterval)
    all_df = all_df["AssociatedModernCountry"].value_counts().rename_axis('Country').reset_index(name='Counts')

    rest_of_countries = list(set(all_countries) - set(all_df['Country']))
    rest_df = pd.DataFrame(rest_of_countries, columns=['Country'])
    rest_df['Counts'] = 0
    all_df = pd.concat([all_df, rest_df], ignore_index=True)
    
    selection_df = all_df[all_df['Country'].isin(filters["countries"])]
   
    return get_choropleth(all_df, selection_df, filters["countries"], norm_mode), country_dropdown, occupation_dropdown


# Sunburst functions

In [8]:
category_color_maps = {'Arts and Entertainment': '#1f77b4',
                    'Politics and Public Service': '#ff7f0e',
                    'Science and Academia': '#17becf',
                    'Religion': '#7f7f7f',
                    'Sports': '#9467bd',
                    'Business and Commerce': '#8c564b',
                    'Healthcare': '#e377c2',
                    'Law and Justice': '#ff2400',
                    'Media and Communication': '#ff2400',
                    'Education': '#bcbd22',
                    'Engineering and Architecture': '#ffd700',
                    'Agriculture': '#ffbb78',
                    'Others': '#98df8a',
                    'Unspecified': '#ff9896'}



@app.callback(
    Output('sunburst', 'figure'),
    [Input('choropleth', 'clickData'), 
     Input('year-slider', 'value'), 
     Input('sunburst', 'clickData'),
     Input('dropdown-checklist-country', 'value'),
     Input('dropdown-checklist-occupation', 'value'),
     Input('dropdown-checklist-gender', 'value')]
)

def update_sunburst(choropleth_clickData, year_range, sunburst_clickData, country_dropdown, occupation_dropdown, gender_dropdown):  # maybe you're missing a filtered_df somewhere with the slicer?
    global filters 
    
    # Apply filters on sunburst
    sunburst_df = applyFilters(df, countries = filters["countries"], genders = filters["genders"], occupations = filters["occupations"], alive_in = year_range)
   
    # Set title
    title = ''
    title_countries = replace_countries_with_continents(continent_mapping, filters["countries"])
    if len(filters["countries"]) == all_countries_len:
        title = 'Occupation Distribution by Category in the World'
    elif len(title_countries) <= 5:
        title = f'Occupation Distribution by Category in {", ".join(title_countries)}'
    else:
        title = f'Occupation Distribution by Category in {", ".join(title_countries[:5])}...'
    
    # Get figure 
    occupation_count = sunburst_df.groupby(['Occupation category', 'Occupation']).size().reset_index(name='Count')
    categories_to_color = pd.unique(occupation_count['Occupation category']).tolist()
    color_map = [category_color_maps[x] for x in categories_to_color]
    
    fig = px.sunburst(occupation_count,
                        path = ['Occupation category', 'Occupation'], 
                        values = 'Count',
                        title = title,
                        color = 'Occupation category', 
                        color_discrete_map= category_color_maps,
                        width = 500,
                        height = 500)
 
    return fig
                
     

# Linechart functions 

In [None]:
start_year = df["Birth year"].min() 
end_year = df["Death year"].max() 

year_range = list(range(start_year, end_year + 1))

num_of_births_each_year = dict()
num_of_deaths_each_year = dict()
avg_age_at_death_each_year = dict()

for country in all_countries: 
    country_df = df[df["AssociatedModernCountry"] == country]
    x = country_df["Birth year"].value_counts()
    y = country_df["Death year"].value_counts()
    z = country_df.groupby("Death year")["Age of death"].mean()
    
    num_of_births_each_year[country] = [x.get(year, float('nan')) for year in year_range]
    num_of_deaths_each_year[country] = [y.get(year, float('nan')) for year in year_range]
    avg_age_at_death_each_year[country] = [z.get(year, float('nan')) for year in year_range]
    
num_of_births_each_year_df = pd.DataFrame(num_of_births_each_year)
num_of_deaths_each_year_df = pd.DataFrame(num_of_deaths_each_year)
avg_age_at_death_each_year_df = pd.DataFrame(avg_age_at_death_each_year)


@app.callback(
    Output('linechart', 'figure'),
    [Input('choropleth', 'clickData'),
     Input('sunburst', 'clickData'), 
     Input('linechart-dropdown-datatype', 'value'),
     Input('linechart-dropdown-mode', 'value'),
     Input('dropdown-checklist-country', 'value')]
)
def update_linechart(choropleth_clickData, sunburst_clickData, datatype, mode, country_dropdown):
    global filters

    plot_data_df = None
    if datatype == 'Number of Births':
        plot_data_df = num_of_births_each_year_df
    elif datatype == 'Number of Deaths': 
        plot_data_df = num_of_deaths_each_year_df
    elif datatype == 'Average Age at Death': 
        plot_data_df = avg_age_at_death_each_year_df

    fig = go.Figure()
    
    if mode == 'Exploded view': 
        for country in filters["countries"]:
            # Add the line plot for this country
            fig.add_trace(go.Scatter(x = year_range, y = plot_data_df[country], mode='lines', name=country, connectgaps=False))
            
    if mode == 'Aggregated view':
        y_data = None
        if datatype == "Average Age at Death":
            y_data = plot_data_df[filters["countries"]].mean(axis=1)
        else:
            y_data = plot_data_df[filters["countries"]].sum(axis=1)

        fig.add_trace(go.Scatter(x=year_range, y = y_data, mode='lines', name="Countries Aggregated", connectgaps=False))
        
    # Customize layout with title, axes labels, and grid
    
    title = ""
    title_countries = replace_countries_with_continents(continent_mapping, filters["countries"])
    if len(filters["countries"]) == all_countries_len:
        title = f'Overview of {datatype} Across all Years for the World'
    elif len(title_countries) <= 5:
        title = f'Overview of {datatype} Across all Years for {", ".join(title_countries)}'
    else: 
        title = f'Overview of {datatype} Across all Years for {", ".join(title_countries[:5])}...'
    
    fig.update_layout(title= title,
                      xaxis_title='Year',
                      yaxis_title='Average Age of Death',
                      xaxis=dict(showgrid=True, gridwidth=1, gridcolor='LightPink'),
                      yaxis=dict(showgrid=True, gridwidth=1, gridcolor='LightPink'),
                      plot_bgcolor='white')
    
    fig.update_layout(dragmode='zoom')
    
    return fig 
    

# Tile functions

In [None]:
@app.callback(
    Output('total-observations-tile', 'children'),
    [Input('sunburst', 'clickData'),
    Input('choropleth', 'clickData'),
    Input('year-slider', 'value'),
    Input('dropdown-checklist-country', 'value'),
    Input('dropdown-checklist-occupation', 'value'),
    Input('dropdown-checklist-gender', 'value')]
)
def update_total_observations_tile(sunburst_clickData, choropleth_clickData, year_range, country_dropdown, occupation_dropdown, gender_dropdown):
    # Filter the DataFrame
    time.sleep(0.1)
    tile_df = applyFilters(df, countries = filters["countries"], genders = filters["genders"], occupations = filters["occupations"], alive_in = year_range)
    
    return f'Total Observations: {len(tile_df)}'

@app.callback(
    Output('selected-years-tile', 'children'),
    [Input('year-slider', 'value')]
)
def update_selected_years_tile(year_range):
    return f'Selected Years: {year_range[0]} - {year_range[1]}'

# Sliders, buttons, dropdowns and other functions

In [None]:
@app.callback(
    Output('year-slider', 'value'),
    [Input('button-1', 'n_clicks'),
     Input('button-2', 'n_clicks'),
     Input('button-3', 'n_clicks')]
)
def update_slider(button_1, button_2, button_3):
    clicked_button_id = dash.callback_context.triggered[0]['prop_id'].split('.')[0]
    if clicked_button_id == 'button-1':
        return [1917, 1921]
    elif clicked_button_id == 'button-2':
        return [1939, 1945]
    elif clicked_button_id == 'button-3':
        return [1980, 1985]
    else:
        return [df['Birth year'].min(), df['Death year'].max()]
    
@app.callback(
     Output('dropdown-checklist-gender', 'value'),
     Input('dropdown-checklist-gender', 'value')
)
def filter_genders(value): 
    if value == []: 
        filters["genders"] = all_genders
    else: 
        filters["genders"] = value 
        
    return value 

<hr style="border:3px solid gray">

# Running the Dash app

In [None]:
app.run(debug = True)

# Still To-do 
-  change box-whisker adaption colormap  &#9745;
- find a way to keep colors of sunburst catagories constant &#9745;
- set the right colors for sunburst in colormap
- make overall dashboard prettier
- get filters over to the side
- make layout prettier 
- change year slider to have smaller increments
- get linechart working &#9745;
- get filters working &#9745;
- fix bug with occupation selection (others and unspecificed) &#9745;
- add more tiles with statistics
- add correct titles to charts, indicating that its people living in a given period (Choropleth not displaying title)
- add describtions to charts 
- add overall title and describtion of visualization
- be able to select category &#9745;
- add percentages to sunburst occupations 
- be able to select continents in filter &#9745;
- cut year range from 0 to 2021 instead?
- reduce number of countries in the lookup
- add more Occupations to catagories? 

