# Data visualization project - WikiViz 
<hr style="border:3px solid gray">

# Imports 

In [1]:
import plotly.express as px
import pandas as pd
from dash import Dash, dcc, html, Input, Output, ctx
import plotly.graph_objects as go
import dash
import json
import numpy as np
import time

# Loading data

In [2]:
# Loading the dataset
df = pd.read_csv("TheAgeDatasetV5.csv")

# Loading geojson of world
with open('medium.geo.json', 'r', encoding='utf-8') as f:
    geojson = json.load(f)

# Loading dictionary of continents and associated countries
with open('continents.json', 'r') as f:
    continents_countries = json.load(f)

with open('categories.json', 'r') as f:
    category_mapping = json.load(f)


# Global filter constants

In [3]:
all_occupations = sorted(df["Occupation"].unique().tolist())
all_genders = df["Gender"].unique().tolist()
all_countries =  sorted(df["AssociatedModernCountry"].unique().tolist())

all_countries_len = len(all_countries)
all_genders_len = len(all_genders)
all_occupations_len = len(all_occupations)

filters = {"countries": all_countries, "genders": all_genders, "occupations": all_occupations}

# Helper functions

In [4]:
def applyFilters(df, countries = None, genders = None, occupations = None, alive_in = None):
    # Filter on countries 
    if (countries != None and len(countries) != all_countries_len):
        df = df[df['AssociatedModernCountry'].isin(countries)]
        
    # filter on genders
    if (genders != None and len(genders) != all_genders_len):
        df = df[df['Gender'].isin(genders)]
        
    # filter on occupations 
    if (occupations != None and len(occupations) != all_occupations_len):
        df = df[df['Occupation'].isin(occupations)]
    
    # filter on alive individuals
    if alive_in != None: 
        assert len(alive_in) == 2, "Must provide both birthyear and deathyear to find alive indivduals within timeframe"
        df = df[(df['Birth year'] <= alive_in[1]) & (df['Death year'] >= alive_in[0])]
    
    return df

# Dash setup

In [5]:
# Create the Dash app
app = dash.Dash(__name__)

# Defining the different components
filters_widget = html.Div([
            html.H2("Filters"),
            html.Hr(),
            #dbc.Button(id='gender-button', n_clicks=0, children='Both'),
            # add dropdown gender instead of button
            html.P("Select gender"),
            dcc.Dropdown(
                options=
                [{'label':gender,'value': gender} for gender in all_genders],
                multi = True,
                id = 'dropdown-gender',
                placeholder="Select options...",
                style={'width': '100%'}
            ),
            html.Div(id='output-div-gender'),
            #html.Div(id='current-state-div', style={'margin-top': '10px'}),
            html.Hr(),
            html.P("Select the countries you want to compare"),
            dcc.Dropdown(
                # add continents from continents_countries and unique countries to the dropdown
                options=
                    [{'label': continent, 'value': continent} for continent in all_countries],
                multi=True,
                id='dropdown-checklist',
                placeholder="Select options...",
                style={'width': '100%'}
            ),
            html.Div(id='output-country-div'),
            html.Hr(),
            html.P("Select the occupation you want to compare"),
            # occupation dropdown
            dcc.Dropdown(
                options=
                    [{'label': occupation, 'value': occupation} for occupation in all_occupations],
                multi=True,
                id='dropdown-checklist-occupation',
                placeholder="Select options...",
                style={'width': '100%'}
                ),
            html.Div(id='output-div-occupation'),
            html.Hr(),

            ], style={'width': '100%', 'margin': 'auto'})

tiles = html.Div([
            html.Div([
            html.Div(id='total-observations-tile', style={'padding': '5px', 'border-right': '1px solid lightgray'}),
            html.Div(id='selected-years-tile', style={'padding': '5px', 'border-right': '1px solid lightgray'})
            ], style={'display': 'flex', 'justify-content': 'space-between', 'align-items': 'center', 'width': '25%'})
            ], style={'background-color': 'lightgray', 'height': '50px', 'display': 'flex', 'align-items': 'center'})

slider = html.Div([
        html.Button('1917-1921', id='button-1', n_clicks=0),
        html.Button('1939-1945', id='button-2', n_clicks=0),
        html.Button('1980-1985', id='button-3', n_clicks=0),
        dcc.RangeSlider(
            id='year-slider',
            min=-1000,
            max=2021,
            value=[-1000, 2021],
            marks={str(year): str(year) for year in range(-1000, 2021, 200)},
            step=None
        )], style={'width': '100%', 'margin': 'auto'})


app.layout = html.Div([
    tiles,
    html.Div([
        html.Div([dcc.Graph(id='choropleth'), dcc.Dropdown(['No normalization', 'Boxplot adaption'], 'No normalization', id='choropleth-dropdown')], 
                 style={'width': '55%', 'display': 'inline-block'}),
        dcc.Graph(id='sunburst', style={'width': '45%', 'display': 'inline-block'})
    ], style={'marginTop': 25}),
    html.Div([
        html.Button('1917-1921', id='button-1', n_clicks=0),
        html.Button('1939-1945', id='button-2', n_clicks=0),
        html.Button('1980-1985', id='button-3', n_clicks=0),
        dcc.RangeSlider(
            id='year-slider',
            min=-1000,
            max=2021,
            value=[-1000, 2021],
            marks={str(year): str(year) for year in range(-1000, 2021, 200)},
            step=None
        )], style={'width': '100%', 'margin': 'auto'}),  
    dcc.Graph(id='linechart', style={'width': '100%', 'margin': 'auto'}),
    dcc.Dropdown(['Number of Births', 'Number of Deaths', 'Average Age at Death'], 'Number of Births', id='linechart-dropdown-datatype'),
    dcc.Dropdown(['Aggregated view', 'Exploded view'], 'Aggregated view', id='linechart-dropdown-mode')
    #filters_widget
])


<hr style="border:3px solid gray">


# Choropleth functions 

In [6]:
country_lookup = {feature['properties']['admin']: feature 
                   for feature in geojson['features']}

def get_highlights(selections, geojson=geojson, country_lookup= country_lookup):
    geojson_highlights = dict()
    for k in geojson.keys():
        if k != 'features':
            geojson_highlights[k] = geojson[k]
        else:
            geojson_highlights[k] = [country_lookup[selection] for selection in selections]        
    return geojson_highlights


def get_choropleth(all_df, selection_df, filter_countries, norm_mode):
    
    # Get appropriote colorscale 
    if norm_mode == "Boxplot adaption":
        Q1 = all_df['Counts'].quantile(0.25)
        Q3 = all_df['Counts'].quantile(0.75)
        IQR = Q3 - Q1

        # Define color scale based on quartiles and IQR
        color_scale = [
                [0, 'rgb(242,240,247)'],
                [max(0, (Q1 - 1.5 * IQR) / all_df['Counts'].max()), 'rgb(218,218,235)'],
                [Q1 / all_df['Counts'].max(), 'rgb(188,189,220)'],
                [(Q1 + Q3) / 2 / all_df['Counts'].max(), 'rgb(158,154,200)'],
                [Q3 / all_df['Counts'].max(), 'rgb(128,125,186)'],
                [min(1, (Q3 + 1.5 * IQR) / all_df['Counts'].max()), 'rgb(106,81,163)'],
                [1, 'rgb(74,20,134)'],
            ]
        
    else: 
        color_scale = [ 'rgb(242,240,247)', 
                       'rgb(218,218,235)', 
                       'rgb(188,189,220)', 
                       'rgb(158,154,200)', 
                       'rgb(128,125,186)', 
                       'rgb(106,81,163)', 
                       'rgb(74,20,134)']
 
    
    # Base choropleth layer --------------#
    fig = px.choropleth_mapbox(all_df, geojson=geojson, 
                               color="Counts",                               
                               locations="Country", 
                               featureidkey="properties.admin",
                               color_continuous_scale=color_scale,
                               range_color = [0,all_df['Counts'].max()], 
                               opacity=0.25)

    # Second layer - Highlights ----------#
    highlights = get_highlights(filter_countries)
    
    fig.add_trace(
        px.choropleth_mapbox(selection_df, geojson=highlights, 
                                 color="Counts",
                                 locations="Country", 
                                 featureidkey="properties.admin",
                                 color_continuous_scale=color_scale,                                 
                                 opacity=1).data[0]
    )

    #------------------------------------#
    fig.update_layout(mapbox_style="carto-positron", 
                      mapbox_zoom=0.35,
                      mapbox_center={"lat": 30.0, "lon": 0.00},
                      margin={"r":0,"t":0,"l":0,"b":0},
                      uirevision='constant',
                      title = "Distribution of selected people in the World")
    
    return fig



@app.callback(
    Output('choropleth', 'figure'),
    [Input('choropleth', 'clickData'),
     Input('choropleth-dropdown', 'value'), 
     Input('year-slider', 'value'),
     Input('sunburst', 'clickData')])

def update_choropleth(clickData, norm_mode, yearinterval, sunburst_clickData): 
    global filters
    
    # Handle filtrations on click 
    if ctx.triggered_id == "choropleth":
        location = clickData['points'][0]['location']
        countries = filters["countries"]

        if len(countries) == all_countries_len: 
            countries = [location]
        
        elif location not in countries:
            countries.append(location)
          
        else:
            countries.remove(location)
            if len(countries) == 0: 
                countries = all_countries
    
        filters["countries"] = countries
    
    if ctx.triggered_id == "sunburst":
        time.sleep(0.1)
    
    # Create right dataframes for figure
    all_df = applyFilters(df, genders = filters["genders"], occupations = filters["occupations"], alive_in = yearinterval)
    all_df = all_df["AssociatedModernCountry"].value_counts().rename_axis('Country').reset_index(name='Counts')

    rest_of_countries = list(set(all_countries) - set(all_df['Country']))
    rest_df = pd.DataFrame(rest_of_countries, columns=['Country'])
    rest_df['Counts'] = 0
    all_df = pd.concat([all_df, rest_df], ignore_index=True)
    
    selection_df = all_df[all_df['Country'].isin(filters["countries"])]
   

    return get_choropleth(all_df, selection_df, filters["countries"], norm_mode)


# Sunburst functions

In [7]:
category_color_maps = {'Arts and Entertainment': '#1f77b4',
                    'Politics and Public Service': '#ff7f0e',
                    'Science and Academia': '#17becf',
                    'Religion': '#7f7f7f',
                    'Sports': '#9467bd',
                    'Business and Commerce': '#8c564b',
                    'Healthcare': '#e377c2',
                    'Law and Justice': '#ff2400',
                    'Media and Communication': '#ff2400',
                    'Education': '#bcbd22',
                    'Engineering and Architecture': '#ffd700',
                    'Agriculture': '#ffbb78',
                    'Others': '#98df8a',
                    'Unspecified': '#ff9896'}



@app.callback(
    Output('sunburst', 'figure'),
    [Input('choropleth', 'clickData'), 
     Input('year-slider', 'value'), 
     Input('sunburst', 'clickData')]
)

def update_sunburst(choropleth_clickData, year_range, sunburst_clickData):  # maybe you're missing a filtered_df somewhere with the slicer?
    global filters 
    
    # Handle filtrations on click
    if ctx.triggered_id == "sunburst":
        catagory = sunburst_clickData['points'][0]['parent']
        label = sunburst_clickData['points'][0]['label']
        if catagory == "":  
            if filters["occupations"] == category_mapping[label]:
                filters["occupations"] = all_occupations 
            else: 
                filters["occupations"] = category_mapping[label]
        else: 
            filters["occupations"] = [label]
        
    # Apply filters on sunburst
    sunburst_df = applyFilters(df, countries = filters["countries"], genders = filters["genders"], occupations = filters["occupations"], alive_in = year_range)
   
    # Set title
    title = ''
    if len(filters["countries"]) == all_countries_len:
            title = 'Occupation Distribution by Category in the World'
    elif len(filters["countries"]) <= 5:
            title = f'Occupation Distribution by Category in {", ".join(filters["countries"])}'
    else: 
            title = f'Occupation Distribution by Category in {", ".join(filters["countries"][:5])}'
    
    # Get figure 
    occupation_count = sunburst_df.groupby(['Occupation category', 'Occupation']).size().reset_index(name='Count')
    categories_to_color = pd.unique(occupation_count['Occupation category']).tolist()
    color_map = [category_color_maps[x] for x in categories_to_color]
    
    fig = px.sunburst(occupation_count,
                        path = ['Occupation category', 'Occupation'], 
                        values = 'Count',
                        title = title,
                        color = 'Occupation category', 
                        color_discrete_map= category_color_maps,
                        width = 500,
                        height = 500)
 
    return fig
                
     

# Linechart functions

In [8]:
start_year = df["Birth year"].min() 
end_year = df["Death year"].max() 

year_range = list(range(start_year, end_year + 1))

num_of_births_each_year = dict()
num_of_deaths_each_year = dict()
avg_age_at_death_each_year = dict()

for country in all_countries: 
    country_df = df[df["AssociatedModernCountry"] == country]
    x = country_df["Birth year"].value_counts()
    y = country_df["Death year"].value_counts()
    z = country_df.groupby("Death year")["Age of death"].mean()
    
    num_of_births_each_year[country] = [x.get(year, float('nan')) for year in year_range]
    num_of_deaths_each_year[country] = [y.get(year, float('nan')) for year in year_range]
    avg_age_at_death_each_year[country] = [z.get(year, float('nan')) for year in year_range]
    
num_of_births_each_year_df = pd.DataFrame(num_of_births_each_year)
num_of_deaths_each_year_df = pd.DataFrame(num_of_deaths_each_year)
avg_age_at_death_each_year_df = pd.DataFrame(avg_age_at_death_each_year)


@app.callback(
    Output('linechart', 'figure'),
    [Input('choropleth', 'clickData'),
     Input('sunburst', 'clickData'), 
     Input('linechart-dropdown-datatype', 'value'),
     Input('linechart-dropdown-mode', 'value')]
)
def update_linechart(choropleth_clickData, sunburst_clickData, datatype, mode):
    global filters

    plot_data_df = None
    if datatype == 'Number of Births':
        plot_data_df = num_of_births_each_year_df
    elif datatype == 'Number of Deaths': 
        plot_data_df = num_of_deaths_each_year_df
    elif datatype == 'Average Age at Death': 
        plot_data_df = avg_age_at_death_each_year_df

    fig = go.Figure()
    
    if mode == 'Exploded view': 
        for country in filters["countries"]:
            # Add the line plot for this country
            fig.add_trace(go.Scatter(x = year_range, y = plot_data_df[country], mode='lines', name=country, connectgaps=False))
            
    if mode == 'Aggregated view':
        y_data = None
        if datatype == "Average Age at Death":
            y_data = plot_data_df[filters["countries"]].mean(axis=1)
        else:
            y_data = plot_data_df[filters["countries"]].sum(axis=1)

        fig.add_trace(go.Scatter(x=year_range, y = y_data, mode='lines', name="Countries Aggregated", connectgaps=False))
        
    # Customize layout with title, axes labels, and grid
    
    title = ""
    if len(filters["countries"]) == all_countries_len:
        title = f'Overview of {datatype} Across all Years for the World'
    elif len(filters["countries"]) <= 5:
        title = f'Overview of {datatype} Across all Years for {", ".join(filters["countries"])}'
    else: 
        title = f'Overview of {datatype} Across all Years for {", ".join(filters["countries"][:5])}...'
    
    
    fig.update_layout(title= title,
                      xaxis_title='Year',
                      yaxis_title='Average Age of Death',
                      xaxis=dict(showgrid=True, gridwidth=1, gridcolor='LightPink'),
                      yaxis=dict(showgrid=True, gridwidth=1, gridcolor='LightPink'),
                      plot_bgcolor='white')
    
    fig.update_layout(dragmode='zoom')
    
    # Add range slider and selectors for years
    #fig.update_layout(xaxis=dict(
    #    rangeselector=dict(
    #        buttons=list([
    #            dict(count=10, label='Last 10 Years', step='year', stepmode='backward'),
    #            dict(count=50, label='Last 50 Years', step='year', stepmode='backward'),
    #            dict(step='all', label='All Years')
    #        ])
    #    ),
    #    rangeslider=dict(visible=True)
    #))

    return fig 
    

# Filter widget functions 

# Sliders and buttons

In [9]:
@app.callback(
    Output('year-slider', 'value'),
    [Input('button-1', 'n_clicks'),
     Input('button-2', 'n_clicks'),
     Input('button-3', 'n_clicks')]
)
def update_slider(button_1, button_2, button_3):
    clicked_button_id = dash.callback_context.triggered[0]['prop_id'].split('.')[0]
    if clicked_button_id == 'button-1':
        return [1917, 1921]
    elif clicked_button_id == 'button-2':
        return [1939, 1945]
    elif clicked_button_id == 'button-3':
        return [1980, 1985]
    else:
        return [df['Birth year'].min(), df['Birth year'].max()]

# Tiles 

In [10]:
@app.callback(
    Output('total-observations-tile', 'children'),
    [Input('sunburst', 'clickData'),
    Input('choropleth', 'clickData'),
    Input('year-slider', 'value')]
)
def update_total_observations_tile(sunburst_clickData, choropleth_clickData, year_range):
    # Filter the DataFrame
    time.sleep(0.1)
    tile_df = applyFilters(df, countries = filters["countries"], genders = filters["genders"], occupations = filters["occupations"], alive_in = year_range)
    
    return f'Total Observations: {len(tile_df)}'

@app.callback(
    Output('selected-years-tile', 'children'),
    [Input('year-slider', 'value')]
)
def update_selected_years_tile(year_range):
    return f'Selected Years: {year_range[0]} - {year_range[1]}'

<hr style="border:3px solid gray">

# Running the Dash app

In [11]:
app.run(debug=True)
#app.run_server(host="0.0.0.0", port="8050")

# Still To-do 
-  change box-whisker adaption colormap  &#9745;
- find a way to keep colors of sunburst catagories constant &#9745;
- set the right colors for sunburst in colormap
- make visualization prettier
- get filters over to the side
- make layout prettier 
- change year slider to have smaller increments
- get linechart working &#9745;
- get filters working 
- fix bug with occupation selection (others and unspecificed)
- add more tiles with statistics
- add correct titles to charts (Choropleth cant display title???)
- add describtions to charts 
- add overall title and describtion of visualization
- add more Occupations to catagories? 

