# Data visualization project - WikiViz 
<hr style="border:3px solid gray">

# Imports 

In [49]:
import plotly.express as px
import pandas as pd
from dash import Dash, dcc, html, Input, Output, ctx, State
import plotly.graph_objects as go
import dash
import json
import numpy as np
import time
import dash_bootstrap_components as dbc
import textwrap

# Loading data

In [50]:
# Loading the dataset
df = pd.read_csv("TheAgeDatasetV5.csv")

# Loading geojson of world
with open('medium.geo.json', 'r', encoding='utf-8') as f:
    geojson = json.load(f)

# Loading dictionary of continents and associated countries
with open('continents.json', 'r') as f:
    continent_mapping = json.load(f)

with open('categories.json', 'r') as f:
    category_mapping = json.load(f)

num_of_births_each_year_df = pd.read_csv("num_of_births_each_year_df.csv")
num_of_deaths_each_year_df = pd.read_csv("num_of_deaths_each_year_df.csv")
avg_age_at_death_each_year_df = pd.read_csv("avg_age_at_death_each_year_df.csv")
num_of_alive_each_year_df = pd.read_csv("num_of_alive_each_year_df.csv")

# Global filter constants

In [51]:
events = {
    'Viking Era': [793, 1066],
    'Ancient Greece': [-800, 400],
    'Maurya Empire': [-322,-185],
    'Silk Road Establishment': [130,130],
    'Roman Empire': [27,476],
    'Great Wall of China': [700,1700],
    'Mongol Empire': [1206,1368],
    'Reneissance': [1300,1700],
    'Age of Exploration': [1400,1700],
    'Sengoku Period': [1400,1700],
    'French Revolution': [1789,1799],
    'Opium Wars': [1839,1860],
    'Industrial Revolution': [1750,1850],
    'Meiji Restoration': [1868,1868],
    'Napoleonic Wars': [1789,1815],
    'American Civil War': [1861,1865],
    'World War I': [1914,1918],
    'Interwar Period': [1918,1939],
    'World War II': [1939,1945],
    'Korean War': [1950,1953],
    'Cold War': [1947,1991],
    'Digital age': [1980,2021],
    'Vietnam War': [1955,1975],
}
# sort eras by start year
all_events = {event: start_year for event, start_year in sorted(events.items(), key=lambda item: item[1])}

all_occupations = sorted(df["Occupation"].unique().tolist())
all_genders = df["Gender"].unique().tolist()
all_countries =  sorted(df["AssociatedModernCountry"].unique().tolist())
default_period = [df["Birth year"].min(), df["Death year"].max()]

all_countries_len = len(all_countries)
all_genders_len = len(all_genders)
all_occupations_len = len(all_occupations)
all_events_len = len(events)

datamode = 'Alive Within Timeframe'

filters = {"countries": all_countries, "genders": all_genders, "occupations": all_occupations, "period": default_period}

# Helper functions

In [52]:
def applyFilters(df, countries = None, genders = None, occupations = None, timeframe = None):
    # Filter on countries 
    if (countries != None and len(countries) != all_countries_len):
        df = df[df['AssociatedModernCountry'].isin(countries)]
        
    # filter on genders
    if (genders != None and len(genders) != all_genders_len):
        df = df[df['Gender'].isin(genders)]
        
    # filter on occupations 
    if (occupations != None and len(occupations) != all_occupations_len):
        df = df[df['Occupation'].isin(occupations)]
    
    # filter on alive individuals
    if timeframe != None:
        assert len(timeframe) == 2, "Must provide both start year and end year to find alive individuals within timeframe"
        if datamode == "Alive Within Timeframe":
            df = df[(df['Birth year'] <= timeframe[1]) & (df['Death year'] >= timeframe[0])]
        elif datamode == 'Born Within Timeframe':
            df = df[(df['Birth year'] >= timeframe[0]) & (df['Birth year'] <= timeframe[1])]
        elif datamode == "Died Within Timeframe":
            df = df[(df['Death year'] >= timeframe[0]) & (df['Death year'] <= timeframe[1])]
            
    return df

In [53]:
def replace_countries_with_continents(continent_dict, list_countries):
    new_list = []
    for country in list_countries:
        found = False
        for continent, countries in continent_dict.items():
            if country in countries and set(countries).issubset(set(list_countries)):
                if continent not in new_list:
                    new_list.append(continent)
                found = True
                break
        if not found:
            new_list.append(country)
    return new_list


# Dash setup

In [54]:
# Create the Dash app
app = dash.Dash(__name__)


# Defining the different components
filters_widget = html.Div([
            html.Hr(),
            html.P("Select the countries you want to compare"),
            dcc.Dropdown(
                options=
                    [{'label': country, 'value': country} for country in list(continent_mapping.keys()) + all_countries],
                multi=True,
                id='dropdown-checklist-country',
                placeholder="Select options...",
                style={'width': '100%'}
            ),
            html.Hr(),
            html.P("Select the occupation you want to compare"),
            dcc.Dropdown(
                options=
                    [{'label': occupation, 'value': occupation} for occupation in list(category_mapping.keys()) + [x for x in all_occupations if x not in {"Education", "Unspecified"}]],
                multi=True,
                id='dropdown-checklist-occupation',
                placeholder="Select options...",
                style={'width': '100%'}
            ),
            html.Hr(),
            # add dropdown gender instead of button
            html.P("Select the genders you want to compare"),
            dcc.Dropdown(
                options=
                [{'label':gender,'value': gender} for gender in all_genders],
                multi = True,
                id = 'dropdown-checklist-gender',
                placeholder="Select options...",
                style={'width': '100%'}
            )
            ], style={'width': '100%', 'margin': 'auto'})

tiles = html.Div([
            html.Div([
            html.Div(id='total-observations-tile', style={'padding': '5px', 'border-right': '1px solid lightgray'}),
            html.Div(id='selected-years-tile', style={'padding': '5px', 'border-right': '1px solid lightgray'})
            ], style={'display': 'flex', 'justify-content': 'space-between', 'align-items': 'center', 'width': '25%'})
            ], style={'background-color': 'lightgray', 'height': '50px', 'display': 'flex', 'align-items': 'center'})

slider = html.Div([
        html.Button('1917-1921', id='button-1', n_clicks=0),
        html.Button('1939-1945', id='button-2', n_clicks=0),
        html.Button('1980-1985', id='button-3', n_clicks=0),
        dcc.RangeSlider(
            id='year-slider',
            min=-1000,
            max=2021,
            value=[-1000, 2021],
            marks={str(year): str(year) for year in range(-1000, 2021, 200)},
            step=None
        )], style={'width': '100%', 'margin': 'auto'})

app.layout = html.Div([
    tiles,
    html.Div([
        html.Div([dcc.Graph(id='choropleth'), dcc.Dropdown(['No normalization', 'Boxplot adaption'], 'No normalization', id='choropleth-dropdown', clearable=False)], 
                 style={'width': '55%', 'display': 'inline-block'}),
        dcc.Graph(id='sunburst', style={'width': '45%', 'display': 'inline-block'})
    ], style={'marginTop': 25}),
    html.Div([
        html.Button('1917-1921', id='button-1', n_clicks=0),
        html.Button('1939-1945', id='button-2', n_clicks=0),
        html.Button('1980-1985', id='button-3', n_clicks=0),
        dcc.RangeSlider(
            id='year-slider',
            min=-1000,
            max=2021,
            value=[-1000, 2021],
            marks={str(year): str(year) for year in range(-1000, 2021, 200)},
            step=None
        )], style={'width': '100%', 'margin': 'auto'}),  
    dcc.Graph(id='linechart', style={'width': '100%', 'margin': 'auto'}),
    dcc.Dropdown(['Number of Births', 'Number of Deaths', 'Average Age at Death', "Number of Alive Individuals"], 'Number of Births', id='linechart-dropdown-datatype', clearable=False),
    dcc.Dropdown(['Aggregated view', 'Exploded view'], 'Aggregated view', id='linechart-dropdown-mode', clearable=False), 
    filters_widget
])



## Bootstrap Edition

In [55]:
#create a tooltip generator that returns a dbc tooltip

def tooltip_generator(id,target, description,style={"font-size": "1.5rem"},trigger="hover focus",delay={"show": 1000, "hide": 1000}):
    return dbc.Tooltip(
        textwrap.fill(description, width=50),
        target=target,
        id=id,
        placement="top",
        style=style,
        delay=delay,
        trigger=trigger,
    )

# Dictionary of descriptions for each tooltips
tooltip_descriptions = {
    'Choropleth': "This map shows the number of individuals that were born in a country and died in another country. The darker the color, the more individuals died in that country.",
    'Scatterplot': "This scatterplot shows the number of individuals that were born in a country and died in another country. The darker the color, the more individuals died in that country.",
    'Bar Chart': "This barplot shows the number of individuals that were born in a country and died in another country. The darker the color, the more individuals died in that country.",
    'Line Chart': "This histogram shows the number of individuals that were born in a country and died in another country. The darker the color, the more individuals died in that country.",
    'Sunburst': "This sunburst shows the number of individuals that were born in a country and died in another country. The darker the color, the more individuals died in that country.",

} 

In [56]:
# bootstrap: 
# Create the Dash app
app = dash.Dash(__name__, external_stylesheets=[dbc.themes.LITERA])

# filters
SIDEBAR_STYLE = {
    "position": "fixed",
    "top": 0,
    "left": 0,
    "bottom": 0,
    "width": "24rem",
    "padding": "2rem 1rem",
    "background-color": "#f8f9fa",
}

filters_widget = html.Div(
    [
        html.H2("Filters", style={'margin-top': '2px'}),
        html.Hr(),
        html.P("Select the countries you want to compare"),
        dcc.Dropdown(
            options=[
                {'label': country, 'value': country} for country in list(continent_mapping.keys()) + all_countries
            ],
            multi=True,
            id='dropdown-checklist-country',
            placeholder="Select options...",
            searchable = True
        ),
        html.Br(),
        html.P("Select the occupation you want to compare"),
        dcc.Dropdown(
            options=[
                {'label': occupation, 'value': occupation} for occupation in list(category_mapping.keys()) + [x for x in all_occupations if x not in {"Education", "Unspecified"}]
            ],
            multi=True,
            id='dropdown-checklist-occupation',
            placeholder="Select options...",
        ),
        html.Br(),
        html.P("Select the genders you want to compare"),
        dcc.Dropdown(
            options=[{'label': gender, 'value': gender} for gender in all_genders],
            multi=True,
            id='dropdown-checklist-gender',
            placeholder="Select options...",
        ),
        html.Br(),
        html.P("Select an era, time period, or an event"),
        dcc.Dropdown(
        options = [{'label': k, 'value': v}
            for k, v in  all_events.items()],
                id = 'dropdown-event', clearable=False, placeholder="Apply an option"
        ),
        html.Br(),
        html.P("Select the type of data to show"),
        dcc.Dropdown(['Alive Within Timeframe', 'Born Within Timeframe', "Died Within Timeframe"], 'Alive Within Timeframe', id='dropdown-datamode', clearable=False)
        # dcc.Dropdown(['Aggregated view', 'Exploded view'], 'Aggregated view', id='linechart-dropdown-mode', clearable=False),
    ] #,
    # style=SIDEBAR_STYLE,
)

# make it collapsable
sidebar = html.Div([
    dbc.Offcanvas(filters_widget,id="offcanvas",
                  scrollable=True,
                  backdrop=False,
                  close_button=True,
                  keyboard=True,
                  # title="Filters",
                  is_open=False),
],style = {'width': 3, 'margin': 'auto'})

# collapsable sidebar: https://community.plotly.com/t/how-to-create-a-button-that-open-and-closed-side-bar/76007
# the toggle sidebar here

#tiles
# tiles = dbc.Container([
#     dbc.Row([
#         dbc.Col(html.Div(id='total-observations-tile', style={'padding': '5px', 'border-right': '1px solid lightgray'}), width=6),
#         dbc.Col(html.Div(id='selected-years-tile', style={'padding': '5px', 'border-right': '1px solid lightgray'}), width=6)
#     ], style={'display': 'flex', 'justify-content': 'space-between', 'align-items': 'center'})
# ], style={'background-color': 'lightgray', 'height': '50px', 'display': 'flex', 'align-items': 'center'})

# cards for tiles
total_observations_tile_card = dbc.Card(
    dbc.CardBody([
        html.H4("Total Observations:", className="card-title"),
        html.Div(id='total-observations-tile') # style={'padding': '5px', 'border-right': '1px solid lightgray'}
    ]) #,
    # style={'background-color': 'lightgray', 'height': '50px', 'display': 'flex', 'align-items': 'center'}
)

selected_years_tile_card = dbc.Card(
    dbc.CardBody([
        html.H4("Selected Years:", className="card-title"),
        html.Div(id='selected-years-tile')
    ]) #,
    # style={'background-color': 'lightgray', 'height': '50px', 'display': 'flex', 'align-items': 'center'}
)

gender_ratio_tile_card = dbc.Card(
    dbc.CardBody([
        html.H4("Gender Ratio:", className="card-title"),
        html.Div(id='gender-ratio-tile')
    ]) #,
    # style={'background-color': 'lightgray', 'height': '50px', 'display': 'flex', 'align-items': 'center'}
)





# slider
slider = dbc.Container([
    dbc.Row([
        dbc.Col(dcc.RangeSlider(
            id='year-slider',
            min=-1000,
            max=2021,
            value=[-1000, 2021],
            marks={i: str(i) for i in range(-1000, 2021, 200)},
            # step=None
            allowCross=False,
            drag_value=1,
            tooltip = {'always_visible': False, 'placement': 'bottom'},
            step=1,
                       
        ),width=14,style={'padding': '40px 0px 20px 0px','width': '100%'})#'transform': 'scale(1.2)', width = 12
    ], style={'width': '100%'})
])

    
# cards for graphs
choropleth_card = dbc.Card(
    dbc.CardBody([
        html.H4("Choropleth", className="card-title", id="choropleth_target"),
        dcc.Graph(id='choropleth', style={'width': '100%'}),
        tooltip_generator(id="choropleth_tooltip", target="choropleth_target",description= tooltip_descriptions['Choropleth']),
        dbc.Col(dcc.Dropdown(['Standard view', 'Boxplot adaption on colormap'], 'Standard view', id='choropleth-dropdown', clearable=False, searchable = False), width=4, 
                style = {"margin-top": "0.2cm"}),
    ])
)

sunburst_card = dbc.Card(
    dbc.CardBody([
        html.H4("Sunburst", className="card-title", id="sunburst_target"),
        dcc.Graph(id='sunburst', style={'width': '100%', 'height': '100%'}),
        tooltip_generator(id="sunburst_tooltip", target="sunburst_target",description=tooltip_descriptions['Sunburst'])
        
    ])
)

linechart_card = dbc.Card(
    dbc.CardBody([
        html.H4("Line Chart", className="card-title",id="linechart_target"),
        dcc.Graph(id='linechart', style={'width': '100%', 'margin': 'auto'}),
        tooltip_generator(id="linechart_tooltip", target="linechart_target",description=tooltip_descriptions['Line Chart']),
        dbc.Row([
            dbc.Col(dcc.Dropdown(['Number of Births', 'Number of Deaths', 'Average Age at Death', "Number of Alive Individuals"], 
                                 'Number of Births', id='linechart-dropdown-datatype', clearable=False, searchable = False)),
            dbc.Col(dcc.Dropdown(['Aggregated view', 'Exploded view'], 'Aggregated view', id='linechart-dropdown-mode', clearable=False, searchable = False))])
    ])
)

barchart_card = dbc.Card(
    dbc.CardBody([
        html.H4("Bar Chart", className="card-title",id="barchart_target"),
        dcc.Graph(id='barchart', style={'width': '100%', 'margin': 'auto'}),
        dcc.Dropdown(['Compare Countries', 'Compare Occupation Categories', 'Compare Occupations'], 'Compare Countries', id='barchart-dropdown-mode', clearable=False, searchable = False),
        tooltip_generator(id="barchart_tooltip", target="barchart_target",description=tooltip_descriptions['Bar Chart'])
    ])
)

# app layout
app.layout = dbc.Container([
    dbc.Row([
        dbc.Container(sidebar),  # Sidebar column
        dbc.Col(
            dbc.Container([
                # Navbar with app title and Open Filters button
                dbc.Navbar(
                    [
                        # dbc.NavItem(dbc.Button("Open Filters", id="open_me_baby", color="primary", n_clicks=0)),
                        dbc.NavItem(dbc.Button(id="open-filters", 
                                               children=[html.Img(src='https://upload.wikimedia.org/wikipedia/commons/a/a7/Toicon-icon-lines-and-angles-filter.svg')],
                                               size="sm", 
                                               color="light", 
                                               n_clicks=0), 
                                   style={"padding": "0.5cm"}),
                        dbc.NavItem(html.Img(src="https://upload.wikimedia.org/wikipedia/commons/8/80/Wikipedia-logo-v2.svg", 
                                             className="pl-4", 
                                             height="50px", 
                                             width="50px")),
                        dbc.NavbarBrand("Wiki Data Visualization", className="mx-auto pl-4", 
                                        style={"font-size": "30px", "text-align": "center", "font-family": "Linux Libertine"}),  # App title aligned to center
                        dbc.NavItem(html.Img(src="https://upload.wikimedia.org/wikipedia/de/7/71/Uni_aarhus_logo.svg",
                                             #"https://upload.wikimedia.org/wikipedia/commons/5/52/Aarhus_University_logo.svg",
                            #src="https://upload.wikimedia.org/wikipedia/commons/5/52/Aarhus_University_logo.svg", 
                                             height="100px", 
                                             width="130px",
                                            style={"filter": "grayscale(100%) brightness(300%)", "padding-right": "0.5cm"}),
                                   className = "ml-auto")
                    ], # https://commons.wikimedia.org/wiki/File:Wikipedia-logo-v2-en.svg#/media/File:Wikipedia_logo_v2_(white).svg
                    color="#3F3F3F",
                    dark= True,
                    sticky="top",
                    className="mb-3"
                ),
            dbc.Container([
                dbc.Card([
                    dbc.CardBody([
                        dbc.Row([
                            dbc.Col(total_observations_tile_card, width=3),
                            dbc.Col(selected_years_tile_card, width=3),
                            dbc.Col(gender_ratio_tile_card, width = 3),
                        ], style={'display': 'flex', 'justify-content': 'space-between', 'align-items': 'center'})
                    ])
                ], style = {'margin-bottom': '0.5cm'}),
                dbc.Container([
                    # dbc.Row([
                        # html.Button(id='your_button', children=[html.Img(src='https://static.thenounproject.com/png/6402-84.png')]),# color primary
                        # dbc.Col(dcc.Dropdown(['Standard view', 'Boxplot adaption on colormap'], 'Standard view', id='choropleth-dropdown', clearable=False), width=4)],
                        #style={'marginTop': 25}),
                        # style={'display': 'flex', 'justify-content': 'space-between', 'align-items': 'center', 'horizontalAlign':'left'}),
                    dbc.Row([
                        dbc.Col(choropleth_card, width=7),
                        dbc.Col(sunburst_card, width=5),
                    ], style={'display': 'flex', 'margin-bottom': '0.5cm'})
                ]),
                barchart_card,
                slider,
                html.Hr(style={'border': 'none', 'borderTop': '0.3cm solid #272727'}),
                linechart_card
            ], style={'margin-bottom': '4cm'})
        ])
                
        )
    ], justify='between')
])


<hr style="border:3px solid gray">


# Filtration handling

In [57]:
Output('dropdown-checklist-country', 'value'),
Output('dropdown-checklist-occupation', 'value')

@app.callback(
    [Output('dropdown-checklist-country', 'value'),
     Output('dropdown-checklist-occupation', 'value'),
     Output('dropdown-checklist-gender', 'value'), 
     Output('year-slider', 'value'), 
     Output('dropdown-event', 'value')],
    [Input('choropleth', 'clickData'),
     Input('choropleth-dropdown', 'value'), 
     Input('year-slider', 'value'),
     Input('sunburst', 'clickData'),
     Input('dropdown-checklist-country', 'value'), 
     Input('dropdown-checklist-occupation', 'value'),
     Input('dropdown-checklist-gender', 'value'),
     Input('dropdown-event', 'value'),
     Input('dropdown-datamode', "value")])

def handle_all_filtrations(clickData, norm_mode, year_slider, sunburst_clickData, country_dropdown, 
                           occupation_dropdown, gender_dropdown, event_dropdown, datamode_dropdown):
    global filters
    global datamode
    
    # Handle filtration on occupation-dropdown 
    if ctx.triggered_id == "dropdown-checklist-occupation":
        if occupation_dropdown != []: 
            cats = list(category_mapping.keys())
            cats_in_selection = [x for x in occupation_dropdown if x in cats]
            selection_without_cats = [x for x in occupation_dropdown if x not in cats]
            filters["occupations"] = list(set([x for cat in cats_in_selection for x in category_mapping[cat]] + selection_without_cats))
        else: 
            filters["occupations"] = all_occupations

    # Handle filtrations on sunburst click
    if ctx.triggered_id == "sunburst":
        catagory = sunburst_clickData['points'][0]['parent']
        label = sunburst_clickData['points'][0]['label']
        if catagory == "":  
            if filters["occupations"] == category_mapping[label]:
                filters["occupations"] = all_occupations 
                occupation_dropdown = []
            else: 
                filters["occupations"] = category_mapping[label]
                occupation_dropdown = [label]
        else: 
            filters["occupations"] = [label]
            occupation_dropdown = [label]
    
    # Handle filtration from country dropdown 
    if ctx.triggered_id == "dropdown-checklist-country":
        if country_dropdown != []: 
            continents = list(continent_mapping.keys())
            continents_in_selection = [x for x in country_dropdown if x in continents]
            selection_without_continents = [x for x in country_dropdown if x not in continents]
            filters["countries"] = list(set([x for continent in continents_in_selection for x in continent_mapping[continent]] + selection_without_continents))
            country_dropdown = replace_countries_with_continents(continent_mapping, filters["countries"])
        else: 
            filters["countries"] = all_countries

    # Handle filtrations on choropleth click 
    if ctx.triggered_id == "choropleth":
        location = clickData['points'][0]['location']
        countries = filters["countries"]

        if len(countries) == all_countries_len: 
            countries = [location]
            country_dropdown = [location]
        
        elif location not in countries:
            countries.append(location) 
            country_dropdown = replace_countries_with_continents(continent_mapping, countries)
          
        else:
            countries.remove(location)
            country_dropdown = replace_countries_with_continents(continent_mapping, countries)
            if len(countries) == 0: 
                countries = all_countries
                country_dropdown = []
            
    
        filters["countries"] = countries
        
    if ctx.triggered_id == 'dropdown-checklist-gender':
        if gender_dropdown == []:
            filters['genders'] = all_genders
        else:
            filters['genders'] = gender_dropdown
    
    if ctx.triggered_id == 'dropdown-event':
        if event_dropdown != default_period:
            filters["period"] = event_dropdown 
            event_dropdown = None

    if ctx.triggered_id == "year-slider": 
        filters["period"] = year_slider

    if ctx.triggered_id == 'dropdown-datamode':
        datamode = datamode_dropdown
    
    
    return country_dropdown, occupation_dropdown, gender_dropdown, year_slider, event_dropdown

#New
@app.callback(
    Output("offcanvas", "is_open"),
    Input("open-filters", "n_clicks"),
    [State("offcanvas", "is_open")],
)

def toggle_offcanvas(n1, is_open):
    if n1:
        return not is_open
    return is_open

# Choropleth functions 

In [58]:
country_lookup = {feature['properties']['admin']: feature 
                   for feature in geojson['features']}

def get_highlights(selections, geojson=geojson, country_lookup= country_lookup):
    geojson_highlights = dict()
    for k in geojson.keys():
        if k != 'features':
            geojson_highlights[k] = geojson[k]
        else:
            geojson_highlights[k] = [country_lookup[selection] for selection in selections]        
    return geojson_highlights


def get_choropleth(all_df, selection_df, filter_countries, norm_mode):
 
    # Get appropriote colorscale 
    if norm_mode == 'Boxplot adaption on colormap':
        Q1 = all_df['Counts'].quantile(0.25)
        Q3 = all_df['Counts'].quantile(0.75)
        IQR = Q3 - Q1

        # Define color scale based on quartiles and IQR
        color_scale = [
                [0, 'rgb(242,240,247)'],
                [max(0, (Q1 - 1.5 * IQR) / all_df['Counts'].max()), 'rgb(218,218,235)'],
                [Q1 / all_df['Counts'].max(), 'rgb(188,189,220)'],
                [(Q1 + Q3) / 2 / all_df['Counts'].max(), 'rgb(158,154,200)'],
                [Q3 / all_df['Counts'].max(), 'rgb(128,125,186)'],
                [min(1, (Q3 + 1.5 * IQR) / all_df['Counts'].max()), 'rgb(106,81,163)'],
                [1, 'rgb(74,20,134)'],
            ]
        
    else: 
        color_scale = [ 'rgb(242,240,247)', 
                       'rgb(218,218,235)', 
                       'rgb(188,189,220)', 
                       'rgb(158,154,200)', 
                       'rgb(128,125,186)', 
                       'rgb(106,81,163)', 
                       'rgb(74,20,134)']
 
    
    # Base choropleth layer --------------#
    fig = px.choropleth_mapbox(all_df, geojson=geojson, 
                               color="Counts",                               
                               locations="Country", 
                               featureidkey="properties.admin",
                               color_continuous_scale=color_scale,
                               range_color = [0,all_df['Counts'].max()], 
                               opacity=0.25)

    # Second layer - Highlights ----------#
    highlights = get_highlights(filter_countries)
    
    fig.add_trace(
        px.choropleth_mapbox(selection_df, geojson=highlights, 
                                 color="Counts",
                                 locations="Country", 
                                 featureidkey="properties.admin",
                                 color_continuous_scale=color_scale,                                 
                                 opacity=1).data[0]
    )

    #------------------------------------#
    fig.update_layout(mapbox_style="carto-positron", 
                      mapbox_zoom=0.35,
                      mapbox_center={"lat": 30.0, "lon": 0.00},
                      margin={"r":0,"t":0,"l":0,"b":0},
                      uirevision='constant',
                      title = "Distribution of selected people in the World")
    
    return fig



@app.callback(
    Output('choropleth', 'figure'),
    [Input('choropleth', 'clickData'),
     Input('choropleth-dropdown', 'value'), 
     Input('year-slider', 'value'),
     Input('sunburst', 'clickData'),
     Input('dropdown-checklist-country', 'value'), 
     Input('dropdown-checklist-occupation', 'value'),
     Input('dropdown-checklist-gender', 'value'),
     Input('dropdown-event', 'value'),
     Input('dropdown-datamode', "value")])

def update_choropleth(clickData, norm_mode, yearinterval, sunburst_clickData, country_dropdown, 
                      occupation_dropdown, gender_dropdown, event_dropdown, datamode_dropdown): 
    # Create right dataframes for figure
    all_df = applyFilters(df, genders = filters["genders"], occupations = filters["occupations"], timeframe = filters["period"])
    all_df = all_df["AssociatedModernCountry"].value_counts().rename_axis('Country').reset_index(name='Counts')

    rest_of_countries = list(set(all_countries) - set(all_df['Country']))
    rest_df = pd.DataFrame(rest_of_countries, columns=['Country'])
    rest_df['Counts'] = 0
    all_df = pd.concat([all_df, rest_df], ignore_index=True)
    
    selection_df = all_df[all_df['Country'].isin(filters["countries"])]
   
    return get_choropleth(all_df, selection_df, filters["countries"], norm_mode)


# Sunburst functions

In [59]:
category_color_maps = {
    'Arts and Entertainment': '#1f77b4',  # Blue
    'Politics and Public Service': '#ff7f0e',  # Orange
    'Science and Academia': '#17becf',  # Cyan
    'Religion': '#ff9896',  # Light red
    'Sports': '#9467bd',  # Purple
    'Business and Commerce': '#8c564b',  # Brown
    'Healthcare': '#e377c2',  # Pink
    'Law and Justice': '#ff2400',  # Red
    'Media and Communication': '#ff2400',  # Red
    'Education': '#bcbd22',  # Yellow-green
    'Engineering and Architecture': '#ffd700',  # Gold
    'Agriculture': '#ffbb78',  # Light orange
    'Others': '#98df8a',  # Light green
    'Unspecified': '#7f7f7f'  # Gray
}



@app.callback(
    Output('sunburst', 'figure'),
    [Input('choropleth', 'clickData'), 
     Input('year-slider', 'value'), 
     Input('sunburst', 'clickData'),
     Input('dropdown-checklist-country', 'value'),
     Input('dropdown-checklist-occupation', 'value'),
     Input('dropdown-checklist-gender', 'value'),
     Input('dropdown-event', 'value'), 
     Input('dropdown-datamode', "value")]
)

def update_sunburst(choropleth_clickData, year_range, sunburst_clickData, country_dropdown, 
                    occupation_dropdown, gender_dropdown, event_dropdown, datamode_dropdown):  # maybe you're missing a filtered_df somewhere with the slicer?
    # Apply filters on sunburst
    sunburst_df = applyFilters(df, countries = filters["countries"], genders = filters["genders"], occupations = filters["occupations"], timeframe = filters["period"])
   
    # Set title
    title = ''
    title_countries = replace_countries_with_continents(continent_mapping, filters["countries"])
    if len(filters["countries"]) == all_countries_len:
        title = textwrap.wrap('Occupation Distribution by Category in the World', width = 29)  # width should change based on how big screen is
    elif len(title_countries) <= 5:
        title = textwrap.wrap(f'Occupation Distribution by Category in {", ".join(title_countries)}', width = 29)
    else:
        title = textwrap.wrap(f'Occupation Distribution by Category in {", ".join(title_countries[:5])}...', width = 29) 
    
    # Get figure 
    occupation_count = sunburst_df.groupby(['Occupation category', 'Occupation']).size().reset_index(name='Count')
    categories_to_color = pd.unique(occupation_count['Occupation category']).tolist()
    color_map = [category_color_maps[x] for x in categories_to_color]
    
    fig = px.sunburst(occupation_count,
                        path = ['Occupation category', 'Occupation'], 
                        values = 'Count',
                        title = '<br>'.join(title),
                        color = 'Occupation category', 
                        color_discrete_map= category_color_maps)
 
    return fig
                
     

# Barchart functions 

In [60]:
@app.callback(
    Output('barchart', 'figure'),
    [Input('choropleth', 'clickData'), 
     Input('year-slider', 'value'), 
     Input('sunburst', 'clickData'),
     Input('dropdown-checklist-country', 'value'),
     Input('dropdown-checklist-occupation', 'value'),
     Input('dropdown-checklist-gender', 'value'),
     Input('barchart-dropdown-mode', 'value'),
     Input('dropdown-event', 'value'),
     Input('dropdown-datamode', "value")]
)

def update_barchart(choropleth_clickData, year_range, sunburst_clickData, country_dropdown, 
                    occupation_dropdown, gender_dropdown, barchart_mode, event_dropdown, datamode_dropdown):  # maybe you're missing a filtered_df somewhere with the slicer?
    
    filtered_df = applyFilters(df, countries = filters["countries"], genders = filters["genders"], occupations = filters["occupations"], timeframe = filters["period"])
    
    fig = go.Figure()
    
    if barchart_mode == "Compare Countries": 
        countries_df = filtered_df["AssociatedModernCountry"].value_counts().rename_axis('Country').reset_index(name='Counts')

        rest_of_countries = list(set(filters["countries"]) - set(countries_df['Country']))
        rest_df = pd.DataFrame(rest_of_countries, columns=['Country'])
        rest_df['Counts'] = 0
        countries_df = pd.concat([countries_df, rest_df], ignore_index=True)

        fig = px.bar(countries_df, x='Country', y='Counts', text_auto='.2s',
                title="Individuals in Each Country of the Selection")
        fig.update_traces(textfont_size=12, textangle=-45, textposition="outside", cliponaxis=False)
        fig.update_layout(xaxis=dict(range=[-1, min(len(countries_df), 20)]))
        if len(countries_df) > 20: fig.update_layout(xaxis_rangeslider_visible=True,xaxis_range=[-1, 20])

    if barchart_mode == "Compare Occupation Categories": 
        cats_in_selection = set(applyFilters(df, occupations = filters["occupations"])["Occupation category"].unique())
        categories_df = filtered_df["Occupation category"].value_counts().rename_axis('Occupation category').reset_index(name='Counts')
        rest_of_cats  = list(cats_in_selection - set(categories_df['Occupation category']))
        rest_df = pd.DataFrame(rest_of_cats, columns=["Occupation category"])
        rest_df['Counts'] = 0
        categories_df = pd.concat([categories_df, rest_df], ignore_index=True)
        
        fig = px.bar(categories_df, x='Occupation category', y='Counts', color ='Occupation category',  text_auto='.2s', color_discrete_map=category_color_maps, 
                title="Individuals in Each Occupation Category of the Selection")
        fig.update_traces(textfont_size=12, textangle=-45, textposition="outside", cliponaxis=False)
        fig.update_layout(xaxis=dict(range=[-1, min(len(categories_df), 20)]))
        if len(categories_df) > 20: fig.update_layout(xaxis_rangeslider_visible=True, xaxis_range=[-1, 20])
     
        
    if barchart_mode == "Compare Occupations": 
        occupation_df = filtered_df["Occupation"].value_counts().rename_axis('Occupations').reset_index(name='Counts')
        rest_of_occupations = list(set(filters["occupations"]) - set(occupation_df['Occupations']))
        rest_df = pd.DataFrame(rest_of_occupations, columns=['Occupations'])
        rest_df['Counts'] = 0
        occupation_df = pd.concat([occupation_df, rest_df], ignore_index=True)
        occupation_df["Occupation Category"] = occupation_df['Occupations'].apply(lambda x: next((k for k, v in category_mapping.items() if x in v), None))
        
        fig = px.bar(occupation_df, x='Occupations', y='Counts', color = 'Occupation Category', text_auto='.2s', color_discrete_map=category_color_maps,
                title="Individuals in Each Occupation of the Selection", category_orders={'Occupations': occupation_df['Occupations'].tolist()})
        fig.update_traces(textfont_size=12, textangle=-45, textposition="outside", cliponaxis=False)
        if len(occupation_df) > 20: fig.update_layout(xaxis_rangeslider_visible=True, xaxis_range=[-1, 20])
     
        
        
    return fig 

# Linechart functions 

In [61]:


@app.callback(
    Output('linechart', 'figure'),
    [Input('choropleth', 'clickData'),
     Input('sunburst', 'clickData'), 
     Input('linechart-dropdown-datatype', 'value'),
     Input('linechart-dropdown-mode', 'value'),
     Input('dropdown-checklist-country', 'value')]
)
def update_linechart(choropleth_clickData, sunburst_clickData, datatype, mode, country_dropdown):
    
    start_year = df["Birth year"].min() 
    end_year = df["Death year"].max() 

    year_range = list(range(start_year, end_year + 1))
    
    plot_data_df = None
    if datatype == 'Number of Births':
        plot_data_df = num_of_births_each_year_df
    elif datatype == 'Number of Deaths': 
        plot_data_df = num_of_deaths_each_year_df
    elif datatype == 'Average Age at Death': 
        plot_data_df = avg_age_at_death_each_year_df
    elif datatype == "Number of Alive Individuals":
        plot_data_df = num_of_alive_each_year_df

    fig = go.Figure()
    
    if mode == 'Exploded view': 
        for country in filters["countries"]:
            # Add the line plot for this country
            fig.add_trace(go.Scatter(x = year_range, y = plot_data_df[country], mode='lines', name=country, connectgaps=False))
            
    if mode == 'Aggregated view':
        y_data = None
        if datatype == "Average Age at Death":
            y_data = plot_data_df[filters["countries"]].mean(axis=1)
        else:
            y_data = plot_data_df[filters["countries"]].sum(axis=1)

        fig.add_trace(go.Scatter(x=year_range, y = y_data, mode='lines', name="Countries Aggregated", connectgaps=False))
        
    # Customize layout with title, axes labels, and grid
    
    title = ""
    title_countries = replace_countries_with_continents(continent_mapping, filters["countries"])
    if len(filters["countries"]) == all_countries_len:
        title = f'Overview of {datatype} Across all Years for the World'
    elif len(title_countries) <= 5:
        title = f'Overview of {datatype} Across all Years for {", ".join(title_countries)}'
    else: 
        title = f'Overview of {datatype} Across all Years for {", ".join(title_countries[:5])}...'
    
    fig.update_layout(title= title,
                      xaxis_title='Year',
                      yaxis_title=datatype,
                      xaxis=dict(showgrid=True, gridwidth=1, gridcolor='LightSteelBlue'),
                      yaxis=dict(showgrid=True, gridwidth=1, gridcolor='LightSteelBlue'),
                      plot_bgcolor='whitesmoke')
 
    fig.update_layout(dragmode='zoom')
    
    return fig 
    

# Tile functions - Create one showing the number of genders, number of occupations, number of countries selected, as well as gender ratio male:female:other

In [62]:
@app.callback(
    Output('total-observations-tile', 'children'),
    [Input('sunburst', 'clickData'),
    Input('choropleth', 'clickData'),
    Input('year-slider', 'value'),
    Input('dropdown-checklist-country', 'value'),
    Input('dropdown-checklist-occupation', 'value'),
    Input('dropdown-checklist-gender', 'value'),
    Input('dropdown-event', 'value'),
    Input('dropdown-datamode', "value")]
)
def update_total_observations_tile(sunburst_clickData, choropleth_clickData, year_range, country_dropdown, 
                                   occupation_dropdown, gender_dropdown, event_dropdown, datamode_dropdown):
    # Filter the DataFrame
    tile_df = applyFilters(df, countries = filters["countries"], genders = filters["genders"], occupations = filters["occupations"], timeframe = filters["period"])
    
    return f'Total Observations: {len(tile_df)}'

@app.callback(
    Output('selected-years-tile', 'children'),
    [Input('year-slider', 'value'), 
     Input("dropdown-event", "value")]
)
def update_selected_years_tile(year_range, event_dropdown):
    for key, value in all_events.items():
        if value == filters["period"]:
            return f'Selected Years: {filters["period"][0]} to {filters["period"][1]} ({key})'
    return f'Selected Years: {filters["period"][0]} to {filters["period"][1]}'

@app.callback(
    Output('gender-ratio-tile', 'children'),
    [Input('sunburst', 'clickData'),
    Input('choropleth', 'clickData'),
    Input('year-slider', 'value'),
    Input('dropdown-checklist-country', 'value'),
    Input('dropdown-checklist-occupation', 'value'),
    Input('dropdown-checklist-gender', 'value'),
    Input('dropdown-event', 'value'),
    Input('dropdown-datamode', "value")]
)
def update_gender_ratio(sunburst_clickData, choropleth_clickData, year_range, country_dropdown, 
                                   occupation_dropdown, gender_dropdown, event_dropdown, datamode_dropdown):
    tile_df = applyFilters(df, countries = filters["countries"], genders = filters["genders"], occupations = filters["occupations"], timeframe = filters["period"])
    # print(tile_df)
    total_male = len(tile_df[tile_df['Gender'] == 'Male']) 
    total_female = len(tile_df[tile_df['Gender'] == 'Female']) 
    total_other = len(tile_df[(tile_df['Gender'] != 'Female') & (tile_df['Gender'] != 'Male')]) 
    total_count = total_male + total_female + total_other

    
    # ratio
    male_ratio = total_male / total_count
    female_ratio = total_female / total_count
    other_ratio = total_other / total_count
    gender_ratio = f"Male: {male_ratio:.2f}, Female: {female_ratio:.2f}, Other: {other_ratio:.2f} Ratio"
    return gender_ratio
    
    # gender_number = f"Male/Female/Other Ratio: {total_male} : {total_female} : {total_other}



    

<hr style="border:3px solid gray">

# Running the Dash app

In [63]:
"""if __name__ == '__main__':
    app.run_server(mode='external', port = 8094, dev_tools_ui=True, #debug=True,
             dev_tools_hot_reload =True, threaded=True)
"""

"if __name__ == '__main__':\n    app.run_server(mode='external', port = 8094, dev_tools_ui=True, #debug=True,\n             dev_tools_hot_reload =True, threaded=True)\n"

In [64]:
app.run()

# Still To-do 
-  change box-whisker adaption colormap  &#9745;
- find a way to keep colors of sunburst catagories constant &#9745;
- set the right colors for sunburst in colormap
- make overall dashboard prettier
- get filters over to the side
- make layout prettier 
- change year slider to have smaller increments
- get linechart working &#9745;
- get filters working &#9745;
- fix bug with occupation selection (others and unspecificed) &#9745;
- add more tiles with statistics
- add correct titles to charts, indicating that its people living in a given period (Choropleth not displaying title)
- add describtions to charts 
- add overall title and describtion of visualization
- be able to select category &#9745;
- add percentages to sunburst occupations 
- be able to select continents in filter &#9745;
- cut year range from 0 to 2021 instead?
- reduce number of countries in the lookup
- add more Occupations to catagories? 

