# NECTA PSLE Dashboard

## 04-dashboard
### Tasks
1. Read in and prepare data
2. Setup data inputs
3. Create control components
4. Create map renderers and tables
5. Setup layout, callbacks, and app

#### Inputs:
- 03-feature-extraction.csv (17900, 44)

#### Outputs:
- Public web app at: https://bit.ly/psle2022mvp (https://lonnychen.pythonanywhere.com)

In [None]:
#Libraries
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', 50)

#Plotly Dash, added to Anaconda
from dash import Dash, dcc, html, callback, Input, Output, dash_table
import plotly.express as px
from dash.dash_table.Format import Format, Scheme, Trim
import dash_bootstrap_components as dbc

#Library for JSON loading
import json

### 1. Read in and prepare data

**Steps:**
1. Read in schools CSV from data preparation steps
2. Read in region polygons GeoJSON downloaded from [GADM - Tanzania ADM1](https://gadm.org/download_country.html)

**Learnings:** (🧑🏻‍💻📚😎⚠️)
- 😎 (Geo)JSON files are just big Python dictionaries and can be accessed/modified that way!

In [None]:
#Read in deployable school data
df = pd.read_csv('../data/deployable/03-feature-extraction.csv', index_col='school_id')
df.shape #(17900, 44)

#Separate Gov
dfg = df[df['SCHOOL OWNERSHIP'] == 'Government']
dfg.shape #(16361, 44)

#Read in region GeoJSON
with open('../data/raw/geojson/gadm41_TZA_1.json', 'r') as f:
    tza_adm1_geojson = json.load(f)
    
#Region name fix to match NECTA/TAMISEMI
tza_adm1_geojson['features'][1]['properties']['NAME_1'] = 'Dar es Salaam' #was 'DaresSalaam'
tza_adm1_geojson.keys()

### 2. Setup data inputs

**Steps:**

NOTE: school tab inputs at [0], region tab inputs at [1]

1. Setup presentation dicts, common for all tabs
2. Setup `hover_data` input to maps: what gets seen during mouse hover
3. Setup `custom_data` input to maps: additional data for maps' `clickData` input to table

In [None]:
#Configuration: plotting/table DATA inputs

#ALL TABS
labels = {'average_5tile': 'PSLE quintile', 'average_300': 'PSLE average', 'n_schools': 'Number of schools'}
lat_lon_centre = {'lat': -6.826866, 'lon': 37.668493} #Morogoro MC council_hq
y_order = {'average_5tile': ['highest', 'fourth', 'middle', 'second', 'lowest']}

#PER-TAB LISTS
hover_data = list()
custom_data = list()
custom_data_DT = list()

#SCHOOLS TAB [0]
#Plotting/table data inputs
hover_data.append({'LATITUDE fix': False, 'LONGITUDE fix': False, 'average_300': True})
custom_data.append(['school_name', 'SCHOOL OWNERSHIP', 'TOTAL STUDENTS', #school info [0:3]
               'region_name', 'council_name', 'context', 'WARD', #location [3:7]
               'num_sitters', 'average_5tile', 'average_300', 'grade', 'pct_passed', 'results_url', #PSLE results [7:13]
               'PTR', 'PBR_std7', 'CG_per_student', #Resources [13:16]
               'approx_ages_mean', 'pop_3km', 'd_closest', 'd_council_hq']) #demographic/geographic [16:19]
custom_data_DT.append(['PRIMARY SCHOOL NAME', 'Ownership', 'Total students',
                  'Region', 'Council', 'Context', 'Ward',
                  'PSLE sitters', 'PSLE quintile', 'PSLE average (300)', 'PSLE average (grade)', 'PSLE % passed (A-C)', 'PSLE results URL',
                  'Pupil-to-Teacher Ratio (PTR)', 'Pupil-to-Book Ratio (PBR) (Std 7)', 'Capitation Grant (CG) per student (TZS)',
                  'Ages mean (approx.)', 'Population within 3km radius', 'Distance to closest other gov. school (km)', 'Distance to council headquarters (km)'])

#REGIONS TAB [1]
#Plotting/table data inputs
hover_data.append({'region_name': False, 'schools_n': True, 'average_300': True})
custom_data.append(['region_name', 'schools_n', 'councils_n', 'students_sum', #region info [0:3]
               'sitters_sum', 'average_300', 'pct_passed', #PSLE results [4:6]
               'PTR', 'PBR_std7', 'CG_per_student', #Resources [7:9]
               'approx_ages_mean', 'pop_3km', 'd_closest', 'd_council_hq']) #Demographics/Geography (Xd) [10:13]
custom_data_DT.append(['REGION', 'Number of schools', 'Number of councils', 'Total students (region)',
                  'PSLE sitters (region)', 'PSLE average (300)', 'PSLE % passed (A-C)',
                  'Pupil-to-Teacher Ratio (PTR)', 'Pupil-to-Book Ratio (PBR) (Std 7)', 'Capitation Grant (CG) per student (TZS)',
                  'Ages mean (approx.)', 'Population within 3km radius', 'Distance to closest other gov. school (km)', 'Distance to council headquarters (km)'])

### 3. Create control components

**Steps:**

NOTE: school tab controls at [0], region tab controls at [1]

1. Create `dcc.RadioItems` to choose variable for color coding
2. Create `dcc.Checklist` to filter urban/rural contexts
3. Create `dcc.Checklist` to filter regions
4. Create `dcc.RadioItems` to choose mean or median calculation (regions only)

In [None]:
#Plotting controls to build the interaction
#PER-TAB LISTS
color_radio = list()
context_checklist = list()
region_checklist = list()
stat_radio = list()

#SCHOOLS TAB [0]
color_radio.append(dcc.RadioItems(options=['average_5tile', 'average_300', 'PTR', 'pop_3km'], value='average_5tile', inline=True))
context_checklist.append(dcc.Checklist(options=dfg.context.unique(), value=dfg.context.unique(), inline=True))
region_checklist.append(dcc.Checklist(options=np.sort(dfg.region_name.unique()), value=dfg.region_name.unique(), inline=True))
stat_radio.append('') #empty string just to be consistent with indexing

#REGIONS TAB [1]
color_radio.append(dcc.RadioItems(options=['average_300', 'PTR', 'pop_3km'], value='average_300', inline=True))
context_checklist.append(dcc.Checklist(options=dfg.context.unique(), value=dfg.context.unique(), inline=True))
region_checklist.append(dcc.Checklist(options=np.sort(dfg.region_name.unique()), value=dfg.region_name.unique(), inline=True))
stat_radio.append(dcc.RadioItems(options=['mean', 'median'], value='mean', inline=True))

### 4. Create map renderers and tables

**Steps:**

NOTE: school tab maps/tables at [0], region tab maps/tables at [1]

1. Create `dcc.Graph` to "render" dynamic maps
2. Create `dash_table.DataTable` to display maps' `clickData`

**Learnings:** (🧑🏻‍💻📚😎⚠️)
- 🧑🏻‍💻 Plotly formatting `Trim` saves the day to handle both int and float numbers properly!

In [None]:
#Maps and tables
#PER-TAB LISTS
map_graph = list()
click_data_table = list()

for i in [0,1]:
    #Graph
    map_graph.append(dcc.Graph(figure={}))

    #DataTable
    click_data_table.append(dash_table.DataTable(
        columns = [{'id': 'Field', 'name': 'Field', 'type': 'any'},
                   {'id': 'Value', 'name': 'Value', 'type': 'numeric', 'format': Format(precision=2, group=',', scheme=Scheme.fixed, trim=Trim.yes)}],
        style_table={'height': '480px', 'overflowY': 'auto'},
        style_header = {'display': 'none'},
        style_cell={
            'height': 'auto',
            # all three widths are needed
            'minWidth': '150px', 'width': '150px', 'maxWidth': '150px',
            'whiteSpace': 'normal',
            'overflow': 'scroll',
            #'overflow-wrap': 'anywhere'
            'font_size': '12px'
        })
    )

### 5. Setup layout, callbacks, and app

**Steps:**

1. Setup various `dbc` components for layouts: Card, Tab(s), Container > Row > Col
2. **Define callback inputs, functions, and outputs**
    - This is where maps are really created dynamically based on user interaction
    - Tables are also updated based on maps' `clickData`
3. Instantiate and call app
    - Runs locally in THIS Jupyter Notebook, and terminal with "python app.py"
    - TEMP deployment: [lonnychen.pythonanywhere.com](https://lonnychen.pythonanywhere.com)

**Learnings:** (🧑🏻‍💻📚😎⚠️)
- 🧑🏻‍💻 Plotly `choropleth_mapbox` really goes hand-in-hand with Pandas `groupby('region_name')` to get the actual statistics of combined DATA for the choropleth regions (else takes last school in each region)

In [None]:
#Define CARDS
school_map_card = dbc.Card(
    dbc.CardBody([
        map_graph[0]
    ])
)
region_map_card = dbc.Card(
    dbc.CardBody([
        map_graph[1]
    ])
)

In [None]:
#Define TABS
tab0_content = dbc.Container([
    dbc.Row([
        dbc.Col([
            dbc.Label('Choose color data'),
            color_radio[0]],
            width='auto'),
        dbc.Col([
            dbc.Label('Filter contexts'),
            context_checklist[0]],
            width='auto')
    ]),
    dbc.Row([
        dbc.Col([
            dbc.Label('Filter regions'),
            region_checklist[0]],
            width=True)
    ]),
    dbc.Row([
        #dbc.Col(map_graph[0], width=8),
        dbc.Col(school_map_card, width=8),
        dbc.Col(click_data_table[0], width=4)
    ])],
    fluid=True
)

tab1_content = dbc.Container([
    dbc.Row([
        dbc.Col([
            dbc.Label('Choose color data'),
            color_radio[1]],
            width='auto'),
        dbc.Col([
            dbc.Label('Choose statistic'),
            stat_radio[1]],
            width='auto'),
        dbc.Col([
            dbc.Label('Filter contexts'),
            context_checklist[1]],
            width='auto')
    ]),
    dbc.Row([
        dbc.Col([
            dbc.Label('Filter regions'),
            region_checklist[1]],
            width=True)
    ]),
    dbc.Row([
        #dbc.Col(map_graph[1], width=8),
        dbc.Col(region_map_card, width=8),
        dbc.Col(click_data_table[1], width=4)
    ])],
    fluid=True
)

In [None]:
# Initialize the app (Dash constructor)
#app = Dash(__name__)
app = Dash(external_stylesheets=[dbc.themes.BOOTSTRAP])

##############
# App layout #
##############
# (app components that will be displayed in the web browse)
# Define layout elements
title = html.H2('Tanzania NECTA PSLE Dashboard 2022')
hr = html.Hr()
# Combine all elements here using dbc
app.layout = html.Div([
    html.Div([title, hr]),
    dbc.Tabs([
        dbc.Tab(tab0_content, label='Schools', tab_id="schools_tab"),
        dbc.Tab(tab1_content, label='Regions', tab_id='regions_tab')],
        id='tabs',
        active_tab='schools_tab',
    ),   
])

#############
# Callbacks #
#############
# Callbacks Schools 1: @Color/filter >school coordinates
@callback(
    Output(map_graph[0], 'figure'),
    Input(color_radio[0], 'value'),
    Input(context_checklist[0], 'value'),
    Input(region_checklist[0], 'value')
)
def update_school_graph(color_input, context_input, region_input):
    
    #Prepare data
    df_fig = dfg[(dfg['context'].isin(context_input)) & (dfg['region_name'].isin(region_input))]
    
    #Plot MAP
    fig = px.scatter_mapbox(
                        df_fig,
                        lat='LATITUDE fix',
                        lon='LONGITUDE fix',
                        #color='y_5tile',
                        color=color_input,
                        color_discrete_sequence=px.colors.sequential.Jet, #Ordinal
                        category_orders=y_order,
                        size='num_sitters',
                        #text='school_name',
                        hover_name='school_name',
                        hover_data=hover_data[0],
                        custom_data=custom_data[0],
                        labels=labels
                           )
    fig.update_layout(
        mapbox = {'style': 'open-street-map', 'center': lat_lon_centre, 'zoom': 10},
        title = 'Primary School Leaving Examination (PSLE) 2022 Results - Schools',
        #width = 800, height = 500,
        uirevision = True,
        #margin={"r":0,"t":0,"l":0,"b":0}
        margin={"b":0}
    )
    fig.update_traces()#mode="markers+text"
    #fig.show()
    return fig

# Callbacks Schools 2: School coordinates @CLICK >school data table
@callback(
    Output(click_data_table[0], 'data'),
    Input(map_graph[0], 'clickData')
)
def update_school_table(clickData):
    if clickData:
        click_customdata = clickData['points'][0]['customdata']
        school_data_DT = pd.DataFrame({'Field': custom_data_DT[0][0:20], 'Value': click_customdata[0:20]}).to_dict('records')
        return school_data_DT
    else:
        #initial "blank" hover, else can not find ['points']
        return None

# Callbacks Regions 1: @Color/filter >region polygons
@callback(
    Output(map_graph[1], 'figure'),
    Input(color_radio[1], 'value'),
    Input(stat_radio[1], 'value'),
    Input(context_checklist[1], 'value'),
    Input(region_checklist[1], 'value')
)
def update_region_graph(color_input, stat_input, context_input, region_input):

    #Prepare data
    dfg2 = dfg[(dfg['context'].isin(context_input)) & (dfg['region_name'].isin(region_input))]
    df_fig = dfg2.groupby('region_name').agg(
        #Basic
        schools_n = pd.NamedAgg(column='school_name', aggfunc='count'),
        councils_n = pd.NamedAgg(column='council_name', aggfunc='nunique'),
        students_sum = pd.NamedAgg(column='TOTAL STUDENTS', aggfunc='sum'),
        #Results (y)
        sitters_sum = pd.NamedAgg(column='num_sitters', aggfunc='sum'),
        average_300 = pd.NamedAgg(column='average_300', aggfunc=stat_input),
        pct_passed = pd.NamedAgg(column='pct_passed', aggfunc=stat_input),
        #Resources (Xi)
        PTR = pd.NamedAgg(column='PTR', aggfunc=stat_input),
        PBR_std7 = pd.NamedAgg(column='PBR_std7', aggfunc=stat_input),
        CG_per_student = pd.NamedAgg(column='CG_per_student', aggfunc=stat_input),
        #Demographics/Geography (Xd)
        approx_ages_mean = pd.NamedAgg(column='approx_ages_mean', aggfunc=stat_input),
        pop_3km = pd.NamedAgg(column='pop_3km', aggfunc=stat_input),
        d_closest = pd.NamedAgg(column='d_closest', aggfunc=stat_input),
        d_council_hq = pd.NamedAgg(column='d_council_hq', aggfunc=stat_input)
    ).reset_index()
    
    #Plot MAP
    fig = px.choropleth_mapbox(
        df_fig,
        locations='region_name',
        geojson=tza_adm1_geojson,
        featureidkey='properties.NAME_1',
        color=color_input,
        #color_discrete_sequence=px.colors.sequential.Jet, #Ordinal
        opacity=0.5,
        hover_name='region_name',
        hover_data=hover_data[1],
        custom_data=custom_data[1],
        labels=labels
    )

    fig.update_layout(
        mapbox = {'style': 'open-street-map', 'center': lat_lon_centre, 'zoom': 4},
        title = 'Primary School Leaving Examination (PSLE) 2022 Results - Regions',
        #width = 1000, height = 600,
        uirevision = True
        #margin={"r":0,"t":0,"l":0,"b":0}
    )

    fig.update_traces()#mode="markers+text"
    #fig.show()
    return fig

# Callbacks Regions 2: Region polygons @CLICK >region data table
@callback(
    Output(click_data_table[1], 'data'),
    Input(map_graph[1], 'clickData')
)
def update_region_table(clickData):
    if clickData:
        click_customdata = clickData['points'][0]['customdata']
        school_data_DT = pd.DataFrame({'Field': custom_data_DT[1][0:14], 'Value': click_customdata[0:14]}).to_dict('records')
        return school_data_DT
    else:
        #initial "blank" hover, else can not find ['points']
        return None
    
# Run the app
if __name__ == '__main__':
    app.run_server(debug=True)