# DATASET & CHARTS

## Basic Libraries

In [None]:
import pandas as pd
import numpy as np
import geopandas as gpd
import matplotlib.pyplot as plt

from shapely.geometry import Point

import plotly.express as px
import plotly.graph_objects as go
import seaborn as sns

%pylab inline

## Dataset

In [None]:
url = 'https://raw.githubusercontent.com/francheska-vicente/datapre-project/main_v2/data_output/combined_data.csv'
sdg_data = pd.read_csv (url)
sdg_data 

In [None]:
sdg_data = sdg_data [list (sdg_data.columns [:-15])]
sdg_data

In [None]:
sdg_info = pd.read_csv ('data/sdg_infov3.csv')
sdg_info

In [None]:
region_info = pd.read_csv ('data/region_infov1.csv')
region_info

In [None]:
sdg_score = pd.read_csv ('data/sdg_target_score.csv')
sdg_score

## Line Charts

In [None]:
regions_selected = []

In [None]:
regions_selected = ['NCR: National Capital Region', 'Region 1: Ilocos Region']

In [None]:
indicators_selected = []

In [None]:
indicators_selected = ['1.4.1 Net JHS Enrolment Rate', '1.2.1 Poverty Proportion']

In [None]:
two_region = pd.DataFrame ()

for region in regions_selected:
    if len (indicators_selected) > 1:
        temp_region = sdg_data [sdg_data['Geolocation'] == region][['Year', indicators_selected [0], indicators_selected [1]]]
    else:
        temp_region = sdg_data [sdg_data['Geolocation'] == region][['Year', indicators_selected [0]]]
    
    temp_region = pd.concat ([sdg_data['Geolocation'], temp_region], axis=1)
    
    temp_region = temp_region.dropna (thresh = len (indicators_selected) + 1)
    temp_region ['Year'] = temp_region ['Year'].astype('int')
    
    two_region = pd.concat([two_region, temp_region])
    
two_region = two_region.reset_index (drop = True)
two_region

In [None]:
def line_update_layout (fig, title, label):
    fig.update_layout(
        # TITLE

        title={'text' : title, 'y': 0.95, 'x' : 0.5, # Position of the title
              # 'xanchor': 'center', 'yanchor': 'top'
              },
        title_font_family="Cambria",
        title_font_color="#000000",
        title_font_size=20,


        # axis and legend font
        font_family="Cambria",
        font_color="#000000",


        # x-axis
        xaxis_title='Year',

        xaxis=dict(
            showline=True,
            showgrid=False,
            showticklabels=True,
            linecolor='#000000',
            linewidth=2,
            ticks='outside',
            tickfont=dict(
                family='Cambria',
                size=16,
                color='#000000',
            ),
        ),


        # y-axis
        yaxis_title = label, 

        yaxis=dict(
            showgrid=False,
            showline=True,
            showticklabels=True,
            linecolor='#000000',
            linewidth=2,
            ticks='outside',
            tickfont=dict(
                family='Cambria',
                size=16,
                color='#000000',
            ),

        ),

        hovermode="x unified",

        autosize=True,

        # MARGIN
        # margin=dict(autoexpand=False, l=100, r=20,t=110),

        showlegend=True,

        paper_bgcolor="LightSteelBlue", # BG COLOR OUTSIDE CHART

        plot_bgcolor='light gray' # BG COLOR INSIDE CHART
    )
    
    return fig

In [None]:
print (indicators_selected)

In [None]:
indicator = indicators_selected [0]

In [None]:
label = " ".join (indicator.split (' ') [1 : ])
df_visualization = two_region [['Geolocation', 'Year', indicator]]
df_visualization = df_visualization.dropna ()

x = 0
while indicator != sdg_score.iloc[x]['Indicator']:
    x = x + 1

y = 0
while y < len(df_visualization['Year'].unique()):
    target = " ".join (indicator.split (' ') [1 : ])
    new_row = {'Geolocation':'Target ' + target, indicator:sdg_score.iloc[x]['Target'], 'Year': df_visualization['Year'].unique()[y]}
    df_visualization = df_visualization.append(new_row, ignore_index=True)
    y = y + 1

x_axis_values = df_visualization ['Year'].unique ()
    
fig = px.line(df_visualization, x='Year', y = indicator, markers=True,
              labels={indicator: label}, color = 'Geolocation') 
title = " ".join (indicator.split (' ') [1 : ]) + ' per Year'
fig = line_update_layout (fig, title, label)
fig.update_xaxes(type='category')
    
fig.show ()

In [None]:
for indicator in indicators_selected:
    label = " ".join (indicator.split (' ') [1 : ])
    df_visualization = two_region [['Geolocation', 'Year', indicator]]
    df_visualization = df_visualization.dropna ()

    x = 0
    while indicator != sdg_score.iloc[x]['Indicator']:
        x = x + 1

    y = 0
    while y < len(df_visualization['Year'].unique()):
        target = " ".join (indicator.split (' ') [1 : ])
        new_row = {'Geolocation':'Target ' + target, indicator:sdg_score.iloc[x]['Target'], 'Year': df_visualization['Year'].unique()[y]}
        df_visualization = df_visualization.append(new_row, ignore_index=True)
        y = y + 1
    
    x_axis_values = df_visualization ['Year'].unique ()
    
    fig = px.line(df_visualization, x='Year', y = indicator, markers=True,
             labels={indicator: label}, color = 'Geolocation') 
    title = " ".join (indicator.split (' ') [1 : ]) + ' per Year'
    fig = line_update_layout (fig, title, label)
    fig.update_xaxes(type='category')
    
    fig.show ()

## LINE CHART FOR REGION-FOCUSED TAB

In [None]:
region_selected = 'PHILIPPINES'

In [None]:
target_selected = 'By 2030, the proportion of men, women and children living in poverty should be reduced by half.'

In [None]:
target_selected = sdg_info [sdg_info ['Shortened Target'] == target_selected].drop_duplicates (['Shortened Target'])['Target Number'].values[0]
target_selected

In [None]:
sdg_columns = sdg_data.columns [:-15]
targets_value = ['Target Number', '', '1.2.', '1.4.', '1.4.', '1.4.', '1.4.', '1.4.', '1.4.', '1.4.', '1.4.', '1.4.', '1.5.', 
 '3.4.', '3.4.', '3.4.', '3.4.', '3.4.', '3.7.', '3.7.', '4.1.', '4.1.', '4.1.', '4.1.', '4.1.', 
 '4.1.', '4.1.', '4.1.', '4.1.', '', '7.1.', '8.1.', '10.1.', '10.1.', '14.5.', '14.5.', 
 '16.1.', '16.1.']
targets_df = pd.DataFrame ([targets_value], columns = sdg_columns)
sdg_data_only = sdg_data [sdg_columns]

In [None]:
region_df = sdg_data_only [sdg_data_only ['Geolocation'] == region_selected]
region_df = pd.concat ([targets_df, region_df]).reset_index (drop = True)
region_df = region_df.drop ('Geolocation', axis = 1)
region_df.loc [0, 'Year'] = 'Target Number'
region_df

In [None]:
region_df = region_df.T
region_df.columns = region_df.iloc [0]
region_df = region_df.drop ('Year', axis = 0)
region_df

In [None]:
region_df = region_df [region_df ['Target Number'] == target_selected].T
region_df = region_df.drop ('Target Number', axis = 0)
region_df = region_df.dropna ()
region_df

In [None]:
fig = px.line(region_df) 
title = sdg_info [sdg_info ['Target Number'] == target_selected].drop_duplicates(['Target Number'])['Shortened Target'].values[0]

fig = line_update_layout (fig, title, 'Indicator Value')
fig.update_xaxes(type='category')
    
fig.show ()

## Bar Charts

### HORIZONTAL BAR CHART

In [None]:
regions_selected = []

In [None]:
regions_selected = ['NCR: National Capital Region', 'Region 1: Ilocos Region']

In [None]:
indicators_selected = []

In [None]:
indicators_selected = ['1.4.1 Net JHS Enrolment Rate', '1.2.1 Poverty Proportion']

In [None]:
two_region = pd.DataFrame ()

for indicartor in indicators_selected:
    if len (indicators_selected) > 1:
        temp_region = sdg_data [['Year', indicators_selected [0], indicators_selected [1]]]
    else:
        temp_region = sdg_data [['Year', indicators_selected [0]]]
    
    temp_region = pd.concat ([sdg_data['Geolocation'], temp_region], axis=1)
    temp_region = temp_region [temp_region ['Geolocation'] != 'PHILIPPINES']
    temp_region = temp_region.dropna (thresh = len (indicators_selected) + 1)
    temp_region ['Year'] = temp_region ['Year'].astype('int')
    
    two_region = pd.concat([two_region, temp_region])
    
two_region = two_region.reset_index (drop = True)
two_region

In [None]:
def bar_update_layout (fig, title, label):
    fig.update_layout(
    # TITLE
    
        title={'text': title, 'y':0.95, 'x':0.5, # Position of the title
              # 'xanchor': 'center', 'yanchor': 'top'
              },    
        title_font_family="Cambria",
        title_font_color="#000000",
        title_font_size=20,

        # axis font
        font_family="Cambria",
        font_color="#000000",


        # x-axis
        xaxis_title= label,

        xaxis=dict(
            showline=True,
            showgrid=False,
            showticklabels=True,
            linecolor='#000000',
            linewidth=2,
            ticks='outside',
            tickfont=dict(
                family='Cambria',
                size=14,
                color='#000000',
            ),
        ),

        # y-axis
        yaxis_title='Geolocation',
        yaxis=dict(
            {'categoryorder':'total ascending'}, # ascending values from bottom to top
            showgrid=False,
            showline=True,
            showticklabels=True,
            linecolor='#000000',
            linewidth=2,
            ticks='outside',
            tickfont=dict(
                family='Cambria',
                size=10,
                color='#000000',
            ),
        ),

        autosize=True,

        # margin=dict(autoexpand=False, l=100, r=20,t=110),

        showlegend=True,

        plot_bgcolor='light grey'
    )

    return fig

In [None]:
geolocation_values = []
for temp in sdg_data ['Geolocation'].unique () [1 :]:
    temp = temp.split (":")
    geolocation_values.append (temp [1])

In [None]:
for indicator in indicators_selected:
    label = " ".join (indicator.split (' ') [1 : ])
    df_visualization = two_region [['Geolocation', 'Year', indicator]]
    df_visualization = df_visualization.dropna ()
    
    year_values = df_visualization ['Year'].unique ()
    
    df_visualization_curr = df_visualization [df_visualization ['Year'] == year_values [-1]]
    df_visualization_curr = df_visualization_curr.drop_duplicates ()
    fig = px.bar(df_visualization_curr, x = indicator, y = geolocation_values,
             labels={indicator: label}, color = 'Geolocation') 
    title = " ".join (indicator.split (' ') [1 : ]) + ' of the Year ' + str (year_values [-1])
    fig = bar_update_layout (fig, title, label)
    
    fig.show ()

## Heat Map

In [None]:
sdg_columns = sdg_data.columns [:-15]
targets_value = ['Target Number', '', '1.2.', '1.4.', '1.4.', '1.4.', '1.4.', '1.4.', '1.4.', '1.4.', '1.4.', '1.4.', '1.5.', 
 '3.4.', '3.4.', '3.4.', '3.4.', '3.4.', '3.7.', '3.7.', '4.1.', '4.1.', '4.1.', '4.1.', '4.1.', 
 '4.1.', '4.1.', '4.1.', '4.1.', '', '7.1.', '8.1.', '10.1.', '10.1.', '14.5.', '14.5.', 
 '16.1.', '16.1.']
targets_df = pd.DataFrame ([targets_value], columns = sdg_columns)
targets_df

In [None]:
sdg_data_only = sdg_data [sdg_columns]
sdg_data_only

In [None]:

sdg_data_only 

In [None]:
region_selected = 'PHILIPPINES'

region_df = sdg_data_only [sdg_data_only ['Geolocation'] == region_selected]
region_df = pd.concat ([targets_df, region_df]).reset_index (drop = True)
region_df = region_df.T
region_df.columns = region_df.iloc [0]
region_df = region_df.drop (['Geolocation', 'Year'], axis = 0)
region_df = region_df.groupby ('Target Number', group_keys=True).mean ().T
region_df = region_df.drop ([''], axis = 1)
region_df

In [None]:
temp_sdg_info = sdg_info [['Target Number', 'Shortened Target']].drop_duplicates ()
temp_sdg_info

In [None]:
new_col_names = []

for target in region_df.columns:
    info = list (temp_sdg_info [temp_sdg_info ['Target Number'] == target]['Shortened Target'].values) [0]
    new_col_names.append (info)

region_df.columns = new_col_names
region_df

In [None]:
region_df_corr = region_df.corr ()

In [None]:
mask = np.zeros_like(region_df_corr, dtype=bool)
mask[np.triu_indices_from(mask)] = True
# Viz
df_corr_viz = region_df_corr.mask(mask).dropna(how='all').dropna('columns', how='all')
fig = px.imshow(df_corr_viz, text_auto=True)
fig.show()

## Choropleth Map: Selected Indicator and Selected Year

### Load Requirements

In [None]:
gdf_shp = gpd.read_file('./data/geospatial_data/regional/regional_data.shp')
region = gpd.read_file('./data/gadm_ph/gadm_regional.geojson').set_index('geolocation')
sdg_data = pd.read_csv('data/sdg_data/updated_sdg_data.csv')
px.set_mapbox_access_token(open(".mapbox_token").read())

### Fixing the Column Names in the Geospatial Data

In [None]:
# Check the current column names
gdf_shp.columns

In [None]:
# Check the correct format of column names (from SDG Data)
sdg_data.columns

In [None]:
# Matches the column names of the Geospatial Data to the SDG data (correct format)
for i in range(len(sdg_data.columns)):
    name_map = {gdf_shp.columns[i] : sdg_data.columns[i]}
    gdf_shp.rename(columns=name_map, inplace=True)
gdf_shp.columns

In [None]:
gdf_shp

In [None]:
def create_choropleth_data(selected_indicator):
    # check if a csv file for this indicator already exists
    check_file = os.path.isfile(selected_indicator+'.csv')
    
    # if csv file does not exist
    if check_file == False:
        gdf = gdf_shp[['geometry', 'Geolocation', 'Year', selected_indicator]]
        pvt_gdf = pd.pivot_table(gdf, index='Geolocation', columns='Year', values=selected_indicator).reset_index()
        pvt_gdf.to_csv('./data/indicator_csv/'+selected_indicator+'.csv', index = False)
    
    pvt_gdf = pd.read_csv('./data/indicator_csv/'+selected_indicator+'.csv')
    
    # Checking if there is year data based on the selected year and selected indicator 
    try:
        year_list = pvt_gdf.columns [1:]
        print (year_list)
        # year_list = list(map(float, year_list))
        year_list = list(map(int, year_list))
        year_list = list(map(str, year_list))
        print('Only available year data for this indicator: ' + ', '.join(year_list))
        selected_year = input('Input year:').strip()
        selected_year = str (selected_year)
        
        # year_list = list(map(float, year_list))
        year_list = list(map(str, year_list))
        list(year_list).index(str(selected_year))
        print (year_list, ' ', selected_year, type (selected_year))
        pvt_gdf = pvt_gdf[['Geolocation',  selected_year]]
    except:
        print('No data for ' + selected_year)
        selected_year = input('Input year:') # Asks for year input again if no year data
    print('\n[SDG Indicator] ' + selected_indicator)
    print('[Year Data] ' + selected_year)
    return pvt_gdf, str(selected_year)

In [None]:
selected_indicator = '1.2.1 Poverty Proportion'

In [None]:
import os

In [None]:
choropleth_data = create_choropleth_data(selected_indicator)

In [None]:
fig = px.choropleth_mapbox(choropleth_data[0],
                          geojson=region.geometry,
                          locations='Geolocation',
                          color=choropleth_data[1],
                          center={'lat': 12.099, 'lon': 122.733}, 
                          zoom = 4)
fig.show()

## Choropleth: Correlation Between the Two Chosen Indicators

In [None]:
selected_indicators = ['4.1 Elem Completion Rate (Male)', '4.1 Elem Completion Rate (Female)']

In [None]:
ind_1 = pd.read_csv('./data/indicator_csv/'+selected_indicators[0]+'.csv')
ind_1

In [None]:
ind_2 = pd.read_csv('./data/indicator_csv/'+selected_indicators[1]+'.csv')
ind_2

In [None]:
ind_2_T = ind_2.T 
ind_2_T 

In [None]:
ind_1_T = ind_1.T 
ind_1_T 

In [None]:
import numpy as np
import scipy.stats

In [None]:
data_regional_1 = ind_1_T[0]

x= np.array(data_regional_1[1:])
x

In [None]:
data_regional_2 = ind_2_T[0]

y= np.array(data_regional_2[1:])
y

In [None]:
r, p = scipy.stats.pearsonr(x, y)
r

In [None]:
df = pd.DataFrame([])
df = df.append({'Geolocation': data_regional_2[0], 
        selected_indicators[0] + ' and ' + selected_indicators[1]: r}, ignore_index = True)
display(df)

In [None]:
# Index(['Geolocation', 'Year', '1.2.1 Poverty Proportion',
#        '1.4.1 Net Elem Enrolment Rate',
#        '1.4.1 Net Elem Enrolment Rate (Girls)',
#        '1.4.1 Net Elem Enrolment Rate (Boys)', '1.4.1 Net JHS Enrolment Rate',
#        '1.4.1 Net JHS Enrolment Rate (Girls)',
#        '1.4.1 Net JHS Enrolment Rate (Boys)', '1.4.1 Net SHS Enrolment Rate',
#        '1.4.1 Net SHS Enrolment Rate (Girls)',
#        '1.4.1 Net SHS Enrolment Rate (Boys)',
#        '1.5.4 Proportion of LGU with DRR',
#        '3.4.1 Mortality rate credited to NCD',
#        '3.4.1 Mortality rate credited to Cardio',
#        '3.4.1 Mortality rate credited to Cancer',
#        '3.4.1 Mortality rate credited to Diabetes',
#        '3.4.1 Mortality rate credited to Respi',
#        '3.7.1 Proportion of Contraceptive Use of Women',
#        '3.7.2 Teenage pregnancy rates per 1000', '4.1 Elem Completion Rate',
#        '4.1 Elem Completion Rate (Female)', '4.1 Elem Completion Rate (Male)',
#        '4.1 JHS Completion Rate', '4.1 JHS Completion Rate (Female)',
#        '4.1 JHS Completion Rate (Male)', '4.1 SHS Completion Rate',
#        '4.1 SHS Completion Rate (Female)', '4.1 SHS Completion Rate (Male)',
#        '4.c TVET trainers trained', '7.1.1 Proportion of pop with electricity',
#        '8.1.1 Growth rate of real GDP per capita',
#        '10.1.1.1 Income per capita growth rate of bottom 40',
#        '10.1.1.2 Income per capita growth rate',
#        '14.5.1.1 Coverage of protected areas',
#        '14.5.1.2 Coverage of protected NIPAS and Locally managed MPAs',
#        '16.1.1 Victims of intentional homicide per 100,000',
#        '16.1.s1 Number of murder cases', 'geometry'],
#       dtype='object')

In [None]:
selected_indicators = ['4.1 JHS Completion Rate (Female)', '4.1 JHS Completion Rate (Male)']
ind_1 = pd.read_csv('./data/indicator_csv/'+selected_indicators[0]+'.csv')
ind_1_T = ind_1.T
ind_2 = pd.read_csv('./data/indicator_csv/'+selected_indicators[1]+'.csv')
ind_2_T = ind_2.T
df = pd.DataFrame([])
for i in range(17): 
    j = 1
    print(i)
    data_regional_1 = ind_1_T[i]
    data_regional_2 = ind_2_T[i]
    
    if i == 3:
        j = 3
    x= np.array(data_regional_1[j:])
    
    
    y= np.array(data_regional_2[j:])
    
    r, p = scipy.stats.pearsonr(x, y)
    r
    df = df.append({'Geolocation': data_regional_1[0], 
        selected_indicators[0] + ' and ' + selected_indicators[1]: r}, ignore_index = True)
display(df)    

In [None]:
# temp_df = sdg_data [['Geolocation', 'Year', selected_indicator [0], selected_indicator [1]]]
# temp_df = temp_df [temp_df ['Year'] == year_selected].T
# temp_df.columns = temp_df.loc ['Geolocation']
# temp_df = temp_df.drop (['Geolocation', 'Year'])
# temp_df.T

In [None]:
region = gpd.read_file('./data/gadm_ph/gadm_regional.geojson').set_index('geolocation')
px.set_mapbox_access_token(open(".mapbox_token").read())

In [None]:
fig = px.choropleth_mapbox(df,
                          geojson=region.geometry,
                          locations='Geolocation',
                          color='4.1 JHS Completion Rate (Female) and 4.1 JHS Completion Rate (Male)',
                          center={'lat': 12.099, 'lon': 122.733}, 
                          zoom = 4)
fig.show()

## Choropleth Map: Area of each region

In [None]:
sdg_data = pd.read_csv('./data/sdg_data/combined_data.csv')
sdg_data

### Data cleaning and preprocessing of Region-Area Dataset

In [None]:
df = pd.read_csv('./data/region_area.csv')
df

In [None]:
sdg= sdg_data['Geolocation'].unique()[1:]
sdg = pd.DataFrame({'Geolocation' : sdg})
geo = pd.DataFrame(df['Area'])              

updated_region_area = pd.concat ([sdg, geo['Area']], axis = 1)

In [None]:
for i in range(len(updated_region_area)):
    area = float(updated_region_area.loc[i, 'Area'].replace(",", "").split(' ')[0])
    updated_region_area['Area'] = updated_region_area['Area'].replace(
                                    [updated_region_area.loc[i, 'Area']], 
                                    area)

In [None]:
updated_region_area

### Generating the choropleth map

In [None]:
fig = px.choropleth_mapbox(updated_region_area,
                          geojson=region.geometry,
                          locations='Geolocation',
                          color='Area',
                          center={'lat': 12.099, 'lon': 122.733}, 
                          zoom = 4)
fig.show()

# Functions related to Generating Choropleth Maps

In [None]:
# Function for creating CSV for Correlation between 2 indicators 
def create_csv_correlation_2_ind(selected_indicators):
    ind_1 = pd.read_csv('./data/indicator_csv/'+selected_indicators[0]+'.csv')
    ind_2 = pd.read_csv('./data/indicator_csv/'+selected_indicators[1]+'.csv')
    ind_1_T = ind_1.T
    ind_2_T = ind_2.T
    
    df = pd.DataFrame([])
    
    for i in range(17): 
        data_regional_1 = ind_1_T[i]
        data_regional_2 = ind_2_T[i]
        
        x= np.array(data_regional_1[j:])
        y= np.array(data_regional_2[j:])
        r, p = scipy.stats.pearsonr(x, y)
        
        df = df.append({'Geolocation': data_regional_1[0], 
            selected_indicators[0] + ' and ' + selected_indicators[1]: r}, ignore_index = True)
    display(df)

# Function for creating CSV for 1 SDG indicator 
def create_csv_sdg_1_ind(selected_indicator):
    gdf = gdf_shp[['geometry', 'Geolocation', 'Year', selected_indicator]]
    pvt_gdf = pd.pivot_table(gdf, index='Geolocation', columns='Year', values=selected_indicator).reset_index()
    pvt_gdf.to_csv('./data/indicator_csv/'+selected_indicator+'.csv', index = False)

# Function for creating CSV for 1 SDG indicator Choropleth Map
def choropleth_map_1_ind(selected_indicator):
    # check if a csv file for this indicator already exists
    check_file = os.path.isfile(selected_indicator+'.csv')
    
    # if csv file does not exist
    if check_file == False:
        create_csv_sdg_1_ind(selected_indicator)
    pvt_gdf = pd.read_csv('./data/indicator_csv/'+selected_indicator+'.csv')
    
    # Checking if there is year data based on the selected year and selected indicator 
    try:
        year_list = pvt_gdf.columns [1:]
        print (year_list)
        # year_list = list(map(float, year_list))
        year_list = list(map(int, year_list))
        year_list = list(map(str, year_list))
        print('Only available year data for this indicator: ' + ', '.join(year_list))
        selected_year = input('Input year:').strip()
        selected_year = str (selected_year)
        
        # year_list = list(map(float, year_list))
        year_list = list(map(str, year_list))
        list(year_list).index(str(selected_year))
        print (year_list, ' ', selected_year, type (selected_year))
        pvt_gdf = pvt_gdf[['Geolocation',  selected_year]]
    except:
        print('No data for ' + selected_year)
        selected_year = input('Input year:') # Asks for year input again if no year data
    print('\n[SDG Indicator] ' + selected_indicator)
    print('[Year Data] ' + selected_year)
    return pvt_gdf, str(selected_year)

# Function for creating Correlation Choropleth Map
def choropleth_map_2_ind(corr_df):
    fig = px.choropleth_mapbox(corr_df,
                          geojson=region.geometry,
                          locations='Geolocation',
                          color=corr_df.columns[1],
                          center={'lat': 12.099, 'lon': 122.733}, 
                          zoom = 4)
    fig.show()
    
# Function for identifying number of selected indicators before generating a choropleth map
def generate_choropleth(selected_indicators):
    indicator_str = len(selected_indicators)
    
    if indicator_str == 1:
        chropleth_map_1_ind(selected_indicator[0])
    elif indicator_Str == 2:
        corr_df = pd.read_csv('./data/indicator_csv/'+selected_indicators[0] + ' and ' + selected_indicators[1]+'.csv')
        choropleth_map_2_ind(corr_df)
    else:
        print("No selected indicators!")

### Trial for creating Correlation Dataset CSV per pair indicators

In [None]:
indicators = ['1.2.1 Poverty Proportion',
       '1.4.1 Net Elem Enrolment Rate',
       '1.4.1 Net Elem Enrolment Rate (Girls)',
       '1.4.1 Net Elem Enrolment Rate (Boys)', '1.4.1 Net JHS Enrolment Rate',
       '1.4.1 Net JHS Enrolment Rate (Girls)',
       '1.4.1 Net JHS Enrolment Rate (Boys)', '1.4.1 Net SHS Enrolment Rate',
       '1.4.1 Net SHS Enrolment Rate (Girls)',
       '1.4.1 Net SHS Enrolment Rate (Boys)',
       '1.5.4 Proportion of LGU with DRR',
       '3.4.1 Mortality rate credited to NCD',
       '3.4.1 Mortality rate credited to Cardio',
       '3.4.1 Mortality rate credited to Cancer',
       '3.4.1 Mortality rate credited to Diabetes',
       '3.4.1 Mortality rate credited to Respi',
       '3.7.1 Proportion of Contraceptive Use of Women',
       '3.7.2 Teenage pregnancy rates per 1000', '4.1 Elem Completion Rate',
       '4.1 Elem Completion Rate (Female)', '4.1 Elem Completion Rate (Male)',
       '4.1 JHS Completion Rate', '4.1 JHS Completion Rate (Female)',
       '4.1 JHS Completion Rate (Male)', '4.1 SHS Completion Rate',
       '4.1 SHS Completion Rate (Female)', '4.1 SHS Completion Rate (Male)',
       '4.c TVET trainers trained', '7.1.1 Proportion of pop with electricity',
       '8.1.1 Growth rate of real GDP per capita',
       '10.1.1.1 Income per capita growth rate of bottom 40',
       '10.1.1.2 Income per capita growth rate',
       '14.5.1.1 Coverage of protected areas',
       '14.5.1.2 Coverage of protected NIPAS and Locally managed MPAs',
       '16.1.1 Victims of intentional homicide per 100,000',
       '16.1.s1 Number of murder cases']
len(indicators)

In [None]:
from itertools import combinations
import numpy as np

In [None]:
res = list(combinations(indicators, 2))
 
# printing result
for i in range(len(res)):
    print("Pair #"+ str(i) + " " + str(res[i]))
q = np.array(res)