# HTML Choropleth Generator

Displaying Choropleth maps in a Streamlit app can get complicated when considering whether geo data is able to be hashed, cached, and interacted with. Loading times can get exorbitant when displaying many points on a map, for instance. For that reason, we have chosen to pre-compute all of the maps for each feature choice, and save them as html assets that the Streamlit app can load and display based on user interactions. This notebook accomplishes this.

In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import math
import streamlit as st
from streamlit_folium import st_folium, folium_static
from pathlib import Path
import geopandas as gpd
import folium
from shapely import wkt
import pickle
here_prefix = ''
data_prefix = here_prefix + '../data/'
html_prefix = data_prefix + 'html/'

states_of_interest = ['GA', 'WI', 'MA', 'NC']
MA_neighbors = ['MA', 'NY', 'CT', 'NH', 'RI', 'ME', 'VT', 'NH']
WI_neighbors = ['WI', 'MI', 'MN', 'IA', 'IL']
GA_neighbors = ['GA', 'NC', 'SC', 'FL', 'AL', 'TN']

features_dict = {
    'AP Pass Rate (3 or higher)': 'PassRate',
    'Per capita Personal Income': 'Income',
    'Population': 'Population'
}
national_features_dict = {
    'AP Pass Rate (3 or higher)': 'PassRate',
    'AP Score Mean (out of 5)': 'Mean',
    'Total No. AP Exams': 'Total',
    'Offer 5+ Exams (%)': '5+Exams%',
    'Asian Participation (%)': '%Asian',
    'Hispanic or Latino Participation (%)': '%HispanicOrLatino',
    'White Participation (%)': '%White',
    'Black or African American Participation (%)': '%BlackOrAfricanAmerican',
    'Native American or Alaska Native Participation (%)': '%NativeAmericanOrAlaskaNative',
    'Native Hawaiian or other Pacific Islander Participation (%)': '%NativeHawaiianOrOtherPacificIslander',
    'Two or More Races Participation (%)': '%TwoOrMoreRaces',
}



## Loaders and Folium Methods

In [2]:
def load_national_choropleth_data():
    return pd.read_csv(here_prefix + "US_States_Map_Data.csv")

def reconstruct_geo(pre_geo_data):
    pre_geo_data['geometry'] = pre_geo_data['geometry'].apply(wkt.loads)
    geo_data = gpd.GeoDataFrame(pre_geo_data, geometry = 'geometry')
    geo_data.set_crs(epsg = 4326, inplace = True)
    return geo_data

def load_county_choropleth_data():
    counties_map_data = pd.read_csv(here_prefix + 'States_Counties_Map_Data.csv')
    counties_map_data['Year'] = counties_map_data['Year'].astype(str)
    return counties_map_data[counties_map_data['Year'] == '2022']
  
def load_universities_data():
    universities_data = pd.read_csv(data_prefix + 'carnegie_with_location.csv')[['name', 'stabbr', 'latitude', 'longitude']]
    MA_nearby_universities = universities_data[universities_data['stabbr'].isin(MA_neighbors)]
    WI_nearby_universities = universities_data[universities_data['stabbr'].isin(WI_neighbors)]
    GA_nearby_universities = universities_data[universities_data['stabbr'].isin(GA_neighbors)]
    return universities_data, MA_nearby_universities, WI_nearby_universities, GA_nearby_universities

def get_state_summaries():
    MA_stats = pd.read_csv(here_prefix + 'MA_summary_stats.csv')
    WI_stats = pd.read_csv(here_prefix + 'WI_summary_stats.csv')
    GA_stats = pd.read_csv(here_prefix + 'GA_summary_stats.csv')
    return MA_stats, WI_stats, GA_stats

def get_state_AP_tables():
    MA_AP_table = pd.read_csv(here_prefix + 'MA_AP_table.csv')
    WI_AP_table = pd.read_csv(here_prefix + 'WI_AP_table.csv')
    GA_AP_table = pd.read_csv(here_prefix + 'GA_AP_table.csv')
    return MA_AP_table, WI_AP_table, GA_AP_table

def choropleth(geo_data, 
               selected_feature, 
               university_data, 
               features_dict,
               title,
               fields,
               aliases,
               center,
               zoom,
               save_path = ''):
    # Define the choropleth layer based on the selected feature and year
    choropleth_layer = folium.Choropleth(
        geo_data = geo_data,
        name = f'{title} choropleth',
        data = geo_data,
        columns = ['GEOID', features_dict[selected_feature]],
        key_on = 'feature.properties.GEOID',
        fill_color = 'YlOrRd',
        nan_fill_color = 'lightgrey',
        fill_opacity = 0.7,
        line_opacity = 0.2,
        legend_name = f'{selected_feature} {title}'
    )

    # Define tooltips with certain areas
    area_tooltips = folium.GeoJson(
        geo_data,
        name = f'{title} tooltips',
        control = False,
        style_function = lambda x: {'fillColor': 'transparent', 'color': 'transparent'},
        tooltip = folium.features.GeoJsonTooltip(
            fields = fields,
            aliases = aliases,
            localize = True
        )
    )

    if university_data is not None:
        # Add a new layer for university markers
        university_layer = folium.FeatureGroup(name = f'{title} universities')
        # Add markers for each university in the DataFrame
        for _, row in university_data.iterrows():
            folium.Circle(
                radius = 300,
                fill = False,
                color = "black",
                fill_color = "orange",
                opacity = 1,
                fill_opacity = 0.2,
                weight = 2,
                location = [row['latitude'], row['longitude']],
                popup = folium.Popup(f"{row['name']}", max_width = 300),
                tooltip = row['name']
            ).add_to(university_layer)

    # Map center coordinates
    m = folium.Map(location = center, zoom_start = zoom)
    # Add choropleth layer to the map
    choropleth_layer.add_to(m)
    # Add the area tooltips to the map
    area_tooltips.add_to(m)
    # Add the university layer to the map
    university_layer.add_to(m)
    # Add a layer control to toggle layers
    folium.LayerControl().add_to(m)
    # Save the map to HTML
    m.save(f'{html_prefix}{save_path}')

## Get all Relevant Data for Maps

In [3]:
pre_national_geo_data = load_national_choropleth_data()
pre_county_geo_data = load_county_choropleth_data()
universities_data, MA_nearby_universities, WI_nearby_universities, GA_nearby_universities = load_universities_data()
MA_stats, WI_stats, GA_stats = get_state_summaries()
MA_AP_table, WI_AP_table, GA_AP_table = get_state_AP_tables()
national_geo_data = reconstruct_geo(pre_national_geo_data)
county_geo_data = reconstruct_geo(pre_county_geo_data)
MA_geo_data = county_geo_data[county_geo_data['State_Abbreviation'] == 'MA']
WI_geo_data = county_geo_data[county_geo_data['State_Abbreviation'] == 'WI']
GA_geo_data = county_geo_data[county_geo_data['State_Abbreviation'] == 'GA']

## National Choropleths

In [9]:
for key, value in national_features_dict.items():
    choropleth(
        geo_data = national_geo_data, 
        selected_feature = key, 
        university_data = universities_data, 
        features_dict = national_features_dict,
        title = 'All States AP Performance and Demographics 2022',
        fields = ['State', 'PassRate', 'Mean', 'Total', '5+Exams%', '%Asian', '%HispanicOrLatino', '%White', '%BlackOrAfricanAmerican', '%NativeAmericanOrAlaskaNative', '%NativeHawaiianOrOtherPacificIslander', '%TwoOrMoreRaces'],
        aliases = ['State Name:', 'Pass Rate (%)', 'Mean AP Score', 'Total No. AP Exams', 'Offer 5+ Exams (%)', '% Asian:', '% Hispanic or Latino:', '% White:', '% Black or African American:', '% Native American or Alaska Native:', '% Native Hawaiian or other Pacific Islander:', '% Two or More Races:'],
        center = [40, -96],
        zoom = 4,
        save_path = f'National {key} Choropleth.html'
    ) 

## County Choropleths

In [5]:
for key, value in features_dict.items():
    choropleth(
        geo_data = county_geo_data, 
        selected_feature = key, 
        university_data = universities_data, 
        features_dict = features_dict,
        title = 'States of Interest by County 2022',
        fields = ['County_State', 'PassRate', 'Income', 'Population', 'Year'],
        aliases = ['County:', 'AP Pass Rate (%):', 'Per-capita Income: $', 'Population:', 'Year:'],
        center = [39.5, -82],
        zoom = 5,
        save_path = f'County {key} Choropleth.html'
    ) 

## MA Choropleths

In [6]:
for key, value in features_dict.items():
    choropleth(
        geo_data = MA_geo_data, 
        selected_feature = key, 
        university_data = MA_nearby_universities, 
        features_dict = features_dict,
        title = 'Massachusetts by County 2022',
        fields = ['County_State', 'PassRate', 'Income', 'Population', 'Year'],
        aliases = ['County:', 'AP Pass Rate (%):', 'Per-capita Income: $', 'Population:', 'Year:'],
        center = [42.4, -71.7],
        zoom = 8,
        save_path = f'Massachusetts {key} Choropleth.html'
    ) 

## WI Choropleths

In [7]:
for key, value in features_dict.items():
    choropleth(
        geo_data = WI_geo_data, 
        selected_feature = key, 
        university_data = WI_nearby_universities, 
        features_dict = features_dict,
        title = 'Wisconsin by County 2022',
        fields = ['County_State', 'PassRate', 'Income', 'Population', 'Year'],
        aliases = ['County:', 'AP Pass Rate (%):', 'Per-capita Income: $', 'Population:', 'Year:'],
        center = [44.5, -88.8],
        zoom = 6,
        save_path = f'Wisconsin {key} Choropleth.html'
    ) 

## GA Choropleths

In [8]:
for key, value in features_dict.items():
    choropleth(
        geo_data = GA_geo_data, 
        selected_feature = key, 
        university_data = GA_nearby_universities, 
        features_dict = features_dict,
        title = 'Georgia by County 2022',
        fields = ['County_State', 'PassRate', 'Income', 'Population', 'Year'],
        aliases = ['County:', 'AP Pass Rate (%):', 'Per-capita Income: $', 'Population:', 'Year:'],
        center = [32.2, -82.9],
        zoom = 8,
        save_path = f'Georgia {key} Choropleth.html'
    ) 