In [110]:
import os
import pandas as pd
import folium
from decimal import Decimal
import geopandas as gpd
import numpy as np

### Read Census Data 
Read in census data which reprots hispanic population by CBSA for the U.S.

In [192]:
census = pd.read_csv("census_hispanic_population.csv")

In [193]:
census.columns = census.columns.str.lower()
census['hispanic_pop_pct'] = census.hispanic_pop/census.population
census['hispanic_pop_pct'] = census['hispanic_pop_pct'].fillna(0)
census = census.dropna()  ## Drop cbsas that are NA
census.cbsa = census.cbsa.astype('int').astype('str')

In [194]:
census.head()

Unnamed: 0,cbsa,cbsa_name,hispanic_pop,population,businesses,hispanic_pop_pct
0,44140,"Springfield, MA",104249,622503,13129,0.167467
1,14460,"Boston-Cambridge-Newton, MA-NH",410625,4550079,121730,0.090246
3,38860,"Portland-South Portland, ME",7977,514464,17069,0.015505
4,13620,"Berlin, NH-VT",460,39795,1015,0.011559
5,25540,"Hartford-West Hartford-East Hartford, CT",151129,1210570,28971,0.124841


### Read geojson data Here - See https://rsandstroem.github.io/GeoMapsFoliumDemo.html for details

In [195]:
geodata = gpd.read_file('cb_2017_us_cbsa_20m.geojson')
geodata['CBSAFP'] = geodata['CBSAFP'].astype(str)
geodata.head()

Unnamed: 0,CSAFP,CBSAFP,AFFGEOID,GEOID,NAME,LSAD,ALAND,AWATER,geometry
0,357.0,12660,310M300US12660,12660,"Baraboo, WI",M2,2153665639,45713371,"POLYGON ((-90.312404 43.640988, -89.785809 43...."
1,544.0,38920,310M300US38920,38920,"Port Lavaca, TX",M2,1312707024,1361884769,"POLYGON ((-96.930367 28.586728, -96.853757 28...."
2,290.0,22840,310M300US22840,22840,"Fort Payne, AL",M2,2012676476,4121538,"POLYGON ((-86.118894 34.403845, -86.057712 34...."
3,,32100,310M300US32100,32100,"Marquette, MI",M2,4685386476,4185372277,"POLYGON ((-88.11657099999999 46.419951, -88.11..."
4,258.0,26540,310M300US26540,26540,"Huntington, IN",M2,991057049,13133342,"POLYGON ((-85.64384099999999 41.002305, -85.33..."


### Generate Map

In [204]:
census['cbsa'] = census['cbsa'].astype(str)
cbsa = os.path.join('cb_2017_us_cbsa_20m.geojson')
hispanic_pop_map = folium.Map(location=[48, -102], zoom_start=3.5)
hispanic_pop_map.choropleth(
    geo_data=cbsa,
    data=census,
    columns=['cbsa', 'hispanic_pop_pct'],
    key_on='feature.properties.CBSAFP', 
    fill_color='YlGnBu',
    line_opacity=0.2,
    legend_name='hispanic_pop_pct(%)'
)

hispanic_pop_map

In [205]:
census[['cbsa', 'cbsa_name', 'population', 
         'hispanic_pop_pct']].sort_values(['population','hispanic_pop_pct'],
                                          ascending = False).head(10)

Unnamed: 0,cbsa,cbsa_name,population,hispanic_pop_pct
6,35620,"New York-Newark-Jersey City, NY-NJ-PA",19561897,0.226247
183,31080,"Los Angeles-Long Beach-Anaheim, CA",12828176,0.444691
73,16980,"Chicago-Naperville-Elgin, IL-IN-WI",9456569,0.206938
32,19100,"Dallas-Fort Worth-Arlington, TX",6434860,0.273401
7,37980,"Philadelphia-Camden-Wilmington, PA-NJ-DE-MD",5950042,0.078531
116,26420,"Houston-The Woodlands-Sugar Land, TX",5928789,0.353843
13,47900,"Washington-Arlington-Alexandria, DC-VA-MD-WV",5642908,0.137506
21,33100,"Miami-Fort Lauderdale-West Palm Beach, FL",5570526,0.415224
19,12060,"Atlanta-Sandy Springs-Roswell, GA",5279666,0.103663
1,14460,"Boston-Cambridge-Newton, MA-NH",4550079,0.090246


### Create Folium Markers for Top 10 locations with the highest hispanic population
Create folium markers for the top 10 locations with the highest hispanic population

In [207]:
#### Load helper functions
def find_coordinates(geodata, cbsas):
    """Given cbsa outputs the CBSA coordinates

    Parameters
    ----------
    cbsa :        List
                  List of CBSAs
    geodata:      Dataframe
                  A dataframe of CBSA and their cooresponding geo coordinates
    
    Returns
    -------
    coordinates : Dictionary
                  Dictionary of CBSA and their corresponding median geo coordinates if any.
                             
    """
    coord_dict = {}
    match_cnt = 0
    for cbsa in top_10:
        try:
            index = geodata[geodata['CBSAFP'] == cbsa].index[0] 
            centroid_lon = geodata.iloc[index].geometry.centroid.y
            centroid_lat = geodata.iloc[index].geometry.centroid.x
            coord_dict[cbsa] = [centroid_lat, centroid_lon]
            match_cnt += 1            
        except:
            coord_dict[cbsa] = [None, None]
            
    print "{}/{} matching records in geodata".format(match_cnt, len(cbsas))            
    coord_dict
    return(coord_dict)

#### Get top 10 CBSAS with highest total population and highest hispanic population %

In [209]:
top_10 = list(census.sort_values(['population','hispanic_pop_pct'], ascending = False).head(10).cbsa)
top_10_coord = find_coordinates(geodata, top_10)

10/10 matching records in geodata


In [211]:
census['cbsa'] = census['cbsa'].astype(str)
cbsa = os.path.join('cb_2017_us_cbsa_20m.geojson')
hispanic_pop_map = folium.Map(location=[48, -102], zoom_start=3.5)
hispanic_pop_map.choropleth(
    geo_data=cbsa,
    data=census,
    columns=['cbsa', 'hispanic_pop_pct'],
    key_on='feature.properties.CBSAFP', 
    fill_color='YlGnBu',
    line_opacity=0.2,
    legend_name='hispanic_pop_pct(%)'
)

### Create folium markers for dm campaigns
for cbsa, coordinates in top_10_coord.iteritems(): 
    if coordinates[1] == None or coordinates[0] == None:
        pass
    else:
        #print cbsa, coordinates
        folium.Marker([coordinates[1], coordinates[0]], popup=str(cbsa), 
                      icon=folium.Icon(color = "purple")).add_to(hispanic_pop_map)

hispanic_pop_map