In [1]:
import geopandas as gpd
import pandas as pd
import shapely

from bokeh.io import output_notebook, show, output_file
from bokeh.models import ColumnDataSource, LinearColorMapper, LogColorMapper
from bokeh.models import ColorBar, HoverTool
from bokeh.plotting import figure, output_file, save
from bokeh.palettes import *

In [2]:
output_notebook()

## 1. Get longitude and latitude for the boundry of zipcode area

In [3]:
def break_geom(geom):
    if type(geom) is shapely.geometry.multipolygon.MultiPolygon:
        return list(geom.geoms)
    return geom

def break_geom_row(row, col):
    col_names = row.index
    other_cols = [c for c in col_names if c != col]
    geom = break_geom(row[col])
    if type(geom) is list:
        n = len(geom)
    else:
        return pd.DataFrame([row])
    other = tuple(row[c] for c in other_cols)
    df = pd.DataFrame([other for i in range(n)])
    df.columns = other_cols
    df[col] = geom
    df = df.loc[:, col_names]
    return df

def get_us_state_polygons(shapefile='shapefiles/cb_2017_us_state_5m.shp'):
    """
    get coordinates of us state boundaries in 
    projected coordinates (not lon/lat)
    """
    df = gpd.read_file(shapefile)
    df = df.loc[:, ['STUSPS', 'geometry']]
    df.columns = ['state', 'geometry']
    df['geometry_proj'] = df['geometry'].to_crs(epsg=2163)
    n = df.shape[0]
    # expand multiple geometries to separate rows
    # (duplicate other columns)
    df_exp = pd.concat([break_geom_row(df.iloc[i,:], 
                                    'geometry_proj') 
                     for i in range(n)]).reset_index(drop=True)
    df_exp['xs'] = df_exp['geometry_proj'].apply(lambda x: 
                            list(x.boundary.xy[0]))
    df_exp['ys'] = df_exp['geometry_proj'].apply(lambda x: 
                            list(x.boundary.xy[1]))
    result = df_exp.loc[:,['state', 'xs', 'ys']]
    return result

def get_us_zip_polygons(shapefile='shapefiles/cb_2017_us_zcta510_500k.shp'):
    """
    get coordinates of us state boundaries in 
    projected coordinates (not lon/lat)
    """
    df = gpd.read_file(shapefile)
    df = df.loc[:, ['GEOID10', 'geometry']]
    df.columns = ['zip', 'geometry']
    df['geometry_proj'] = df['geometry'].to_crs(epsg=2163)
    n = df.shape[0]
    # expand multiple geometries to separate rows
    # (duplicate other columns)
    df_exp = pd.concat([break_geom_row(df.iloc[i,:], 
                                    'geometry_proj') 
                     for i in range(n)]).reset_index(drop=True)
    xs_list = []
    ys_list = []
    for i in range(0,len(df_exp['geometry_proj'])):
        try: 
            xs = df_exp['geometry_proj'][i].boundary.xy[0].tolist()
            ys = df_exp['geometry_proj'][i].boundary.xy[1].tolist()
        except: 
            xs = 'Null'
            ys = 'Null'
        xs_list.append(xs)
        ys_list.append(ys)
    df_exp['xs'] = xs_list
    df_exp['ys'] = ys_list
    df_exp = df_exp.loc[df_exp['xs']!='Null']
    result = df_exp.loc[:,['zip', 'xs', 'ys']]
    return result

def get_us_zip3_polygons(shapefile='shapefiles/zip3.shp'):
    """
    get coordinates of us state boundaries in 
    projected coordinates (not lon/lat)
    """
    df = gpd.read_file(shapefile)
    df = df.loc[:, ['ZIP3', 'geometry']]
    df.columns = ['zip3', 'geometry']
    df['geometry_proj'] = df['geometry'].to_crs(epsg=2163)
    n = df.shape[0]
    # expand multiple geometries to separate rows
    # (duplicate other columns)
    df_exp = pd.concat([break_geom_row(df.iloc[i,:], 
                                    'geometry_proj') 
                     for i in range(n)]).reset_index(drop=True)
    xs_list = []
    ys_list = []
    for i in range(0,len(df_exp['geometry_proj'])):
        try: 
            xs = df_exp['geometry_proj'][i].boundary.xy[0].tolist()
            ys = df_exp['geometry_proj'][i].boundary.xy[1].tolist()
        except: 
            xs = 'Null'
            ys = 'Null'
        xs_list.append(xs)
        ys_list.append(ys)
    df_exp['xs'] = xs_list
    df_exp['ys'] = ys_list
    df_exp = df_exp.loc[df_exp['xs']!='Null']
    result = df_exp.loc[:,['zip3', 'xs', 'ys']]
    return result

def get_us_county_polygons(shapefile='UScounties/UScounties.shp'): ##US map
    """
    get coordinates of us state boundaries in 
    projected coordinates (not lon/lat)
    """
    df = gpd.read_file(shapefile)
    df = df.loc[:, ['FIPS', 'geometry']]
    # df['geometry_proj'] = df['geometry'].to_crs(epsg=2163)
    df['geometry_proj'] = df['geometry']
    n = df.shape[0]
    # expand multiple geometries to separate rows
    # (duplicate other columns)
    df_exp = pd.concat([break_geom_row(df.iloc[i,:], 
                                    'geometry_proj') 
                     for i in range(n)]).reset_index(drop=True)
    xs_list = []
    ys_list = []
    for i in range(0,len(df_exp['geometry_proj'])):
        try: 
            xs = df_exp['geometry_proj'][i].boundary.xy[0].tolist()
            ys = df_exp['geometry_proj'][i].boundary.xy[1].tolist()
        except: 
            xs = 'Null'
            ys = 'Null'
        xs_list.append(xs)
        ys_list.append(ys)
    df_exp['xs'] = xs_list
    df_exp['ys'] = ys_list
    df_exp = df_exp.loc[df_exp['xs']!='Null']
    result = df_exp.loc[:,['FIPS', 'xs', 'ys']]
    return result

In [4]:
county_geom = get_us_county_polygons()

In [5]:
county_geom.head()

Unnamed: 0,FIPS,xs,ys
0,27077,"[-95.34283127277658, -95.34105289190684, -95.0...","[48.546679319076, 48.71517195733587, 48.717357..."
1,53019,"[-118.85162880133869, -118.8484604662869, -118...","[47.949563684819964, 48.478065749514016, 48.47..."
2,53065,"[-117.43883157628596, -117.54219218725807, -11...","[48.04411548512263, 48.04329014316636, 47.7900..."
3,53047,"[-118.97209386283504, -118.9740628825699, -118...","[47.93915200536639, 47.95939692036921, 47.9887..."
4,53051,"[-117.43858043030278, -117.03204952594353, -11...","[48.99991850672649, 48.99993130232423, 48.8380..."


In [6]:
county_geom.shape

(3240, 3)

## 2. Election data

In [8]:
election_data = pd.read_csv('election_data.csv', dtype = {'fips_code': str})
print(election_data.shape)
print(election_data.head())

(3112, 26)
  fips_code              county  total_2008  dem_2008  gop_2008  oth_2008  \
0     26041        Delta County       19064      9974      8763       327   
1     48295     Lipscomb County        1256       155      1093         8   
2     01127       Walker County       28652      7420     20722       510   
3     48389       Reeves County        3077      1606      1445        26   
4     56017  Hot Springs County        2546       619      1834        93   

   total_2012  dem_2012  gop_2012  oth_2012     ...       2016_dem_ratio  \
0       18043      8330      9533       180     ...             0.348243   
1        1168       119      1044         5     ...             0.102118   
2       28497      6551     21633       313     ...             0.153404   
3        2867      1649      1185        33     ...             0.521043   
4        2495       523      1894        78     ...             0.157791   

   2008_gop_ratio  2012_gop_ratio  2016_gop_ratio  2008_oth_ratio  \


## 3. Merge election data and the longitude and latitude for all zipcode

In [9]:
county_rate = election_data.merge(county_geom, left_on='fips_code', right_on='FIPS', how='inner')
print(county_rate.shape)
print(county_rate.head())

(3175, 29)
  fips_code              county  total_2008  dem_2008  gop_2008  oth_2008  \
0     26041        Delta County       19064      9974      8763       327   
1     48295     Lipscomb County        1256       155      1093         8   
2     01127       Walker County       28652      7420     20722       510   
3     48389       Reeves County        3077      1606      1445        26   
4     56017  Hot Springs County        2546       619      1834        93   

   total_2012  dem_2012  gop_2012  oth_2012  \
0       18043      8330      9533       180   
1        1168       119      1044         5   
2       28497      6551     21633       313   
3        2867      1649      1185        33   
4        2495       523      1894        78   

                         ...                          2016_gop_ratio  \
0                        ...                                0.601722   
1                        ...                                0.876702   
2                        ..

## 4.1 Visualize data for 2012

In [14]:
plot_source = ColumnDataSource(county_rate) ## A base class for data source types, which can be mapped onto a columnar format.

In [15]:
## using Boken
color_mapper = LinearColorMapper(Inferno256)
color_mapper.low = county_rate['2012_dem_ratio'].min() 
#The minimum value of the range to map into the palette. 
color_mapper.high = county_rate['2012_dem_ratio'].max() 
#The maximum value of the range to map into the palette.

## create a new plot with the toolbar below
p = figure(title='2012 Democratic Ratio',
          width = 900, height=900) 
p.patches(xs='xs', ys='ys', source=plot_source, 
         fill_color={'field': '2012_dem_ratio', 'transform': color_mapper}) ##??

color_bar = ColorBar(color_mapper=color_mapper)
p.add_layout(color_bar)

hover = HoverTool(tooltips= [
    ('fips', '@FIPS'),
    ('2012_total_count', '@total_2012'),# use @{ } for field names with spaces
    ('2012_dem_count', '@dem_2012'),
    ('2012_gop_count', '@gop_2012'),
    ('2012_oth_count', '@oth_2012'),
    ('2012_dem_ratio', '@2012_dem_ratio'),
    ('2012_gop_ratio', '@2012_gop_ratio'),
    ('2012_oth_ratio', '@2012_oth_ratio')
])
p.add_tools(hover)## adding numbers and names for every points in plot

In [16]:
output_file("2012_county_rate.html")
save(p)

'/Users/luyao/Documents/Projects/Election_project/Election-Geolocation-master/2012_county_rate.html'

## 4.2 Visualize data for 2016

In [17]:
color_mapper = LinearColorMapper(Inferno256)
color_mapper.low = county_rate['2016_dem_ratio'].min()
color_mapper.high = county_rate['2016_dem_ratio'].max()

p = figure(title='2016 Democratic Ratio',
          width = 900, height=900)
p.patches(xs='xs', ys='ys', source=plot_source, 
         fill_color={'field': '2016_dem_ratio', 'transform': color_mapper})

color_bar = ColorBar(color_mapper=color_mapper)
p.add_layout(color_bar)

hover = HoverTool(tooltips= [
    ('fips', '@FIPS'),
    ('2016_total_count', '@total_2016'),
    ('2016_dem_count', '@dem_2016'),
    ('2016_gop_count', '@gop_2016'),
    ('2016_oth_count', '@oth_2016'),
    ('2016_dem_ratio', '@2016_dem_ratio'),
    ('2016_gop_ratio', '@2016_gop_ratio'),
    ('2016_oth_ratio', '@2016_oth_ratio')
])

p.add_tools(hover)

In [18]:
output_file("2016_county_rate.html")
save(p)

'/Users/luyao/Documents/Projects/Election_project/Election-Geolocation-master/2016_county_rate.html'

In [19]:
county_rate_sel = county_rate.loc[(county_rate['dem_2008']+county_rate['gop_2008']+county_rate['oth_2008'])==county_rate['total_2008']]
print(county_rate_sel.shape)

(3170, 29)


## 4.3 Visualize data for 2008

In [20]:
plot_source = ColumnDataSource(county_rate_sel)

In [21]:
color_mapper = LinearColorMapper(Inferno256)
color_mapper.low = county_rate_sel['2008_dem_ratio'].min()
color_mapper.high = county_rate_sel['2008_dem_ratio'].max()

p = figure(title='2008 Democratic Ratio',
          width = 900, height=900)
p.patches(xs='xs', ys='ys', source=plot_source, 
         fill_color={'field': '2008_dem_ratio', 'transform': color_mapper})

color_bar = ColorBar(color_mapper=color_mapper)
p.add_layout(color_bar)

hover = HoverTool(tooltips= [
    ('fips', '@FIPS'),
    ('2008_total_count', '@total_2008'),
    ('2008_dem_count', '@dem_2008'),
    ('2008_gop_count', '@gop_2008'),
    ('2008_oth_count', '@oth_2008'),
    ('2008_dem_ratio', '@2008_dem_ratio'),
    ('2008_gop_ratio', '@2008_gop_ratio'),
    ('2008_oth_ratio', '@2008_oth_ratio')
])

p.add_tools(hover)

In [22]:
output_file("2008_county_rate.html")
save(p)

'/Users/luyao/Documents/Projects/Election_project/Election-Geolocation-master/2008_county_rate.html'