# Geoplotting Obesity Data
Geographical plotting of health atlas data.   
**Author:** Matthias Titze s0563413

In [21]:
import os
import math
import numpy as np
import pandas as pd
import csv
import xml.etree.cElementTree as et

**Bokeh** Imports and Initalization

In [22]:
from bokeh.io import show
from bokeh.io import output_notebook
from bokeh.models import LinearColorMapper
from bokeh.palettes import Viridis6 as palette
from bokeh.plotting import figure

In [13]:
output_notebook()
palette.reverse()

**File Pathes**

In [23]:
# File pathes for quick adjustment if places or directories change.
path_input = '00_data_input'
path_temp = '01_data_temp'
path_inter = '02_data_intermediate'
path_output = '03_data_output'
path_obesity = os.path.join(path_inter, 'obesity_refined_data.csv')
path_counties_geodata = os.path.join(path_input, 'us_counties_geodata.csv')

# Importing Obesity Data
We only want to the obesity data of 2013 for the selected state.

In [24]:
df_source = pd.read_csv(path_obesity)
df_source[:3]

Unnamed: 0,county_fips,county,state,PCT_obese_adults_2008,PCT_obese_adults_2013,PCT_obese_adults_average,PCT_obese_adults_change
0,1001,Autauga,AL,31.5,34.1,32.8,2.6
1,1003,Baldwin,AL,26.2,27.4,26.8,1.2
2,1005,Barbour,AL,37.6,44.4,41.0,6.8


In [25]:
def extract_state_dataframe(df_source, state, column):
    df_state = df_source[df_source['state']==state.upper()][['county_fips','county','state',column]]
    df_state = df_state.sort_values('county')
    return df_state

In [26]:
df_obesity_state = extract_state_dataframe(df_source, 'ny', 'PCT_obese_adults_2013')
df_obesity_state[:5]

Unnamed: 0,county_fips,county,state,PCT_obese_adults_2013
1828,36001,Albany,NY,25.4
1829,36003,Allegany,NY,29.6
1830,36005,Bronx,NY,30.4
1831,36007,Broome,NY,27.3
1832,36009,Cattaraugus,NY,28.1


### Data Range Limits   
In order not to waste the map's color range we calculate the uper and lower lmit of the data to map.

In [29]:
def get_data_range(series):
    df_description = series.describe()
    min_value = math.floor(df_description['min'])
    max_value = math.ceil(df_description['max'])
    values_list = series.tolist()
    return df_description, min_value, max_value, values_list

In [30]:
df_description, min_value, max_value, values_list = get_data_range(df_obesity_state['PCT_obese_adults_2013'])
print('Lower Limit: ', min_value)
print('Upper Limit: ',max_value)
df_description

Lower Limit:  14
Upper Limit:  34


count    62.000000
mean     27.524194
std       3.059181
min      14.700000
25%      26.100000
50%      27.900000
75%      29.600000
max      33.700000
Name: PCT_obese_adults_2013, dtype: float64

# Importing County Geodata
We only want to the geodata for the selected state.

In [31]:
def read_county_geodata():
    nan = float('NaN')

    data = {}

    with open(path_counties_geodata, 'r', newline='', encoding='utf8') as f:
        next(f)
        reader = csv.reader(f, delimiter=str(','), quotechar=str('"'))
        for row in reader:
            name, dummy, state, dummy, geometry, dummy, \
            dummy, dummy, det_name, state_id, county_id, dummy, dummy = row
            xml = et.fromstring(geometry)
            lats = []
            lons = []
            for i, poly in enumerate(xml.findall('.//outerBoundaryIs/LinearRing/coordinates')):
                if i > 0:
                    lats.append(nan)
                    lons.append(nan)
                coords = (c.split(',')[:2] for c in poly.text.split())
                lat, lon = list(zip(*[(float(lat), float(lon)) for lon, lat in
                    coords]))
                lats.extend(lat)
                lons.extend(lon)
                
            # Fill one couty entry in the data package.
            data[(int(state_id), int(county_id))] = {
                'name':name,'detailed name':det_name,'state':state,'lats':lats,'lons':lons,
            }
    return data

# Plotting County Data to State Map

In [47]:
def plot_county_map(title, state, value_name, series, value_limits=None):
    
    # Extract values from the series and get some data description.
    df_description, min_value, max_value, values_list = get_data_range(series)
    
    if (value_limits is not None):
        min_value = value_limits[0]
        max_value = value_limits[1]
    
    # Read the county geodata and filter out only the counties in the selected state.
    counties = read_county_geodata()
    counties = {code: county for code, county in counties.items() if county["state"]==state}

    # Get the geographic coordinates.
    county_xs = [county["lons"] for county in counties.values()]
    county_ys = [county["lats"] for county in counties.values()]

    # Get the name of the counties to map and set the values to plot.
    county_names = [county['name'] for county in counties.values()]
    county_rates = values_list
    
    # Package the data into a dictionary needed for Bokeh.
    data = dict(x=county_xs, y=county_ys, name=county_names, rate=county_rates)
    
    color_mapper = LinearColorMapper(palette=palette, low=min_value, high=max_value)
    TOOLS = "pan,wheel_zoom,reset,hover,save"

    p = figure(
        title=title, x_axis_location=None, y_axis_location=None, plot_width=980,  plot_height=980,
        tooltips=[("Name", "@name"), (value_name, "@rate%")], tools=TOOLS)

    p.grid.grid_line_color = None
    p.hover.point_policy = "follow_mouse"
    p.patches('x', 'y', source=data,
              fill_color={'field': 'rate', 'transform': color_mapper},
              fill_alpha=0.7, line_color="white", line_width=0.5)

    show(p)

In [48]:
state = 'ny'
column = 'PCT_obese_adults_2013'
df_state = extract_state_dataframe(df_source, state, column)
plot_county_map('Obesity Adults US 2013', state, 'Obesity Rate', df_state[column], (14,38))

In [49]:
state = 'ca'
column = 'PCT_obese_adults_2013'
df_state = extract_state_dataframe(df_source, state, column)
plot_county_map('Obesity Adults US 2013', state, 'Obesity Rate', df_state[column], (14,38))

In [50]:
state = 'tx'
column = 'PCT_obese_adults_2013'
df_state = extract_state_dataframe(df_source, state, column)
plot_county_map('Obesity Adults US 2013', state, 'Obesity Rate', df_state[column], (14,38))

In [51]:
state = 'al'
column = 'PCT_obese_adults_2013'
df_state = extract_state_dataframe(df_source, state, column)
plot_county_map('Obesity Adults US 2013', state, 'Obesity Rate', df_state[column], (14,38))