In [1]:
import pandas as pd

from bokeh.io        import show
from bokeh.models    import (ColumnDataSource, HoverTool, LogColorMapper)
from bokeh.palettes  import Viridis6 as palette
from bokeh.plotting  import figure

from bokeh.sampledata.us_counties import data as counties
from bokeh.sampledata.unemployment import data as unemployment

palette.reverse()

In [2]:
df_counties=pd.DataFrame(counties).T
df_counties.columns = ['Location', 'Lat', 'Lon', 'County', 'State']
df_counties.shape
df_counties.head()

Unnamed: 0,Unnamed: 1,Location,Lat,Lon,County,State
1,1,"Autauga County, Alabama","[32.4757, 32.46599, 32.45054, 32.44245, 32.439...","[-86.41182, -86.41177, -86.41167, -86.41157, -...",Autauga,al
1,3,"Baldwin County, Alabama","[30.28557, 30.21934, 30.21771, 30.21183, 30.20...","[-87.51203, -87.56704, -87.5741, -87.59954, -8...",Baldwin,al
1,5,"Barbour County, Alabama","[32.02221, 32.02066, 32.0135, 32.00249, 31.996...","[-85.04884, -85.05367, -85.05381, -85.06454, -...",Barbour,al
1,7,"Bibb County, Alabama","[33.13143, 33.13086, 33.15133, 33.18184, 33.18...","[-87.23637, -87.21582, -87.19914, -87.19907, -...",Bibb,al
1,9,"Blount County, Alabama","[33.949, 33.95621, 33.9629, 33.97324, 33.99538...","[-86.8332, -86.81779, -86.79248, -86.7719, -86...",Blount,al


In [3]:
df_cardio=pd.read_csv("US_Cardiovascular_Disease_Mortality_Rates_by_County_1980_2014.csv", sep="\t")
print(df_cardio.shape)
df_cardio.head()

(3194, 11)


Unnamed: 0,Location,FIPS,Mortality Rate_1980,Mortality Rate_1985,Mortality Rate_1990,Mortality Rate_1995,Mortality Rate_2000,Mortality Rate_2005,Mortality Rate_2010,Mortality Rate_2014,%_Change_in_Mortality Rate_1980_2014
0,United States,,"507.37 (499.60, 514.90)","468.10 (460.74, 475.17)","410.43 (403.72, 417.33)","384.09 (377.41, 390.91)","349.75 (343.46, 356.26)","298.31 (292.81, 304.05)","257.23 (251.82, 262.70)","252.70 (247.12, 258.27)","-50.20 (-50.82, -49.55)"
1,Alabama,1.0,"516.87 (508.36, 524.93)","485.17 (477.42, 493.18)","436.61 (429.10, 444.50)","414.45 (406.84, 422.16)","397.40 (389.49, 405.04)","352.09 (344.74, 359.36)","310.55 (303.87, 317.53)","310.59 (303.44, 317.77)","-39.91 (-40.97, -38.84)"
2,"Autauga County, Alabama",1001.0,"552.68 (521.37, 586.71)","523.12 (492.13, 551.59)","470.30 (444.14, 496.02)","437.74 (415.23, 459.66)","413.92 (393.16, 432.92)","368.83 (350.83, 387.85)","316.82 (299.06, 333.37)","316.36 (298.73, 334.72)","-42.76 (-47.26, -37.68)"
3,"Baldwin County, Alabama",1003.0,"445.67 (423.09, 467.49)","419.28 (400.00, 438.89)","379.58 (362.44, 396.83)","357.73 (342.10, 374.22)","333.18 (319.09, 347.23)","308.99 (296.42, 322.99)","279.98 (268.54, 291.74)","272.04 (260.19, 284.03)","-38.96 (-42.73, -34.94)"
4,"Barbour County, Alabama",1005.0,"515.53 (484.52, 547.21)","468.71 (443.66, 493.54)","417.73 (395.64, 440.17)","387.08 (367.47, 405.98)","364.29 (345.57, 384.22)","301.04 (283.89, 318.38)","264.63 (249.05, 280.85)","255.09 (238.51, 271.82)","-50.52 (-54.72, -45.98)"


In [4]:
df2_cardio=pd.melt(df_cardio, 
                  id_vars=["Location","FIPS"],
                  var_name=["Category_Year"],
                  value_name="Value")

def split_word(text, deli, i):
    words = text.split(deli)
    return(words[i])

df2_cardio['Category'] = df2_cardio.apply(lambda row: split_word(row['Category_Year'], '_', 0), axis=1)
df2_cardio['Year'] = df2_cardio.apply(lambda row: split_word(row['Category_Year'], '_', 1), axis=1)
df2_cardio['Rate'] = df2_cardio.apply(lambda row: split_word(row['Value'], ' ', 0), axis=1)

df3_cardio = df2_cardio[df2_cardio.Category == 'Mortality Rate']
df3_cardio = df3_cardio[['Location','Year','Rate']]
df3_cardio['Rate'] = pd.to_numeric(df3_cardio['Rate'])
df3_cardio['Year'] = pd.to_numeric(df3_cardio['Year'])

df_joined = df3_cardio.set_index('Location').join(df_counties.set_index('Location'))
df_joined = df_joined[pd.notnull(df_joined['County'])]

df_joined_2014 = df_joined[(df_joined.Year==1980) & (df_joined.State=='sc')]


In [5]:
county_lat = df_joined_2014['Lat'].tolist()
county_lon = df_joined_2014['Lon'].tolist()
county_name = df_joined_2014['County'].tolist()
county_rate = df_joined_2014['Rate'].tolist()
color_mapper = LogColorMapper(palette=palette)

col_source = ColumnDataSource(data = dict(
    x=county_lon,
    y=county_lat,
    name=county_name,
    rate=county_rate
))

In [6]:
TOOLS = "pan,wheel_zoom,reset,hover,save"

p2 = figure(
    title='Cardiovascular Disease Control Percentage',
    tools=TOOLS,
    x_axis_location=None, 
    y_axis_location=None,
    plot_width = 1000,
    plot_height = 1000
)

p2.grid.grid_line_color = None

p2.patches(  'x', 
             'y', 
             source=col_source, 
             fill_color={'field': 'rate',
                         'transform': color_mapper},
             fill_alpha=0.7, 
             line_color='white',
             line_width=0.5
            )

hover = p2.select_one(HoverTool)
hover.point_policy = 'follow_mouse'
hover.tooltips = [
    ("Name", "@name"),
    ("Control Rate", "@rate%"),
    ("(Long, Lat)", "($x, $y)")
]

show(p2)