In [1]:
import numpy as np
import pandas as pd
import plotly.plotly as py

In [2]:
layout = pd.read_csv('/home/michael/Documents/brfss2016_layout.csv',
                    names=['StartCol', 'Name', 'Length'])
keep_columns = ['_STATE', 'GENHLTH', 'PHYSHLTH', 'MENTHLTH', '_PHYS14D', '_MENT14D', 'POORHLTH',
               'HLTHPLN1', 'PERSDOC2', 'EDUCA', '_BMI5', '_BMI5CAT', '_RFBMI5',
               '_INCOMG', 'SLEPTIM1', '_SMOKER3', '_RFSMOK3', '_RFDRHV5', 'DRNKANY5', 'EXERANY2']
layout_filtered = layout[layout['Name'].isin(keep_columns)]
colspecs = list(zip(layout_filtered['StartCol'] - 1,
                    layout_filtered['StartCol'] + layout_filtered['Length'] - 1))

In [4]:
df = pd.read_fwf('/home/michael/Documents/LLCP2016.asc', colspecs=colspecs,
                header=None, names=layout_filtered['Name'])
df = df[~df['_STATE'].isin([11, 66, 72, 78])] # Drop DC, Guam, Puerto Rico, Virgin Islands
state_code = np.sort(df['_STATE'].unique())
state_abbrev = ['AL', 'AK', 'AZ', 'AR', 'CA', 'CO', 'CT', 'DE', 'FL', 'GA',
                'HI', 'ID', 'IL', 'IN', 'IA', 'KS', 'KY', 'LA', 'ME', 'MD',
                'MA', 'MI', 'MN', 'MS', 'MO', 'MT', 'NE', 'NV', 'NH', 'NJ',
                'NM', 'NY', 'NC', 'ND', 'OH', 'OK', 'OR', 'PA', 'RI', 'SC',
                'SD', 'TN', 'TX', 'UT', 'VT', 'VA', 'WA', 'WV', 'WI', 'WY']
state_dict = dict(zip(state_code, state_abbrev))
df['STCODE'] = df['_STATE'].replace(state_dict)

In [5]:
df_filter = df[df['GENHLTH'].isin([1,2,3,4,5]) & df['_RFBMI5'].isin([1,2])]
agg_genhlth_not_ow = df_filter[['STCODE', 'GENHLTH']][df_filter['_RFBMI5'] == 1].groupby('STCODE').agg(np.mean)
agg_genhlth_ow = df_filter[['STCODE', 'GENHLTH']][df_filter['_RFBMI5'] == 2].groupby('STCODE').agg(np.mean)
agg_ow = agg_genhlth_ow - agg_genhlth_not_ow

In [9]:
for col in agg_ow.columns:
    agg_ow[col] = agg_ow[col].astype(str)

scl = [[0.0, 'rgb(180,230,180)'], [1.0, 'rgb(80,150,80)']]

agg_ow['text'] = agg_ow.index + '<br>' + 'GENHLTH difference '+ agg_ow['GENHLTH']

data = [dict(type='choropleth',
             colorscale = scl,
             autocolorscale = False,
             locations = agg_ow.index,
             z = agg_ow['GENHLTH'].astype(float),
             locationmode = 'USA-states',
             text = agg_ow['text'],
             marker = dict(line = dict (color = 'rgb(255,255,255)', width = 2)),
             colorbar = dict(title = 'Difference in<br>mean health score'))]

layout = dict(title = ('Difference in Self-Reported General Health<br>' +
                       'for Overweight and Non-Overweight Individuals by State'),
              geo = dict(scope='usa',
                         projection=dict( type='albers usa' ),
                         showlakes = True,
                         lakecolor = 'rgb(255, 255, 255)'))
    
fig = dict(data=data, layout=layout)
py.iplot(fig, filename='genhlth-ow-cloropleth-map')