In [1]:
import sys
import numpy as np
import pandas as pd
import numpy as np
import us
import shapefile

from bokeh.plotting import output_notebook, show, output_file, Figure
from bokeh.models.glyphs import Patches, Line, Circle
from bokeh.models import (
    GMapPlot, Range1d, ColumnDataSource, LinearAxis,
    HoverTool, PanTool, WheelZoomTool, BoxSelectTool, ResetTool, PreviewSaveTool,
    GMapOptions,
    NumeralTickFormatter, PrintfTickFormatter)
from collections import OrderedDict



In [2]:
## Read in information about current veteran population per congressional district
vets_orig = pd.read_excel("/Users/kschles/Documents/bokeh/veterans/10L_VetPop2014.xlsx")

## Trim down to current day stats
vets = vets_orig.loc[np.where(vets_orig['Date']=='2013-09-30')[0]]

In [4]:
## File that matches up each full state name with abbreviated two letter nickname
abbrev = pd.read_table('veterans/state_abbrev.txt', names=['state', 'nickname', 'short', 'capital'])

In [5]:
## Read in the population per CD from American Fact Finder data 
aff_pop = pd.read_csv('veterans/aff_download/ACS_12_1YR_S0101_with_ann.csv', skiprows=1)

In [6]:
## Read in file with information about legislators 
legislators = pd.read_csv('veterans/legislators-current.csv')

## Trim it down to representatives only (no senators)
reps = legislators.loc[np.where(legislators['type']=='rep')[0]]
reps.reset_index(drop=True, inplace=True)

Each of these use their own system for organising district identifiers. We put them all on a single system, where a district is identified by XX-YY, where XX is the two letter code for the state, and YY is a two digit number representing the district. If there is only a single district in the whole state, rather than a number YY is 'AL'

In [7]:
aff_pop['dnum'] = aff_pop['Id'].str[-2:]
aff_pop['dstate'] = aff_pop['Geography'].str.split(',').str[1].str.lstrip()

state_abbrev = []
district_id = []

for i in range(0,len(aff_pop)):
    temp = aff_pop.loc[i,'dstate']
    if ((temp!='District of Columbia') & (temp!='Puerto Rico')):
        #print temp
        shorter = abbrev.loc[np.where(abbrev['state']==temp)[0],'short']
        state_abbrev.append(np.array(shorter)[0])
        if (aff_pop.loc[i,'dnum']=="00"): 
            district_id.append(np.array(shorter)[0] + '-AL')
        if (aff_pop.loc[i,'dnum']!="00"): 
            district_id.append(np.array(shorter)[0] + '-' + aff_pop.loc[i,'dnum'].zfill(2))
    else: 
        district_id.append('None')

                               
aff_pop['district_id'] = np.array(district_id)

In [8]:
cd_num1 = vets['Congressional District (CD)'].str.split(' ').str[1]
cd_num2 = cd_num1.str.replace('at', 'AL')
cd_num = cd_num2.str.replace('(', '')

state_abbrev = []
district_id = []

for i in range(0,len(vets)):
    temp = vets.loc[i,'State']
    if ((temp!='District of Columbia') & (temp!='Puerto Rico')):
        #print temp
        shorter = abbrev.loc[np.where(abbrev['state']==temp)[0],'short']
        state_abbrev.append(np.array(shorter)[0])
        district_id.append(np.array(shorter)[0] + '-' + cd_num[i].zfill(2))
    else : 
        district_id.append('None')

vets['district_id'] = district_id

In [9]:
reps['district_id'] = None 
reps['full_name'] = None

for i in range(0,len(reps)): 
    dname = reps.loc[i,'state']+'-'+reps.loc[i,'district'].astype(int).astype(str).zfill(2)
    if (reps.loc[i,'district']==0.): 
        dname = reps.loc[i,'state'] + '-AL'
    reps.loc[i,'district_id'] = dname
    reps.loc[i,'full_name'] = reps.loc[i,'first_name'] + ' ' + reps.loc[i, 'last_name']

In [10]:
## The veteran population information is currently split by various parameters (Age, gender). We sum it below:
sum_by_dist = vets.groupby('district_id', as_index=False).sum()

In [11]:
## We merge the veteran population information with the data on the congressional district as a whole
combo1 = pd.merge(sum_by_dist, aff_pop, how='left', on='district_id')

In [12]:
## Next, we merge in the information about the representative
combo = pd.merge(combo1, reps, how='left', on='district_id')

In [13]:
combo['pop_per'] = 100.*combo['Veteran']/combo['Total; Estimate; Total population']

In [14]:
combo[['district_id', 'pop_per', 'full_name', 'state']].sort('pop_per', ascending=False)
test = combo[['district_id', 'pop_per', 'full_name', 'state']].groupby('state', as_index=False).mean()
test.reset_index(drop=True, inplace=True)

print test.sort('pop_per', ascending=False)[0:5]
print test.sort('pop_per', ascending=True)[0:5]

   state    pop_per
0     AK  10.022437
25    MT   9.963745
20    ME   9.729527
44    VA   9.517006
48    WV   9.096037
   state   pop_per
33    NY  4.732054
4     CA  4.995788
30    NJ  5.013908
43    UT  5.344056
13    IL  5.738948


States with the highest mean percentage of veterans: Maine, Virginia, West Virginia, Oklahoma, Washington 

States with the smallest mean percentage of veterans: New York, California, New Jersey, Utah, Illinois 

In [15]:
cdnames = combo['district_id'].tolist()
percentage = combo['pop_per'].tolist()
representative = combo['full_name'].tolist()
party = combo['party'].tolist()

maxper = np.amax(percentage, axis=0)
minper = np.amin(percentage, axis=0)


In [16]:
reader = shapefile.Reader('/Users/kschles/Downloads/districtShapes/districts114.shp')

In [17]:
districts = set([i[0] + '-' + i[2] for i in reader.iterRecords()])

In [18]:
from bokeh.palettes import Blues9
colors = ['#ffffff']+Blues9[::-1]

numcolors = len(colors)

#perstep = (sorted(combo['pop_per'])[-2]-min(combo['pop_per']))/(numcolors-1)

maxpop = np.amax(combo['pop_per'])
minpop = np.amin(combo['pop_per'])
popstep = (maxpop-minpop)/(numcolors-1)

combo['color_vals'] = '#ffffff'

for i in range(0,len(combo)):
    try:
        popnorm = float(combo.loc[i,'pop_per']) - minpop
        idx = int(popnorm/popstep)
        combo.loc[i,'color_vals'] = (colors[idx])
    except KeyError:
        combo.loc[i,'color_vals'] = "black"




In [19]:
districts = []
shape_id = []

for i in reader.iterRecords(): 
    state_name = i[0]
    if (state_name != 'District Of Columbia'):
        state_short = np.array(abbrev.loc[np.where(abbrev['state']==state_name)[0],'short'])
        dist_num = str(i[2]).zfill(2)
        if (dist_num == '00'): 
            dist_num = 'AL'
        district_id=state_short + '-' + dist_num
        districts.append(district_id[0])
        shape_id.append(i[1])

In [21]:
for k in range(50,100): 
    dist_name = districts[k]
    sid = shape_id[k]
    
    print dist_name 
    
    districtDict = {dist_name: {} }
    rec = []
    shp = []
    points = []

    for i in reader.shapeRecords(): 
        if i.record[1] == sid: 
            rec.append(i.record)
            shp.append(i.shape)
            
            for j in shp: 
                points = []
                num_parts = len(j.parts)
                end = len(j.points) - 1
                segments = list(j.parts) + [end]
                for m in range(num_parts): 
                    points.append(j.points[segments[m]:segments[m+1]])
                    
            lat = []
            lon = []
            for m in points: 
                lat.append([n[0] for n in m])
                lon.append([n[1] for n in m])
    latty = []
    lonny = []
    for m in range(0,len(lat)): 
        if (m==0): 
            latty = lat[m] + [np.nan]
            lonny = lon[m] + [np.nan]
        else: 
            latty = latty + lat[m] + [np.nan]
            lonny = lonny + lon[m] + [np.nan]
        
    districtDict[dist_name]['lat_list'] = latty
    districtDict[dist_name]['lon_list'] = lonny
    districtDict[dist_name]['state'] = dist_name[0:2]
    
    indexer=np.where(combo['district_id'] == dist_name)
    districtDict[dist_name]['color'] = np.array(combo.loc[indexer[0], 'color_vals'])[0]
    districtDict[dist_name]['rep_name'] = np.array(combo.loc[indexer[0], 'full_name'])[0]
    districtDict[dist_name]['rep_party'] = np.array(combo.loc[indexer[0], 'party'])[0]
    districtDict[dist_name]['pop_per'] = np.array(combo.loc[indexer[0], 'pop_per'])[0]
    
    if (k==0): 
        districtInfo = districtDict
    else: 
        districtInfo.update(districtDict)
    
    

MO-05
MO-06
MO-07
MO-08
MT-AL
NE-01
NE-02
NE-03
NV-01
NV-02
NV-03
NV-04
NH-01
NH-02
NJ-01
NJ-02
NJ-03
NJ-04
NJ-05
OK-01
OK-02
OK-03
OK-04
OK-05
OR-01
OR-02
OR-03
OR-04
OR-05
PA-01
PA-02
PA-03
PA-04
PA-05
PA-06
PA-07
PA-08
PA-09
PA-10
PA-11
PA-12
PA-13
PA-14
PA-15
PA-16
PA-17
PA-18
RI-01
RI-02
SC-01


Helpful code reference: https://github.com/queise/Berlin_Maps/blob/master/Berlin_pop_gmap.py

In [22]:
dname = []
bxs = []
bys = []
bcol = []
repname = []
popper = []
party = []

for i in districtInfo.keys(): 
    dname.append(i)
    bxs.append(districtInfo[i]['lat_list'])
    bys.append(districtInfo[i]['lon_list'])
    bcol.append(districtInfo[i]['color'])
    repname.append(districtInfo[i]['rep_name'])
    popper.append(districtInfo[i]['pop_per'])
    party.append(districtInfo[i]['rep_party'])


In [131]:
"""
source = ColumnDataSource(data=dict( namer=dname,
                               rpname=repname))
p1 = GMapPlot(title="", 
             plot_width=900, plot_height=700,
             x_range = Range1d(), y_range = Range1d(),
             map_options=GMapOptions(lat=39.8333, lng=-98.585522, zoom=4))

p1.map_options.map_type="roadmap" # satellite, roadmap, terrain or hybrid
"""


'\nsource = ColumnDataSource(data=dict( namer=dname,\n                               rpname=repname))\np1 = GMapPlot(title="", \n             plot_width=900, plot_height=700,\n             x_range = Range1d(), y_range = Range1d(),\n             map_options=GMapOptions(lat=39.8333, lng=-98.585522, zoom=4))\n\np1.map_options.map_type="roadmap" # satellite, roadmap, terrain or hybrid\n'

In [23]:
p1 = Figure(plot_width=900, plot_height=800)

In [24]:
source_patches = ColumnDataSource(
    data=dict(
        bo_xs = bxs, 
        bo_ys = bys,
        namer = dname, 
        colors = bcol,
        rpname = repname, 
        rpparty = party,
        pop_per = popper, 
    )
)

In [25]:
patches = Patches(xs='bo_xs', ys='bo_ys', fill_color='colors', line_color="white", fill_alpha=1.0)
patches_glpy = p1.add_glyph(source_patches, patches)

In [26]:
p1.add_tools(PanTool(), WheelZoomTool(), HoverTool(), 
            ResetTool(), PreviewSaveTool())

hover = p1.select(dict(type=HoverTool))
hover.tooltips = OrderedDict([
    ("Borough", "@namer"),
    ("Representative", "@rpname"),
    ("Veteran Population Percentage", '@pop_per'),
    ("Representative Party", '@rpparty')
])

In [27]:
output_file('mapper.html')
show(p1)

In [72]:
output_notebook()

TOOLS= [BoxSelectTool(), ResetTool(), PanTool(), HoverTool(tooltips=[("rep_name","@rep_name"), ("location", "$x, $y")])]
p = Figure(plot_width=900, plot_height=800, tools=TOOLS)

#hover = p.select(dict(type=HoverTool))
#hover.tooltips = OrderedDict([
#    ("District", "@state"),
#    ("Veteran Percentage", "@pop_per"),
#    ("Representative", "@rep_name"),
#])

for dist_name in districts[0:14]: 

    #source = ColumnDataSource(
    #    data=dict(
    #        namer = dist_name,
    #        x = districtInfo[dist_name]['lat_list'],
    #        y = districtInfo[dist_name]['lon_list'],
    #        coloring = districtInfo[dist_name]['color'],
    #        rep = districtInfo[dist_name]['rep_name'],
    #    )
    #)
    
    p.patches(districtInfo[dist_name]['lat_list'], districtInfo[dist_name]['lon_list'],
              fill_color=districtInfo[dist_name]['color'], fill_alpha=1.0,
              line_color="white", line_width=0.5)
    
#show(p)

In [51]:
output_file('mapper.html')

In [53]:
percentage = []
representative = []
for i in districtInfo.keys():
    percentage = districtInfo[i]['pop_per']
    representative = districtInfo[i]['rep_name']

In [56]:
source = ColumnDataSource(data=dict( boroughsnames=districts,
                               representative=representative))
p = GMapPlot(title="", 
             plot_width=900, plot_height=700,
             x_range = Range1d(), y_range = Range1d(),
             map_options=GMapOptions(lat=64.2, lng=-149.50, zoom=4))

p.add_tools(PanTool(), WheelZoomTool(), HoverTool(), 
            ResetTool(), PreviewSaveTool())

p.map_options.map_type="roadmap" # satellite, roadmap, terrain or hybrid


In [68]:
type(districtInfo['ME-01'])
districtInfo['ME-01'].keys()

['rep_name', 'color', 'rep_party', 'pop_per', 'lon_list', 'state', 'lat_list']

In [70]:
dist_name='MO-07'

source = ColumnDataSource(
    data=dict(
        lat=[64.29, 65.20, 63.29],
        lon=[-150.70, -150.74, -150.78],
    )
)

patches = Patches(xs="lon", ys="lat", fill_color="blue", fill_alpha=0.8, line_color=None)
p.add_glyph(source, patches)


show(p)

TypeError: 'numpy.float64' object is not iterable

In [232]:
p = figure(tools=TOOLS, plot_width=900, plot_height=800)

district_name='MO-07'
source_patches = ColumnDataSource(data=districtInfo[district_name])


patches = Patches(xs='lat_list', ys='lon_list', fill_color='color',
                  fill_alpha=0.5, line_color="black", line_width=0.5)
patches_glyph = p.add_glyph(source_patches, patches)




In [233]:
show(p)

TypeError: 'numpy.float64' object is not iterable

In [221]:
districtInfo['MO-07'].keys()

['rep_name', 'color', 'rep_party', 'pop_per', 'lon_list', 'state', 'lat_list']

In [226]:
p.add_tools(PanTool(), WheelZoomTool(), HoverTool(), 
            ResetTool(), PreviewSaveTool())

In [215]:
hover = p.select(dict(type=HoverTool))
hover.tooltips = OrderedDict([
    ("District", "@state"),
    ("Veteran Percentage", "@pop_per"),
    ("Representative", "@rep_name"),
])


In [227]:
bk.show(p)