In [12]:
# Add imports here.
import geoplotlib as gp
import os
import pandas as pd
import geocoder as gc
import requests


In [13]:
capitals = {
    'Alabama': 'Montgomery',
    'Alaska': 'Juneau',
    'Arizona':'Phoenix',
    'Arkansas':'Little Rock',
    'California': 'Sacramento',
    'Colorado':'Denver',
    'Connecticut':'Hartford',
    'Delaware':'Dover',
    'Florida': 'Tallahassee',
    'Georgia': 'Atlanta',
    'Hawaii': 'Honolulu',
    'Idaho': 'Boise',
    'Illinois': 'Springfield',
    'Indiana': 'Indianapolis',
    'Iowa': 'Des Moines',
    'Kansas': 'Topeka',
    'Kentucky': 'Frankfort',
    'Louisiana': 'Baton Rouge',
    'Maine': 'Augusta',
    'Maryland': 'Annapolis',
    'Massachusetts': 'Boston',
    'Michigan': 'Lansing',
    'Minnesota': 'St. Paul',
    'Mississippi': 'Jackson',
    'Missouri': 'Jefferson City',
    'Montana': 'Helena',
    'Nebraska': 'Lincoln',
    'Nevada': 'Carson City',
    'New Hampshire': 'Concord',
    'New Jersey': 'Trenton',
    'New Mexico': 'Santa Fe',
    'New York': 'Albany',
    'North Carolina': 'Raleigh',
    'North Dakota': 'Bismarck',
    'Ohio': 'Columbus',
    'Oklahoma': 'Oklahoma City',
    'Oregon': 'Salem',
    'Pennsylvania': 'Harrisburg',
    'Rhode Island': 'Providence',
    'South Carolina': 'Columbia',
    'South Dakota': 'Pierre',
    'Tennessee': 'Nashville',
    'Texas': 'Austin',
    'Utah': 'Salt Lake City',
    'Vermont': 'Montpelier',
    'Virginia': 'Richmond',
    'Washington': 'Olympia',
    'West Virginia': 'Charleston',
    'Wisconsin': 'Madison',
    'Wyoming': 'Cheyenne'  
} # create a dictionary, key is the state and value is the capital

In [34]:
# Load replication data using IQSS dataverse client.
from dataverse import Connection

host = 'dataverse.unc.edu'                  # All clients >4.0 are supported
token = '6d9aeda0-b118-4395-98e6-008646674b18'  # Generated at /account/apitoken

# Grab data for https://dataverse.unc.edu/dataset.xhtml?persistentId=doi:10.15139/S3/3EAPI2
connection = Connection(host, token)
print(connection._service_document)
dataverse = connection.get_dataverse('unc')
dataset = dataverse.get_dataset_by_doi('DOI:10.15139/S3/3EAPI2')
files = dataset.get_files('latest')

<Element {http://www.w3.org/2007/app}service at 0x11b043ac8>


AttributeError: 'NoneType' object has no attribute 'get_dataset_by_doi'

In [15]:
# TODO check that geocoded csv doesn't already exist.
# Load replication data from file.
df = pd.read_csv('Statehouses.tab.tsv', sep='\t')

# Create a new dataframe with the columns we care about.
filtered = df[['abbr', 'state', 'year', 'lobbyists']].copy()

# Create a groupby object.
filtered_by_state = filtered.groupby('state')

# Load our geocoder.
from geopy import geocoders  
gn = geocoders.GeoNames(username='kuranes')

coordinates = {}

# Populate coordinate data.
for state, rows in filtered_by_state:
    # Get state capital.
    try:
        capital = capitals[state]
        g = gn.geocode(capital + ', ' + state)
        coordinates[state] = [g.latitude, g.longitude]
    except:
        print("no code found for " + capital + ', ' + state)
        
# TODO save DF to CSV for later use.


In [16]:
for index, row in filtered.iterrows():
    latitude, longitude = coordinates[row['state']]
    filtered.at[index, 'lat'] = latitude
    filtered.at[index, 'lon'] = longitude
    
# Fill NaN as 0.
filtered.fillna(0, inplace=True)

In [25]:
# @See https://plot.ly/python/choropleth-maps/#united-states-choropleth-map
import plotly.plotly as py
import plotly

# Convert ints to strings.
for col in filtered.columns:
    filtered[col] = filtered[col].astype(str)

filtered_by_year = filtered.groupby('year')

# TODO make a time series like https://github.com/plotly/dash-opioid-epidemic-demo/blob/master/app.py#L55
# @See https://community.plot.ly/t/python-plotly-how-to-make-a-choropleth-map-with-a-slider-access-grid-data-issue/3218/8
for year, rows in filtered_by_year:
    scl = [[0.0, 'rgb(242,240,247)'],[0.2, 'rgb(218,218,235)'],[0.4, 'rgb(188,189,220)'],\
            [0.6, 'rgb(158,154,200)'],[0.8, 'rgb(117,107,177)'],[1.0, 'rgb(84,39,143)']]

    data = [ dict(
            type='choropleth',
            colorscale = scl,
            autocolorscale = False,
            locations = rows['abbr'],
            z = rows['lobbyists'].astype(float),
            locationmode = 'USA-states',
            text = rows['state'] + ': ' + rows['lobbyists'] + ' total lobbyists in ' + year,
            marker = dict(
                line = dict (
                    color = 'rgb(255,255,255)',
                    width = 2
                ) ),
            colorbar = dict(
                title = "Total State Lobbyists")
            ) ]

    layout = dict(
            title = 'Lobbyists Per State in ' + year + '<br>(Hover for breakdown)',
            geo = dict(
                scope='usa',
                projection=dict( type='albers usa' ),
                showlakes = True,
                lakecolor = 'rgb(255, 255, 255)'),
                 )

    fig = dict(data=data, layout=layout)
    plotly.offline.plot(fig, filename='lobbyists-cloropleth-map.html')