In [4]:
import pandas as pd
import plotly.graph_objs as go
from plotly.offline import init_notebook_mode, plot, iplot

In [5]:
init_notebook_mode(connected=True)

In [6]:
#Read in data set to get state name and code
data = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/2011_us_ag_exports.csv')

In [7]:
#show column names
data.columns

Index(['code', 'state', 'category', 'total exports', 'beef', 'pork', 'poultry',
       'dairy', 'fruits fresh', 'fruits proc', 'total fruits', 'veggies fresh',
       'veggies proc', 'total veggies', 'corn', 'wheat', 'cotton'],
      dtype='object')

In [8]:
# only keep the two columns I want
sows = data[['code','state']]

In [9]:
sows.head()

Unnamed: 0,code,state
0,AL,Alabama
1,AK,Alaska
2,AZ,Arizona
3,AR,Arkansas
4,CA,California


In [11]:
#read in dataset of headcount information
df = pd.read_csv('sow_origin_state.csv')
df.head()

Unnamed: 0,Report Date,Reported For Date,State Description,Head Count
0,6/27/2019,6/26/2019,Arkansas,87
1,6/27/2019,6/26/2019,Canada,1658
2,6/27/2019,6/26/2019,Georgia,59
3,6/27/2019,6/26/2019,Illinois,1038
4,6/27/2019,6/26/2019,Indiana,239


In [12]:
#clean data
df['Head Count'] = df['Head Count'].str.replace(',', '')
df['Head Count'] = pd.to_numeric(df['Head Count'])
df.head()

Unnamed: 0,Report Date,Reported For Date,State Description,Head Count
0,6/27/2019,6/26/2019,Arkansas,87
1,6/27/2019,6/26/2019,Canada,1658
2,6/27/2019,6/26/2019,Georgia,59
3,6/27/2019,6/26/2019,Illinois,1038
4,6/27/2019,6/26/2019,Indiana,239


In [13]:
#Create dictionary with total headcounts
sums = {}
for i in df['State Description'].unique():
        sums[i] = int(df.loc[df['State Description']==i,['Head Count']].sum())
sums.pop('Canada') #remove Canada for geo plot

#If state not in data add it with 0
state_names = list(data['state'])
for state in state_names:
    if state not in sums.keys():
        sums.update({state : 0})

#sort list alphabetically
sums =  dict(sorted(sums.items()))

In [14]:
#Adding headcount to data
sows.loc[:,'headcount'] = sums.values()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [15]:
#create logarithmic color scale
scl = [
    [0, 'rgb(242,240,247)'],
    [1./10000, 'rgb(218,218,235)'],
    [1./1000, 'rgb(188,189,220)'],
    [1./100, 'rgb(158,154,200)'],
    [1./10, 'rgb(117,107,177)'],
    [1., 'rgb(81,5,100)']
]

#create tick labels
max_num = sows.loc[:,'headcount'].max()
tickvals = [0, max_num * .05, max_num * .10, max_num * .20, max_num * .50, max_num]
ticktext = [0, max_num * .05, max_num * .10, max_num * .20, max_num * .50, max_num]

data = [go.Choropleth(
    name ='',
    colorscale = scl,
    autocolorscale = False,
    locations = sows['code'],
    z = sows['headcount'],
    locationmode = 'USA-states',
    hovertemplate = ('State: ' + sows['state'] + '<br>' + 
                   sows['headcount'].astype(str)),
    marker = go.choropleth.Marker(
        line = go.choropleth.marker.Line(
            color = 'white',
            width = 2
    )),
    colorbar = go.choropleth.ColorBar(
        title = "Sows",       
))]

layout = go.Layout(
    title = go.layout.Title(
        text = 'Sow Exports by State<br>(Hover for breakdown)'
    ),
    geo = go.layout.Geo(
        scope = 'usa',
        projection = go.layout.geo.Projection(type = 'albers usa'),
        showlakes = True,
        lakecolor = 'white')
)

fig = go.Figure(data = data, layout = layout)
fig.layout.template = 'seaborn'
iplot(fig, filename = 'd3-cloropleth-map')

# To do, fix colorbar legend, commas