In [1]:
import pandas as pd
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
from bokeh.models import NumeralTickFormatter, BasicTickFormatter, HoverTool, ColumnDataSource

#data used for map
from bokeh.sampledata.us_states import data as states

In [2]:
output_notebook()

In [6]:
# read in data
df = pd.read_csv("sow_origin_state.csv")
df.head() 

Unnamed: 0,Report Date,Reported For Date,State Description,Head Count
0,6/27/2019,6/26/2019,Arkansas,87
1,6/27/2019,6/26/2019,Canada,1658
2,6/27/2019,6/26/2019,Georgia,59
3,6/27/2019,6/26/2019,Illinois,1038
4,6/27/2019,6/26/2019,Indiana,239


In [7]:
#Deleting Report Date because that's not the correct date
df.drop(['Report Date'], axis=1, inplace=True)

#Deleting Canada, only concered about United States right now
df.drop(df.loc[df['State Description']=='Canada'].index, inplace=True)

In [8]:
#Splitting the date into separate columns
new = df['Reported For Date'].str.split("/", expand=True)
df["Day"] = new[1]
df["Month"] = new[0]
df["Year"] = new[2]
df.drop(['Reported For Date'], axis=1, inplace=True)
df.head()

Unnamed: 0,State Description,Head Count,Day,Month,Year
0,Arkansas,87,26,6,2019
2,Georgia,59,26,6,2019
3,Illinois,1038,26,6,2019
4,Indiana,239,26,6,2019
5,Iowa,2222,26,6,2019


In [9]:
#get rid of commas and converting to int in Head Count column
df['Head Count'] = df['Head Count'].str.replace(',', '')
df['Head Count'] = pd.to_numeric(df['Head Count'])

In [10]:
# get state names from Bokeh's state dataset (nested dictionary)
state_names = [state['name'] for state in states.values()]

#create dictionary of the headcount sums
sums = {}
for i in df['State Description'].unique():
        sums[i] = int(df.loc[df['State Description']==i,['Head Count']].sum())
# for any states with no headcount, put 0 in the dictionary
for state in state_names:
    if state not in sums.keys():
        sums.update({state : 0})
        
#Alaska produced 32 sows, taking out for now
sums.pop('Alaska')

32

In [11]:
# Get data for the graph
state_names = list(sums.keys())
sow_prod = list(sums.values())
data={'State': state_names, 'Sows':sow_prod}

hover = HoverTool(tooltips=[("State", "@State"),("Sows", "@Sows{0,0}")], mode='vline')

#define figure and then specify bar chart
p = figure(x_range=state_names, title="State Sow Production", plot_width=900, 
           toolbar_location='right', tools ='pan,box_zoom,reset,save')
p.vbar(x='State', top='Sows', width=0.85, source=data)
p.add_tools(hover)
p.hover.point_policy = "follow_mouse"
                  
p.xgrid.grid_line_color = None
p.xaxis.axis_label = "States"
p.x_range.range_padding = 0.05 #adds a bit of padding between bars
p.xaxis.major_label_orientation = 1.2  #flips labels to diagonal

p.yaxis.axis_label = "Sows"
p.yaxis[0].formatter.use_scientific = False
p.yaxis.formatter=NumeralTickFormatter(format="0,0") #adds commas to number labels

show(p)                