## The Changing American Diet

Background
1. Data Source: data.world available under public licence
2. Inspired by: FlowingData link: http://flowingdata.com/2016/05/17/the-changing-american-diet/
3. Note: The US population in the file "fruit_banana.csv" includes both resident population and the Armed Forces overseas.
4. DataCamp.com

In [1]:
import pandas as pd

# Import figure from bokeh.plotting
from bokeh.plotting import figure

# Import output_file and show from bokeh.io
from bokeh.io import output_notebook, show

In [2]:
apples = pd.read_csv('data/fruit_apple.csv')
apples.head()

Unnamed: 0,year2,u_s_population_of_the_year,supply,imports,total_supply,exports,shipments_to_us_territories,food_disappearance_total,food_disappearance_per_capita_in_pounds
0,1970,203.849,3531.5,95.1,3626.6,102.3,11.0,3513.3,17.2
1,1971,206.466,3483.9,80.3,3564.2,118.8,14.3,3431.1,16.6
2,1972,208.917,3342.0,103.5,3445.5,149.7,19.3,3276.5,15.7
3,1973,210.985,3539.4,90.0,3629.4,181.9,13.3,3434.2,16.3
4,1974,212.932,3690.5,79.2,3769.7,232.8,11.4,3525.5,16.6


In [4]:
bananas = pd.read_csv('data/fruit_banana.csv')
beef = pd.read_csv('data/meat_beef.csv')
chicken = pd.read_csv('data/meat_chicken.csv')
potatoes = pd.read_csv('data/veggie_potatoes.csv')
tomatoes = pd.read_csv('data/veggie_tomatoes.csv')

In [5]:
#Add a new column to differentiate between types of food
apples['food_type'] = 'apples'
bananas['food_type'] = 'bananas'
beef['food_type'] ='beef'
chicken['food_type'] = 'chicken'
potatoes['food_type'] = 'potatoes'
tomatoes['food_type'] = 'tomatoes'

df_list = [apples,bananas,beef,chicken,potatoes,tomatoes]
data = pd.concat(df_list)

In [6]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 392 entries, 0 to 44
Data columns (total 16 columns):
beginning_stocks                           197 non-null float64
ending_stocks                              198 non-null float64
exports                                    260 non-null float64
food_disappearance_per_capita_in_pounds    302 non-null float64
food_disappearance_total                   302 non-null float64
food_type                                  392 non-null object
fresh                                      90 non-null float64
imports                                    264 non-null float64
processed                                  90 non-null float64
shipments_to_us_territories                226 non-null float64
supply                                     301 non-null float64
total                                      90 non-null float64
total_supply                               302 non-null float64
u_s_population_of_the_year                 302 non-null float64
year

In [7]:
#Get the count of records in each food type
data.groupby('food_type').food_type.count()

food_type
apples       45
bananas      45
beef        106
chicken     106
potatoes     45
tomatoes     45
Name: food_type, dtype: int64

In [8]:
data.head()

Unnamed: 0,beginning_stocks,ending_stocks,exports,food_disappearance_per_capita_in_pounds,food_disappearance_total,food_type,fresh,imports,processed,shipments_to_us_territories,supply,total,total_supply,u_s_population_of_the_year,year,year2
0,,,102.3,17.2,3513.3,apples,,95.1,,11.0,3531.5,,3626.6,203.849,,1970.0
1,,,118.8,16.6,3431.1,apples,,80.3,,14.3,3483.9,,3564.2,206.466,,1971.0
2,,,149.7,15.7,3276.5,apples,,103.5,,19.3,3342.0,,3445.5,208.917,,1972.0
3,,,181.9,16.3,3434.2,apples,,90.0,,13.3,3539.4,,3629.4,210.985,,1973.0
4,,,232.8,16.6,3525.5,apples,,79.2,,11.4,3690.5,,3769.7,212.932,,1974.0


#### Simple Scatter Plot - With Bokeh

In [9]:
apples.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 45 entries, 0 to 44
Data columns (total 10 columns):
year2                                      45 non-null int64
u_s_population_of_the_year                 45 non-null float64
supply                                     45 non-null float64
imports                                    45 non-null float64
total_supply                               45 non-null float64
exports                                    45 non-null float64
shipments_to_us_territories                18 non-null float64
food_disappearance_total                   45 non-null float64
food_disappearance_per_capita_in_pounds    45 non-null float64
food_type                                  45 non-null object
dtypes: float64(8), int64(1), object(1)
memory usage: 3.6+ KB


In [10]:
#Lets look at apple supply vs food_disappearance_per_capita_in_pounds

# Create the figure: scatter
scatter = figure(x_axis_label='Apple-food_disappearance_per_capita_in_pounds', y_axis_label='Apple-supply',plot_width=400, plot_height=400)

# Add a circle glyph to the figure p
scatter.circle(apples['food_disappearance_per_capita_in_pounds'],apples['supply'])

# Call the output_notebook()
output_notebook()

# Display the plot
show(scatter)

#### Scatter Plot with different shapes

In [11]:
#Lets look at beef and chicken imports per year

beef['year2'] = pd.to_datetime(beef['year2'], format='%Y')
chicken['year2'] = pd.to_datetime(chicken['year2'], format='%Y')

# Create the figure: scatterA
scatter2 = figure(x_axis_label='total_supply', y_axis_label='food_disappearance_total',plot_width=700, plot_height=400)

# Add a circle glyph to the figure p
scatter2.circle(beef['total_supply'],beef['food_disappearance_total'])

# Add an x glyph to the figure p
scatter2.cross(chicken['total_supply'],chicken['food_disappearance_total'])

# Specify the name of the file
output_notebook()

# Display the plot
show(scatter2)


#### Customizing Scatter with color, alpha and size

In [12]:
#Lets look at beef and chicken imports per year

beef['year2'] = pd.to_datetime(beef['year2'], format='%Y')
chicken['year2'] = pd.to_datetime(chicken['year2'], format='%Y')

# Create the figure: scatterA
scatter = figure(x_axis_label='total_supply', y_axis_label='food_disappearance_total',plot_width=700, plot_height=400)

# Add a circle glyph to the figure p
scatter.circle(beef['total_supply'],beef['food_disappearance_total'],color='red', size=7, alpha=0.8)

# Add an x glyph to the figure p
scatter.cross(chicken['total_supply'],chicken['food_disappearance_total'],color='blue', size=7, alpha=0.8)

# Specify the name of the file
output_notebook()

# Display the plot
show(scatter)

#### Lines

In [13]:
# Create a figure with x_axis_type="datetime": p
p = figure(x_axis_type='datetime', x_axis_label='Year', y_axis_label='Beef-Imports',plot_width=700, plot_height=300)

# Plot date along the x axis and price along the y axis
p.line(beef['year2'],beef['imports'])
p.circle(beef['year2'],beef['imports'], fill_color='red', size=2.5)  #can use color = df['color'] if there is a color column
#Specify output and show the result
output_notebook()
show(p)

#### Using ColumnDataSource

In [14]:

# Import the ColumnDataSource class from bokeh.plotting
from bokeh.plotting import ColumnDataSource

# Create a ColumnDataSource from df: source
source = ColumnDataSource(beef)

p = figure(x_axis_type='datetime', x_axis_label='Year', y_axis_label='Beef-Exports',plot_width=700, plot_height=300)

# Add circle glyphs to the figure p
p.line('year2','exports',source=source)

# Specify the name of the output file and show the result
output_notebook()
show(p)


#### Changing selected non selected circle values

In [24]:
#Lets look at banana supply vs food_disappearance_per_capita_in_pounds
bananas['year2'] = pd.to_datetime(bananas['year2'], format='%Y-%m-%d')

# Create the figure: scatter
scatter = figure(x_axis_type='datetime',x_axis_label='year', y_axis_label='Banana-supply',plot_width=400, plot_height=400,tools = 'box_select')

# Add a circle glyph to the figure p
scatter.circle(bananas['year2'],bananas['supply'],selection_color='red',nonselection_alpha=0.1)

# Call the output_notebook()
output_notebook()

# Display the plot
show(scatter)

##### Hovering

In [25]:
# import the HoverTool
from bokeh.models import HoverTool

In [41]:
# Add circle glyphs to figure p
source = ColumnDataSource(chicken)

p = figure(x_axis_type='datetime', x_axis_label='Year', y_axis_label='Chicken-Imports',plot_width=700, plot_height=300)

p.circle('year2','exports',source=source, size=7,
         fill_color='green', alpha=0.2, line_color='green',
         hover_fill_color='firebrick', hover_alpha=0.7,
         hover_line_color='white')

# Create a HoverTool: hover
hover = HoverTool(tooltips=None,mode='vline')

# Add the hover tool to the figure p
p.add_tools(hover)

# Specify the name of the output file and show the result
output_notebook()
show(p)


#### Colormapping

In [48]:
#Import CategoricalColorMapper from bokeh.models
from bokeh.models import CategoricalColorMapper

# Convert df to a ColumnDataSource: source
source = ColumnDataSource(data)


# Make a CategoricalColorMapper object: color_mapper
color_mapper = CategoricalColorMapper(factors=['beef','chicken','apples','bananas','potatoes','tomatoes'],
                                      palette=['red', 'green', 'blue','yellow','aqua','orange'])

p = figure(x_axis_label='Imports', y_axis_label='Exports',plot_width=700, plot_height=400)


# Add a circle glyph to the figure p
p.circle('imports', 'exports', source=source,
            color=dict(field='food_type', transform=color_mapper),
            legend='food_type')

# Specify the name of the output file and show the result
output_notebook()
show(p)


### Graph Layouts

###### Row Layout

In [63]:
#Laying row wise
# Import row from bokeh.layouts
from bokeh.layouts import row

source1 = ColumnDataSource(potatoes)
source2 = ColumnDataSource(tomatoes)

# Create the first figure: p1
p1 = figure(x_axis_label='Fresh-potatoes', y_axis_label='Processed-potatoes',plot_width=400, plot_height=400)

# Add a circle glyph to p1
p1.circle('fresh', 'processed', source=source1, color='brown')

# Create the second figure: p2
p2 = figure(x_axis_label='Fresh-tomatoes', y_axis_label='Processed-tomatoes',plot_width=400, plot_height=400)

# Add a circle glyph to p2
p2.circle('fresh', 'processed', source=source2,color='red')

# Put p1 and p2 into a horizontal row: layout
layout = row(p1,p2)

# Specify the name of the output_file and show the result
output_notebook()
show(layout)


##### Column Layout

In [98]:
#Laying row wise
# Import row from bokeh.layouts
from bokeh.layouts import column

source1 = ColumnDataSource(potatoes)
source2 = ColumnDataSource(tomatoes)

# Create the first figure: p1
p1 = figure(x_axis_label='Fresh-potatoes', y_axis_label='Processed-potatoes',plot_width=200, plot_height=200)

# Add a circle glyph to p1
p1.circle('fresh', 'processed', source=source1, color='brown')

# Create the second figure: p2
p2 = figure(x_axis_label='Fresh-tomatoes', y_axis_label='Processed-tomatoes',plot_width=200, plot_height=200)

# Add a circle glyph to p2
p2.circle('fresh', 'processed', source=source2,color='red')

# Put p1 and p2 into a horizontal row: layout
layout = column(p1,p2)

# Specify the name of the output_file and show the result
output_notebook()
show(layout)

In [99]:
##### Row / Column Layout doesnt work

In [109]:
source1 = ColumnDataSource(potatoes)
source2 = ColumnDataSource(tomatoes)
source3 = ColumnDataSource(apples)
source4 = ColumnDataSource(bananas)

# Create the first figure: p1
p1 = figure(x_axis_label='year', y_axis_label='total',plot_width=200, plot_height=200)
p1.line('year', 'total', source=source1, color='brown')

# Create the second figure: p2
p2 = figure(x_axis_label='year', y_axis_label='total',plot_width=200, plot_height=200)
p2.line('year', 'total', source=source2,color='red')

# Create the second figure: p3
p3 = figure(x_axis_label='year', y_axis_label='total',plot_width=200, plot_height=200)
p3.line('year2', 'total_supply', source=source3,color='green')

# Create the second figure: p4
p4 = figure(x_axis_type='datetime',x_axis_label='year', y_axis_label='total',plot_width=200, plot_height=200)
p4.line('year2', 'total_supply', source=source4,color='blue')


#Layout 
row2 = column([p1,p2], sizing_mode='scale_width')
# Make a row layout that includes the above column layout: layout
layout = row([p3,row2],sizing_mode='scale_width')

# Specify the name of the output_file and show the result
output_notebook()
show(layout)

TypeError: 'Column' object is not callable

#### Gridded Layout

In [115]:
from bokeh.layouts import gridplot

# Create a list containing plots p1 and p2: row1
row1 = [p1,p2]

# Create a list containing plots p3 and p4: row2
row2 = [p3,p4]

# Create a gridplot using row1 and row2: layout
layout1 = gridplot([row1,row2],toolbar_location=None)

# Specify the name of the output_file and show the result
output_notebook()
show(layout1)

#### Tabbed Layout

In [116]:
# Import Panel from bokeh.models.widgets
from bokeh.models.widgets import Tabs, Panel

# Create tab1 from plot p1: tab1
tab1 = Panel(child=p1, title='potatoes')

# Create tab2 from plot p2: tab2
tab2 = Panel(child=p2, title='tomatoes')

# Create tab3 from plot p3: tab3
tab3 = Panel(child=p3, title='apples')

# Create tab4 from plot p4: tab4
tab4 = Panel(child=p4, title='bananas')

# Create a Tabs layout: layout
layout = Tabs(tabs=[tab1,tab2,tab3,tab4])

# Specify the name of the output_file and show the result
output_notebook()
show(layout)

In [117]:
#### Linking Plots

# Link the x_range of p2 to p1: p2.x_range
p2.x_range = p1.x_range

# Link the y_range of p2 to p1: p2.y_range
p2.y_range = p1.y_range

# Link the x_range of p3 to p1: p3.x_range
p3.x_range = p1.x_range

# Link the y_range of p4 to p1: p4.y_range
p4.y_range = p1.y_range

# Specify the name of the output_file and show the result
output_notebook()
show(layout1)


#### Linked Brushing

In [142]:
# Create ColumnDataSource: source
#By sharing the same ColumnDataSource object between multiple plots, 
#selection tools like BoxSelect and LassoSelect will highlight points in both plots that share a 
#row in the ColumnDataSource

from bokeh.layouts import row

source = ColumnDataSource(chicken)

p1 = figure(x_axis_type='datetime', x_axis_label='Year', y_axis_label='Chicken-Imports',plot_width=300, plot_height=300,
           tools='box_select,lasso_select')

# Add circle glyphs to the figure p
p1.circle('year2','imports',source=source)

p2 = figure(x_axis_type='datetime', x_axis_label='Year', y_axis_label='Chicken-Exports',plot_width=300, plot_height=300,
           tools='box_select,lasso_select')

# Add circle glyphs to the figure p
p2.circle('year2','exports',source=source)

# Create row layout of figures p1 and p2: layout
layout = row(p1,p2)

output_notebook()
show(layout)

#### Legends

In [149]:
source = ColumnDataSource(data)


# Make a CategoricalColorMapper object: color_mapper
color_mapper = CategoricalColorMapper(factors=['beef','chicken','apples','bananas','potatoes','tomatoes'],
                                      palette=['red', 'green', 'blue','yellow','aqua','orange'])

p = figure(x_axis_label='Imports', y_axis_label='Exports',plot_width=700, plot_height=400)


# Add a circle glyph to the figure p
p.circle('imports', 'exports', source=source,
            color=dict(field='food_type', transform=color_mapper),
            legend='food_type')


p.legend.location = 'top_right'

# Fill the legend background with the color 'lightgray': p.legend.background_fill_color
p.legend.background_fill_color = 'lightpink'

# Specify the name of the output file and show the result
output_notebook()
show(p)

##### Hover Tooltips for exposing tooltips

In [150]:
# Import HoverTool from bokeh.models
from bokeh.models import HoverTool

# Create a HoverTool object: hover
hover = HoverTool(tooltips=[('food_type','@food_type')])

# Add the HoverTool object to figure p
p.add_tools(hover)

# Specify the name of the output_file and show the result
output_notebook()
show(p)
