In [4]:
# Import required libraries
from bokeh.plotting import figure
from bokeh.io import output_file, show,output_notebook, curdoc
import pandas as pd
# Import column
from bokeh.layouts import column, row, gridplot
#from bokeh.resources import INLINE
# Import ColumnDataSource
from bokeh.models import ColumnDataSource, NumeralTickFormatter, DatetimeTickFormatter, FactorRange
from bokeh.transform import linear_cmap, factor_cmap
from bokeh.palettes import RdBu8
from bokeh.models import ColorBar, BoxAnnotation, PolyAnnotation
from bokeh.palettes import Category10_5

output_notebook()

# Storytelling with Visualizations

## Customizing Glyph Settings

In [5]:
# Import the dataset

nba = pd.read_csv('../data/nba.csv')
#nba.info()
# Filter dataset by position "SG" and "SF" and store in two seperate dataframe
shooting_guards = nba[nba['position'] == 'SG']
small_forwards = nba[nba['position'] == 'SF']

shooting_guards = ColumnDataSource(data = shooting_guards)
small_forwards = ColumnDataSource(data = small_forwards)

# Create a Hoover tool
TOOLTIPS = [("Name", "@player"), ("Team", "@team"), ("Field Goal %", "@field_goal_perc{0.2f}")]

# Add circle glyphs for points per game versus average assists using shooting_guards, setting size to 16 pixels, fill_color to red, and assigning 0.2 to the relevent keyword argument for glyph transparency.
fig = figure(x_axis_label="Assists", y_axis_label="Points", title="Shooting Guard vs Small Forward", tooltips=TOOLTIPS)

# Add glyphs for shooting guards
fig.scatter(x="assists", y="points", source=shooting_guards, legend_label="Shooting Guard", size=16, fill_color="red", fill_alpha=0.2)

#Add circle glyphs to represent points versus average assists for small forwards, filled in green, 6 pixels in size, and with 0.6 glyph transparency.
# Add glyphs for small forwards
fig.scatter(x="assists", y="points", source=small_forwards, legend_label="Small Forward", size=6, fill_color="green", fill_alpha=0.6)

show(fig)


In [6]:
# Filter dataset by position by centers and power forwards and store in two seperate dataframe
three_point_perc = nba[nba['position'] == 'C']
field_goal_perc = nba[nba['position'] == 'PF']

centers = ColumnDataSource(data = three_point_perc)
power_forwards = ColumnDataSource(data=field_goal_perc)

# Create a Hoover tool
TOOLTIPS = [('Name', '@player'), ('Points', '@points{0.2f}')]

# Draw the circle glyph
fig = figure(x_axis_label="Field Goal Percentage", y_axis_label="Three Point Field Goal Percentage", tooltips = TOOLTIPS)
center_glyphs = fig.scatter(x="field_goal_perc", y="three_point_perc", source=centers, legend_label="Center", fill_alpha=0.2)
power_forward_glyphs = fig.scatter(x="field_goal_perc", y="three_point_perc", source=power_forwards, legend_label="Power Forward", fill_color="green", fill_alpha=0.6)

#Update the glyph attribute of center_glyphs and power_forward_glyphs to 20 pixels and 10 pixels, respectively.
center_glyphs.glyph.size = 20
power_forward_glyphs.glyph.size = 10

# Update the fill color attribute of center_glyphs and power_forward_glyphs to red and yellow, respectively.
center_glyphs.glyph.fill_color = 'red'
power_forward_glyphs.glyph.fill_color = 'yellow'

show(fig)


In [7]:
#display(nba)
steph = nba[nba['player'] == 'Stephen Curry']
chris = nba[nba['player'] == 'Chris Paul']
steph = ColumnDataSource(data = steph)
chris = ColumnDataSource(data = chris)

fig = figure(x_axis_label="Season", y_axis_label="Performance")

#Add line glyphs to represent Steph Curry's average points, filling in green, and setting the width to 2 and transparency to 0.5.
fig.line(x="season", y="points", source=steph, alpha=0.5, line_color="green", line_width=2, legend_label="Steph Curry Points")
#Display Steph Curry's average assists as line glyphs filled in purple, setting the width to 4 and transparency to 0.3
fig.line(x="season", y="assists", source=steph, alpha=0.3, line_color="purple", line_width=4, legend_label="Steph Curry Assists")

# Add line glyphs for Chris Paul
fig.line(x="season", y="points", source=chris, alpha=0.8, line_color="red", line_width=1, legend_label="Chris Paul Points")
fig.line(x="season", y="assists", source=chris, alpha=0.2, line_color="orange", line_width=3, legend_label="Chris Paul Assists")

show(fig)

ERROR:bokeh.core.validation.check:E-1001 (BAD_COLUMN_NAME): Glyph refers to nonexistent column name. This could either be due to a misspelling or typo, or due to an expected column being missing. : x='season' [closest match: 'steals'] {renderer: GlyphRenderer(id='p1212', ...)}
ERROR:bokeh.core.validation.check:E-1001 (BAD_COLUMN_NAME): Glyph refers to nonexistent column name. This could either be due to a misspelling or typo, or due to an expected column being missing. : x='season' [closest match: 'steals'] {renderer: GlyphRenderer(id='p1243', ...)}
ERROR:bokeh.core.validation.check:E-1001 (BAD_COLUMN_NAME): Glyph refers to nonexistent column name. This could either be due to a misspelling or typo, or due to an expected column being missing. : x='season' [closest match: 'steals'] {renderer: GlyphRenderer(id='p1233', ...)}
ERROR:bokeh.core.validation.check:E-1001 (BAD_COLUMN_NAME): Glyph refers to nonexistent column name. This could either be due to a misspelling or typo, or due to an e

## Highlighting and Contrasting

In [8]:
#display(nba)
east = nba[nba['conference'] == 'East']
west = nba[nba['conference'] == 'West']

#east = ColumnDataSource(data = east)
#west = ColumnDataSource(data = west)

#Use east and west to create east_sizes and west_sizes—dividing east["blocks"] and west["blocks"] by 5, respectively.
# Create sizes
east_sizes = east["blocks"]/5
west_sizes = west["blocks"]/5
fig = figure(x_axis_label="Assists", y_axis_label="Points", title="NBA Points, Blocks, and Assists by Conference")

#Add circle glyphs to fig representing points versus assists for east; use a blue fill color, a fill_alpha of 0.3, a legend label of "East", and set the radius to east_sizes.
east_glyphs = fig.circle(x=east["assists"], y=east["points"],  legend_label="East", fill_alpha=0.3, fill_color="blue", radius= east_sizes)
#Repeat the above for west, but fill in red, set a legend label of "West", and use west_sizes to change the glyph size.
west_glyphs = fig.circle(x=west["assists"], y=west["points"], legend_label="West", fill_alpha=0.3, fill_color="red", radius= west_sizes)

show(fig)


In [9]:
source = ColumnDataSource(data = nba)

#Create mapper, adjusting color as the values of "assists" change using RdBu8.
# Create mapper
mapper = linear_cmap(field_name="assists", palette=RdBu8, low=min(nba['assists']), high=max(nba['assists']))

#Create a figure, with the title of "Steals vs. Assists"
fig = figure(x_axis_label="Steals", y_axis_label="Assists", title="Steals vs. Assists")

#Complete the call of circle glyphs to add the mapper
# Add circle glyphs
fig.circle(x="steals", y="assists", source=source, color=mapper, radius=0.01)

# Create color_bar using the relevant key from mapper, and set width to 8 pixels.
color_bar = ColorBar(color_mapper=mapper["transform"], width=8)

#Add color_bar to the figure's layout on the right-hand side.
fig.add_layout(color_bar, "right")
#output_file(filename="steals_vs_assists.html")
show(fig)


In [10]:
source = ColumnDataSource(data = nba)

#Create tooltips
TOOLTIPS = [('Name', '@player')]

#Create positions, a list containing "PG", "SG", "SF", "PF", and "C"
positions = ["PG", "SG", "SF", "PF", "C"]
fig = figure(x_axis_label="Free Throw Percentage", y_axis_label="Points", title="Free Throw Percentage vs. Average Points", tooltips=TOOLTIPS)

# Add circle glyphs
#Pass "position" to the legend_field argument and complete the fill_color argument within fig.circle()
fig.circle(x="free_throw_perc", y="points", source=source, legend_field="position", radius=0.01, fill_color=factor_cmap("position", palette=Category10_5, factors=positions))

#output_file(filename="average_points_vs_free_throw_percentage.html")
show(fig)


## Communicating with Text

In [11]:
# load the bakery data
bakery = pd.read_csv("../data/bakery.csv")
#display(bakery)

grouped_bakery = bakery.groupby(['day_time','day_type'] , as_index=False )['sales'].sum()

# Create a Factor tuple
factors = [('Weekday', 'Morning'), ('Weekday', 'Afternoon'), ('Weekday', 'Evening'), ('Weekday', 'Night'), ('Weekend', 'Morning'), ('Weekend', 'Afternoon'), ('Weekend', 'Evening'), ('Weekend', 'Night')]

# Create fig, setting the x_range by calling FactorRange() and passing *factors, assigning "Sales" to the y-axis label, and giving a title of "Sales by type of day".
fig = figure(x_range = FactorRange(*factors), y_axis_label="Sales", title= 'Sales by type of day')

# Add bar glyphs for factors, with the top represented by grouped_bakery["sales"], and bar width to 90%.
fig.vbar(x=factors, top=grouped_bakery["sales"], width=0.9)
fig.yaxis[0].formatter = NumeralTickFormatter(format="$0,0")

# Update the title font size to "25px"
fig.title.text_font_size = "25px"

# Change the format of the title to center alignment.
fig.title.align = "center"

show(fig)

In [12]:
# load the bakery data
bakery = pd.read_csv("../data/bakery.csv")
#display(bakery)
item_group = bakery.groupby(['day_time','items'], as_index=False ).agg(count = ('items','count'), sales = ('sales','sum'))
morning = item_group[item_group['day_time'] == 'Morning']
afternoon = item_group[item_group['day_time'] == 'Afternoon']
evening = item_group[item_group['day_time'] == 'Evening']

morning = ColumnDataSource(data = morning)
afternoon = ColumnDataSource(data = afternoon)
evening = ColumnDataSource(data = evening)
# Set the tooltips
TOOLTIPS = [('Time of Day', '@day_time'), ('Item', '@items'), ('Volume Sold', '@count')]

# draw the figures
fig = figure(x_axis_label="Count of Products Sold", y_axis_label="Sales", title="Bakery Product Sales", tooltips=TOOLTIPS)
fig.scatter(x="count", y="sales", source=morning, line_color="red", size=12, fill_alpha=0.4, legend_label="Morning")
fig.scatter(x="count", y="sales", source=afternoon, fill_color="purple", size=10, fill_alpha=0.6, legend_label="Afternoon")
fig.scatter(x="count", y="sales", source=evening, fill_color="yellow", size=8, fill_alpha=0.6, legend_label="Evening")

# Add a title to the legend called "Time of Day"
fig.legend.title = "Time of Day"

# Move the legend to the top left corner.
fig.legend.location = "top_left"

#Make the legend interactive, hiding observations by their legend label upon click.
fig.legend.click_policy = "hide"

fig.yaxis[0].formatter = NumeralTickFormatter(format="$0.00")
show(fig)

## Adding Annotations

In [28]:
import datetime as dt
sales = bakery.groupby("date", as_index=False)["sales"].sum()
sales['date'] = pd.to_datetime(sales['date'])
source = ColumnDataSource(data=sales)
fig = figure(x_axis_label="Date", y_axis_label="Revenue ($)")
fig.line(x="date", y="sales", source=source)
fig.xaxis[0].formatter = DatetimeTickFormatter(months="%b %Y")

#Create low_box, setting the top limit equal to $250, transparency to 0.1, and fill_color to "red"
low_box = BoxAnnotation(top=250, fill_alpha=0.1, fill_color='red')
#Create high_box, setting the bottom limit equal to $250, transparency to 0.2, and fill_color to "green".
high_box = BoxAnnotation(bottom=250, fill_alpha=0.2, fill_color='green')
# Add low_box
fig.add_layout(low_box)
# Add high_box
fig.add_layout(high_box)

#Set start_date as 30th June 2016 and end_date as 27th July 2016.
start_date = dt.datetime(2016, 6, 12)
end_date = dt.datetime(2016, 8, 11)
#display(sales[sales['date'] >= '2016-07-13'])
# Create start and end floats
start_float = start_date.timestamp() * 1000
end_float = end_date.timestamp() * 1000

# Create start_data by subsetting Revenue for the row where "date" equals start_date
#display(sales.loc[sales["date"] == end_date]["sales"])
start_data = sales.loc[sales["date"] == start_date]["sales"]
end_data = sales.loc[sales["date"] == end_date]["sales"]

#Create polygon, fill in "green", with 0.4 transparency, and finish the xs and ys arguments.
polygon = PolyAnnotation(fill_color="blue", fill_alpha=0.4,
                         xs=[start_float, start_float, end_float, end_float],
                         ys=[start_data - 10, start_data + 10, end_data + 15, end_data - 15])
# Add polygon to figure and display
fig.add_layout(polygon)
show(fig)

ValueError: failed to validate PolyAnnotation(id='p2561', ...).ys: expected an element of Seq(Either(Float, Datetime, Factor(Either(String, Tuple(String, String), Tuple(String, String, String))))), got seq with invalid items [11    188.55
Name: sales, dtype: float64, 11    208.55
Name: sales, dtype: float64, 14    276.9
Name: sales, dtype: float64, 14    246.9
Name: sales, dtype: float64]