# Storytelling with Data! in Altair

by Maisa de Oliveira Fraiz

## Introduction

This project aims to replicate the examples from Cole Nussbaumer's book, "Storytelling with Data - Let's Practice!", using `Python Altair`. Our primary objective is to document the reasoning behind the modifications proposed by the author, while also highlighting the challenges that arise when transitioning from the book's Excel-based approach to programming in a different software environment.

`Altair` was selected for this project due to its declarative syntax, interactivity, grammar of graphics, and compatibility with `Streamlit` and other web formatting tools, while within the user-friendly Python environment. Anticipated challenges include the comparatively smaller documentation and development community of Altair compared to more established libraries like `Matplotlib`, `Seaborn`, or `Plotly`. Furthermore, tasks that might appear straightforward in Excel may require multiple iterations to translate effectively into the language.


## Imports

In [1]:
import pandas as pd
import numpy as np
import altair as alt

## Chapter 6 - tell a story

*Data in a spreadsheet or facts on a slide aren’t things that naturally stick with
us—they are easily forgotten. Stories, on the other hand, are memorable.*

### Exercise 6 - differentiate between live & standalone stories

The data for this exercise can be found here: https://www.storytellingwithdata.com/letspractice/downloads

In [2]:
table = pd.read_excel(r"..\..\Data\6.6 EXERCISE.xlsx", usecols = [2, 3, 4, 5, 6, 7], header = 4, skipfooter = 5)

table

Unnamed: 0,Unnamed: 2,Unnamed: 3,Internal,External,Overall,Goal
0,Jan,2019-01-01,47.6,44.8,45.05,60
1,Feb,2019-02-01,37.9,48.5,47.25,60
2,Mar,2019-03-01,17.6,49.5,46.15,60
3,Apr,2019-04-01,18.6,55.2,50.35,60
4,May,2019-05-01,40.6,56.5,55.55,60
5,Jun,2019-06-01,28.8,60.7,53.85,60
6,Jul,2019-07-01,27.1,44.2,42.85,60
7,Aug,2019-08-01,36.9,29.0,31.15,60
8,Sep,2019-09-01,37.1,61.2,59.15,60
9,Oct,2019-10-01,25.9,44.9,41.55,60


In [3]:
table.rename(columns = {'Unnamed: 2': 'Month', 'Unnamed: 3': 'Date'}, inplace = True)
table

Unnamed: 0,Month,Date,Internal,External,Overall,Goal
0,Jan,2019-01-01,47.6,44.8,45.05,60
1,Feb,2019-02-01,37.9,48.5,47.25,60
2,Mar,2019-03-01,17.6,49.5,46.15,60
3,Apr,2019-04-01,18.6,55.2,50.35,60
4,May,2019-05-01,40.6,56.5,55.55,60
5,Jun,2019-06-01,28.8,60.7,53.85,60
6,Jul,2019-07-01,27.1,44.2,42.85,60
7,Aug,2019-08-01,36.9,29.0,31.15,60
8,Sep,2019-09-01,37.1,61.2,59.15,60
9,Oct,2019-10-01,25.9,44.9,41.55,60


In [4]:
table.drop(columns = ['Date', 'Goal', 'Overall'], inplace = True)

In [5]:
melted_table = pd.melt(table, id_vars = ['Month'], var_name = 'Metric', value_name = 'Value')
melted_table

Unnamed: 0,Month,Metric,Value
0,Jan,Internal,47.6
1,Feb,Internal,37.9
2,Mar,Internal,17.6
3,Apr,Internal,18.6
4,May,Internal,40.6
5,Jun,Internal,28.8
6,Jul,Internal,27.1
7,Aug,Internal,36.9
8,Sep,Internal,37.1
9,Oct,Internal,25.9


In [6]:
title_chart = alt.Title("Time to fill",
                        fontSize = 18,
                        fontWeight = 'normal',
                        anchor = 'start',
                        offset = 10)

line = alt.Chart(melted_table, title = title_chart).mark_line().encode(
    x = alt.X('Month',
               sort = None,
               axis = alt.Axis(labelAngle = 0, 
                               titleAnchor = 'start',
                               labelColor = "#888888", 
                               titleColor = '#888888', 
                               titleFontWeight = 'normal', 
                               ticks = False),
               title = "2019" 
               ),
    y = alt.Y('Value', 
              axis = alt.Axis(grid = False, 
                              titleAnchor = 'end',
                              labelColor = "#888888", 
                              titleColor = '#888888', 
                              titleFontWeight = 'normal'), 
              title = "TIME TO FILL (DAYS)",
              scale = alt.Scale(domain = [0, 90])
              ),
    color = alt.Color("Metric", scale = alt.Scale(range = ['black', 'black']), legend = None),
    ).properties(width = 500)

goal = alt.Chart().mark_rule(strokeDash = [4,4]).encode(
    x = alt.datum('Jan'),
    x2 = alt.datum('Dec'),
    y = alt.datum(60)
)

label = alt.Chart(melted_table).mark_text(align='left', dx=3).encode(
    alt.X('Month', aggregate='max', sort = None),
    alt.Y('Value', aggregate = {'argmax': 'Month'}),
    alt.Text('Metric')
)

final = line + goal + label

final.configure_view(stroke = None)

In [7]:
label = alt.Chart(melted_table).mark_text(align='left', dx=3).encode(
    x = alt.datum('Dec'),
    y = alt.Y('Value'),
    text = alt.Text('Metric')
)

final = line + goal + label

final.configure_view(stroke = None)

In [8]:
label = alt.Chart(melted_table).mark_text(align = 'left', dx = 4).encode(
    x = alt.datum('Dec'),
    y = alt.Y('Value'),
    text = alt.Text('Metric')
).transform_filter(
    (alt.datum.Month == 'Dec') 
)

label_goal = alt.Chart({"values": [{"text":  [ "GOAL"]}]}).mark_text(align = 'left', dx = 4).encode(
    x = alt.datum('Dec'),
    y = alt.datum(60),
    text = "text:N"
)

final = line + goal + label + label_goal

final.configure_view(stroke = None)

![Alt text](\Images\6_6a.png)

In [9]:
empty = alt.Chart(melted_table, title = title_chart).mark_line(opacity = 0).encode(
    x = alt.X('Month',
               sort = None,
               axis = alt.Axis(labelAngle = 0, 
                               titleAnchor = 'start',
                               labelColor = "#888888", 
                               titleColor = '#888888', 
                               titleFontWeight = 'normal', 
                               ticks = False,
                               title = "2019"), 

               ),
    y = alt.Y('Value', 
              axis = alt.Axis(grid = False, 
                              titleAnchor = 'end',
                              labelColor = "#888888", 
                              titleColor = '#888888', 
                              titleFontWeight = 'normal'), 
              title = "TIME TO FILL (DAYS)",
              scale = alt.Scale(domain = [0, 90]))
            ).properties(width = 500)

graph_1 = empty.configure_view(stroke = None)
graph_1


![Alt text](\Images\6_6b.png)

In [10]:
graph_2 = (empty + goal + label_goal).configure_view(stroke = None)
graph_2

![Alt text](\Images\6_6c.png)

In [11]:
point_jan = alt.Chart().mark_point(filled = True, color = 'black', size = 50, opacity = 1).encode(
    x = alt.datum('Jan'),
    y = alt.datum(melted_table["Value"][12]))

In [12]:
#empty + (goal + label_goal).configure_mark(opacity = 0.2) + point_jan

In [13]:
goal_opaque = alt.Chart().mark_rule(strokeDash = [4,4]).encode(
    x = alt.datum('Jan'),
    x2 = alt.datum('Dec'),
    y = alt.datum(60),
    opacity = alt.value(0.4)
    )

label_goal_opaque = alt.Chart({"values": [{"text":  [ "GOAL"]}]}).mark_text(align = 'left', dx = 4).encode(
    x = alt.datum('Dec'),
    y = alt.datum(60),
    text = "text:N",
    opacity = alt.value(0.4)
)

In [14]:
graph_3 = (empty + goal_opaque + label_goal_opaque + point_jan).configure_view(stroke = None)
graph_3

![Alt text](\Images\6_6d.png)

In [15]:
goal_opaque = alt.Chart().mark_rule(strokeDash = [4,4]).encode(
    x = alt.datum('Jan'),
    x2 = alt.datum('Dec'),
    y = alt.datum(60),
    opacity = alt.value(0.4)
    )

label_goal_opaque = alt.Chart({"values": [{"text":  [ "GOAL"]}]}).mark_text(align = 'left', dx = 4).encode(
    x = alt.datum('Dec'),
    y = alt.datum(60),
    text = "text:N",
    opacity = alt.value(0.4)
)

line.transform_filter(
    alt.FieldOneOfPredicate(field='Month', oneOf=['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun'])
    ).transform_filter(
    alt.datum.Metric == 'External') + goal_opaque + label_goal_opaque

In [16]:
partial_line1 = alt.Chart(melted_table, title = title_chart).mark_line().encode(
    x = alt.X('Month',
               sort = None,
               axis = alt.Axis(labelAngle = 0, 
                               titleAnchor = 'start',
                               labelColor = "#888888", 
                               titleColor = '#888888', 
                               titleFontWeight = 'normal', 
                               ticks = False),
               title = "2019" ,
               scale = alt.Scale(domain = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'])               
               ),
    y = alt.Y('Value', 
              axis = alt.Axis(grid = False, 
                              titleAnchor = 'end',
                              labelColor = "#888888", 
                              titleColor = '#888888', 
                              titleFontWeight = 'normal'), 
              title = "TIME TO FILL (DAYS)",
              scale = alt.Scale(domain = [0, 90])
              ),
    color = alt.Color("Metric", scale = alt.Scale(range = ['black', 'black']), legend = None),
    ).properties(width = 500).transform_filter(
    alt.FieldOneOfPredicate(field='Month', oneOf=['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun'])
    )

point_jun = alt.Chart().mark_point(filled = True, color = 'black', size = 50, opacity = 1).encode(
    x = alt.datum('Jun'),
    y = alt.datum(melted_table["Value"][17]))


graph_4 = (partial_line1.transform_filter(alt.datum.Metric == 'External') + 
           goal_opaque + label_goal_opaque + point_jun).configure_view(stroke = None)
graph_4

![Alt text](\Images\6_6e.png)

In [17]:
points_condition = (alt.datum.Metric == 'External') & ((alt.datum.Month == 'Aug') |
                                                       (alt.datum.Month == 'Sep') | 
                                                       (alt.datum.Month == 'Oct') |
                                                       (alt.datum.Month == 'Nov') |
                                                       (alt.datum.Month == 'Dec'))


points = alt.Chart(melted_table).mark_point(filled = True, size = 50, opacity = 1).encode(
    x = alt.X('Month', sort = None),
    y = 'Value',
    color = alt.condition(alt.datum.Value > 60, alt.value('#f6792c'), alt.value('#187cae'))
    ).transform_filter(points_condition)


# Now that the filter doesn't affect the x-axis domain, we can do it directly

graph_5 = (line.transform_filter(alt.datum.Metric == 'External') + 
           goal_opaque + label_goal_opaque +
           points).configure_view(stroke = None)
graph_5

![Alt text](\Images\6_6f.png)

In [18]:
external_opaque = alt.Chart(melted_table, title = title_chart).mark_line().encode(
    x = alt.X('Month',
               sort = None,
               axis = alt.Axis(labelAngle = 0, 
                               titleAnchor = 'start',
                               labelColor = "#888888", 
                               titleColor = '#888888', 
                               titleFontWeight = 'normal', 
                               ticks = False),
               title = "2019" 
               ),
    y = alt.Y('Value', 
              axis = alt.Axis(grid = False, 
                              titleAnchor = 'end',
                              labelColor = "#888888", 
                              titleColor = '#888888', 
                              titleFontWeight = 'normal'), 
              title = "TIME TO FILL (DAYS)",
              scale = alt.Scale(domain = [0, 90])
              ),
    color = alt.Color("Metric", scale = alt.Scale(range = ['black', 'black']), legend = None),
    opacity = alt.value(0.4)
    ).properties(width = 500).transform_filter(
    alt.datum.Metric == 'External')

label_opaque = alt.Chart(melted_table).mark_text(align = 'left', dx = 4).encode(
    x = alt.datum('Dec'),
    y = alt.Y('Value'),
    text = alt.Text('Metric'),
    opacity = alt.value(0.4)
).transform_filter(
    (alt.datum.Month == 'Dec') 
).transform_filter(
    alt.datum.Metric == 'External')

point_jan2 = alt.Chart().mark_point(filled = True, color = 'black', size = 50, opacity = 1).encode(
    x = alt.datum('Jan'),
    y = alt.datum(melted_table["Value"][0]))

graph_6 = (external_opaque + label_opaque + goal_opaque + label_goal_opaque + point_jan2).configure_view(stroke = None)
graph_6

![Alt text](\Images\6_6g.png)

In [19]:
partial_line2 = alt.Chart(melted_table, title = title_chart).mark_line().encode(
    x = alt.X('Month',
               sort = None,
               axis = alt.Axis(labelAngle = 0, 
                               titleAnchor = 'start',
                               labelColor = "#888888", 
                               titleColor = '#888888', 
                               titleFontWeight = 'normal', 
                               ticks = False),
               title = "2019" ,
               scale = alt.Scale(domain = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'])               
               ),
    y = alt.Y('Value', 
              axis = alt.Axis(grid = False, 
                              titleAnchor = 'end',
                              labelColor = "#888888", 
                              titleColor = '#888888', 
                              titleFontWeight = 'normal'), 
              title = "TIME TO FILL (DAYS)",
              scale = alt.Scale(domain = [0, 90])
              ),
    color = alt.Color("Metric", scale = alt.Scale(range = ['black', 'black']), legend = None),
    ).properties(width = 500).transform_filter(
    alt.FieldOneOfPredicate(field='Month', oneOf=['Jan', 'Feb', 'Mar', 'Apr'])
    )

point_apr = alt.Chart().mark_point(filled = True, color = 'black', size = 50, opacity = 1).encode(
    x = alt.datum('Apr'),
    y = alt.datum(melted_table["Value"][3]))


graph_7 = (partial_line2.transform_filter(alt.datum.Metric == 'Internal') + 
           goal_opaque + label_goal_opaque + point_apr +
           external_opaque + label_opaque).configure_view(stroke = None)
graph_7

![Alt text](\Images\6_6h.png)

In [20]:
partial_line3 = alt.Chart(melted_table, title = title_chart).mark_line().encode(
    x = alt.X('Month',
               sort = None,
               axis = alt.Axis(labelAngle = 0, 
                               titleAnchor = 'start',
                               labelColor = "#888888", 
                               titleColor = '#888888', 
                               titleFontWeight = 'normal', 
                               ticks = False),
               title = "2019" ,
               scale = alt.Scale(domain = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'])               
               ),
    y = alt.Y('Value', 
              axis = alt.Axis(grid = False, 
                              titleAnchor = 'end',
                              labelColor = "#888888", 
                              titleColor = '#888888', 
                              titleFontWeight = 'normal'), 
              title = "TIME TO FILL (DAYS)",
              scale = alt.Scale(domain = [0, 90])
              ),
    color = alt.Color("Metric", scale = alt.Scale(range = ['black', 'black']), legend = None),
    ).properties(width = 500).transform_filter(
    alt.FieldOneOfPredicate(field='Month', oneOf=['Jan', 'Feb', 'Mar', 'Apr', 'May'])
    )

point_may = alt.Chart().mark_point(filled = True, color = 'black', size = 50, opacity = 1).encode(
    x = alt.datum('May'),
    y = alt.datum(melted_table["Value"][4]))


graph_8 = (partial_line3.transform_filter(alt.datum.Metric == 'Internal') + 
           goal_opaque + label_goal_opaque + point_may +
           external_opaque + label_opaque).configure_view(stroke = None)
graph_8

![Alt text](\Images\6_6i.png)

In [21]:
partial_line4 = alt.Chart(melted_table, title = title_chart).mark_line().encode(
    x = alt.X('Month',
               sort = None,
               axis = alt.Axis(labelAngle = 0, 
                               titleAnchor = 'start',
                               labelColor = "#888888", 
                               titleColor = '#888888', 
                               titleFontWeight = 'normal', 
                               ticks = False),
               title = "2019" ,
               scale = alt.Scale(domain = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'])               
               ),
    y = alt.Y('Value', 
              axis = alt.Axis(grid = False, 
                              titleAnchor = 'end',
                              labelColor = "#888888", 
                              titleColor = '#888888', 
                              titleFontWeight = 'normal'), 
              title = "TIME TO FILL (DAYS)",
              scale = alt.Scale(domain = [0, 90])
              ),
    color = alt.Color("Metric", scale = alt.Scale(range = ['black', 'black']), legend = None),
    ).properties(width = 500).transform_filter(
    alt.FieldOneOfPredicate(field='Month', oneOf=['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep'])
    )

point_sep = alt.Chart().mark_point(filled = True, color = 'black', size = 50, opacity = 1).encode(
    x = alt.datum('Sep'),
    y = alt.datum(melted_table["Value"][8]))


graph_9 = (partial_line4.transform_filter(alt.datum.Metric == 'Internal') + 
           goal_opaque + label_goal_opaque + point_sep +
           external_opaque + label_opaque).configure_view(stroke = None)
graph_9

![Alt text](\Images\6_6j.png)

In [22]:
partial_line5 = alt.Chart(melted_table, title = title_chart).mark_line().encode(
    x = alt.X('Month',
               sort = None,
               axis = alt.Axis(labelAngle = 0, 
                               titleAnchor = 'start',
                               labelColor = "#888888", 
                               titleColor = '#888888', 
                               titleFontWeight = 'normal', 
                               ticks = False),
               title = "2019" ,
               scale = alt.Scale(domain = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'])               
               ),
    y = alt.Y('Value', 
              axis = alt.Axis(grid = False, 
                              titleAnchor = 'end',
                              labelColor = "#888888", 
                              titleColor = '#888888', 
                              titleFontWeight = 'normal'), 
              title = "TIME TO FILL (DAYS)",
              scale = alt.Scale(domain = [0, 90])
              ),
    color = alt.Color("Metric", scale = alt.Scale(range = ['black', 'black']), legend = None),
    ).properties(width = 500).transform_filter(
    alt.FieldOneOfPredicate(field='Month', oneOf=['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov'])
    )

point_nov = alt.Chart().mark_point(filled = True, color = 'black', size = 50, opacity = 1).encode(
    x = alt.datum('Nov'),
    y = alt.datum(melted_table["Value"][10]))


graph_10 = (partial_line5.transform_filter(alt.datum.Metric == 'Internal') + 
           goal_opaque + label_goal_opaque + point_nov +
           external_opaque + label_opaque).configure_view(stroke = None)
graph_10

![Alt text](\Images\6_6k.png)

In [23]:
point_dec = alt.Chart().mark_point(filled = True, color = 'black', size = 50, opacity = 1).encode(
    x = alt.datum('Dec'),
    y = alt.datum(melted_table["Value"][11]))


graph_11 = (line.transform_filter(alt.datum.Metric == 'Internal') + label .transform_filter(alt.datum.Metric == 'Internal') +
           goal_opaque + label_goal_opaque + point_dec +
           external_opaque + label_opaque).configure_view(stroke = None)
graph_11

![Alt text](\Images\6_6l.png)

In [24]:
(line + label + goal_opaque + label_goal_opaque).configure_view(stroke = None)

In [25]:
(line.configure_line(color = 'red') + goal_opaque + label_goal_opaque + label).configure_view(stroke = None)

ValueError: Objects with "config" attribute cannot be used within LayerChart. Consider defining the config attribute in the LayerChart object instead.

In [26]:
(
    line + goal_opaque + label_goal_opaque + label
).configure_view(stroke = None).configure_text(color = 'blue').configure_line(color = 'blue')

In [27]:
line_color = alt.Chart(melted_table, title = title_chart).mark_line().encode(
    x = alt.X('Month',
               sort = None,
               axis = alt.Axis(labelAngle = 0, 
                               titleAnchor = 'start',
                               labelColor = "#888888", 
                               titleColor = '#888888', 
                               titleFontWeight = 'normal', 
                               ticks = False),
               title = "2019" 
               ),
    y = alt.Y('Value', 
              axis = alt.Axis(grid = False, 
                              titleAnchor = 'end',
                              labelColor = "#888888", 
                              titleColor = '#888888', 
                              titleFontWeight = 'normal'), 
              title = "TIME TO FILL (DAYS)",
              scale = alt.Scale(domain = [0, 90])
              ),
    color = alt.Color("Metric", scale = alt.Scale(range = ['#1d779c', '#676767']), legend = None),
    ).properties(width = 500)


label_color = alt.Chart(melted_table).mark_text(align = 'left', dx = 4).encode(
    x = alt.datum('Dec'),
    y = alt.Y('Value'),
    text = alt.Text('Metric'),
    color = alt.Color("Metric", scale = alt.Scale(range = ['#1d779c', '#676767']), legend = None)
).transform_filter(
    (alt.datum.Month == 'Dec') 
)

graph_12 = (line_color + goal_opaque + label_goal_opaque + label_color).configure_view(stroke = None)
graph_12


![Alt text](\Images\6_6m.png)

In [28]:
import ipywidgets as widgets
from ipywidgets import interact
from IPython.display import clear_output

In [29]:
graphs = [graph_1, graph_2, graph_3, graph_4, graph_5, graph_6, graph_7, graph_8, graph_9, graph_10, graph_11, graph_12]

def demo(i):
    clear_output(wait=True)
    if 0 <= i < len(graphs):
        chart = graphs[i]
    else:
        chart = None 
    display(chart)

interact(demo, i=widgets.Play(
    value=0,
    min=0,
    max=11,
    step=1,
    description="Press play",
    interval=2000))


interactive(children=(Play(value=0, description='Press play', interval=2000, max=11), Output()), _dom_classes=…

<function __main__.demo(i)>

![Alt text](\Images\6_6n.png)