# Storytelling with Data! in Altair

by Maisa de Oliveira Fraiz

## Introduction

This project aims to replicate the examples from Cole Nussbaumer's book, "Storytelling with Data - Let's Practice!", using `Python Altair`. Our primary objective is to document the reasoning behind the modifications proposed by the author, while also highlighting the challenges that arise when transitioning from the book's Excel-based approach to programming in a different software environment.

`Altair` was selected for this project due to its declarative syntax, interactivity, grammar of graphics, and compatibility with `Streamlit` and other web formatting tools, while within the user-friendly Python environment. Anticipated challenges include the comparatively smaller documentation and development community of Altair compared to more established libraries like `Matplotlib`, `Seaborn`, or `Plotly`. Furthermore, tasks that might appear straightforward in Excel may require multiple iterations to translate effectively into the language.


## Imports

In [3]:
import pandas as pd
import numpy as np
import altair as alt

## Chapter 6 - tell a story

*Data in a spreadsheet or facts on a slide aren’t things that naturally stick with
us—they are easily forgotten. Stories, on the other hand, are memorable.*

### Exercise 6 - differentiate between live & standalone stories

The data for this exercise can be found here: https://www.storytellingwithdata.com/letspractice/downloads

In [4]:
table = pd.read_excel(r"..\..\Data\6.6 EXERCISE.xlsx", usecols = [2, 3, 4, 5, 6, 7], header = 4, skipfooter = 5)

table

Unnamed: 0,Unnamed: 2,Unnamed: 3,Internal,External,Overall,Goal
0,Jan,2019-01-01,47.6,44.8,45.05,60
1,Feb,2019-02-01,37.9,48.5,47.25,60
2,Mar,2019-03-01,17.6,49.5,46.15,60
3,Apr,2019-04-01,18.6,55.2,50.35,60
4,May,2019-05-01,40.6,56.5,55.55,60
5,Jun,2019-06-01,28.8,60.7,53.85,60
6,Jul,2019-07-01,27.1,44.2,42.85,60
7,Aug,2019-08-01,36.9,29.0,31.15,60
8,Sep,2019-09-01,37.1,61.2,59.15,60
9,Oct,2019-10-01,25.9,44.9,41.55,60


In [5]:
table.rename(columns = {'Unnamed: 2': 'Month', 'Unnamed: 3': 'Date'}, inplace = True)
table

Unnamed: 0,Month,Date,Internal,External,Overall,Goal
0,Jan,2019-01-01,47.6,44.8,45.05,60
1,Feb,2019-02-01,37.9,48.5,47.25,60
2,Mar,2019-03-01,17.6,49.5,46.15,60
3,Apr,2019-04-01,18.6,55.2,50.35,60
4,May,2019-05-01,40.6,56.5,55.55,60
5,Jun,2019-06-01,28.8,60.7,53.85,60
6,Jul,2019-07-01,27.1,44.2,42.85,60
7,Aug,2019-08-01,36.9,29.0,31.15,60
8,Sep,2019-09-01,37.1,61.2,59.15,60
9,Oct,2019-10-01,25.9,44.9,41.55,60


In [6]:
table.drop(columns = ['Date', 'Goal', 'Overall'], inplace = True)

In [7]:
melted_table = pd.melt(table, id_vars = ['Month'], var_name = 'Metric', value_name = 'Value')
melted_table

Unnamed: 0,Month,Metric,Value
0,Jan,Internal,47.6
1,Feb,Internal,37.9
2,Mar,Internal,17.6
3,Apr,Internal,18.6
4,May,Internal,40.6
5,Jun,Internal,28.8
6,Jul,Internal,27.1
7,Aug,Internal,36.9
8,Sep,Internal,37.1
9,Oct,Internal,25.9


In [8]:
title_chart = alt.Title("Time to fill",
                        fontSize = 18,
                        fontWeight = 'normal',
                        anchor = 'start',
                        offset = 10)

line = alt.Chart(melted_table, title = title_chart).mark_line().encode(
    x = alt.X('Month',
               sort = None,
               axis = alt.Axis(labelAngle = 0, 
                               titleAnchor = 'start',
                               labelColor = "#888888", 
                               titleColor = '#888888', 
                               titleFontWeight = 'normal', 
                               ticks = False),
               title = "2019" 
               ),
    y = alt.Y('Value', 
              axis = alt.Axis(grid = False, 
                              titleAnchor = 'end',
                              labelColor = "#888888", 
                              titleColor = '#888888', 
                              titleFontWeight = 'normal'), 
              title = "TIME TO FILL (DAYS)",
              scale = alt.Scale(domain = [0, 90])
              ),
    color = alt.Color("Metric", scale = alt.Scale(range = ['black', 'black']), legend = None),
    ).properties(width = 500)

goal = alt.Chart().mark_rule(strokeDash = [4,4]).encode(
    x = alt.datum('Jan'),
    x2 = alt.datum('Dec'),
    y = alt.datum(60)
)

label = alt.Chart(melted_table).mark_text(align='left', dx=3).encode(
    alt.X('Month', aggregate='max', sort = None),
    alt.Y('Value', aggregate = {'argmax': 'Month'}),
    alt.Text('Metric')
)

final = line + goal + label

final.configure_view(stroke = None)

In [9]:
label = alt.Chart(melted_table).mark_text(align='left', dx=3).encode(
    x = alt.datum('Dec'),
    y = alt.Y('Value'),
    text = alt.Text('Metric')
)

final = line + goal + label

final.configure_view(stroke = None)

In [10]:
label = alt.Chart(melted_table).mark_text(align = 'left', dx = 4).encode(
    x = alt.datum('Dec'),
    y = alt.Y('Value'),
    text = alt.Text('Metric')
).transform_filter(
    (alt.datum.Month == 'Dec') 
)

label_goal = alt.Chart({"values": [{"text":  [ "GOAL"]}]}).mark_text(align = 'left', dx = 4).encode(
    x = alt.datum('Dec'),
    y = alt.datum(60),
    text = "text:N"
)

final = line + goal + label + label_goal

final.configure_view(stroke = None)

![Alt text](\Images\6_6a.png)

In [99]:
empty = alt.Chart(melted_table, title = title_chart).mark_line(opacity = 0).encode(
    x = alt.X('Month',
               sort = None,
               axis = alt.Axis(labelAngle = 0, 
                               titleAnchor = 'start',
                               labelColor = "#888888", 
                               titleColor = '#888888', 
                               titleFontWeight = 'normal', 
                               ticks = False,
                               title = "2019"), 

               ),
    y = alt.Y('Value', 
              axis = alt.Axis(grid = False, 
                              titleAnchor = 'end',
                              labelColor = "#888888", 
                              titleColor = '#888888', 
                              titleFontWeight = 'normal'), 
              title = "TIME TO FILL (DAYS)",
              scale = alt.Scale(domain = [0, 90]))
            ).properties(width = 500)

graph_1 = empty.configure_view(stroke = None)
graph_1


![Alt text](\Images\6_6b.png)

In [100]:
graph_2 = (empty + goal + label_goal).configure_view(stroke = None)
graph_2

![Alt text](\Images\6_6c.png)

In [13]:
point_jan = alt.Chart().mark_point(filled = True, color = 'black', size = 50, opacity = 1).encode(
    x = alt.datum('Jan'),
    y = alt.datum(melted_table["Value"][12]))

In [14]:
empty + (goal + label_goal).configure_mark(opacity = 0.2) + point_jan

ValueError: Objects with "config" attribute cannot be used within LayerChart. Consider defining the config attribute in the LayerChart object instead.

In [21]:
goal_opaque = alt.Chart().mark_rule(strokeDash = [4,4]).encode(
    x = alt.datum('Jan'),
    x2 = alt.datum('Dec'),
    y = alt.datum(60),
    opacity = alt.value(0.4)
    )

label_goal_opaque = alt.Chart({"values": [{"text":  [ "GOAL"]}]}).mark_text(align = 'left', dx = 4).encode(
    x = alt.datum('Dec'),
    y = alt.datum(60),
    text = "text:N",
    opacity = alt.value(0.4)
)

In [101]:
graph_3 = (empty + goal_opaque + label_goal_opaque + point_jan).configure_view(stroke = None)
graph_3

![Alt text](\Images\6_6d.png)

In [23]:
goal_opaque = alt.Chart().mark_rule(strokeDash = [4,4]).encode(
    x = alt.datum('Jan'),
    x2 = alt.datum('Dec'),
    y = alt.datum(60),
    opacity = alt.value(0.4)
    )

label_goal_opaque = alt.Chart({"values": [{"text":  [ "GOAL"]}]}).mark_text(align = 'left', dx = 4).encode(
    x = alt.datum('Dec'),
    y = alt.datum(60),
    text = "text:N",
    opacity = alt.value(0.4)
)

line.transform_filter(
    alt.FieldOneOfPredicate(field='Month', oneOf=['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun'])
    ).transform_filter(
    alt.datum.Metric == 'External') + goal_opaque + label_goal_opaque

In [25]:
half_line = alt.Chart(melted_table, title = title_chart).mark_line().encode(
    x = alt.X('Month',
               sort = None,
               axis = alt.Axis(labelAngle = 0, 
                               titleAnchor = 'start',
                               labelColor = "#888888", 
                               titleColor = '#888888', 
                               titleFontWeight = 'normal', 
                               ticks = False),
               title = "2019" ,
               scale = alt.Scale(domain = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'])               
               ),
    y = alt.Y('Value', 
              axis = alt.Axis(grid = False, 
                              titleAnchor = 'end',
                              labelColor = "#888888", 
                              titleColor = '#888888', 
                              titleFontWeight = 'normal'), 
              title = "TIME TO FILL (DAYS)",
              scale = alt.Scale(domain = [0, 90])
              ),
    color = alt.Color("Metric", scale = alt.Scale(range = ['black', 'black']), legend = None),
    ).properties(width = 500).transform_filter(
    alt.FieldOneOfPredicate(field='Month', oneOf=['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun'])
    ).transform_filter(
    alt.datum.Metric == 'External')

point_jun = alt.Chart().mark_point(filled = True, color = 'black', size = 50, opacity = 1).encode(
    x = alt.datum('Jun'),
    y = alt.datum(melted_table["Value"][17]))


graph_4 = (half_line + goal_opaque + label_goal_opaque + point_jun).configure_view(stroke = None)
graph_4

![Alt text](\Images\6_6e.png)

In [26]:
# Now that the filter doesn't affect the x-axis domain, we can do it directly

graph_5 = (line.transform_filter(alt.datum.Metric == 'External') + goal_opaque + label_goal_opaque).configure_view(stroke = None)
graph_5

![Alt text](\Images\6_6f.png)

In [28]:
external_opaque = alt.Chart(melted_table, title = title_chart).mark_line().encode(
    x = alt.X('Month',
               sort = None,
               axis = alt.Axis(labelAngle = 0, 
                               titleAnchor = 'start',
                               labelColor = "#888888", 
                               titleColor = '#888888', 
                               titleFontWeight = 'normal', 
                               ticks = False),
               title = "2019" 
               ),
    y = alt.Y('Value', 
              axis = alt.Axis(grid = False, 
                              titleAnchor = 'end',
                              labelColor = "#888888", 
                              titleColor = '#888888', 
                              titleFontWeight = 'normal'), 
              title = "TIME TO FILL (DAYS)",
              scale = alt.Scale(domain = [0, 90])
              ),
    color = alt.Color("Metric", scale = alt.Scale(range = ['black', 'black']), legend = None),
    opacity = alt.value(0.4)
    ).properties(width = 500).transform_filter(
    alt.datum.Metric == 'External')

graph_6 = (external_opaque + goal_opaque + label_goal_opaque).configure_view(stroke = None)
graph_6

In [93]:
import time
import altair as alt
import pandas as pd
import ipywidgets as widgets
from ipywidgets import interact
source = pd.DataFrame({
    'a': ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I'],
    'b': [28, 55, 43, 91, 81, 53, 19, 87, 52]
})
    
def demo(i):
    return alt.Chart(source.sample(frac=.4)).mark_bar().encode(
    x='a',
    y='b'
    )


interact(demo, i = widgets.Play(
    value=0,
    min=0,
    max=10,
    step=2,
    description="Press play",
    disabled=False,
    interval=1000  # Increase the interval to slow down the animation (1 second delay)
))

interactive(children=(Play(value=0, description='Press play', interval=1000, max=10, step=2), Output()), _dom_…

<function __main__.demo(i)>

In [110]:
def demo(i):
    # Determine which graph to show based on the value of i
    if i == 0:
        chart = graph_1
    elif i == 1:
        chart = graph_2
    elif i == 2:
        chart = graph_3
    elif i == 3:
        chart = graph_4
    elif i == 4:
        chart = graph_5
    elif i == 5:
        chart = graph_6

    return chart

interact(demo, i=widgets.Play(
    value=0,
    min=0,
    max=5,  # Update max to match the number of graphs (0, 1, 2)
    step=1,
    description="Press play",
    interval = 2000))


interactive(children=(Play(value=0, description='Press play', interval=2000, max=5), Output()), _dom_classes=(…

<function __main__.demo(i)>

![Alt text](\Images\6_6g.png)

![Alt text](\Images\6_6h.png)

![Alt text](\Images\6_6i.png)

![Alt text](\Images\6_6j.png)

![Alt text](\Images\6_6k.png)

![Alt text](\Images\6_6l.png)

![Alt text](\Images\6_6m.png)

![Alt text](\Images\6_6n.png)