In [0]:
import pandas as pd
import altair as alt

In [7]:
alt.data_transformers.enable('default', max_rows=None) 

DataTransformerRegistry.enable('default')

In [8]:
df = pd.read_csv('https://gist.githubusercontent.com/andriy-gazin/f8f74625aeb3602850067fbb47bc1e4f/raw/05e5080840396deb084e90208dc8e76a75a980a3/pitchfork.csv')
df.head()

Unnamed: 0,artist,album,genre,primary,score,date,author,role
0,David Byrne,“…The Best Live Show of All Time” — NME EP,Rock,Rock,5.5,2019-01-11,Andy Beta,Contributor
1,DJ Healer,Lost Lovesongs / Lostsongs Vol. 2,Electronic,Electronic,6.2,2019-01-11,Chal Ravens,Contributor
2,Jorge Velez,Roman Birds,Electronic,Electronic,7.9,2019-01-10,Philip Sherburne,Contributing Editor
3,Chandra,Transportation EPs,Rock,Rock,7.8,2019-01-10,Andy Beta,Contributor
4,The Chainsmokers,Sick Boy,Electronic,Electronic,3.1,2019-01-09,Larry Fitzmaurice,Contributor


# **Faceting**

Split our data based on a certain variable, make charts for every group

In [9]:
alt.Chart(df).mark_line().encode(
    x = alt.X('date:T', timeUnit='year'),
    y = alt.Y('count()')
).properties(width = 150, height = 150).facet('primary', columns = 5).resolve_scale(y = 'shared')

Output hidden; open in https://colab.research.google.com to view.

# **Pay attention to**

- Sorting
- Layout, rows/columns
- Scales - shared / independent

# **Faceting Layouts**

- Rows/columns
- Wrap
- [Geofacet](https://ryanhafen.com/blog/geofacet/)
- [Tree](https://g2.antv.vision/en/examples/facet/facet#tree-column)
- [Mirror](https://g2.antv.vision/en/examples/facet/facet#mirror)
- [Circle](https://g2.antv.vision/en/examples/facet/facet#circle)

# **Layering**

Adding few layers of visual information on top of each other (raw and aggregated data, different marks, annotations, etc)

In [10]:
histograms = alt.Chart(df).mark_rect().encode(
    x = alt.X('score:Q', bin = alt.Bin(step = 0.5)),
    y = alt.Y('count()')
).properties(width = 150, height = 150)

means = alt.Chart(df).mark_rule().encode(
    x = alt.X('score:Q', aggregate='mean')
).properties(width = 150, height = 150)

alt.layer(means, histograms).facet('primary:O', columns = 5)

Output hidden; open in https://colab.research.google.com to view.

# Pay attention to

- Order
- Visual hierarchy (background / foreground)

# **Repetition**

Same chart but for different variables / variable combinations

In [11]:
alt.Chart(df).mark_area().encode(
    x = alt.X('date:T', timeUnit='year'),
    y = alt.Y('count()'),
    color = alt.Color(alt.repeat('column'), type = 'ordinal')
).repeat(column = ['primary', 'author'])

Output hidden; open in https://colab.research.google.com to view.

# **Concatenation**

Combine several charts on one panel

In [12]:
scatterplot = alt.Chart(df).mark_circle().encode(
    x = alt.X('date:T'),
    y = alt.Y('score:Q')
).properties(width = 600)

side_histogram = alt.Chart(df).mark_rect().encode(
    y = alt.Y('score:Q', bin = alt.Bin(step = 0.25)),
    x = alt.X('count()')
).properties(width = 200)

alt.hconcat(scatterplot, side_histogram)

Output hidden; open in https://colab.research.google.com to view.

In [13]:
big_area = alt.Chart(df).mark_area().encode(
    x = alt.X('date:T', timeUnit='yearmonth'),
    y = alt.Y('count()'),
    color = alt.Color('primary:O')
).properties(width = 850)

small_area = alt.Chart(df).mark_area().encode(
    x = alt.X('date:T', timeUnit='yearmonth'),
    y = alt.Y('count()', stack='center'),
    color = alt.Color('primary:O')
).properties(width = 850, height = 50)

alt.vconcat(big_area, small_area)

Output hidden; open in https://colab.research.google.com to view.

# Pay attention to

- Order and location (should establish visual hierarchy, help to navigate)
- Size (important stuff should be bigger)
- Axes (ranges should be identical)

# **Tooltip**

Provide additional info on click or hover

In [14]:
alt.Chart(df).mark_circle(opacity=0.5, size = 50).encode(
    x = alt.X('score'),
    y = alt.Y('primary', sort = alt.Sort(field = 'score', op = 'mean', order = 'descending')),
    tooltip = alt.Tooltip(['artist', 'album', 'score'])
).properties(width = 850)

Output hidden; open in https://colab.research.google.com to view.

# Pay attention to

- Event (hover or click)
- Amount of information (provide only important info)
- Order (most important goes first)
- Formatting (most important should be distinct)

# Tooltip examples

- [Візуалізація у tooltip](https://www.tableau.com/about/blog/2017/11/viz-tooltip-here-78442)

# **Single selection**

Select one data point in order to have better focus, compare this data point against other, filter your data

In [15]:
single_selection = alt.selection_single()

alt.Chart(df).mark_circle().encode(
    detail = alt.Detail(['yearmonth(date)', 'primary']),
    x = alt.X('count()'),
    y = alt.Y('score', aggregate='mean', scale = alt.Scale(zero=False)),
    color=alt.condition(single_selection, alt.value('magenta'), alt.value('lightblue'))
).properties(width = 800).add_selection(single_selection)

Output hidden; open in https://colab.research.google.com to view.

# **Multi selection**

Select several data points in order to have better focus, compare this data point against other, filter your data

In [16]:
# press shift and start selecting points

multi_selection = alt.selection_multi()

alt.Chart(df).mark_circle().encode(
    detail = alt.Detail(['yearmonth(date)', 'primary']),
    x = alt.X('count()'),
    y = alt.Y('score', aggregate='mean', scale = alt.Scale(zero=False)),
    color=alt.condition(multi_selection, alt.value('magenta'), alt.value('lightgray'))
).properties(width = 800).add_selection(multi_selection)

Output hidden; open in https://colab.research.google.com to view.

# **Interval selection (brush)**

Highlight and/or filter data by selecting interval

In [17]:
interval_selection = alt.selection_interval(encodings=['x'])

alt.Chart(df).mark_circle().encode(
    x = alt.X('date:T'),
    y = alt.Y('score:Q'),
    color=alt.condition(interval_selection, alt.value('magenta'), alt.value('lightgray'))
).properties(width = 600).add_selection(interval_selection)

Output hidden; open in https://colab.research.google.com to view.

# **Slider selection**

Filter your data by changing slider position

In [18]:
slider = alt.binding_range(min=1999, max=2019, step=1, name='year:')
slider_selection = alt.selection_single(name="year_selector", fields=['year'],bind=slider, init={'year': 2018})

alt.Chart(df).mark_rect().encode(
    x = alt.X('score:Q', bin = alt.Bin(step = 0.1)),
    y = alt.Y('count()')
).transform_calculate(year = 'year(datum.date)'
).add_selection(slider_selection).transform_filter(slider_selection).properties(width = 800)

Output hidden; open in https://colab.research.google.com to view.

# **Dropdown selection**

Highlight or filter data by choosing one of the options in dropdown menu

In [19]:
dropdown = alt.binding_select(options=['Rock', 'Electronic', 'Rap', 'Experimental', 'Jazz', 'Global', 'Pop/R&B'])
menu = alt.selection_single(fields=['primary'], bind=dropdown, name="select")

alt.Chart(df).mark_rect().encode(
    x = alt.X('score:Q', bin = alt.Bin(step = 0.1)),
    y = alt.Y('count()')
).add_selection(menu).transform_filter(menu).properties(width=800)

Output hidden; open in https://colab.research.google.com to view.

# Pay attention to

- Don't make the menu too long
- You can use other chart as the selection interface instead of dropdown menu

# **Let's combine**

In [20]:
# overview & detail + brush

brush = alt.selection_interval(encodings=['x'])

big_area = alt.Chart(df).mark_area().encode(
    x = alt.X('date:T', timeUnit='yearmonth'),
    y = alt.Y('count()'),
    color = alt.Color('primary:O')
).transform_filter(brush).properties(width = 850)

small_area = alt.Chart(df).mark_area().encode(
    x = alt.X('date:T', timeUnit='yearmonth'),
    y = alt.Y('count()', stack='center'),
    color = alt.Color('primary:O')
).add_selection(brush).properties(width = 850, height = 50)

alt.vconcat(big_area, small_area)

Output hidden; open in https://colab.research.google.com to view.

In [21]:
# concatenate two charts, click to select

genre_selection = alt.selection_single(fields=['primary'])

scores_histogram = alt.Chart(df).mark_rect().encode(
    x = alt.X('score:Q', bin = alt.Bin(step = 0.1)),
    y = alt.Y('count()'),
    color = alt.value('darkblue')
).transform_filter(genre_selection).properties(width = 500, height = 300)

genre_count = alt.Chart(df).mark_bar().encode(
    x = alt.X('count()'),
    y = alt.Y('primary:N', sort=alt.Sort(op='count',  order='descending')),
    color=alt.condition(genre_selection, alt.value('darkblue'), alt.value('lightgray'))

).add_selection(genre_selection).properties(width = 300, height = 300)

alt.hconcat(scores_histogram, genre_count)

Output hidden; open in https://colab.research.google.com to view.

In [22]:
# layer and concatenate charts, click to select

genre_selection = alt.selection_single(fields=['primary'])

background_histogram = alt.Chart(df).mark_rect().encode(
    x = alt.X('score:Q', bin = alt.Bin(step = 0.1)),
    y = alt.Y('count()'),
    color = alt.value('lightblue')
)

scores_histogram = alt.Chart(df).mark_rect().encode(
    x = alt.X('score:Q', bin = alt.Bin(step = 0.1)),
    y = alt.Y('count()'),
    color = alt.value('darkblue')
).transform_filter(genre_selection).properties(width = 500, height = 300)

genre_count = alt.Chart(df).mark_bar().encode(
    x = alt.X('count()'),
    y = alt.Y('primary:N', sort=alt.Sort(op='count',  order='descending')),
    color=alt.condition(genre_selection, alt.value('darkblue'), alt.value('lightblue'))

).add_selection(genre_selection).properties(width = 300, height = 300)

alt.hconcat(background_histogram + scores_histogram, genre_count)

Output hidden; open in https://colab.research.google.com to view.

In [23]:
# redraw histogram based on selection

brush_scatterplot = alt.selection_interval(encodings=['x'])

scatterplot = alt.Chart(df).mark_circle().encode(
    x = alt.X('date:T'),
    y = alt.Y('score:Q'),
    color=alt.condition(brush_scatterplot, alt.value('darkblue'), alt.value('lightgray'))
).properties(width = 600).add_selection(brush_scatterplot)

filtered_histogram = alt.Chart(df).mark_rect().encode(
    y = alt.Y('score:Q', bin = alt.Bin(step = 0.25)),
    x = alt.X('count()'),
    color = alt.value('darkblue')
).properties(width = 200).transform_filter(brush_scatterplot)

alt.hconcat(scatterplot, filtered_histogram)

Output hidden; open in https://colab.research.google.com to view.

In [24]:
# redraw scatterplot and histogram based on selection

brush_scatterplot = alt.selection_interval(encodings=['x'])
brush_histogram = alt.selection_interval(encodings=['y'])

scatterplot = alt.Chart(df).mark_circle().encode(
    x = alt.X('date:T'),
    y = alt.Y('score:Q'),
    color=alt.condition(brush_scatterplot, alt.value('darkblue'), alt.value('lightgray')),
    opacity = alt.condition(brush_histogram, alt.value(0.7), alt.value(0.1))
).properties(width = 600).add_selection(brush_scatterplot)

filtered_histogram = alt.Chart(df).mark_rect().encode(
    y = alt.Y('score:Q', bin = alt.Bin(step = 0.25)),
    x = alt.X('count()'),
    color = alt.value('darkblue')
).properties(width = 200).add_selection(brush_histogram).transform_filter(brush_scatterplot)

alt.hconcat(scatterplot, filtered_histogram)

Output hidden; open in https://colab.research.google.com to view.

# **Exercise**

- Create a chart that shows how average score by genre changed by month. Add a line to show overall average score. Add interactivity to enable interval selection on X axis (time period). Add filtering function in order to recalculate average score for selected interval and reposition the horizontal line.

- Make a data visualization of number of reviews by genre and year. Add interactivity to enable selection of a specific genre. Make other genres less visible on a selection (filter them out or decrease their opacity)

- Create two charts. First should show a distribution of scores by genres. Second should show count of reviews for each genre. Add interactivity to enable interval selection on first chart. Add filtering function in order to count only selected reviews and adjust second chart accordingly.

- Create two charts. First one should show a distribution of scores by genres. Second should show an overall distribution. Add interactivity to enable interval selection on second chart. Add filtering function in order to show only selected interval on first chart.

In [60]:
interval_selection = alt.selection_interval(encodings=['x'])

chart = alt.Chart(df).mark_point().encode(
    x=alt.X('date:T', timeUnit='yearmonth'),
    y=alt.Y('mean(score):Q'),
    detail=alt.Detail('primary:N')
).add_selection(interval_selection)

line = alt.Chart(df).mark_rule().encode(
    y=alt.Y('mean(score):Q'),
    color=alt.value('red')
).transform_filter(interval_selection)



alt.layer(
    chart,
    line
).properties(width=1200)

Output hidden; open in https://colab.research.google.com to view.

In [98]:
single_selection = alt.selection_single()

chart = alt.Chart(df).mark_line(size=5).encode(
    x=alt.X('date:T', timeUnit='year'),
    y=alt.Y('count(score):Q'),
    color=alt.Color('primary:N'),
    opacity=alt.condition(single_selection, alt.value(1), alt.value(0.3))
).properties(width = 1100).add_selection(single_selection)

chart

# alt.layer(
#     chart
# ).properties(width = 1000)

Output hidden; open in https://colab.research.google.com to view.

In [113]:
interval_selection = alt.selection_interval(encodings=['x'])

chart1 = alt.Chart(df).mark_circle().encode(
    y=alt.Y('primary:N'),
    x=alt.X('score:Q'),
    size = alt.Size('count()')
).properties(width=750, height = 300).add_selection(interval_selection)

chart2 = alt.Chart(df).mark_bar().encode(
    x=alt.X('primary:N'),
    y=alt.Y('count()')
).properties(width=250, height = 300).transform_filter(interval_selection)

alt.hconcat(
    chart1,
    chart2)

Output hidden; open in https://colab.research.google.com to view.

In [125]:
interval_selection = alt.selection_interval(encodings=['x'])

chart1 = alt.Chart(df).mark_circle().encode(
    y=alt.Y('primary:N'),
    x=alt.X('score:Q'),
    size = alt.Size('count()')
).properties(width=1100, height=300).transform_filter(interval_selection)

chart2 = alt.Chart(df).mark_area().encode(
    x=alt.X('date:T', timeUnit='year' ),
    y=alt.Y('count()'),
    opacity=alt.value(0.3)
).properties(width=1100, height=100).add_selection(interval_selection)

alt.vconcat(
    chart2,
    chart1)

Output hidden; open in https://colab.research.google.com to view.