# Visualization Curriculum

## Chapter5: Multi-View Composition

---
* Author:  [Yuttapong Mahasittiwat](mailto:khala1391@gmail.com)
* Technologist | Data Modeler | Data Analyst
* [YouTube](https://www.youtube.com/khala1391)
* [LinkedIn](https://www.linkedin.com/in/yuttapong-m/)
---

Source: [Visualization Curriculum](https://idl.uw.edu/visualization-curriculum/altair_introduction.html)

In [8]:
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import altair as alt
print("pandas version :",pd.__version__)
print("numpy version :",np.__version__)
print("matplotlib version :",mpl.__version__)
print("seaborn version :",sns.__version__)
print("altair version :",alt.__version__)

pandas version : 2.2.1
numpy version : 1.26.4
matplotlib version : 3.8.4
seaborn version : 0.13.2
altair version : 5.4.0


In [9]:
import warnings
warnings.filterwarnings('ignore', category=FutureWarning, message="the convert_dtype parameter is deprecated")

### Data

In [14]:
weather = 'https://cdn.jsdelivr.net/npm/vega-datasets@1/data/weather.csv'
df = pd.read_csv(weather)

In [18]:
display(df.info())
df.head(3)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2922 entries, 0 to 2921
Data columns (total 7 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   location       2922 non-null   object 
 1   date           2922 non-null   object 
 2   precipitation  2922 non-null   float64
 3   temp_max       2922 non-null   float64
 4   temp_min       2922 non-null   float64
 5   wind           2922 non-null   float64
 6   weather        2922 non-null   object 
dtypes: float64(4), object(3)
memory usage: 159.9+ KB


None

Unnamed: 0,location,date,precipitation,temp_max,temp_min,wind,weather
0,Seattle,2012-01-01,0.0,12.8,5.0,4.7,drizzle
1,Seattle,2012-01-02,10.9,10.6,2.8,4.5,rain
2,Seattle,2012-01-03,0.8,11.7,7.2,2.3,rain


### Layer

#### shared axis

In [38]:
alt.Chart(df).mark_area().encode(
    alt.X('month(date):T'),
    alt.Y('average(temp_max):Q'),
    alt.Y2('average(temp_min):Q')
)

In [42]:
alt.Chart(df).mark_area(opacity=0.3).encode(
    alt.X('month(date):T'),
    alt.Y('average(temp_max):Q'),
    alt.Y2('average(temp_min):Q'),
    alt.Color('location:N')
)

In [44]:
alt.Chart(df).mark_line().encode(
    alt.X('month(date):T'),
    alt.Y('average(temp_mid):Q'),
    alt.Color('location:N')
).transform_calculate(temp_mid='(datum.temp_min+datum.temp_max)/2')

In [50]:
alt.Chart(df).mark_line().encode(
    alt.X('month(date):T'),
    alt.Y('average(temp_mid):Q'),
    alt.Color('location:N')
).transform_calculate(temp_mid='(+datum.temp_min+ +datum.temp_max)/2')

**note**: meaning for `+`
- **Coercion to number**: If datum.temp_min and datum.temp_max are stored as strings, the + operator forces JavaScript to convert these string values into numbers. Without this conversion, the calculation could result in string concatenation instead of numeric addition.

In [66]:
tempMinMax = alt.Chart(weather).mark_area(opacity=0.3).encode(
  alt.X('month(date):T'),
  alt.Y('average(temp_max):Q'),
  alt.Y2('average(temp_min):Q'),
  alt.Color('location:N')
)

tempMid = alt.Chart(weather).mark_line().transform_calculate(
  temp_mid='(+datum.temp_min + +datum.temp_max) / 2'
).encode(
  alt.X('month(date):T'),
  alt.Y('average(temp_mid):Q'),
  alt.Color('location:N')
)

tempMinMax + tempMid

In [69]:
alt.layer(tempMinMax,tempMid)

#### dual-axis

In [91]:
alt.Chart(df).transform_filter(
    'datum.location=="Seattle"'
).mark_line(
    interpolate='monotone',
    stroke='grey'
).encode(
    alt.X('month(date):T', title=None),
    alt.Y('average(precipitation):Q', title='Precipitation')
).properties(width=400,height=400)

In [95]:
tempMinMax = alt.Chart(df).transform_filter(
    'datum.location == "Seattle"'
).mark_area(opacity=0.3).encode(
    alt.X('month(date):T', title=None, axis=alt.Axis(format='%b')),
    alt.Y('average(temp_max):Q', title='Avg. Temperature °C'),
    alt.Y2('average(temp_min):Q')
)

precip = alt.Chart(df).transform_filter(
    'datum.location=="Seattle"'
).mark_line(
    interpolate='monotone',
    stroke='grey'
).encode(
    alt.X('month(date):T', title=None),
    alt.Y('average(precipitation):Q', title='Precipitation')
)

alt.layer(tempMinMax, precip)

In [97]:
tempMinMax = alt.Chart(df).transform_filter(
    'datum.location == "Seattle"'
).mark_area(opacity=0.3).encode(
    alt.X('month(date):T', title=None, axis=alt.Axis(format='%b')),
    alt.Y('average(temp_max):Q', title='Avg. Temperature °C'),
    alt.Y2('average(temp_min):Q')
)

precip = alt.Chart(df).transform_filter(
    'datum.location=="Seattle"'
).mark_line(
    interpolate='monotone',
    stroke='grey'
).encode(
    alt.X('month(date):T', title=None),
    alt.Y('average(precipitation):Q', title='Precipitation')
)

alt.layer(tempMinMax, precip).resolve_scale(y='independent')

In [101]:
tempMinMax = alt.Chart(df).mark_area(opacity=0.3).encode(
    alt.X('month(date):T', title=None, axis=alt.Axis(format='%b')),
    alt.Y('average(temp_max):Q', title='Avg. Temperature °C'),
    alt.Y2('average(temp_min):Q')
)

precip = alt.Chart(df).mark_line(
    interpolate='monotone',
    stroke='grey'
).encode(
    alt.X('month(date):T', title=None),
    alt.Y('average(precipitation):Q', title='Precipitation')
)


# filter transform to the top-level layered chart
alt.layer(tempMinMax, precip).transform_filter(
    'datum.location=="Seattle"').resolve_scale(y='independent')

### Facet

In [103]:
alt.Chart(df).mark_bar().transform_filter(
  'datum.location == "Seattle"'
).encode(
  alt.X('temp_max:Q', bin=True, title='Temperature (°C)'),
  alt.Y('count():Q')
)

In [113]:
colors = alt.Scale(
  domain=['drizzle', 'fog', 'rain', 'snow', 'sun'],
  range=['#aec7e8', '#c7c7c7', '#1f77b4', '#9467bd', '#e7ba52']
)

alt.Chart(df).mark_bar().transform_filter(
  'datum.location == "Seattle"'
).encode(
    alt.X('temp_max:Q', bin=True, title='Temperature (°C)'),
    alt.Y('count():Q'),
    alt.Color('weather:N', scale=colors),
    alt.Column('weather:N')
).properties(width=150,height=150)

In [119]:
# explicit

colors = alt.Scale(
  domain=['drizzle', 'fog', 'rain', 'snow', 'sun'],
  range=['#aec7e8', '#c7c7c7', '#1f77b4', '#9467bd', '#e7ba52']
)

alt.Chart().mark_bar().encode(
  alt.X('temp_max:Q', bin=True, title='Temperature (°C)'),
  alt.Y('count():Q'),
  alt.Color('weather:N', scale=colors)
).properties(
  width=150,
  height=150
).facet(
  data=df,
  column='weather:N'
).transform_filter(
  'datum.location == "Seattle"'
)

- For basic charts, we should certainly use the column or row encoding channels if we can. However, using the facet operator explicitly is useful if we want to facet composed views
- We can layer the charts much as before, then invoke facet on the layered chart object, passing in the data and specifying column facets based on the location field

In [131]:
tempMinMax = alt.Chart().mark_area(opacity=0.3).encode(
  alt.X('month(date):T', title=None, axis=alt.Axis(format='%b')),
  alt.Y('average(temp_max):Q', title='Avg. Temperature (°C)'),
  alt.Y2('average(temp_min):Q'),
  alt.Color('location:N')
)

tempMid = alt.Chart().mark_line().transform_calculate(
  temp_mid='(+datum.temp_min + +datum.temp_max) / 2'
).encode(
  alt.X('month(date):T'),
  alt.Y('average(temp_mid):Q'),
  alt.Color('location:N')
)

alt.layer(tempMinMax, tempMid).facet(
  data=weather,
  column='location:N'
)

In [133]:
tempMinMax = alt.Chart().mark_area(opacity=0.3).encode(
  alt.X('month(date):T', title=None, axis=alt.Axis(format='%b')),
  alt.Y('average(temp_max):Q', title='Avg. Temperature (°C)'),
  alt.Y2('average(temp_min):Q'),
  alt.Color('location:N')
)

tempMid = alt.Chart().mark_line().transform_calculate(
  temp_mid='(+datum.temp_min + +datum.temp_max) / 2'
).encode(
  alt.X('month(date):T'),
  alt.Y('average(temp_mid):Q'),
  alt.Color('location:N')
)

alt.layer(tempMinMax, tempMid).facet(
  data=weather,
  column='location:N'
).resolve_axis(y='independent')

In [135]:
tempMinMax = alt.Chart().mark_area(opacity=0.3).encode(
  alt.X('month(date):T', title=None, axis=alt.Axis(format='%b')),
  alt.Y('average(temp_max):Q', title='Avg. Temperature (°C)'),
  alt.Y2('average(temp_min):Q'),
  alt.Color('location:N')
)

tempMid = alt.Chart().mark_line().transform_calculate(
  temp_mid='(+datum.temp_min + +datum.temp_max) / 2'
).encode(
  alt.X('month(date):T'),
  alt.Y('average(temp_mid):Q'),
  alt.Color('location:N')
)

alt.layer(tempMinMax, tempMid).facet(
  data=weather,
  column='location:N'
).resolve_scale(y='independent')

### Concatenate
same dataset with different view
- `hconcat` equivalent to |
- `vconcat` equivalent to &

In [141]:
alt.Chart(df).mark_line().encode(
  alt.X('month(date):T', title=None),
  alt.Y('average(temp_max):Q'),
  color='location:N'
)

In [144]:
base = alt.Chart(df).mark_line().encode(
  alt.X('month(date):T', title=None),
  color='location:N'
).properties(
  width=240,
  height=180
)

temp = base.encode(alt.Y('average(temp_max):Q'))
precip = base.encode(alt.Y('average(precipitation):Q'))
wind = base.encode(alt.Y('average(wind):Q'))

temp | precip | wind

In [269]:
alt.hconcat(temp,precip,wind)

In [152]:
temp | precip & wind

In [164]:
(temp | precip) & wind

### Repeat
- For cases where only one or two variables are changing, the repeat operator provides a convenient shortcut for creating multiple charts

In [168]:
alt.Chart(df).mark_line().encode(
  alt.X('month(date):T',title=None),
  alt.Y(alt.repeat('column'), aggregate='average', type='quantitative'),
  color='location:N'
).properties(
  width=240,
  height=180
).repeat(
  column=['temp_max', 'precipitation', 'wind']
)

In [170]:
alt.Chart(df).mark_line().encode(
  alt.X('month(date):T',title=None),
  alt.Y(alt.repeat('row'), aggregate='average', type='quantitative'),
  color='location:N'
).properties(
  width=240,
  height=180
).repeat(
  row=['temp_max', 'precipitation', 'wind']
)

In [172]:
alt.Chart(df).mark_line().encode(
  alt.X('month(date):T',title=None),
  alt.Y(alt.repeat('row'), aggregate='average', type='quantitative'),
  # color='location:N'
  alt.Column('location:N')
).properties(
  width=240,
  height=180
).repeat(
  row=['temp_max', 'precipitation', 'wind'],
)

In [182]:
# scatter plot matrix (SPLOM)

alt.Chart().mark_point(filled=True, size=15, opacity=0.5).encode(
  alt.X(alt.repeat('column'), type='quantitative'),
  alt.Y(alt.repeat('row'), type='quantitative')
).properties(
  width=150,
  height=150
).repeat(
  data=weather,
  row=['temp_max', 'precipitation', 'wind'],
  column=['temp_max', 'precipitation', 'wind']
).transform_filter(
  'datum.location == "Seattle"'
)

In [180]:
# scatter plot matrix

alt.Chart().mark_point(filled=True, size=15, opacity=0.5).encode(
    alt.X(alt.repeat('column'), type='quantitative'),
    alt.Y(alt.repeat('row'), type='quantitative'),
    alt.Color('location:N')

).properties(
  width=150,
  height=150
).repeat(
  data=weather,
  row=['temp_max', 'precipitation', 'wind'],
  column=['temp_max', 'precipitation', 'wind']
)

### View composition algebra

In [192]:
basic1 = alt.Chart(weather).transform_filter(
  'datum.location == "Seattle"'
).mark_bar().encode(
  alt.X('month(date):O'),
  alt.Y('average(temp_max):Q')
)

basic2 = alt.Chart(weather).transform_filter(
  'datum.location == "Seattle"'
).mark_rule(stroke='firebrick').encode(
  alt.Y('average(temp_max):Q')
)

basic1 | basic2

In [194]:
basic1 = alt.Chart(weather).transform_filter(
  'datum.location == "Seattle"'
).mark_bar().encode(
  alt.X('month(date):O'),
  alt.Y('average(temp_max):Q')
)

basic2 = alt.Chart(weather).transform_filter(
  'datum.location == "Seattle"'
).mark_rule(stroke='firebrick').encode(
  alt.Y('average(temp_max):Q')
)

basic1 + basic2

`mark_rule`: is used to create straight lines, which can be useful for adding reference lines or boundaries in your visualizations

In [243]:
bar = alt.Chart().mark_bar().encode(
    alt.X('month(date):O', title='Month'),
    alt.Y(alt.repeat('column'), aggregate='average', type='quantitative'))
rule = alt.Chart().mark_rule(stroke='firebrick').encode(
    alt.Y(alt.repeat('column'), aggregate='average', type='quantitative'))

alt.layer(bar,rule).properties(
  width=200,
  height=150
).repeat(
  data=weather,
  column=['temp_max', 'precipitation', 'wind']
).transform_filter(
  'datum.location == "Seattle"'
)

In [291]:
splom = alt.Chart().mark_point(filled=True, size=15, opacity=0.5).encode(
  alt.X(alt.repeat('column'), type='quantitative'),
  alt.Y(alt.repeat('row'), type='quantitative')
).properties(
  width=125,
  height=125
).repeat(
  row=['temp_max', 'precipitation', 'wind'],
  column=['wind', 'precipitation', 'temp_max']
)

dateHist = alt.layer(
  alt.Chart().mark_bar().encode(
    alt.X('month(date):O', title='Month'),
    alt.Y(alt.repeat('row'), aggregate='average', type='quantitative')
  ),
  alt.Chart().mark_rule(stroke='firebrick').encode(
    alt.Y(alt.repeat('row'), aggregate='average', type='quantitative')
  )
).properties(
  width=175,
  height=125
).repeat(
  row=['temp_max', 'precipitation', 'wind']
)

tempHist = alt.Chart(weather).mark_bar().encode(
  alt.X('temp_max:Q', bin=True, title='Temperature (°C)'),
  alt.Y('count():Q'),
  alt.Color('weather:N', scale=alt.Scale(
    domain=['drizzle', 'fog', 'rain', 'snow', 'sun'],
    range=['#aec7e8', '#c7c7c7', '#1f77b4', '#9467bd', '#e7ba52']
  ))
).properties(
  width=115,
  height=100
).facet(
  column='weather:N'
)

alt.vconcat(
  alt.hconcat(splom, dateHist),
  tempHist,
  data=weather,
  title='Seattle Weather Dashboard'
).transform_filter(
  'datum.location == "Seattle"'
).resolve_legend(
  color='independent'
).configure_axis(
  labelAngle=0
)