
# Altair overview

Altair is a declarative statistical visualization library for Python, based on Vega-Lite.

* [Altair](https://altair-viz.github.io/index.html)
* [Vega-lite](https://vega.github.io/vega-lite/)

In [40]:
import altair as alt
import pandas as pd
from vega_datasets import data

# Example

In [41]:
# load built-in dataset as a pandas DataFrame
cars = data.cars()
cars.head()


Unnamed: 0,Name,Miles_per_Gallon,Cylinders,Displacement,Horsepower,Weight_in_lbs,Acceleration,Year,Origin
0,chevrolet chevelle malibu,18.0,8,307.0,130.0,3504,12.0,1970-01-01,USA
1,buick skylark 320,15.0,8,350.0,165.0,3693,11.5,1970-01-01,USA
2,plymouth satellite,18.0,8,318.0,150.0,3436,11.0,1970-01-01,USA
3,amc rebel sst,16.0,8,304.0,150.0,3433,12.0,1970-01-01,USA
4,ford torino,17.0,8,302.0,140.0,3449,10.5,1970-01-01,USA


In [42]:
alt.Chart(cars, width=900).mark_circle().encode(
    x='Horsepower',
    y='Miles_per_Gallon',
    color='Origin',
    size='Miles_per_Gallon',
    tooltip = 'Name'
)

In [43]:
cars

Unnamed: 0,Name,Miles_per_Gallon,Cylinders,Displacement,Horsepower,Weight_in_lbs,Acceleration,Year,Origin
0,chevrolet chevelle malibu,18.0,8,307.0,130.0,3504,12.0,1970-01-01,USA
1,buick skylark 320,15.0,8,350.0,165.0,3693,11.5,1970-01-01,USA
2,plymouth satellite,18.0,8,318.0,150.0,3436,11.0,1970-01-01,USA
3,amc rebel sst,16.0,8,304.0,150.0,3433,12.0,1970-01-01,USA
4,ford torino,17.0,8,302.0,140.0,3449,10.5,1970-01-01,USA
...,...,...,...,...,...,...,...,...,...
401,ford mustang gl,27.0,4,140.0,86.0,2790,15.6,1982-01-01,USA
402,vw pickup,44.0,4,97.0,52.0,2130,24.6,1982-01-01,Europe
403,dodge rampage,32.0,4,135.0,84.0,2295,11.6,1982-01-01,USA
404,ford ranger,28.0,4,120.0,79.0,2625,18.6,1982-01-01,USA


In [44]:
interval = alt.selection_interval()

scatter = alt.Chart(cars, width=900).mark_point().encode(
    x='Horsepower',
    y='Miles_per_Gallon',
    color=alt.condition(interval,'Origin', alt.value('lightgrey'))
).add_selection(
    interval
)

hist = alt.Chart(cars).mark_bar().encode(
    x='count()',
    y='Origin',
    color='Origin'
).transform_filter(
interval)

scatter & hist

# Data Transformations

## Filtering

In [26]:
import altair as alt
from altair import datum

from vega_datasets import data
pop = data.population.url

pop

'https://cdn.jsdelivr.net/npm/vega-datasets@v1.29.0/data/population.json'

In [27]:
alt.Chart(pop).mark_area().encode(
    x='age:O',
    y='people:Q',
).transform_filter(
    (datum.year == 2000) & (datum.sex == 1)
)

### Filter by selection

In [28]:
import altair as alt
from vega_datasets import data
pop = data.population.url

selection = alt.selection_multi(fields=['year'])

top = alt.Chart().mark_line().encode(
    x='age:O',
    y='sum(people):Q',
    color='year:O'
).properties(
    width=600, height=200
).transform_filter(
    selection
)

bottom = alt.Chart().mark_bar().encode(
    x='year:O',
    y='sum(people):Q',
    color=alt.condition(selection, alt.value('steelblue'), alt.value('lightgray'))
).properties(
    width=600, height=100,
    selection=selection
)

alt.vconcat(
    top, bottom,
    data=pop
)

# Selection

In [45]:
df = pd.DataFrame(data={'A':[10,20,30], 
                        'B':[99,77, 102],
                        'C':[200,210,166],
                         'S':['XX','YY','ZZ']})
df

Unnamed: 0,A,B,C,S
0,10,99,200,XX
1,20,77,210,YY
2,30,102,166,ZZ


In [52]:
s = alt.selection_interval()
chart = alt.Chart(df).mark_circle(size=120).encode(
                                    x='A', 
                                    color=alt.condition(s,'C',alt.value('firebrick'))
                                    ).properties(selection=s)

chart.encode(y='A') | chart.encode(y='B') | chart.encode(y='C')

# Layered plots

In [34]:
cars = data.cars()

In [35]:
cars.head()

Unnamed: 0,Name,Miles_per_Gallon,Cylinders,Displacement,Horsepower,Weight_in_lbs,Acceleration,Year,Origin
0,chevrolet chevelle malibu,18.0,8,307.0,130.0,3504,12.0,1970-01-01,USA
1,buick skylark 320,15.0,8,350.0,165.0,3693,11.5,1970-01-01,USA
2,plymouth satellite,18.0,8,318.0,150.0,3436,11.0,1970-01-01,USA
3,amc rebel sst,16.0,8,304.0,150.0,3433,12.0,1970-01-01,USA
4,ford torino,17.0,8,302.0,140.0,3449,10.5,1970-01-01,USA


In [36]:
df_plot = cars.groupby([cars.Year.dt.year, 'Origin']).mean().reset_index()
df_plot.head()

Unnamed: 0,Year,Origin,Miles_per_Gallon,Cylinders,Displacement,Horsepower,Weight_in_lbs,Acceleration
0,1970,Europe,25.2,4.0,112.0,91.0,2439.333333,16.666667
1,1970,Japan,25.5,4.0,105.0,91.5,2251.0,14.75
2,1970,USA,15.272727,7.703704,339.185185,165.962963,3752.148148,11.685185
3,1971,Europe,28.75,4.0,95.4,68.8,2014.8,17.4
4,1971,Japan,29.5,4.0,88.25,79.25,1936.0,16.375


In [55]:
chart = alt.Chart(df_plot[df_plot.Origin=='USA'], width=600).encode(x='Year:O', y='Horsepower')

#chart.mark_point()
#chart.mark_line()
chart.mark_point() + chart.mark_line()

## Facets

By setting 'row' separate charts are created for each category

In [39]:
chart = alt.Chart(df_plot, width=800).encode(x='Year:O',y='Miles_per_Gallon', row='Origin')

chart.mark_line()

# More Altair

* Presentation from PyCon 2018: https://altair-viz.github.io/altair-tutorial/README.html