# Altair Basics 
from https://altair-viz.github.io/getting_started/starting.html

In [23]:
# import necessary modules 
import pandas as pd
import altair as alt

In [24]:
# datasets are most commonly provided to altair as a pd Dataframe:
data = pd.DataFrame({'a': list('CCCDDDEEE'),
                     'b': [2, 7, 4, 1, 2, 6, 8, 4, 7]})

# fundamental object in altair is chart, then we specify how we want to visualize the data with marks and encodings:
alt.Chart(data).mark_point().encode(
    x='a',
    y='b'
)

In [25]:
# we can instead choose to aggregate these data. Bars are more typical for showing aggregated values
# can also flip the variables for encodings to have a horizontal bar chart 
alt.Chart(data).mark_bar().encode(
    y='a',
    x='average(b)'
)

In [26]:
chart = alt.Chart(data).mark_bar().encode(
    x='a',
    y='average(b)',
)
# print(chart.to_json()) #altair converts plot specifications to a JSON string conforming to Vega-lite schema 

In [27]:
# more verbose specification of channels - useful for more advanced configurations
alt.Chart(data).mark_bar().encode(
    alt.Y('a', type='nominal'),
    alt.X('b', type='quantitative', aggregate='average')
)

In [28]:
# customizing the visualization
alt.Chart(data).mark_bar(color='firebrick').encode(
    alt.Y('a', title='category'),
    alt.X('average(b)', title='avg(b) by category')
)