# visualizations: altair

# setup

In [1]:
import numpy as np
import pandas as pd 
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
import altair as alt

In [3]:
%matplotlib inline

In [4]:
df_iris = sns.load_dataset("iris")
df_tips = sns.load_dataset("tips")
df_fmri = sns.load_dataset("fmri")

# altair

In [5]:
# install via conda install -c conda-forge altair

In [6]:
# reference: https://github.com/empathy87/storytelling-with-data

In [7]:
# reference: https://altair-viz.github.io/
# reference: https://vega.github.io/vega-lite/docs
# reference: https://altair-viz.github.io/gallery/

In [8]:
# colors
custom_colors = {
    "GRAY_1" : "#231F20",
    "GRAY_2" : "#414040",
    "GRAY_3" : "#555655",
    "GRAY_4" : "#646369",
    "GRAY_5" : "#76787B",
    "GRAY_6" : "#828282",
    "GRAY_7" : "#929497",
    "GRAY_8" : "#A6A6A5",
    "GRAY_9" : "#BFBEBE",
    "BLUE_1" : "#174A7E",
    "BLUE_2" : "#4A81BF",
    "BLUE_3" : "#94B2D7",
    "BLUE_4" : "#94AFC5",
    "RED_1" : "#C3514E",
    "RED_2" : "#E6BAB7",
    "GREEN_1" : "#0C8040",
    "GREEN_2" : "#9ABB59",
    "ORANGE_1" : "#F79747",
    "PURPLE_1" : "#663399",
    "BLACK" : "#000000",
}
color_cycle = [
    custom_colors["BLUE_1"],
    custom_colors["GREEN_1"],
    custom_colors["RED_1"],
    custom_colors["ORANGE_1"],
    custom_colors["PURPLE_1"],
]
color_cycle_2 = [
    custom_colors["GRAY_5"],
    custom_colors["GRAY_6"],
    custom_colors["GRAY_7"],
    custom_colors["GRAY_8"],
    custom_colors["GRAY_9"],
]

In [9]:
# figure size
golden_ratio = (1 + np.sqrt(5)) / 2
width = 650
height = width / golden_ratio

In [10]:
def custom_theme():
    # font
    font = "Helvetica Neue"
    font_size = 16
    font_larger_size = 18
    subtitle_font_size = 20
    title_font_size = 36
    # colors
    custom_font_axes_labels_color = custom_colors["GRAY_7"]
    custom_title_color = custom_colors["GRAY_3"]
    custom_subtitle_color = custom_colors["GRAY_5"]
    custom_primary_mark_color = custom_colors["BLUE_1"]
    # config
    config = {
        # background
        "background": "white",
        "config" : {
            # axis
            "axis": {
                # grid
                "grid": False,
                # axes
                "domainColor": custom_font_axes_labels_color,
                "domainWidth" : 1.25,
                # ticks
                "tickColor" : custom_font_axes_labels_color,
                "tickSize" : 5,
                "tickWidth" : 1.25,
                "tickCount" : 6,
                # title
                "titleFont" : font,
                "titleColor" : custom_font_axes_labels_color,
                "titleFontSize" : font_larger_size,
                "titleFontWeight" : 400,
                "titlePadding" : 5,
                # labels
                "labelFont" : font,
                "labelColor" : custom_font_axes_labels_color,
                "labelFontSize" : font_size,
                "labelFontWeight" : 400,
                "labelPadding" : 5,
            },
            # axis X
            "axisX": {
                # title
                "titlePadding" : 5,
            },
            # axis Y 
            "axisY": {
                # title
                "titlePadding" : 10,
            },
            # title
            "title": {
                "font": font,
                "color": custom_title_color,
                "fontSize": title_font_size,
                "fontWeight": 600,
                # subtitle
                "subtitleFont" : font,
                "subtitleColor": custom_subtitle_color,
                "subtitleFontSize": subtitle_font_size,
                "subtitleFontWeight" : 400,
                "subtitlePadding" : 8,
                "anchor": "start",
                "offset": 14,
            },
            # legend
            "legend": {
                # title
                "titleFont": font,
                "titleColor": custom_font_axes_labels_color,
                "titleFontSize": font_size,
                "titleFontWeight" : 600,
                "titlePadding" : 5,
                # labels
                "labelFont": font,
                "labelColor": custom_font_axes_labels_color,
                "labelFontSize": font_size,
                "labelFontWeight" : 400,
                "offset" : 20,
                "padding" : 5,
            },
            # facets
            "header": {
                # title
                "titleFont": font,
                "titleColor": custom_font_axes_labels_color,
                "titleFontSize": font_larger_size,
                "titleFontWeight" : 400,
                # labels
                "labelFont": font,
                "labelColor": custom_font_axes_labels_color,
                "labelFontSize": font_size,
                "labelFontWeight" : 400,
            },
            # color cycles
            "range": {
                "category": color_cycle,
                "diverging" : color_cycle_2
            },
            # view
            "view" : {
                # outline
                "stroke" : "transparent",
            },
            # points
            "circle" : {
                "color" : custom_primary_mark_color,
                "size" : 100,
            },
            "point": {
               "filled": True,
            },
            # bars
            "bar" : {
                "color" : custom_primary_mark_color,
            },
            # lines
            "line" : {
                "color" : custom_primary_mark_color,
            },
            # text
            "text" : {
                "font" : font,
                "fontSize" : font_larger_size,
                "color" : custom_colors["GRAY_5"],
            },
            # rule
            "rule" : {
                "size" : 1.25,
                "color" : custom_font_axes_labels_color,
            },
            # area
            "area": {
               "fill": custom_primary_mark_color,
            }
        }
    }
    return config

In [11]:
alt.themes.register("custom_theme", custom_theme)
alt.themes.enable("custom_theme");

In [12]:
# example api syntax: alt.Chart({data_frame}).{mark_method}.encode({encodings}).interactive()
# note: prefers long-form dataframes (use .melt() to transform)

In [13]:
# marks: .mark_bar(), .mark_circle(), .mark_line(), .mark_point(), .mark_boxplot(), .mark_rule(), .mark_text()
# encoding: Q(quantitative), O(ordinal, discrete ordered), N(nominal, discrete unordered), T(temporal)
# properties: color, opacity, shape, size

In [14]:
# note: alt.text(format={format string}) uses D3 format codes
# https://github.com/d3/d3-format#locale_format

In [15]:
# histograms
chart = alt.Chart(df_iris).mark_bar(
).encode(
    x=alt.X("sepal_width:Q", bin=alt.Bin(maxbins=20)),
    y='count()',
    opacity=alt.value(0.75)
).properties(
    title={"text" : "Altair Histogram Plot Title",  
           "subtitle" : "Subtitle Text"},
    width=width, height=height,
).interactive()
# display
chart.display()

In [16]:
# histogram 
chart = alt.Chart(df_iris).transform_fold(
    ['sepal_length', 'sepal_width','petal_length', 'petal_width'],
    as_=['Feature', 'Value'],
).mark_bar(
    opacity=0.6,
).encode(
    x=alt.X('Value:Q', bin=alt.Bin(maxbins=50)),
    y=alt.Y('count()', stack=None),
    color=alt.Color('Feature:N', legend=alt.Legend(orient="none", legendX=500, legendY=0)),
).properties(
    title={"text" : "Altair Multiple Histogram Plot Title",  
           "subtitle" : "Subtitle Text"},
    width=width, height=height,
).interactive()
# display
chart.display()

In [17]:
# scatterplot
df = pd.DataFrame({'x' : np.random.random(100),
                   'y' : np.random.random(100)})
chart = alt.Chart(df).mark_circle().encode(
    x=alt.X("x:Q", title="x", scale=alt.Scale(zero=False)),
    y=alt.Y("y:Q", title="y", scale=alt.Scale(zero=False)), 
    opacity=alt.value(0.6),
).properties(
    title={"text" : "Altair Scatter Plot Title",  
           "subtitle" : "Subtitle Text"},
    width=width, height=height,
).interactive()
# display
chart.display()

In [18]:
# scatterplot
df = pd.DataFrame({'x' : np.random.random(100),
                   'y' : np.random.random(100)})
chart = alt.Chart(df).mark_circle().encode(
    x=alt.X("x:Q", title="x", scale=alt.Scale(zero=False),
            axis=alt.Axis(tickCount=10)),
    y=alt.Y("y:Q", title="y", scale=alt.Scale(zero=False),
            axis=alt.Axis(tickCount=10)),
    opacity=alt.value(0.6),
).configure_view(
    strokeWidth=1.25,
    stroke=custom_colors["GRAY_6"],
).configure_axis(
    grid=True,
    gridWidth=.3,
    gridColor=custom_colors['GRAY_8'],
).properties(
    title={"text" : "Altair Scatter Plot with Grid Title",  
           "subtitle" : "Subtitle Text"},
    width=width, height=height,
).interactive()
# display
chart.display()

In [19]:
# scatterplot
chart = alt.Chart(df_iris).mark_circle().encode(
    x=alt.X("sepal_length:Q", title="speal_length", scale=alt.Scale(zero=False)),
    y=alt.Y("sepal_width:Q", title="sepal_width", scale=alt.Scale(zero=False)), 
    size=alt.Size("petal_width:Q", legend=alt.Legend(orient="right")),
    color=alt.Color("species:N", legend=alt.Legend(orient="right")),
    opacity=alt.value(0.6),
).properties(
    title={"text" : "Altair Scatter Plot Title",  
           "subtitle" : "Subtitle Text"},
    width=width, height=height,
).interactive()
# display
chart.display()

In [20]:
# data
bar_data = pd.DataFrame(df_iris.mean()).reset_index()
bar_data.columns = ["Feature", "Mean_Value"]
# bars
bars = alt.Chart(bar_data).mark_bar(
    size=70,
).encode(
    x=alt.X("Mean_Value:Q", title="Mean Value"),
    y=alt.Y("Feature:N", title=None),
    opacity=alt.value(0.85)
)
# text 
text = bars.mark_text(
    size=28,
    color="white",
    align='center',
    baseline='middle',
    dx=-45,
).encode(
    text=alt.Text('Mean_Value:Q', format=',.3r')
)
# chart
chart = alt.layer(bars, text).properties(
    title={"text" : "Altair Bar Plot Title",  
           "subtitle" : "Subtitle Text",
           "offset" : 4},
    width=width, height=height,
).configure_axisY(
    domainOpacity=0,
    labelPadding=10,
    tickSize=0,
)
# display
chart.display()

In [21]:
# barh
bar_data = pd.DataFrame(df_iris.mean()).reset_index()
bar_data.columns = ["Feature", "Mean_Value"]
# base
base = alt.Chart(bar_data).encode(
    x=alt.X("Mean_Value:Q", title="Mean Value"),
    y=alt.Y("Feature:N", title=None,
            sort = alt.EncodingSortField(
                field="Mean_Value",  
                order="descending")),
    opacity=alt.value(0.85),
)
# bars
bars = base.mark_bar(
    size=70,
).encode(
    color= alt.condition(
        alt.datum.Feature == "petal_length", 
        alt.value(custom_colors["BLUE_1"]),
        alt.value(custom_colors["GRAY_6"]),
    )
)
# text
text = base.mark_text(
    size=28,
    color="white",
    align='center',
    baseline='middle',
    dx=-45,
).encode(
    text=alt.Text('Mean_Value:Q', format=',.2f'),
)
# chart
chart = alt.layer(bars, text).properties(
    title={"text" : "Altair Bar Plot Title",  
           "subtitle" : "Subtitle Text",
           "offset" : 4},
    width=width, height=height,
).configure_axisY(
    domainOpacity=0,
    labelPadding=10,
    tickSize=0,
)
# display
chart.display()

In [22]:
# data 
bar_data = df_iris.groupby("species").mean()
bar_data["species"] = bar_data.index
bar_data.reset_index(drop=True, inplace=True)
bar_data_melt = bar_data.melt(id_vars=['species'], 
                              value_vars=['sepal_length', 'sepal_width','petal_length', 'petal_width'])
bar_data_melt.columns = ["Species","Feature","Value"]
# bars
chart = alt.Chart(bar_data_melt).mark_bar(
    size=50,
).encode(
    column=alt.Column("Feature:N", title="Feature", spacing=10, 
                      header=alt.Header(orient="bottom", titlePadding=-2, labelPadding=8)),
    y=alt.Y("Value:Q", title="Mean Value"),
    color=alt.Color("Species:N", title="Species" , 
                    legend=alt.Legend(orient="none", legendX=550, legendY=0)),
    x=alt.X("Species:O", axis=alt.Axis(title=None, labels=False, ticks=False)),
    opacity=alt.value(0.8),
).properties(
    title={"text" : "Altair Grouped Bar Plot Title",  
           "subtitle" : "Subtitle Text"},
    width=160, height=410,
).configure_scale(
    bandPaddingInner=0.01,
    bandPaddingOuter=0.08,
)
# display
chart.display()

In [23]:
# data 
bar_data = df_iris.groupby("species").mean()
bar_data["species"] = bar_data.index
bar_data.reset_index(drop=True, inplace=True)
bar_data_melt = bar_data.melt(id_vars=['species'], 
                              value_vars=['sepal_length', 'sepal_width','petal_length', 'petal_width'])
bar_data_melt.columns = ["Species","Feature","Value"]
# base
base = alt.Chart(bar_data_melt).encode(
    y=alt.Y("Value:Q", title="Mean Value"),
    x=alt.X("Species:O", axis=alt.Axis(title=None, labels=False, ticks=False)),
).properties(
    width=150,
    height=410,
)
# bars
bar_colors = [custom_colors['BLUE_1'], custom_colors['GRAY_7'], custom_colors['GRAY_9']]
bars = base.mark_bar(
    size=50,
).encode(
    color=alt.Color("Species:N", title="Species",
                    legend=alt.Legend(orient="none", legendX=550, legendY=0),
                    scale=alt.Scale(range=bar_colors)),
    opacity=alt.value(0.8),
)
# text
text = base.mark_text(
    size=18,
    fontWeight=600,
    align='center',
    baseline='middle',
    dy=-10,
).encode(
    text=alt.Text('Value:Q', format=',.1f'),
    opacity=alt.condition(alt.datum.Species == "setosa",
                         alt.value(1), alt.value(0))
)
# chart
chart = alt.layer(bars, text, data=bar_data_melt).facet(
    column=alt.Column("Feature:N",title="Feature", 
                      header=alt.Header(orient="bottom", titlePadding=-2, labelPadding=8)),
    title={"text" : "Altair Grouped Bar Plot Title",  
           "subtitle" : "Subtitle Text"},
).configure_scale(
    bandPaddingInner=0.25,
    bandPaddingOuter=0.1,
)
# display
chart.display()

In [24]:
# data
line_data = pd.DataFrame(df_iris["petal_length"]).reset_index()
line_data.columns = ["index_time", "petal_length"]
line_data_subset = line_data.iloc[45:60]
# base
base = alt.Chart(line_data_subset).encode(
    x=alt.X("index_time:Q", title="Index",
            scale=alt.Scale(zero=False, clamp=True, domain=(45,60))),
    y=alt.Y("petal_length:Q", title="petal_length Value",
            scale=alt.Scale(zero=False, clamp=True, domain=(1,6))),
)
# liness
lines = base.mark_line(
    size=5,
)
# points
points = base.mark_circle(
    size=150,
).encode(
    opacity=alt.condition(((alt.datum.index_time >= 50) & (alt.datum.index_time <= 58)),
                          alt.value(1), alt.value(0))
)
# text up
text_up = base.mark_text(
    size=20,
    fontWeight=600,
    color=custom_colors['GRAY_1'],
    align='center',
    baseline='middle',
    dx=-1,
    dy=-22,
).encode(
    text=alt.condition(((alt.datum.index_time != 53) & (alt.datum.index_time != 57) &
                        (alt.datum.index_time > 49) & (alt.datum.index_time < 59)),
                       alt.Text('petal_length:Q', format=',.1f'), 
                       alt.Text())
)
# text down
text_down = base.mark_text(
    size=20,
    fontWeight=600,
    color=custom_colors['GRAY_1'],
    align='center',
    baseline='middle',
    dx=-1,
    dy=20,
).encode(
    text=alt.condition(((alt.datum.index_time == 53) | (alt.datum.index_time == 57)),
                       alt.Text('petal_length:Q', format=',.1f'), 
                       alt.Text())
)
# vertical line
overlay = pd.DataFrame({'x': [49]})
vline = alt.Chart(overlay).mark_rule(
    color=custom_colors['GRAY_7'],
    size=1.5,
).encode(
    x='x:Q'
)
# text annotation
annotation = pd.DataFrame({'x' : [50], 'y' : [5.8], 'text' : ["event text description"]})
text_annotation = alt.Chart(annotation).mark_text(
    color=custom_colors['GRAY_3'],
    dx=65,
    size=18,
).encode(
    x='x:Q',
    y='y:Q',
    text='text',
)
# chart
chart = alt.layer(lines, points, text_up, text_down, vline, text_annotation).properties(
    title={"text" : "Altair Single Line Plot Title",  
           "subtitle" : "Subtitle Text",
           "offset" : 15},
    width=width, height=height
)
# display
chart.display()

In [25]:
# data
df_iris_melt = df_iris.reset_index().melt(id_vars=['index'], 
    value_vars=['sepal_length','sepal_width','petal_length','petal_width'])
# base
colors = []
base = alt.Chart(df_iris_melt).encode(
    x=alt.X('index', title="Index", scale=alt.Scale(clamp=True, domain=(30,80))),
    y=alt.Y('value', title="Value"),
    color=alt.Color('variable', title=None,
                    legend=alt.Legend(orient="none", legendX=40, legendY=0),
                    scale=alt.Scale(range=color_cycle)),
)
# lines
lines = base.mark_line(
    size=3,
    opacity=1,
)
# points
points = base.mark_point(
    size=125,
    opacity=1,
)
# chart
chart = alt.layer(lines, points).properties(
    title={"text" : "Altair Multiple Line Plot Title",  
           "subtitle" : "Subtitle Text",
           "offset" : 15},
    width=width, height=height
).interactive()
# display
chart.display()

In [26]:
# data
df_iris_melt = df_iris.reset_index().melt(id_vars=['index'], 
    value_vars=['sepal_length','sepal_width','petal_length','petal_width'])
# base
line_colors = [custom_colors['BLUE_1'], custom_colors['GRAY_9'], custom_colors['GRAY_5'], custom_colors['GRAY_7']]
base = alt.Chart(df_iris_melt).encode(
    x=alt.X('index', title="Index", scale=alt.Scale(clamp=True, domain=(30,80))),
    y=alt.Y('value', title="Value"),
    color=alt.Color('variable', title=None,
                    legend=alt.Legend(orient="none", legendX=40, legendY=0),
                    scale=alt.Scale(range=line_colors)),
)
# lines
lines = base.mark_line(
    size=3,
    opacity=1,
)
# points
points = base.mark_point(
    size=125,
    opacity=1,
)
# chart
chart = alt.layer(lines, points).properties(
    title={"text" : "Altair Multiple Line Plot Title",  
           "subtitle" : "Subtitle Text",
           "offset" : 15},
    width=width, height=height
).interactive()
# display
chart.display()

In [27]:
# data
df_iris_melt = df_iris.reset_index().melt(id_vars=['index'], 
    value_vars=['petal_length','sepal_length'])
# base
colors = []
base = alt.Chart(df_iris_melt).encode(
    x=alt.X('index', title="Index", scale=alt.Scale(clamp=True, domain=(60,90))),
    y=alt.Y('value', title="Value", scale=alt.Scale(clamp=True, domain=(3,7.5))),
    color=alt.Color('variable', title=None,
                    legend=alt.Legend(orient="none", legendX=475, legendY=0),
                    scale=alt.Scale(range=[custom_colors["BLUE_1"], custom_colors["RED_1"]])),
)
# points
points = base.mark_point(
    size=100,
    opacity=1,
)
# smooth
smooth = base.transform_loess(
    on='index', 
    loess='value', 
    groupby=['variable'], 
    bandwidth=0.06,
).mark_line(
    size=5,
    strokeDash=[10,7],
)
# chart
chart = alt.layer(points, smooth).properties(
    title={"text" : "Altair Line Smoothing Plot Title",  
           "subtitle" : "Subtitle Text",
           "offset" : 15},
    width=width, height=height
).interactive()
# display
chart.display()

In [28]:
# base
base = alt.Chart(df_iris).mark_circle().encode(
    x=alt.X("sepal_length", scale=alt.Scale(zero=False)), 
    y=alt.Y("petal_width", scale=alt.Scale(zero=False)),
)
# regressions
regression_order_list = [1,2,4]
colors = [custom_colors['GREEN_1'], custom_colors['RED_1'], custom_colors['ORANGE_1']]
regressions = []
for each_order in regression_order_list:
    regressions.append(
        base.transform_regression(
            on="sepal_length", 
            regression="petal_width", 
            method="poly", 
            order=each_order, 
            as_=["sepal_length", str(each_order)],
        ).mark_line(
            size=5,
            strokeDash=[25,10],
        ).transform_fold(
            [str(each_order)], 
            as_=["order", "petal_width"],
        ).encode(
            color=alt.Color("order:N", legend=None, scale=alt.Scale(range=colors)),
        )
    )
# chart
chart = alt.layer(base, *regressions).properties(
    title={"text" : "Altair Regression Plot Title",  
           "subtitle" : "Subtitle Text",
           "offset" : 15},
    width=width, height=height
).interactive()
# display
chart.display()

In [29]:
# points
points = alt.Chart(df_fmri).mark_point(
    size=100,
    opacity=alt.Value(0.5),
).encode(
    x=alt.X('timepoint:Q'),
    y=alt.Y('signal:Q'),
    color=alt.Color("event:N", legend=alt.Legend(orient="none", title=None, legendX=550)),
)
# lines
lines = alt.Chart(df_fmri).mark_line(
    size=3,
    opacity=alt.Value(0.5),
).encode(
    x=alt.X('timepoint:Q'),
    y=alt.Y('mean(signal):Q'),
    color=alt.Color("event:N", legend=None),
)
# confidence intervals
bands = alt.Chart(df_fmri).mark_errorband(
    extent='ci',
    opacity=alt.Value(0.2),
).encode(
     x=alt.X('timepoint:Q'),
     y=alt.Y('signal:Q'),
     color=alt.Color("event:N", legend=None),
)
# chart
chart = alt.layer(points, lines, bands).properties(
    title={"text" : "Altair Line Plot Title",  
           "subtitle" : "Subtitle Text",
           "offset" : 15},
    width=width, height=height
).interactive()
# display
chart.display()

In [30]:
# data
df_iris_melt = df_iris.melt(id_vars=['species'], 
                            value_vars=["sepal_length", "sepal_width"])
df_iris_melt.columns = ["Species", "Feature", "Value"]
# box
chart = alt.Chart(df_iris_melt).mark_boxplot(
    extent='min-max',
    opacity=0.8,
    size=90,
    ticks=True,
    median= {"color": "black"},
).encode(
    column=alt.Column("Feature:N", title="Feature", spacing=10, 
                      header=alt.Header(orient="bottom", titlePadding=-2, labelPadding=8)),
    x=alt.X("Species:N", axis=alt.Axis(title=None, labels=False, ticks=False)),
    y=alt.Y("Value:Q", title="Value", scale=alt.Scale(zero=False)),
    color=alt.Color("Species:N", title=None, legend=alt.Legend(orient="none", legendX=550))
).properties(
    title={"text" : "Altair Box Plot Title",  
           "subtitle" : "Subtitle Text",
           "offset" : 15},
    width=340, height=height
).interactive()
# display
chart.display()

In [31]:
# note: see https://altair-viz.github.io/gallery/ for more advanced visualizations 