# Annotations in Altair

## Setup

In [119]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import pandas as pd
import altair as alt

## Data

In [120]:
df = pd.read_csv('https://raw.githubusercontent.com/kirenz/datasets/master/cars.csv')
df.head()

Unnamed: 0,Name,Miles_per_Gallon,Cylinders,Displacement,Horsepower,Weight_in_lbs,Acceleration,Date,Origin
0,chevrolet chevelle malibu,18.0,8,307.0,130.0,3504,12.0,1970-01-01,USA
1,buick skylark 320,15.0,8,350.0,165.0,3693,11.5,1970-01-01,USA
2,plymouth satellite,18.0,8,318.0,150.0,3436,11.0,1970-01-01,USA
3,amc rebel sst,16.0,8,304.0,150.0,3433,12.0,1970-01-01,USA
4,ford torino,17.0,8,302.0,140.0,3449,10.5,1970-01-01,USA


In [121]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 392 entries, 0 to 391
Data columns (total 9 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Name              392 non-null    object 
 1   Miles_per_Gallon  392 non-null    float64
 2   Cylinders         392 non-null    int64  
 3   Displacement      392 non-null    float64
 4   Horsepower        392 non-null    float64
 5   Weight_in_lbs     392 non-null    int64  
 6   Acceleration      392 non-null    float64
 7   Date              392 non-null    object 
 8   Origin            392 non-null    object 
dtypes: float64(4), int64(2), object(3)
memory usage: 27.7+ KB


We first transform the data in the correct format.

In [122]:
df['Date'] = pd.to_datetime(df['Date'])

df['Date'].info()

<class 'pandas.core.series.Series'>
RangeIndex: 392 entries, 0 to 391
Series name: Date
Non-Null Count  Dtype         
--------------  -----         
392 non-null    datetime64[ns]
dtypes: datetime64[ns](1)
memory usage: 3.2 KB


In [123]:
# Extract only the year from the date and create a new column called Year
df['Year'] = df['Date'].dt.year

In [124]:
df.head()

Unnamed: 0,Name,Miles_per_Gallon,Cylinders,Displacement,Horsepower,Weight_in_lbs,Acceleration,Date,Origin,Year
0,chevrolet chevelle malibu,18.0,8,307.0,130.0,3504,12.0,1970-01-01,USA,1970
1,buick skylark 320,15.0,8,350.0,165.0,3693,11.5,1970-01-01,USA,1970
2,plymouth satellite,18.0,8,318.0,150.0,3436,11.0,1970-01-01,USA,1970
3,amc rebel sst,16.0,8,304.0,150.0,3433,12.0,1970-01-01,USA,1970
4,ford torino,17.0,8,302.0,140.0,3449,10.5,1970-01-01,USA,1970


## Annotations

### Scatterplot annotation example

In [125]:
# Step 1: Create graph
graph = alt.Chart(df).mark_circle().encode(
        x=alt.X('Horsepower'),
        y=alt.Y('Miles_per_Gallon'),
        color=alt.Color('Origin:N'),
        tooltip=['Horsepower', 'Miles_per_Gallon']
)

graph

In [126]:
# Step 2: Create text data and values where to display the text
text = [[132, 32.7, 'Japan', 'This is an unusual value'],
        [10,  46 , 'Europe', 'Efficient cars']]

df_text = pd.DataFrame(text, columns=['Horsepower', 'Miles_per_Gallon', 'Origin', 'Annotation'])
df_text

Unnamed: 0,Horsepower,Miles_per_Gallon,Origin,Annotation
0,132,32.7,Japan,This is an unusual value
1,10,46.0,Europe,Efficient cars


In [127]:
# Step 3: create annotations
annotations = alt.Chart(df_text).mark_text(
    align='left', 
    baseline='middle',
    dx=8 # Nudges text to right
).encode(
    x='Horsepower',
    y='Miles_per_Gallon',
    text='Annotation',
    color='Origin',
)

annotations

In [128]:
# Step 4: combine graph with annotations
chart = graph + annotations

chart

In [129]:
# Step 5: hide grid
chart.configure_view(strokeWidth=0).configure_axis(grid=False)

### Annotation with long text

We use the same example as before but add more text with a line break

In [130]:
# Step 1: use the graph from before
graph

In [131]:
# Step 2: Create text data but now use line breaks with \n
text_2 = [[132, 32.7, 'Japan', 'This is an unusual value \n from a Japanese car'],
          [10,  46 , 'Europe', 'Efficient cars \n made in \n Europe']]

df_text_2 = pd.DataFrame(text_2, columns=['Horsepower', 'Miles_per_Gallon', 'Origin', 'Annotation'])
df_text_2

Unnamed: 0,Horsepower,Miles_per_Gallon,Origin,Annotation
0,132,32.7,Japan,This is an unusual value \n from a Japanese car
1,10,46.0,Europe,Efficient cars \n made in \n Europe


In [132]:
# Step 3: create annotations
annotations_2 = alt.Chart(df_text_2).mark_text(
    align='left', 
    baseline='middle',
    dx=8 ,
    lineBreak='\n'
).encode(
    x='Horsepower',
    y='Miles_per_Gallon',
    text='Annotation',
    color='Origin',
)

annotations_2

In [133]:
# Step 4: combine graph with label
chart = graph + annotations_2

chart

In [134]:
# Step 5: hide grid
chart.configure_view(strokeWidth=0).configure_axis(grid=False)

### Line graph with line labels

In [135]:
# Show mean values per column
df.groupby(['Origin']).mean()

Unnamed: 0_level_0,Miles_per_Gallon,Cylinders,Displacement,Horsepower,Weight_in_lbs,Acceleration,Year
Origin,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Europe,27.602941,4.161765,109.632353,80.558824,2433.470588,16.794118,1975.720588
Japan,30.450633,4.101266,102.708861,79.835443,2221.227848,16.172152,1977.594937
USA,20.033469,6.277551,247.512245,119.04898,3372.489796,14.990204,1975.644898


In [136]:
# Only use Europe and USA
df = df[df['Origin']!='Japan']

df['Origin'].value_counts()

USA       245
Europe     68
Name: Origin, dtype: int64

In [137]:
# Create line graph
line = alt.Chart(df).mark_line(point=True).encode(
        alt.X('Date'),
        alt.Y('average(Horsepower)'),
        color=alt.Color('Origin:N', legend=None),
        tooltip=['Date', 'average(Horsepower)']
)

# Create labels at end of chart
label = alt.Chart(df).mark_text(
    align='left', 
    baseline='middle',
    dx=8 # Nudges text to right
).encode(
    x='max(Date)',
    y=alt.Y('average(Horsepower):Q', aggregate={'argmax': 'Date'}), # use argmax to reach the last value
    text='Origin',
    color='Origin',
)

chart = line + label

# Hide grid
chart.configure_view(strokeWidth=0).configure_axis(grid=False)

### Line graph with annotations

In [138]:
text = [['1972-01-01',155, 'USA', 'Hello USA 🎉'],
        ['1976-01-01',75, 'Europe', 'Hello Europe']]

df_text = pd.DataFrame(text, columns=['Date','Horsepower', 'Origin', 'note'])
df_text

Unnamed: 0,Date,Horsepower,Origin,note
0,1972-01-01,155,USA,Hello USA 🎉
1,1976-01-01,75,Europe,Hello Europe


In [139]:
text = alt.Chart(df_text).mark_text().encode(
  x='Date:T',
  y='Horsepower:Q',
  text='note:N',
  color='Origin'
)

chart_2 = line + text

chart_2.configure_view(strokeWidth=0).configure_axis(grid=False)

In [None]:
import altair as alt
import pandas as pd
from sklearn.datasets import make_blobs

X, labels = make_blobs(20, random_state=1)
points = pd.DataFrame({
    'x': X[:, 0],
    'y': X[:, 1],
    'labels': labels
})
centers = points.groupby('labels').mean()
points['type'] = 'points'
centers['type'] = 'centers'
data = pd.concat([points , centers.reset_index()])

alt.Chart(data).mark_point(filled=True, size=150).encode(
    x='x',
    y='y',
    color=alt.condition("datum.type == 'points'", 'labels:N', alt.value('black')),
    size=alt.condition("datum.type == 'points'", alt.value(150), alt.value(50)),
    column='labels:N'
)