## Annotations in Altair

## Setup

In [2]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import pandas as pd
import altair as alt

## Data

In [82]:
from vega_datasets import data  
df = data.cars() 

df.head()

Unnamed: 0,Name,Miles_per_Gallon,Cylinders,Displacement,Horsepower,Weight_in_lbs,Acceleration,Year,Origin
0,chevrolet chevelle malibu,18.0,8,307.0,130.0,3504,12.0,1970-01-01,USA
1,buick skylark 320,15.0,8,350.0,165.0,3693,11.5,1970-01-01,USA
2,plymouth satellite,18.0,8,318.0,150.0,3436,11.0,1970-01-01,USA
3,amc rebel sst,16.0,8,304.0,150.0,3433,12.0,1970-01-01,USA
4,ford torino,17.0,8,302.0,140.0,3449,10.5,1970-01-01,USA


In [83]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 406 entries, 0 to 405
Data columns (total 9 columns):
 #   Column            Non-Null Count  Dtype         
---  ------            --------------  -----         
 0   Name              406 non-null    object        
 1   Miles_per_Gallon  398 non-null    float64       
 2   Cylinders         406 non-null    int64         
 3   Displacement      406 non-null    float64       
 4   Horsepower        400 non-null    float64       
 5   Weight_in_lbs     406 non-null    int64         
 6   Acceleration      406 non-null    float64       
 7   Year              406 non-null    datetime64[ns]
 8   Origin            406 non-null    object        
dtypes: datetime64[ns](1), float64(4), int64(2), object(2)
memory usage: 28.7+ KB


In [84]:
# drop missing cases
df = df.dropna()

In [85]:
# Show mean values per column
df.groupby(['Origin']).mean()

Unnamed: 0_level_0,Miles_per_Gallon,Cylinders,Displacement,Horsepower,Weight_in_lbs,Acceleration
Origin,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Europe,27.602941,4.161765,109.632353,80.558824,2433.470588,16.794118
Japan,30.450633,4.101266,102.708861,79.835443,2221.227848,16.172152
USA,20.033469,6.277551,247.512245,119.04898,3372.489796,14.990204


In [86]:
# Only use Europe and USA
df = df[df['Origin']!='Japan']

df['Origin'].value_counts()

USA       245
Europe     68
Name: Origin, dtype: int64

## Line graph with label

In [88]:
# Create line graph
line = alt.Chart(df).mark_line(point=True).encode(
        alt.X('Year'),
        alt.Y('average(Horsepower)'),
        color=alt.Color('Origin:N', legend=None),
        tooltip=['Year', 'average(Horsepower)']
)

# Create labels at end of chart
label = alt.Chart(df).mark_text(
    align='left', 
    baseline='middle',
    dx=8 # Nudges text to right
).encode(
    x='max(Year):T',
    y=alt.Y('average(Horsepower):Q', aggregate={'argmax': 'Year'}), # use argmax to reach the last value
    text='Origin',
    color='Origin',
)

chart = line + label

# Hide grid
chart.configure_view(strokeWidth=0).configure_axis(grid=False)

## Line graph with text


### Create data


In our first example, we simply include some annotations: 

In [120]:
text = [['1972-01-01',155, 'USA', 'Hello USA ðŸŽ‰'],
        ['1976-01-01',75, 'Europe', 'Hello Europe']]

df_text = pd.DataFrame(text, columns=['Year','Horsepower', 'Origin', 'note'])
df_text

Unnamed: 0,Year,Horsepower,Origin,note
0,1972-01-01,155,USA,Hello USA ðŸŽ‰
1,1976-01-01,75,Europe,Hello Europe


In [121]:
text = alt.Chart(df_text).mark_text().encode(
  x='Year:T',
  y='Horsepower:Q',
  text='note:N',
  color='Origin'
)

chart2 = line + text

chart2.configure_view(strokeWidth=0).configure_axis(grid=False)