# Data Visualization with ALTAIR

In [1]:
import altair as alt
import pandas as pd
import numpy as np

In [2]:
data_url = 'data/EPA_fuel_economy_summary.csv'

df = pd.read_csv(data_url)

df.head()

Unnamed: 0,make,model,year,transmission,drive,date_range,fuel_type_summary,class_summary,cylinders,displ,co2,barrels08,fuelCost08,highway08,city08,comb08
0,Acura,NSX,2000,Automatic,2WD,2000-2010,Gas,Car,6.0,3.0,-1,18.311667,2600,22,15,18
1,Acura,NSX,2000,Manual,2WD,2000-2010,Gas,Car,6.0,3.2,-1,18.311667,2600,22,15,18
2,BMW,M Coupe,2000,Manual,2WD,2000-2010,Gas,Car,6.0,3.2,-1,17.347895,2500,23,17,19
3,BMW,Z3 Coupe,2000,Automatic,2WD,2000-2010,Gas,Car,6.0,2.8,-1,17.347895,2500,24,17,19
4,BMW,Z3 Coupe,2000,Manual,2WD,2000-2010,Gas,Car,6.0,2.8,-1,17.347895,2500,24,17,19


In [3]:
alt.data_transformers.enable('data_server')

DataTransformerRegistry.enable('data_server')

In [4]:
(
    alt.Chart(df)
    .mark_circle()
    .encode(
        x='displ',
        y='fuelCost08'
           )
)

  for col_name, dtype in df.dtypes.iteritems():


In [5]:
(
    alt.Chart(df)
    .mark_point()
    .encode(
        x='displ',
        y='fuelCost08'
           )
)

In [6]:
(
    alt.Chart(df)
    .mark_point()
    .encode(
        x='displ',
        y='fuelCost08',
        color='drive',
        shape='drive'
           )
)

In [7]:
(
    alt.Chart(df)
    .mark_bar()
    .encode(
        y='count()',
        x='fuelCost08'
           )
)

In [8]:
# Histogram with bins
alt.Chart(df).mark_bar().encode(
    alt.X('fuelCost08', type='quantitative', bin=True),
    alt.Y(aggregate='count', type='quantitative')
)

In [9]:
alt.Chart(df).mark_bar().encode(
    alt.X('fuelCost08:Q', bin=alt.Bin(extent=[0,5000], step=250)),
    alt.Y('count()')
)

In [10]:
alt.Chart(df).mark_point().encode(
    alt.X('displ', type='quantitative'),
    alt.Y('fuelCost08'),
    alt.Color('cylinders', type='ordinal')
)

In [11]:
alt.Chart(df).mark_point().encode(
    alt.X('displ', type='quantitative'),
    alt.Y('fuelCost08'),
    alt.Color('cylinders', type='quantitative')
)

In [12]:
alt.Chart(df).mark_point().encode(
    alt.X('displ', type='quantitative'),
    alt.Y('fuelCost08'),
    alt.Color('cylinders', type='nominal')
)

In [13]:
(
    alt.Chart(df)
    .mark_tick()
    .encode(
        y='fuel_type_summary',
        x='barrels08'
           )
)

In [14]:
(
    alt.Chart(df)
    .mark_boxplot()
    .encode(
        x='year:O',                         #Ordinal
        y='fuelCost08:Q'                    #Quantitative
           )
)

In [15]:
(
    alt.Chart(df)
    .mark_bar()
    .encode(
        x='mean(fuelCost08)',
        y='year:O'              # Ordinal Value
           )
)

## Interactive Plots

In [16]:
# Scatterplot

alt.Chart(df).mark_circle(size=50).encode(
    x='displ',
    y='fuelCost08',
    tooltip=['make', 'model', 'year']
).interactive()

  for col_name, dtype in df.dtypes.iteritems():


In [18]:
# Multiple Charts on ONE Viz

chart_1 = alt.Chart(df).mark_tick().encode(
    y='fuel_type_summary',
    x='barrels08'
)

chart_2 = alt.Chart(df).mark_bar().encode(
    alt.X('barrels08:Q', bin=True),
    alt.Y('count()')
)

chart_1 | chart_2

In [19]:
chart_1 & chart_2

In [20]:
# Facetting

alt.Chart(df).mark_circle(size=50).encode(
    x='displ',
    y='fuelCost08',
    color='class_summary:N',
    tooltip=['make', 'model','year']
).facet(row='class_summary:N')

  for col_name, dtype in df.dtypes.iteritems():


In [21]:
# Facetting

base_chart = alt.Chart(df).mark_circle(size=50).encode(
    x='displ',
    y='fuelCost08',
    color='class_summary:N',
    tooltip=['make', 'model','year']
)

base_chart.facet('class_summary:N', columns=2)

  for col_name, dtype in df.dtypes.iteritems():


In [23]:
# Chart Layering

bars = alt.Chart(df).mark_bar().encode(
    x='mean(fuelCost08):Q',
    y='year:O'
)

rule = alt.Chart(df).mark_rule(color='red').encode(
    x='mean(fuelCost08):Q'
)

# Show the bars + rule
bars + rule

  for col_name, dtype in df.dtypes.iteritems():


In [26]:
text = bars.mark_text(
    align='left', dx=3
                     ).encode(text=alt.Text('mean(fuelCost08):Q', format=',.0f'))

(bars + rule + text).properties(width=700)

  for col_name, dtype in df.dtypes.iteritems():
