In [None]:
!pip install altair

In [None]:
!pip install vega_datasets

In [None]:
import pandas as pd
import altair as alt

from vega_datasets import data

In [None]:
# ignore warnings
import warnings
warnings.filterwarnings('ignore')

In [None]:
#import dataset on cars
cars = data.cars()

print(cars.head())
print(cars.tail())
print(cars.size)
print(cars.shape)
print(cars.describe())
print(cars.info())
print(cars.value_counts())

In [None]:
#1-D plot
alt.Chart(cars).mark_point().encode(x = 'Miles_per_Gallon')

In [None]:
#1-D plot with tick marks
alt.Chart(cars).mark_point().encode(x = 'Miles_per_Gallon')

In [None]:
#2-D plot with points
alt.Chart(cars).mark_point().encode(x = 'Miles_per_Gallon', y = 'Horsepower')

In [None]:
### Simple Interactivity

In [None]:
#2-D plot with points
alt.Chart(cars).mark_point().encode(x = 'Miles_per_Gallon', y = 'Horsepower').interactive()

In [None]:
### Add Third Dimension with Color

In [None]:
# add color to a categorical variable
alt.Chart(cars).mark_point().encode(x = 'Miles_per_Gallon', y = 'Horsepower', color = 'Origin')

In [None]:
# add color to a continous variable
alt.Chart(cars).mark_point().encode(
    x = 'Miles_per_Gallon',
    y = 'Horsepower',
    color = 'Origin')

In [None]:
#add color to discrete ordered data
alt.Chart(cars).mark_point().encode(
    x = 'Miles_per_Gallon',
    y = 'Horsepower',
    color = 'Cylinders:O')

In [None]:
#Hover Tool

In [None]:
alt.Chart(cars).mark_circle(size = 60).encode(
    x = 'Horsepower',
    y = 'Miles_per_Gallon',
    color = 'Origin',
    tooltip = ['Name', 'Origin', 'Horsepower', 'Miles_per_Gallon']).interactive()

In [None]:
#### Slider

In [None]:
slider = alt.binding_range(min = 0, max = 1, step = 0.05, name = 'opacity')
op_var = alt.param(value= 0.1, bind = slider)

alt.Chart(cars).mark_circle(opacity = op_var).encode(
    x = 'Horsepower:Q',
    y = 'Miles_per_Gallon:Q',
    color = 'Origin:N',
    tooltip = ['Name', 'Origin', 'Horsepower', 'Miles_per_Gallon']).add_params(op_var)



In [None]:
# add a new column for year of manufacture in years
cars['year'] = cars['Year'].dt.year # convert the Year into integer years

#find the range of the yars
min_year = cars['year'].min()
max_year = cars['year'].max()

print(min_year)
print(max_year)

#define the slider object
slider = alt.binding_range(min = min_year, max = max_year, step = 1, name = 'year')
op_var = alt.param(value= min_year, bind = slider)

#create a scatter plot with the slider
alt.Chart(cars).mark_point(opacity = op_var).encode(
    x = 'Horsepower:Q',
    y = 'Miles_per_Gallon:Q',
    color = 'Origin:N',
    tooltip = ['Name', 'Year', 'Origin']
    ).add_params (
        op_var
    ).transform_filter(
        alt.datum.year == op_var
    )



In [None]:
#### Linked Brushing

In [None]:
brush = alt.selection_interval()

chart = alt.Chart(cars).mark_point().encode(
    x = 'Horsepower:Q',
    y = 'Miles_per_Gallon:Q',
    color = alt.condition(brush, 'Origin:N', alt.value('lightgray'))
).properties(
    width = 250,
    height = 250
).add_params(brush)

chart | chart.encode(x = 'Acceleration:Q')

In [None]:
# Filter Data
brush = alt.selection_interval()
points = alt.Chart(cars).mark_point().encode (
        x = 'Horsepower:Q',
    y = 'Miles_per_Gallon:Q',
    color = 'Origin:N'
).add_params(
    brush
)

bars = alt.Chart(cars).mark_bar().encode (
        x = 'Horsepower:Q',
    y = 'Miles_per_Gallon:Q',
    color = 'Origin:N'
).add_params(
    brush
)

points | bars

In [None]:
### Time series and Layering

In [None]:
import altair as alt

# Make sure to run the cell with id EXk9Loems5-0 to load the 'cars' dataset first.

brush = alt.selection_interval(resolve = 'global')

chart_horsepower = alt.Chart(cars).mark_point().encode(
     x = 'Horsepower:Q',
     y = 'Miles_per_Gallon:Q',
     color = alt.condition(brush, 'Origin', alt.ColorValue('gray'))
 ).add_params(brush).properties(
     width = 250,
     height = 250
 )

chart_acceleration = alt.Chart(cars).mark_point().encode(
     x = 'Acceleration:Q',
     y = 'Miles_per_Gallon:Q',
     color = alt.condition(brush, 'Origin', alt.ColorValue('gray'))
 ).add_params(brush).properties(
     width = 250,
     height = 250
 )


chart.encode(x = 'Horsepower') | chart.encode(x = 'Acceleration')
chart_horsepower | chart_acceleration

In [None]:
#create a dashboard
dashboard = (chart_horsepower | chart_acceleration).properties(
    title = 'My Interactive Dashboard'
)


dashboard.save('Cars.html')

In [None]:
# time trends of Miles_per_Gallon
alt.Chart(cars).mark_point().encode(
    x = 'Year:T',
    y = 'Miles_per_Gallon:Q'
)

In [None]:
# draw a trend line with the mean of each year and the confidence interval ci0 and ci1
alt.Chart(cars).mark_area(opacity = 0.3).encode(
    x = alt.X('Year', timeUnit = 'year'),
    y = alt.Y('ci0(Miles_per_Gallon)', axis = alt.Axis(title = 'Miles_per_Gallon')),
    y2 = 'ci1(Miles_per_Gallon)',
    color = 'Origin'
).properties (
    width = 800
)

In [None]:
#superimpose a linear chart representing the mean on top of the
# area chart representing the confidence intervals

spread = alt.Chart(cars).mark_area(opacity = 0.3).encode(
    x = alt.X('Year', timeUnit = 'year'),
    y = alt.Y('ci0(Miles_per_Gallon)', axis = alt.Axis(title = 'Miles_per_Gallon')),
    y2 = 'ci1(Miles_per_Gallon)',
    color = 'Origin'
).properties (
    width = 800
)

lines = alt.Chart(cars).mark_line().encode(
    x = alt.X('Year', timeUnit = 'year'),
    y = 'mean(Miles_per_Gallon)',
    color = 'Origin'
).properties (
    width = 800
)

spread + lines

In [None]:
#draw a trend line with the mean of each year
#and the confidence interval ci0 and ci1


area = alt.Chart(cars).mark_area().encode(
    x = 'Year:T',
    y = 'ci0(Miles_per_Gallon):Q',
    y2 = 'ci1(Miles_per_Gallon):Q'
)

area