In [1]:
import pandas as pd
import altair as alt
from vega_datasets import data

In [4]:
alt.renderers.enable('notebook')
alt.renderers.enable('default')

RendererRegistry.enable('default')

In [5]:
#working with seatle weather dataset
weather_data = data.seattle_weather()
weather_data.head(5)

Unnamed: 0,date,precipitation,temp_max,temp_min,wind,weather
0,2012-01-01,0.0,12.8,5.0,4.7,drizzle
1,2012-01-02,10.9,10.6,2.8,4.5,rain
2,2012-01-03,0.8,11.7,7.2,2.3,rain
3,2012-01-04,20.3,12.2,5.6,4.7,rain
4,2012-01-05,1.3,8.9,2.8,6.1,rain


In [6]:
#first extract some details from the date column
weather_data['year'] = weather_data['date'].dt.year
weather_data['month'] = weather_data['date'].dt.month
weather_data['day'] = weather_data['date'].dt.day

#dropping the original date column
weather_data.drop('date', axis = 1, inplace = True)

In [7]:
weather_data.head(10)

Unnamed: 0,precipitation,temp_max,temp_min,wind,weather,year,month,day
0,0.0,12.8,5.0,4.7,drizzle,2012,1,1
1,10.9,10.6,2.8,4.5,rain,2012,1,2
2,0.8,11.7,7.2,2.3,rain,2012,1,3
3,20.3,12.2,5.6,4.7,rain,2012,1,4
4,1.3,8.9,2.8,6.1,rain,2012,1,5
5,2.5,4.4,2.2,2.2,rain,2012,1,6
6,0.0,7.2,2.8,2.3,rain,2012,1,7
7,0.0,10.0,2.8,2.0,sun,2012,1,8
8,4.3,9.4,5.0,3.4,rain,2012,1,9
9,1.0,6.1,0.6,3.4,rain,2012,1,10


In [8]:
#statistic overview of numerical columns
weather_data.describe()

Unnamed: 0,precipitation,temp_max,temp_min,wind,year,month,day
count,1461.0,1461.0,1461.0,1461.0,1461.0,1461.0,1461.0
mean,3.029432,16.439083,8.234771,3.241136,2013.498973,6.52293,15.729637
std,6.680194,7.349758,5.023004,1.437825,1.118723,3.449884,8.803105
min,0.0,-1.6,-7.1,0.4,2012.0,1.0,1.0
25%,0.0,10.6,4.4,2.2,2012.0,4.0,8.0
50%,0.0,15.6,8.3,3.0,2013.0,7.0,16.0
75%,2.8,22.2,12.2,4.0,2014.0,10.0,23.0
max,55.9,35.6,18.3,9.5,2015.0,12.0,31.0


In [10]:
#building a simple interactive scatter plot
alt.Chart(weather_data, height = 500, width = 700) \
    .mark_point() \
    #on the x axis, we'll have the maximum temperatures for each day
    #on the y axis, we'll have the windspeed info
    .encode(x = 'temp_max:Q',
           y = 'wind:Q',
           color = 'weather:N', #data points based on the weather description
           tooltip = ['weather', 'temp_max']) \ #indicating the weather conditions
    .interactive()

In [11]:
#building more complex interactions with visualizations
#setup a selection brush that will allow to select specific features from my graphs
brush = alt.selection(type = 'interval', encodings = ['x'])

#along the x axis, we have the different months
#along the y axis, we have average maximum temperatures
bars = alt.Chart(height = 400, width = 600) \
        .mark_bar(color = 'limegreen') \
        .encode(x = 'month:O',
               y = 'mean(temp_max):Q',
               opacity = alt.condition(brush, #allow us to select within this graph
                                      alt.OpacityValue(1), #alter opacity of our visualization
                                      alt.OpacityValue(0.5)))\
        .add_selection(brush) #need to explicity add the brush selector

#I'll also have a horizontal line on my chart that represent the average max temperatures across the months
#it's not static, it can be transformed based on the selection in the transform_filter
line = alt.Chart() \
        .mark_rule(color = 'red') \
        .encode(y = 'mean(temp_max):Q',
               size = alt.SizeValue(5)) \
        .transform_filter(brush)

alt.layer(bars, line, data = weather_data)

In [14]:
#scatter plot and along the y axis we'll encode the windspeed info
#thre will be two of them, will also use the brush selector
#by default the scatter plot points will have their color based on the weather conditions
#but when they are not selected, the alternate color will be a light gray
chart = alt.Chart(weather_data, height = 400, width = 600) \
            .mark_point() \
            .encode(y = 'wind:Q',
                   color = alt.condition(brush,
                                        'weather:N',
                                        alt.value('lightgray'))) \
            .properties(width = 250, height = 250) \
            .add_selection(brush)

#displaying two charts side-by-side
#the first one will have precipitation along the X axis,
#the second one will have the maximum temperature
#select on the right chart or the left chart; both will be updated
chart.encode(x = 'precipitation:Q') | chart.encode(x = 'temp_max:Q')

In [15]:
#using a slider UI component to allow us to select the years we want to view data
#the year will be selected using a single selection, alt.selection_single
slider = alt.binding_range(min = 2012, max = 2015, step = 1)
select_year = alt.selection_single(name = "year", fields = ['year'],
                                  bind = slider, init = {'year': 2012})

In [16]:
#there are different kinds of days in Seattle and I'll represent each of these using a diff color
color = alt.Scale(domain = ('drizzle', 'rain', 'sun', 'snow', 'fog'),
                 range = ['steelblue', 'yellow', 'red', 'green', 'violet'])

In [17]:
#setup the interactive Altair chart with a slider
#a simple bar graph, along the x axis, we'll have categorical value for weather
#along the y axis, we'll plot the maximum temperatures for that kind of day
#the color will indicate the differente kinds of weather, and it's using the color scale instantiated earlier
#along each column we'll have the data for different months
alt.Chart(weather_data, height = 400, width = 600) \
    .mark_bar() \
    .encode(x = alt.X('weather:N', title = None),
           y = alt.Y('temp_max:Q',
                    scale = alt.Scale(domain = (0, 40))),
           color = alt.Color('weather:N', scale = color),
           column = 'month:O', tooltip = ['precipitation']) \
    .properties(width = 50) \
    .add_selection(select_year) \
    .transform_filter(select_year) \
    .configure_facet(spacing = 8)