Interactive Visualizations with Altair

In [1]:
# Import our data processing library (note: you may have to install this!)
import pandas as pd
import altair as alt

# Let's use this to upload a sample dataset and show the start of the dataset
data= pd.read_csv("WHR_2016.csv")
data.head()

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


Unnamed: 0,Country,Region,Happiness Rank,Happiness Score,Lower Confidence Interval,Upper Confidence Interval,Economy (GDP per Capita),Family,Health (Life Expectancy),Freedom,Trust (Government Corruption),Generosity,Dystopia Residual
0,Denmark,Western Europe,1,7.526,7.46,7.592,1.44178,1.16374,0.79504,0.57941,0.44453,0.36171,2.73939
1,Switzerland,Western Europe,2,7.509,7.428,7.59,1.52733,1.14524,0.86303,0.58557,0.41203,0.28083,2.69463
2,Iceland,Western Europe,3,7.501,7.333,7.669,1.42666,1.18326,0.86733,0.56624,0.14975,0.47678,2.83137
3,Norway,Western Europe,4,7.498,7.421,7.575,1.57744,1.1269,0.79579,0.59609,0.35776,0.37895,2.66465
4,Finland,Western Europe,5,7.413,7.351,7.475,1.40598,1.13464,0.81091,0.57104,0.41004,0.25492,2.82596


Selection

In [2]:
# Implementing selection
selection = alt.selection(type='multi', fields=['Region'])

alt.Chart(data).mark_circle().encode(
    x = "Health (Life Expectancy)",
    y    = "Happiness Score",
    color=alt.Color('Region', scale=alt.Scale(scheme='spectral')),
    size="Generosity",
    tooltip=["Country", "Happiness Score"],
    opacity=alt.condition(selection,alt.value(1),alt.value(.2))
).add_selection(selection)

   Use 'selection_point()' or 'selection_interval()' instead; these functions also include more helpful docstrings.
        combined and should be specified using "selection_point()".


In [3]:
# Implementing selection
selection = alt.selection(type='multi', fields=['Region'], on='mouseover', nearest=True)

alt.Chart(data).mark_circle().encode(
    x = "Health (Life Expectancy)",
    y    = "Happiness Score",
    color=alt.Color('Region', scale=alt.Scale(scheme='spectral')),
    size="Generosity",
    tooltip=["Country", "Happiness Score"],
    opacity=alt.condition(selection,alt.value(1),alt.value(.2))
).add_selection(selection)

   Use 'selection_point()' or 'selection_interval()' instead; these functions also include more helpful docstrings.
        combined and should be specified using "selection_point()".


Exploration

In [4]:
alt.Chart(data).mark_circle().encode(
    x = "Health (Life Expectancy)",
    y    = "Happiness Score",
    color=alt.Color('Region', scale=alt.Scale(scheme='spectral')),
    size="Generosity",
    tooltip=["Country", "Happiness Score"]
).interactive()

Abstract / Elaborate

In [5]:
# Let's implement filtering using dynamic queries. 
selection = alt.selection(type="multi", fields=["Region"])

# Create a container for our two different views
base =  alt.Chart(data).properties(width=500, height=250)

# Let's specify our overview chart
overview = alt.Chart(data).mark_bar().encode(
    y = "mean(Happiness Score)",
    x = "Region",
    color=alt.condition(selection, alt.value("orange"), alt.value("lightgrey"))
).add_selection(selection).properties(height=250, width=250)

# Create a detail chart
detail = hist = base.mark_bar().encode(
    y = "Happiness Score", 
    x = "Country"
).transform_filter(selection).properties(height=250, width=250)

overview | detail

   Use 'selection_point()' or 'selection_interval()' instead; these functions also include more helpful docstrings.
        combined and should be specified using "selection_point()".


Filtering

In [6]:
# Bind our selection to the legend
selection = alt.selection(type='multi', fields=['Region'], bind='legend')

alt.Chart(data).mark_circle().encode(
    x = "Health (Life Expectancy)",
    y    = "Happiness Score",
    color=alt.Color('Region', scale=alt.Scale(scheme='spectral')),
    size="Generosity",
    tooltip=["Country", "Happiness Score"],
    opacity=alt.condition(selection,alt.value(1),alt.value(.2))
).add_selection(selection)

   Use 'selection_point()' or 'selection_interval()' instead; these functions also include more helpful docstrings.
        combined and should be specified using "selection_point()".


In [7]:
# Let's implement filtering using dynamic queries. 
dropdown = alt.binding_select (options=data["Region"].unique(), name="Select a region:")

# Create a new selection that uses my dynamic query widget
selection = alt.selection(type="single", fields=["Region"], bind=dropdown)

# Let's specify our chart
alt.Chart(data).mark_circle().encode(
    x = "Health (Life Expectancy)",
    y = "Happiness Score",
    color=alt.Color('Region', scale=alt.Scale(scheme='spectral')),
    size="Generosity",
    tooltip=["Country", "Happiness Score"],
    opacity=alt.condition(selection,alt.value(1),alt.value(.2))
).add_selection(selection)

   Use 'selection_point()' or 'selection_interval()' instead; these functions also include more helpful docstrings.
        combined and should be specified using "selection_point()".


Reconfigure

In [8]:
# Let's specify our chart
alt.Chart(data).mark_bar().encode(
    y = "mean(Happiness Score)",
    x = "Region"
)

In [9]:
# Let's specify our chart
alt.Chart(data).mark_bar().encode(
    y = "mean(Happiness Score)",
    x = alt.X(field='Region', type='nominal', sort=alt.EncodingSortField(field='Happiness Score', op='mean'))
)

Encode

In [10]:
# Let's implement filtering using dynamic queries. 
dropdown = alt.binding_select (options=["Generosity", "Family", "Freedom"], name="Select a size variable:")

# Create a new selection that uses my dynamic query widget
selection = alt.selection(type="single", fields=['column'], bind=dropdown, init={'column':'Generosity'})

# Let's specify our chart
alt.Chart(data).transform_fold(
    ["Generosity", "Family", "Freedom"],
    as_=['column', 'value']
).transform_filter(
    selection
).mark_circle().encode(
    x = "Health (Life Expectancy)",
    y = "Happiness Score",
    color=alt.Color('Region', scale=alt.Scale(scheme='spectral')),
    size="value:Q",
    tooltip=["Country", "Happiness Score"],
).add_selection(selection)

   Use 'selection_point()' or 'selection_interval()' instead; these functions also include more helpful docstrings.
        combined and should be specified using "selection_point()".


TypeError: altair.vegalite.v5.schema.core.SelectionParameter() got multiple values for keyword argument 'value'

Connect

In [None]:
# Linked views
# Creating a selection: 
selection = alt.selection(type="multi", fields=["Region"])

# Create a container for our two different views
base =  alt.Chart(data).properties(width=250, height=250)

# Create our scatterplot
scatterplot = base.mark_circle().encode(
    x = 'Happiness Score',
    y = 'Health (Life Expectancy)',
    size = "Generosity", 
    color = alt.condition(selection, "Region", alt.value('lightgray'))
).add_selection(selection)

# Create a histogram
hist = base.mark_bar().encode(
    x = alt.X("Happiness Score", bin=alt.Bin(maxbins=5)), 
    y = "count()"
).transform_filter(selection)

# Connect our charts using the pipe operation
scatterplot | hist

In [None]:
# This selection is going to be an interval selection
selection = alt.selection(type="interval", encodings=["x", "y"])

# Create our scatterplot
scatterplot = alt.Chart(data).mark_circle().encode(
    x = 'Happiness Score',
    y = 'Health (Life Expectancy)',
    size = "Generosity", 
    color = alt.condition(selection, "Region", alt.value('lightgray'))
).properties(
    width = 200,
    height = 200
).add_selection(selection)

# Define our background chart
base = alt.Chart().mark_bar(color="cornflowerblue").encode(
    x = alt.X("Happiness Score", bin=alt.Bin(maxbins=5)),
    y = "count()"
).properties (
    width=200,
    height = 200
)

# Grey background to show the selection range in the scatterplot
background = base.encode(color=alt.value('lightgray')).add_selection(selection)

# Blue highlights to show the transformed (brushed) data
highlight = base.transform_filter(selection)

# Layer the two charts
layers = alt.layer(background, highlight, data = data)

scatterplot | layers