# Activity: Altair 


# Activity 1: Bar chart with transform_aggregate and hoverable information


- Load the `Life Expectancy Data.csv` data from the slides directory 
- Create a bar chart that:
        - Rounds the years of schooling variable 
        - Groups the data by the rounded years of schooling and the `status` variable
        - Filters out null values for years of schooling. Hint: https://stackoverflow.com/questions/71770959/is-there-a-way-to-hide-nulls-in-an-altair-chart
        - Plots the average life expectancy across the non-null years of schooling x status groups
        - Contains hoverable information with the life expectancy 

In [35]:
import pandas as pd
import numpy as np
import altair as alt
from altair import datum

who = pd.read_csv("../../slides/session9_altair/Life Expectancy Data.csv")
who.columns = [col.strip().lower() for col in 
          who.columns]

domain = ['Developing', 'Developed']
colors = ['seagreen', '#7D3C98']

who['schooling_rounded'] = who.schooling.round()
alt.Chart(who).mark_bar().encode(
    x = alt.X('schooling_rounded:O', title = "Years of schooling"),
    xOffset = "status:N",
    y = alt.Y('avg_life:Q', title = "Average Life expectancy"),
    tooltip = alt.Tooltip('avg_life:Q', 
                          format='.2f',
                          title = "Life exp:"),
    color = alt.Color('status:N', title = "").scale(domain = domain,
                                                   range = colors)
).transform_filter(
'isValid(datum.schooling_rounded)'
).transform_aggregate(
    avg_life = 'mean(life expectancy)',
    groupby = ['status', 'schooling_rounded']
).interactive()

# Activity 2: make a chart responsive to another chart's selections

- Create two charts:
    - A chart where the x axis is each of the rounded years of schooling (no y axis)
    - A scatterplot with an x axis of gdp and a y axis of life expectancy
    - Place the scatterplot on top and histogram on bottom
- Use `transform_filter` to:
    - Restrict the data across both plots to the year 2010
- On the scatterplot, highlight/ gray out scatterplot values in response to selections on the histogram 
    


In [55]:
select_hist = alt.selection_interval(encodings = ['x'])

hist_c = alt.Chart(who).mark_bar().encode(
    x = alt.X('schooling_rounded:O', title = "Years of schooling"),
).add_selection(select_hist).transform_filter(
datum.year == 2010
).transform_filter(
'isValid(datum.schooling_rounded)'
)

scatter_gdp = alt.Chart(who).mark_point().encode(
  x = alt.X('gdp', title = "GDP"),
  y = alt.Y('life expectancy', title = "Life expectancy (2010)"),
  color = alt.condition(select_hist, alt.value('red'), alt.value('lightgray')),
  opacity = alt.condition(select_hist, alt.value(0.8), alt.value(0.1))
).transform_filter(
datum.year == 2010
).transform_filter(
'isValid(datum.schooling_rounded)'
)

scatter_gdp & hist_c

  hist_c = alt.Chart(who).mark_bar().encode(


# Activity 3: create a repeated chart 

- Read the documentation here for a repeated chart: https://altair-viz.github.io/altair-viz-v4/user_guide/compound_charts.html 
- Create a repeated chart for the year 2010 where the y axis is life expectancy and the x axis is:
    - infant deaths (divided by population)
    - under-five deaths (divided by population)
    - income composition of resources
    - percentage expenditure

In [54]:
who['id_pop'] = who['infant deaths']/who['population']
who['u5_pop'] = who['under-five deaths']/who['population']

alt.Chart(who).mark_point().encode(
    alt.X(alt.repeat("row"), type='quantitative'),
    alt.Y(alt.repeat("column"), type='quantitative'),
    color='status:N'
).repeat(
    row=['id_pop', 'u5_pop',
         'income composition of resources',
        'percentage expenditure'],
    column=['life expectancy']
).transform_filter(
datum.year == 2010)