In [None]:
%run _prepare.ipynb

In [None]:
import pandas as pd
import json

In [None]:
import altair as alt

In [None]:
def create_date_features(df):
    """
    Creates some time related features: year, month, day, weekday and week_of_year
    """
    df.loc[:,"year"] = df.date.dt.year
    df.loc[:,"month"] = df.date.dt.month
    df.loc[:,"day"] = df.date.dt.day
    df.loc[:,"weekday"] = df.date.dt.weekday
    df.loc[:,"week_of_year"] = df.date.dt.isocalendar().week
    return df

In [None]:
#alt.data_transformers.disable_max_rows()

# Basic Charts

In [None]:
bubble_chart = alt.Chart(countries_actual).mark_circle().encode(
    x="total_cases_per_million", 
    y="total_deaths_per_million",color="continent", 
    tooltip=["location","life_expectancy"], 
    size="life_expectancy"
)
bubble_chart 

In [None]:
alt.Chart(countries, width=900, height=250).mark_line().encode(
    x="date:T", 
    y=alt.Y(field='new_cases', aggregate='sum', type='quantitative'),
    color="continent"
)

# Using scales

In [None]:
plot_df = create_date_features(
    countries.query("location=='Germany'")[["date","new_cases","new_deaths"]]
)

In [None]:
alt.Chart(plot_df).mark_bar().encode(
    x="week_of_year",
    y="new_cases",
)

In [None]:
alt.Chart(plot_df).mark_bar().encode(
    x="week_of_year:O",
    y="new_cases",
)

In [None]:
alt.Chart(plot_df).mark_bar().encode(
    x="week_of_year",
    y="new_cases",
    row="year"
)

In [None]:
alt.Chart(plot_df).mark_bar().encode(
    x="week_of_year",
    y="new_cases",
    row="year",
    column="weekday"
)

## Modifying existing Charts

In [None]:
heatmap = alt.Chart(plot_df).mark_rect().encode(
    x='week_of_year:N',
    y='weekday:N',
    color=alt.Color(field='new_cases', aggregate='mean', type='quantitative'),
)
heatmap

In [None]:
heatmap.encode(row="year")

In [None]:
melted_country_df = create_date_features(
countries[countries.location.isin(["Germany", "France", "Spain", "United Kingdom", "Italy", "Netherlands"])][["date","location","new_cases_per_million", "continent"]]
)

chart = alt.Chart(melted_country_df).mark_rect().encode(
    x='weekday:O',
    y='month:O',
    color=alt.Color(field='new_cases_per_million', aggregate='mean', type='quantitative'),
    column="location",
    row="year"
)
chart

## Aggregations and resolving Scales

In [None]:
melted_df = create_date_features(
countries[countries.location.isin(["Germany"])][["date","new_cases_per_million", "new_deaths_per_million", "new_vaccinations","weekly_hosp_admissions_per_million"]].melt("date").fillna(0)
)

chart = alt.Chart(melted_df).mark_rect().encode(
    x='weekday:O',
    y='month:O',
    color=alt.Color("value", aggregate='mean'),
    column="variable"
)#.resolve_scale(color='independent')
chart

In [None]:
chart = alt.Chart(melted_df, height=150).mark_line().encode(
    x='date',
    y='value:Q',
    row="variable",
).resolve_scale(
    y='independent'
)
chart

# Combining Charts

In [None]:
base = alt.Chart(plot_df)

In [None]:
base.mark_line().encode(y='new_cases',x='date:T') | base.mark_line().encode(y='new_deaths',x='date:T')

In [None]:
(
    base.mark_line().encode(y='new_cases',x='date:T') + 
    base.mark_line().encode(y='new_deaths',x='date:T', color=alt.value("red"))
).resolve_scale(y="independent")

## Interactivity

In [None]:
base = alt.Chart(data, width=900, height=250).mark_line()

In [None]:
brush = alt.selection(type='interval', encodings=['x'])

upper = base.encode(
    y=alt.Y(field='new_cases', aggregate='sum', type='quantitative'),
    x=alt.X('date:T', scale=alt.Scale(domain=brush)),
    color="continent"
)

lower = base.encode(
    y=alt.Y(field='new_cases', aggregate='sum', type='quantitative'),
    x=alt.X('date:T')
).properties(
    height=60
).add_selection(brush)

upper & lower

In [None]:
selector = alt.selection(type="multi", fields = ["location"], empty='none')

line = base.encode(
    y=alt.Y(field='new_cases', aggregate='sum', type='quantitative'),
    x=alt.X('date:T'),
    color=alt.Color("location")
)

bubble_interactive = bubble_chart.encode(
    color=alt.condition(selector, alt.Color("continent:N"), alt.value('lightgray'))
).add_selection(selector).properties(height=500, width=500)

(bubble_interactive & line.transform_filter(selector)).resolve_scale(color="independent")

# Geo

In [None]:
url = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/web-data/data/cases_country.csv"
cases = pd.read_csv(url).rename(columns={"UID":"id"})
cols = ["id","Country_Region", "Confirmed", "Deaths", "Mortality_Rate"]

from vega_datasets import data as vega_data
countries_topo = alt.topo_feature(vega_data.world_110m.url, 'countries')
    
worldmap = alt.Chart(countries_topo, width=900, height=380).mark_geoshape(stroke="white").encode(
    tooltip=["id:Q"]
).project("equirectangular")
worldmap

## Integrating map and other data

To get data offline you can use: 

```
#!wget https://cdn.jsdelivr.net/npm/vega-datasets@v1.29.0/data/world-110m.json
topojson = json.loads(open("world-110m.json","rb").read())
countries = alt.InlineData(topojson, format=alt.DataFormat(feature='countries',type='topojson'))
```

In [None]:
cols = ["Confirmed", "Deaths", "Mortality_Rate"] 

world_topo = alt.topo_feature(vega_data.world_110m.url, 'countries')
    
worldmap = alt.Chart(world_topo, width=900, height=380).mark_geoshape(stroke="white").encode(
    tooltip=["Country_Region:N", "Deaths:Q"],
    color='Deaths:Q',
).project("equirectangular")\
.transform_lookup(
    lookup='id',
    from_=alt.LookupData(cases.fillna(0), key='id', fields=cols)
)
worldmap

## Repeating

In [None]:
worldmap.encode(
    tooltip=["Country_Region:N", "Deaths:Q"],
    color = alt.Color(alt.repeat('row'), type='quantitative'),
).project("equirectangular")\
.transform_lookup(
    lookup='id',
    from_=alt.LookupData(cases.fillna(0), key='id', fields=["Country_Region"] + cols)
).repeat(
    row=cols
).resolve_scale(color="independent")


# Interactive Maps

In [None]:
click = alt.selection_multi(fields=['id'], empty='none')

data = countries.join(cases.set_index("Country_Region")["id"].astype(int), on="location")
line_chart = alt.Chart(data, width=900, height=250).mark_line().encode(
    x="date:T", 
    y=alt.Y(field='new_cases_smoothed_per_million', aggregate='sum', type='quantitative'),
    color="location",
    row="location"
)

interactive_worldmap.add_selection(click) & line_chart.transform_filter(click)

In [None]:
df = create_date_features(data[["date","location","id","new_cases_per_million"]])

click = alt.selection_multi(fields=['id'], empty='none')
chart = alt.Chart(df).mark_rect().encode(
    x='weekday:O',
    y='month:O',
    color=alt.Color(field='new_cases_per_million', aggregate='mean', type='quantitative'),
    column="location",
    row="year"
)
(interactive_worldmap.add_selection(click) & chart.transform_filter(click)).resolve_scale(color="independent")

# Summary

Pro's
 * Esay way to encode different data types
 * Simple way to combine different charts
 * Enables powerfull interactions
 
Con's:
 * Syntax sometimes a bit verbose 
 * No Tree-Map, Sunburst, Parallel Coordinates / Categories or other fancy visualizations