# Altair

In [1]:
import altair as alt
from vega_datasets import data

import pandas as pd

https://altair-viz.github.io/user_guide/compound_charts.html

Layering multiple viz into one chart, and concatenating charts onto one another.

## Basics

In [2]:
# this data shows the values of four categories, a-e
data = pd.DataFrame(data = {'values': [1,8,5,3,4],
                           'index': ['a','b','c','d','e']}
                   )
data

Unnamed: 0,values,index
0,1,a
1,8,b
2,5,c
3,3,d
4,4,e


In [3]:
# defining the underlying chart object
# the Chart class of alt called on our df
chart = alt.Chart(data)

- the `mark` attribute of the `Chart` object defines the type of visualization
- e.g., `mark_point()` works for scatters
- the `encode()` method maps values in the data to attributes of the chart
    - e.g., mapping one column to the x or y axis
    - e.g., mapping a column to the color value of the marks

In [4]:
# combining mark_bar with encode
alt.Chart(data).mark_bar().encode(
    x = 'index',
    y = 'values'
)

- note that the de facto index is one of the colums so it can be called the same way
- switching the $x,y$ naturally makes a bar chart over a column chart
- we can also specify some attributes in the `mark` method, like `color`

In [5]:
alt.Chart(data).mark_bar(color = 'red').encode(
    y = 'index',
    x = 'values'
)

# Encodings

- now loading daily data for examples

In [22]:
df = pd.read_csv('ml_general.csv')
df = df[::-1][36:]
df

Unnamed: 0,date,total,new,avg
208,2020-04-01,66,13,6.571429
207,2020-04-02,78,12,8.000000
206,2020-04-03,91,13,9.571429
205,2020-04-04,113,22,12.571429
204,2020-04-05,133,20,14.142857
...,...,...,...,...
4,2020-10-22,1071,11,7.571429
3,2020-10-23,1081,10,7.857143
2,2020-10-24,1085,4,8.571429
1,2020-10-25,1094,9,7.857143


In [7]:
bars = alt.Chart(
            df
        ).mark_bar(
            fillOpacity = 0.3
        ).encode(
            x = 'date:T',
            y = 'new:Q'
        )

line = alt.Chart(
            df
        ).mark_line(
        ).encode(
        x = 'date:T',
        y = 'avg:Q'
        )

- this can be the basis for our bar/line/area chart: some translucent bars
- now we have to combine the average line and the area underneath the line to fill in the gaps

In [8]:
# the + operator layers the charts
bars + line

In [9]:
# the | operator concatenates them
bars | line

- to configure these charts we need to: 
    - create a `MarkConfig` object
    - pass that to a general `config` object
    - pass that into a `layer` constructor

In [10]:
mk = alt.MarkConfig(
    fillOpacity = 0.3
)
cfg = alt.Config(
    mark = mk,
)

- To fix the axis, we can pass `tickCount` to `configure_axis()`
- we also pass `grid = False` to remove the grid
- we call `configure_view()` to remove the border

In [11]:
# daily cases bar/line chart
daily = alt.LayerChart(
        layer = [bars, line],
        config = cfg    
    ).configure_axis(
        grid = False
    ).configure_view(
        strokeWidth = 0
    ).properties(
        width = 200,
        height = 150
    )

new_daily = alt.LayerChart(
        layer = [bars, line]   
    ).properties(
        width = 450,
        height = 525
    )

In [12]:
new_daily

## Working on demo stack

- this chart creates a custom Y domain for the Altair API, with their `Y` class
- for some reason, their idiom for stacked area charts requires their data to be "tidy" formatted, with each series occuring in order row-wise

In [13]:
# tidying data
demodf = pd.read_csv('ml_demo.csv').set_index('date')
demodf
ddf = demodf.stack().reset_index()
ddf.columns = ['date', 'age', 'pct']
ddf.sort_values(by=['age', 'date'], ascending = [False, True],
                inplace = True)
ddf

Unnamed: 0,date,age,pct
8,2020-04-01,90s,0.051282
17,2020-04-02,90s,0.043956
26,2020-04-03,90s,0.061947
35,2020-04-04,90s,0.067669
44,2020-04-05,90s,0.066667
...,...,...,...
1836,2020-10-22,00s,0.135466
1845,2020-10-23,00s,0.136999
1854,2020-10-24,00s,0.137835
1863,2020-10-25,00s,0.140238


In [14]:
# idiom for stacked area chart
# https://altair-viz.github.io/gallery/normalized_stacked_area_chart

demo = alt.Chart(ddf).mark_area().encode(
    x = 'date:T',
    y = alt.Y('pct:Q', stack = 'normalize'),
    color = 'age:N'
).properties(
    width = 450,
    height = 525
)
demo

# Other charts

Options:
1. Data table akin to factbox
    - [example here](https://altair-viz.github.io/gallery/scatter_linked_table.html)
2. Ridgeline plot to show relative curves of peer units
    - [example here](https://altair-viz.github.io/gallery/ridgeline_plot.html)

## Test joy plot

- getting 7-day curves from ontario_new

In [15]:
new = pd.read_csv('ontario_new.csv', 
                 usecols = ['London_7', 'Durham_7',
                           'Halton_7', 'Hamilton_7',
                           'Ottawa_7', 'Waterloo_7',
                           'Windsor_7', 'date']).set_index('date')
new.columns = ['Middlesex-London Health Unit', 'Durham Regional Health Unit',
               'Halton Regional Health Unit', 'City of Hamilton Health Unit',
              'City of Ottawa Health Unit', 'Waterloo Health Unit',
               'Windsor-Essex County Health Unit']

In [16]:
n = pd.DataFrame(new.unstack())
n = n.reset_index()
n.columns = ['region', 'date', 'value']
n.sort_values(by=['region', 'date'], inplace = True)
n

Unnamed: 0,region,date,value
971,City of Hamilton Health Unit,2020-02-26,0.000000
970,City of Hamilton Health Unit,2020-02-27,0.000000
969,City of Hamilton Health Unit,2020-02-28,0.000000
968,City of Hamilton Health Unit,2020-02-29,0.000000
967,City of Hamilton Health Unit,2020-03-01,0.000000
...,...,...,...
1462,Windsor-Essex County Health Unit,2020-10-22,4.428571
1461,Windsor-Essex County Health Unit,2020-10-23,3.571429
1460,Windsor-Essex County Health Unit,2020-10-24,3.000000
1459,Windsor-Essex County Health Unit,2020-10-25,4.571429


- this is the correct formatting
- however, the scales are set to be a constant based on the row size
- I can either make the scales a function of the data, or adjust the curves by population
    - not sure how to fix the scales
    - now adjusting for population...

In [17]:
pops = pd.read_csv('pops .csv', usecols = ['unit', '2018']).set_index('unit')
pops

Unnamed: 0_level_0,2018
unit,Unnamed: 1_level_1
District of Algoma Health Unit,116459
Brant County Health Unit,149392
Durham Regional Health Unit,683604
Grey Bruce Health Unit,171667
Haldimand-Norfolk Health Unit,116366
"Haliburton, Kawartha, Pine Ridge District Health Unit",188956
Halton Regional Health Unit,580008
City of Hamilton Health Unit,567979
Hastings and Prince Edward Counties Health Unit,169116
Huron County Health Unit,61835


In [18]:
pdf = n
regions = pdf.region.drop_duplicates().tolist()
dfs = []
for reg in regions:
    d = pdf.groupby('region').get_group(reg)
    p = pops.loc[reg][0]
    p_values = d.value / p
    fdf = pd.DataFrame([d.date, p_values]).T
    dfs.append(fdf)
pdf = pd.concat(dfs, keys = regions)
pdf = pdf.reset_index()
pdf = pdf.drop('level_1', axis=1)
pdf.columns = ['region', 'date', 'value']
pdf

Unnamed: 0,region,date,value
0,City of Hamilton Health Unit,2020-02-26,0.000000
1,City of Hamilton Health Unit,2020-02-27,0.000000
2,City of Hamilton Health Unit,2020-02-28,0.000000
3,City of Hamilton Health Unit,2020-02-29,0.000000
4,City of Hamilton Health Unit,2020-03-01,0.000000
...,...,...,...
1696,Windsor-Essex County Health Unit,2020-10-22,0.000010
1697,Windsor-Essex County Health Unit,2020-10-23,0.000008
1698,Windsor-Essex County Health Unit,2020-10-24,0.000007
1699,Windsor-Essex County Health Unit,2020-10-25,0.000011


In [19]:
step = 75
overlap = 1

joy = alt.Chart(
    pdf, height = step
).mark_area(
    interpolate='monotone',
    fillOpacity=0.5,
    stroke='black',
    strokeWidth=0.5
).encode(
    alt.X('date:T'),
    alt.Y(
        'value:Q',
        scale=alt.Scale(range = (step, -step * overlap)),
        axis=None
    )
#     alt.Fill(
#         'region:N',
#         legend = None
#     )
).facet(
    row = alt.Row(
        'region:N',
        title = None,
        header = alt.Header(labelAngle = 0, labelAlign = 'center')
    )
).properties(
    bounds = 'flush'
)

joy

# Combining the charts

**MAJOR ISSUE HERE**:
- you can't individually configure views and then concatenate them
- I have to pass a `config` arugment to the the `alt.ConcatChart()` itself
- this raises an issue, as I have some overlapping specifications (e.g. opacity)
    - need to find a way to specify my config between visualizations

In [20]:
demo | new_daily | joy

In [21]:
main = alt.ConcatChart(
    concat = [new_daily, joy, demo]
).configure_axis(
    grid = False
).configure_view(
    stroke = None,
).configure_facet(
    spacing = 0
).properties(
    title = {
        'text': ['London, Ont. COVID Data'],
        'subtitle': 'Oct. 26'
    }
).configure_title(
    align = 'left',
    anchor = 'middle',
    fontSize = 30,
    fontWeight = 'bold',
    subtitleFontSize = 20,
    subtitleFontWeight = 'bold'
    
)
main