In [27]:
import pandas as pd
from bokeh.palettes import Viridis, Viridis256
from bokeh.plotting import figure, show, output_notebook, output_file, save, reset_output
from bokeh.transform import factor_cmap, linear_cmap, LinearColorMapper, transform
from squarify import normalize_sizes, squarify

In [9]:
def treemap(df, col, x, y, dx, dy, *, N=100):
    sub_df = df.nlargest(N, col)
    normed = normalize_sizes(sub_df[col], dx, dy)
    blocks = squarify(normed, x, y, dx, dy)
    blocks_df = pd.DataFrame.from_dict(blocks).set_index(sub_df.index)
    return sub_df.join(blocks_df, how='left').reset_index()

# Plot dimensions
x, y, w, h = 0, 0, 800, 450

In [10]:
# Prep data
data = pd.read_csv("./data/processed/WHR_LL_Region_20052022.csv", index_col=0)
data = data[["region", "shortname", "Life Ladder", "Population", "Year"]]
data.rename(columns={
    "shortname": "country",
    "Population": "pop",
    "Life Ladder": "life_ladder"
}, inplace=True)

data.head(2)

Unnamed: 0,region,country,life_ladder,pop,Year
96,Oceania,Australia,7.340688,20176844.0,2005
97,Oceania,Australia,7.285391,20827622.0,2007


In [11]:
# Filter by year
data = data[data["Year"] == 2021].copy()
data.drop(columns="Year", inplace=True)
data.head(2)

Unnamed: 0,region,country,life_ladder,pop
110,Oceania,Australia,7.111599,25688079.0
1408,Oceania,New Zealand,7.136701,5111400.0


In [12]:
pop_by_country = data.groupby(["region", "country"]).sum("pop")
pop_by_country = pop_by_country.sort_values(by="pop").reset_index()
pop_by_country.head(2)

pop_by_region = pop_by_country.groupby("region").sum("pop").sort_values(by="pop")
pop_by_region.head(2)

Unnamed: 0_level_0,life_ladder,pop
region,Unnamed: 1_level_1,Unnamed: 2_level_1
Oceania,14.2483,30799479.0
Europe,224.184264,540200260.0


In [13]:
blocks_by_region = treemap(pop_by_region, "pop", x, y, w, h)

In [15]:
dfs = []

for index, (region, pop, LL, x, y, dx, dy) in blocks_by_region.iterrows():
    df = pop_by_country[pop_by_country.region==region]
    dfs.append(treemap(df, "pop", x, y, dx, dy, N=10))
blocks = pd.concat(dfs)
blocks.head(2)

regions = sorted(blocks["region"].unique())

In [16]:
mapper = linear_cmap(field_name='life_ladder', palette=Viridis256, low=data["life_ladder"].min(), high=data["life_ladder"].max())

dfs = []
for index, (region, pop, LL, x, y, dx, dy) in blocks_by_region.iterrows():
    df = pop_by_country[pop_by_country.region==region]
    dfs.append(treemap(df, "pop", x, y, dx, dy, N=10))
blocks = pd.concat(dfs)

p = figure(width=w, height=h, tooltips="@country @life_ladder", toolbar_location=None,
           x_axis_location=None, y_axis_location=None)
p.x_range.range_padding = p.y_range.range_padding = 0
p.grid.grid_line_color = None

p.block('x', 'y', 'dx', 'dy', source=blocks, line_width=1, line_color="white",
        fill_alpha=0.8, fill_color=mapper)

p.text('x', 'y', x_offset=2, text="region", source=blocks_by_region,
       text_font_size="16pt", text_color="white")

blocks["ytop"] = blocks.y + blocks.dy
p.text('x', 'ytop', x_offset=2, y_offset=2, text="country", source=blocks,
       text_font_size="6pt", text_baseline="top",
       text_color="white")

output_notebook()
show(p)

## Put into a function

In [36]:
def gen_treemap_v1(year, x=0, y=0, w=800, h=450, export=False):
    # Plot dimensions
    x, y, w, h = x, y, w, h

    # Prep data
    data = pd.read_csv("./data/processed/WHR_LL_Region_20052022.csv", index_col=0)
    data = data[["region", "shortname", "Life Ladder", "Population", "Year"]]
    data.rename(columns={
        "shortname": "country",
        "Population": "pop",
        "Life Ladder": "life_ladder"
    }, inplace=True)

    # Filter by year
    data = data[data["Year"] == year].copy()
    data.drop(columns="Year", inplace=True)

    pop_by_country = data.groupby(["region", "country"]).sum("pop")
    pop_by_country = pop_by_country.sort_values(by="pop").reset_index()

    pop_by_region = pop_by_country.groupby("region").sum("pop").sort_values(by="pop")
    blocks_by_region = treemap(pop_by_region, "pop", x, y, w, h)

    dfs = []
    for index, (region, pop, LL, x, y, dx, dy) in blocks_by_region.iterrows():
        df = pop_by_country[pop_by_country.region==region]
        dfs.append(treemap(df, "pop", x, y, dx, dy, N=10))

    blocks = pd.concat(dfs)
    regions = sorted(blocks["region"].unique())

    mapper = linear_cmap(field_name='life_ladder', palette=Viridis256, low=data["life_ladder"].min(), high=data["life_ladder"].max())

    dfs = []
    for index, (region, pop, LL, x, y, dx, dy) in blocks_by_region.iterrows():
        df = pop_by_country[pop_by_country.region==region]
        dfs.append(treemap(df, "pop", x, y, dx, dy, N=10))
    blocks = pd.concat(dfs)

    p = figure(width=w, height=h, tooltips="@country @life_ladder", toolbar_location=None,
               x_axis_location=None, y_axis_location=None)
    p.x_range.range_padding = p.y_range.range_padding = 0
    p.grid.grid_line_color = None

    p.block('x', 'y', 'dx', 'dy', source=blocks, line_width=1, line_color="white",
            fill_alpha=0.8, fill_color=mapper)

    p.text('x', 'y', x_offset=2, text="region", source=blocks_by_region,
           text_font_size="16pt", text_color="white")

    blocks["ytop"] = blocks.y + blocks.dy
    p.text('x', 'ytop', x_offset=2, y_offset=2, text="country", source=blocks,
           text_font_size="6pt", text_baseline="top",
           text_color="white")

    if export:
        reset_output()
        output_file("test.html")
        save(p)
    else:
        output_notebook()
        show(p)

In [43]:
gen_treemap_v1(2019, export=False)

In [47]:
gen_treemap_v1(2021)

In [45]:
gen_treemap_v1(2013)