# WHR Data - Regional Line Charts

In [180]:
from math import pi
import pandas as pd

from bokeh.models import BasicTicker, PrintfTickFormatter
from bokeh.plotting import figure, output_notebook, show
from bokeh.sampledata.unemployment1948 import data
from bokeh.transform import linear_cmap, LinearColorMapper, transform
from bokeh.palettes import Viridis256
from bokeh.models import ColumnDataSource, HoverTool, Legend

## Data Prep

In [185]:
data = pd.read_csv("./data/processed/WHR_LL_Region_20052022.csv", index_col=0)
data = data[data["Year"] > 2012]
data.head(5)

Unnamed: 0,Year,region,region2,alpha2,Country Code,shortname,Population,Log GDP per capita,Life Ladder
102,2013,Oceania,Australia and New Zealand,AU,AUS,Australia,23128129.0,10.752455,7.364169
103,2014,Oceania,Australia and New Zealand,AU,AUS,Australia,23475686.0,10.763002,7.28855
104,2015,Oceania,Australia and New Zealand,AU,AUS,Australia,23815995.0,10.769909,7.309061
105,2016,Oceania,Australia and New Zealand,AU,AUS,Australia,24190907.0,10.781229,7.25008
106,2017,Oceania,Australia and New Zealand,AU,AUS,Australia,24594202.0,10.78726,7.257038


In [182]:
# per country
data["pop_x_ll"] = data["Population"] * data["Life Ladder"]

# agg by subregion
ll_by_region_df = data.groupby(["region", "region2", "Year"]).agg(
    {'Population': 'sum', 'pop_x_ll': 'sum'}).reset_index()

# divide by total pop for region
ll_by_region_df["w_ll"] = ll_by_region_df["pop_x_ll"] / ll_by_region_df["Population"]
ll_by_region_df

Unnamed: 0,region,region2,Year,Population,pop_x_ll,w_ll
0,Africa,East Africa,2013,305908543.0,1.258737e+09,4.114750
1,Africa,East Africa,2014,339451015.0,1.430929e+09,4.215420
2,Africa,East Africa,2015,363786694.0,1.546210e+09,4.250319
3,Africa,East Africa,2016,347953522.0,1.379416e+09,3.964369
4,Africa,East Africa,2017,371704735.0,1.477869e+09,3.975922
...,...,...,...,...,...,...
165,Oceania,Australia and New Zealand,2018,29867243.0,2.153043e+08,7.208709
166,Oceania,Australia and New Zealand,2019,30319417.0,2.191870e+08,7.229262
167,Oceania,Australia and New Zealand,2020,30745489.0,2.200528e+08,7.157237
168,Oceania,Australia and New Zealand,2021,30799479.0,2.191618e+08,7.115765


## Plotting

In [183]:
# Mapping dicts to have all sub-regions in a region share a color
all_regions = sorted(ll_by_region_df["region"].unique())
all_subregions = sorted(ll_by_region_df["region2"].unique())
region_mapping = ll_by_region_df[["region", "region2"]].drop_duplicates().copy()
region_mapping_dict = ll_by_region_df.set_index('region2')['region'].to_dict()
region_mapping_dict

viridis_cols = ["#ffaa00", "#5ec962", "#21918c", "#3b528b", "#440154"]
color_mapper = linear_cmap(field_name="line_index", palette=Viridis256, low=0, high=len(all_regions))
color_mapping_dict = {key: value for key, value in zip(all_regions, viridis_cols)}
color_mapping_dict

{'Africa': '#ffaa00',
 'Americas': '#5ec962',
 'Asia': '#21918c',
 'Europe': '#3b528b',
 'Oceania': '#440154'}

In [184]:
# Create a figure
p = figure(title="Global Happiness Scores by Region (Weighted)", x_axis_label="Year", y_axis_label="Average Life Ladder Score")

# Create ColumnDataSource for each line (optional but useful for interactive plots)

for subregion in all_subregions:
    line_col = color_mapping_dict[result_dict[subregion]]
    source = ColumnDataSource(ll_by_region_df[ll_by_region_df["region2"] == subregion])
    line_color = color_mapping_dict[region_mapping_dict[subregion]]
    line = p.line(x='Year', y='w_ll', source=source, line_color=line_col, legend_label=region)

# Add hover tooltips
hover = HoverTool()
hover.tooltips = [("Year", "@Year"), ("Life Ladder", "@w_ll"), ("Region", "@region2")]
p.add_tools(hover)

# Hide legend
p.legend.visible = False

# Show the plot
output_notebook()
show(p)