In [1]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.express as px
import ipywidgets as widgets
from IPython.display import display

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 1000)


In [2]:
GBD_PATH = 'data/GBD.csv'

In [3]:
gbd_dtypes = {
    "location_id":   "int16",
    "sex_id":        "int8",
    "cause_id":      "int16",
    "metric_id":     "int8",
    "year":          "int16",
    "val":           "float32",
    "metric_name":   "category"
}

gbd_usecols = [
    "location_id",
    "sex_id",
    "cause_id",
    "metric_id",
    "year",
    "val",
    "metric_name"
]

In [4]:
gbd_df = pd.read_csv(GBD_PATH, usecols=gbd_usecols, dtype=gbd_dtypes)
print(gbd_df.info())
gbd_df.head(10)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 22131144 entries, 0 to 22131143
Data columns (total 7 columns):
 #   Column       Dtype   
---  ------       -----   
 0   location_id  int16   
 1   sex_id       int8    
 2   cause_id     int16   
 3   metric_id    int8    
 4   metric_name  category
 5   year         int16   
 6   val          float32 
dtypes: category(1), float32(1), int16(3), int8(2)
memory usage: 274.4 MB
None


Unnamed: 0,location_id,sex_id,cause_id,metric_id,metric_name,year,val
0,35,1,602,1,Number,1980,3.236811
1,35,2,602,1,Number,1980,0.418637
2,35,3,602,1,Number,1980,3.655448
3,35,1,602,2,Percent,1980,0.000135
4,35,2,602,2,Percent,1980,1.8e-05
5,35,3,602,2,Percent,1980,7.8e-05
6,35,1,602,3,Rate,1980,0.133496
7,35,2,602,3,Rate,1980,0.015251
8,35,3,602,3,Rate,1980,0.070711
9,35,2,603,1,Number,1980,2.620745


In [5]:
print("Locations:", gbd_df["location_id"].nunique())
print("Causes:   ", gbd_df["cause_id"].nunique())
print("Years:    ", gbd_df["year"].nunique())

Locations: 204
Causes:    294
Years:     42


In [6]:
# sorted(gbd_df["year"].unique())


In [7]:
# sorted(gbd_df["location_id"].unique())

In [8]:
# Missing in the world bank dataset, so removed.
# 149	Palestine	
# 320	Cook Islands	
# 374	Niue	
# 413	Tokelau	

exclude_ids = [149, 320, 374, 413]
gbd_df = gbd_df[
    (gbd_df.metric_id == 3) &                 # only “Rate”
    (gbd_df.sex_id    == 3) &                 # both sexes combined
    (gbd_df.year         >= 1987) &           # from 1987 onward
    (~gbd_df.location_id.isin(exclude_ids))   # drop the 4 missing ones
][["location_id", "cause_id", "year", "val"]]

gbd_df = gbd_df.rename(columns={"val":"rate"})
gbd_df.head(10)

Unnamed: 0,location_id,cause_id,year,rate
1342817,19,942,1987,0.224645
1342826,19,943,1987,0.291015
1342835,19,944,1987,1.246096
1342844,19,945,1987,34.033714
1342853,19,946,1987,0.087525
1342862,19,947,1987,0.0
1342871,19,948,1987,0.001618
1342880,19,949,1987,2e-06
1342889,19,950,1987,0.0
1342898,19,955,1987,1.339189


In [9]:
gbd_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 2058000 entries, 1342817 to 22131143
Data columns (total 4 columns):
 #   Column       Dtype  
---  ------       -----  
 0   location_id  int16  
 1   cause_id     int16  
 2   year         int16  
 3   rate         float32
dtypes: float32(1), int16(3)
memory usage: 35.3 MB


In [10]:
print("Locations:", gbd_df["location_id"].nunique())
print("Causes:   ", gbd_df["cause_id"].nunique())
print("Years:    ", gbd_df["year"].nunique())

Locations: 200
Causes:    294
Years:     35


In [11]:
LOCATION_PATH = 'data/locations_with_codes.csv'
COUNTRY_CLS_BY_INCOME_PATH = 'data/country_classification_by_income.xlsx'
COUNTRY_CLS_BY_INCOME_PATH_SHEET_NAME = 'Country Analytical History'

df_loc = (
    pd.read_csv(
        LOCATION_PATH,
        usecols=['location_id','country_code'],
        dtype={'location_id':'int16','country_code':'string'}
    )
    .rename(columns={'country_code':'iso3'})
)

df_wide = pd.read_excel(
    COUNTRY_CLS_BY_INCOME_PATH,
    sheet_name=COUNTRY_CLS_BY_INCOME_PATH_SHEET_NAME,
    header=5,
    dtype=str
)
df_wide = df_wide.drop(index=range(0,5)).reset_index(drop=True)


# 4. Rename the first two columns
df_wide = df_wide.rename(
    columns={
        df_wide.columns[0]:'iso3',
        df_wide.columns[1]:'country_name'
    }
)

# 5. Identify the year columns (they come through as ints or digit-strings)
year_cols = [
    col for col in df_wide.columns
    if isinstance(col, int) or (isinstance(col, str) and col.isdigit())
]

df_income = (
    df_wide
      .melt(
         id_vars=['iso3','country_name'],
         value_vars=year_cols,
         var_name='year',
         value_name='income_group'
      )
      # ensure year is an integer
      .assign(year=lambda d: d['year'].astype(int))
)

In [12]:
df_income = df_income[df_income['year'] <= 2021]
df_income.head(10)

Unnamed: 0,iso3,country_name,year,income_group
0,AFG,Afghanistan,1987,L
1,ALB,Albania,1987,..
2,DZA,Algeria,1987,UM
3,ASM,American Samoa,1987,H
4,AND,Andorra,1987,..
5,AGO,Angola,1987,..
6,ATG,Antigua and Barbuda,1987,UM
7,ARG,Argentina,1987,UM
8,ARM,Armenia,1987,..
9,ABW,Aruba,1987,H


In [13]:
df_merged_income_location = pd.merge(df_loc,
                     df_income,
                     on='iso3',
                     how='inner')

df_merged_income_location['income_group'] = df_merged_income_location[
    'income_group'
    ].replace(['..', ' '], 'MISSING')

In [14]:
df_merged_income_location.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7000 entries, 0 to 6999
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   location_id   7000 non-null   int16 
 1   iso3          7000 non-null   object
 2   country_name  7000 non-null   object
 3   year          7000 non-null   int64 
 4   income_group  7000 non-null   object
dtypes: int16(1), int64(1), object(3)
memory usage: 232.6+ KB


In [15]:
df_merged_income_location.head(20)

Unnamed: 0,location_id,iso3,country_name,year,income_group
0,160,AFG,Afghanistan,1987,L
1,160,AFG,Afghanistan,1988,L
2,160,AFG,Afghanistan,1989,L
3,160,AFG,Afghanistan,1990,L
4,160,AFG,Afghanistan,1991,L
5,160,AFG,Afghanistan,1992,L
6,160,AFG,Afghanistan,1993,L
7,160,AFG,Afghanistan,1994,L
8,160,AFG,Afghanistan,1995,L
9,160,AFG,Afghanistan,1996,L


In [16]:
# df_albania = df_merged_income_location[df_merged_income_location['country_name']=='Albania']
# df_albania

In [17]:
df_merged_income_location = df_merged_income_location[['location_id','year','country_name','income_group']]
df    = gbd_df.merge(df_merged_income_location,
                 on=['location_id','year'],
                 how='left'
                )

In [18]:
df.info()
df.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2058000 entries, 0 to 2057999
Data columns (total 6 columns):
 #   Column        Dtype  
---  ------        -----  
 0   location_id   int16  
 1   cause_id      int16  
 2   year          int16  
 3   rate          float32
 4   country_name  object 
 5   income_group  object 
dtypes: float32(1), int16(3), object(2)
memory usage: 51.0+ MB


Unnamed: 0,location_id,cause_id,year,rate,country_name,income_group
0,19,942,1987,0.224645,Timor-Leste,MISSING
1,19,943,1987,0.291015,Timor-Leste,MISSING
2,19,944,1987,1.246096,Timor-Leste,MISSING
3,19,945,1987,34.033714,Timor-Leste,MISSING
4,19,946,1987,0.087525,Timor-Leste,MISSING


In [19]:
# average rate for the group (year, income_group, cause_id)
df_grouped_by_year_income_cause = (
    df
      .groupby(['year','income_group','cause_id'], as_index=False)
      .rate
      .mean()
      .rename(columns={'rate':'avg_rate'})
)
df_grouped_by_year_income_cause.info()
df_grouped_by_year_income_cause.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 49980 entries, 0 to 49979
Data columns (total 4 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   year          49980 non-null  int16  
 1   income_group  49980 non-null  object 
 2   cause_id      49980 non-null  int16  
 3   avg_rate      49980 non-null  float32
dtypes: float32(1), int16(2), object(1)
memory usage: 781.1+ KB


Unnamed: 0,year,income_group,cause_id,avg_rate
0,1987,H,294,748.046387
1,1987,H,295,50.209618
2,1987,H,297,3.403862
3,1987,H,298,1.427613
4,1987,H,300,1.163587


In [20]:
CAUSE_MAPPING_PATH = 'data/cause_mapping.csv'
df_causes = pd.read_csv(
    CAUSE_MAPPING_PATH,
    usecols=['cause_id','cause_name'],
    dtype={
        'cause_id':'int16',
        'cause_name':'string'
    }
)
df_causes.info()
df_causes.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 294 entries, 0 to 293
Data columns (total 2 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   cause_id    294 non-null    int16 
 1   cause_name  294 non-null    string
dtypes: int16(1), string(1)
memory usage: 3.0 KB


Unnamed: 0,cause_id,cause_name
0,602,Other urinary diseases
1,603,Gynecological diseases
2,604,Uterine fibroids
3,607,Endometriosis
4,608,Genital prolapse


In [21]:
df_global = (
    df
      .groupby(['year','cause_id'], as_index=False)['rate']
      .mean()
      .rename(columns={'rate':'avg_rate'})
      .merge(df_causes, on='cause_id', how='inner')
)

df_global.info()
df_global.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10290 entries, 0 to 10289
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   year        10290 non-null  int16  
 1   cause_id    10290 non-null  int16  
 2   avg_rate    10290 non-null  float32
 3   cause_name  10290 non-null  string 
dtypes: float32(1), int16(2), string(1)
memory usage: 160.9 KB


Unnamed: 0,year,cause_id,avg_rate,cause_name
0,1987,294,937.969482,All causes
1,1987,295,347.138489,"Communicable, maternal, neonatal, and nutritio..."
2,1987,297,32.899364,Tuberculosis
3,1987,298,4.218121,HIV/AIDS
4,1987,300,2.51616,HIV/AIDS resulting in other diseases


# LEVEL 1

In [22]:
# 9(cause) * (income group)5 * years (35)
keep_ids_level_1 = [
    295,
    409,
    687,
    1058,
    1029,
    1026,
    1027,
    1028,
    1059,
]


df_grouped_by_year_income_cause_level_1 = df_grouped_by_year_income_cause[
    df_grouped_by_year_income_cause['cause_id'
    ].isin(keep_ids_level_1)
]

df_grouped_by_year_income_cause_level_1.info()
print("Unique Causes:", df_grouped_by_year_income_cause_level_1["cause_id"].nunique())
df_grouped_by_year_income_cause_level_1.head(10)

<class 'pandas.core.frame.DataFrame'>
Index: 1530 entries, 1 to 49979
Data columns (total 4 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   year          1530 non-null   int16  
 1   income_group  1530 non-null   object 
 2   cause_id      1530 non-null   int16  
 3   avg_rate      1530 non-null   float32
dtypes: float32(1), int16(2), object(1)
memory usage: 35.9+ KB
Unique Causes: 9


Unnamed: 0,year,income_group,cause_id,avg_rate
1,1987,H,295,50.209618
63,1987,H,409,633.635437
197,1987,H,687,64.201378
286,1987,H,1026,3.448181
287,1987,H,1027,5.205003
288,1987,H,1028,1.680914
289,1987,H,1029,173.493271
292,1987,H,1058,0.0
293,1987,H,1059,172.860321
295,1987,L,295,865.470032


In [23]:
# 9(cause) * (income group)5 * years (35) = 1575 (1530)
df_top_10_leading_cause_per_income_group_year_level_1 = (
    df_grouped_by_year_income_cause_level_1
      .sort_values(['year','income_group','avg_rate'],
                   ascending=[True, True, False])
      .groupby(['year','income_group'], as_index=False)
      .head(10)
)
df_top_10_leading_cause_per_income_group_year_level_1.info()
# df_top_10_leading_cause_per_income_group_year_level_1.head(50)

<class 'pandas.core.frame.DataFrame'>
Index: 1530 entries, 63 to 49974
Data columns (total 4 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   year          1530 non-null   int16  
 1   income_group  1530 non-null   object 
 2   cause_id      1530 non-null   int16  
 3   avg_rate      1530 non-null   float32
dtypes: float32(1), int16(2), object(1)
memory usage: 35.9+ KB


In [24]:
# get cause name
df_top_10_leading_cause_per_income_group_year_level_1_merged = df_top_10_leading_cause_per_income_group_year_level_1.merge(
    df_causes,
    on='cause_id',
    how='inner'
)

df_top_10_leading_cause_per_income_group_year_level_1_merged.info()
df_top_10_leading_cause_per_income_group_year_level_1_merged.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1530 entries, 0 to 1529
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   year          1530 non-null   int16  
 1   income_group  1530 non-null   object 
 2   cause_id      1530 non-null   int16  
 3   avg_rate      1530 non-null   float32
 4   cause_name    1530 non-null   string 
dtypes: float32(1), int16(2), object(1), string(1)
memory usage: 36.0+ KB


Unnamed: 0,year,income_group,cause_id,avg_rate,cause_name
0,1987,H,409,633.635437,Non-communicable diseases
1,1987,H,1029,173.493271,Total cancers
2,1987,H,1059,172.860321,Total Cancers excluding Non-melanoma skin cancer
3,1987,H,687,64.201378,Injuries
4,1987,H,295,50.209618,"Communicable, maternal, neonatal, and nutritio..."


In [25]:
df_top_10_leading_cause_per_income_group_year_level_1 = df_top_10_leading_cause_per_income_group_year_level_1_merged[[
    'year','income_group','cause_id','cause_name','avg_rate'
]]

df_top_10_leading_cause_per_income_group_year_level_1.info()
# df_top_10_leading_cause_per_income_group_year_level_1.head(60)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1530 entries, 0 to 1529
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   year          1530 non-null   int16  
 1   income_group  1530 non-null   object 
 2   cause_id      1530 non-null   int16  
 3   cause_name    1530 non-null   string 
 4   avg_rate      1530 non-null   float32
dtypes: float32(1), int16(2), object(1), string(1)
memory usage: 36.0+ KB


In [26]:
df_top_10_leading_cause_per_income_group_year_level_1_2015 = df_top_10_leading_cause_per_income_group_year_level_1.query("year == 2015")
# df_top_10_leading_cause_per_income_group_year_level_1_2015

In [27]:
# 21*35=735
df_global_level_1 = df_global[
    df_global['cause_id'
    ].isin(keep_ids_level_1)
]

df_global_level_1.info()
print("Unique Causes:", df_global_level_1["cause_id"].nunique())
df_global_level_1.head(10)

<class 'pandas.core.frame.DataFrame'>
Index: 315 entries, 1 to 10289
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   year        315 non-null    int16  
 1   cause_id    315 non-null    int16  
 2   avg_rate    315 non-null    float32
 3   cause_name  315 non-null    string 
dtypes: float32(1), int16(2), string(1)
memory usage: 7.4 KB
Unique Causes: 9


Unnamed: 0,year,cause_id,avg_rate,cause_name
1,1987,295,347.138489,"Communicable, maternal, neonatal, and nutritio..."
63,1987,409,507.009491,Non-communicable diseases
197,1987,687,83.821487,Injuries
286,1987,1026,7.977884,Total burden related to hepatitis B
287,1987,1027,6.400929,Total burden related to hepatitis C
288,1987,1028,1.372914,Total burden related to Non-alcoholic fatty li...
289,1987,1029,102.847389,Total cancers
292,1987,1058,0.0,Other COVID-19 pandemic-related outcomes
293,1987,1059,102.458878,Total Cancers excluding Non-melanoma skin cancer
295,1988,295,338.782318,"Communicable, maternal, neonatal, and nutritio..."


In [50]:
df_top_10_level_1 = df_top_10_leading_cause_per_income_group_year_level_1.copy()


income_groups = ['H','UM','LM','L']
group_titles  = {
    'H':  'High Income',
    'UM': 'Upper-middle Income',
    'LM': 'Lower-middle Income',
    'L':  'Low Income'
}
color_map = {
    'H':  '#1f77b4',
    'UM': '#2ca02c',
    'LM': '#ff7f0e',
    'L':  '#9467bd',
    'ALL':'#555555'       # for the global panel
}

years = sorted(df_top_10_level_1['year'].unique())
x_max  = max(df_top_10_level_1['avg_rate'].max(),
             df_global_level_1['avg_rate'].max()) * 1.1

# 1) Fixed-year dropdown
year_dd_level_1 = widgets.Dropdown(
    options=years,
    value=years[0],
    description='Year:',
    style={'description_width':'initial'},
    layout=widgets.Layout(width='150px')
)

# 2) Plotting function
def plot_for_year_level_1(selected_year):
    # 3×2 grid: bottom row spans both columns for “Global”
    fig = make_subplots(
        rows=3, cols=2,
        specs=[
            [{}, {}],
            [{}, {}],
            [{"colspan": 2}, None]
        ],
        subplot_titles=[*map(group_titles.get, income_groups), "Global"],
        vertical_spacing=0.18,
        horizontal_spacing=0.12
    )
    
    # 2×2: income-group panels
    for i, inc in enumerate(income_groups):
        df_sel = (
            df_top_10_level_1
              .loc[(df_top_10_level_1['year']==selected_year) &
                   (df_top_10_level_1['income_group']==inc)]
              .nlargest(10, 'avg_rate')
        )
        r, c = divmod(i, 2)
        fig.add_trace(
            go.Bar(
                x=df_sel['avg_rate'],
                y=df_sel['cause_name'],
                orientation='h',
                marker_color=color_map[inc],
                hovertemplate='<b>%{y}</b><br>Rate: %{x:.1f}<extra></extra>',
                showlegend=False
            ),
            row=r+1, col=c+1
        )
    
    # bottom: Global top-10 from df_global_level_1
    df_glob = (
        df_global_level_1
          .loc[df_global_level_1['year']==selected_year]
          .nlargest(10, 'avg_rate')
    )
    fig.add_trace(
        go.Bar(
            x=df_glob['avg_rate'],
            y=df_glob['cause_name'],
            orientation='h',
            marker_color=color_map['ALL'],
            hovertemplate='<b>%{y}</b><br>Rate: %{x:.1f}<extra></extra>',
            showlegend=False
        ),
        row=3, col=1
    )
    
    # Layout & styling
    fig.update_layout(
        template='plotly_white',
        title_text=f"Top 10 Causes of Death by Income Group — {selected_year}",
        title_x=0.5,
        width=1200,
        height=1050,
        margin=dict(l=80, r=80, t=140, b=60),
    )
    
    # Axis formatting for all 5 subplots
    for idx in range(5):
        if idx < 4:
            rr, cc = divmod(idx, 2)
        else:
            rr, cc = 2, 0  # bottom row
        fig.update_xaxes(
            title_text='Mortality Rate (Per 100,000)',
            range=[0, x_max],
            row=rr+1, col=cc+1,
            ticks='outside',
            tickfont=dict(size=11)
        )
        fig.update_yaxes(
            autorange='reversed',
            row=rr+1, col=cc+1,
            tickfont=dict(size=11)
        )
    
    fig.show()

# 3) Wire up & display
out_level_1 = widgets.interactive_output(plot_for_year_level_1, {'selected_year': year_dd_level_1})
display(year_dd_level_1, out_level_1)


Dropdown(description='Year:', layout=Layout(width='150px'), options=(np.int16(1987), np.int16(1988), np.int16(…

Output()

In [51]:

df_top_10_level_1 = df_top_10_leading_cause_per_income_group_year_level_1.copy()

# 2) Map the four real groups + add “ALL” for Global
income_map = {
    'ALL': 'Global',
    'H':   'High Income',
    'UM':  'Upper-middle Income',
    'LM':  'Lower-middle Income',
    'L':   'Low Income',
}
df_top_10_level_1['Income Label'] = df_top_10_level_1['income_group'].map(income_map)

# 3) Take your precomputed global series, limit to top 10 per year, then tag “ALL”
df_glob = df_global_level_1.copy()
df_glob = (
    df_glob
      .sort_values(['year','avg_rate'], ascending=[True, False])
      .groupby('year', as_index=False)
      .head(10)
)
df_glob['Income Label'] = 'Global'

# 4) Concatenate into one DataFrame
df_all = pd.concat([df_top_10_level_1, df_glob], ignore_index=True)

# 5) Build the single dropdown
income_dd_level_1 = widgets.Dropdown(
    options=list(income_map.values()),
    value='Global',
    description='Income Group:',
    style={'description_width':'initial'},
    layout=widgets.Layout(width='250px')
)

# 6) Plotting function with fixed layout & axes
def plot_for_group_level_1(label):
    df = df_all[df_all['Income Label'] == label]
    fig = px.bar(
        df,
        x='avg_rate',
        y='cause_name',
        orientation='h',
        animation_frame='year',
        animation_group='cause_name',
        color_discrete_sequence=['#1f77b4'],
        range_x=[0, df_all.avg_rate.max() * 1.05],
        title=f"Top 10 Causes of Death Over Time — {label}",
        labels={
            'avg_rate':'Mortality Rate (Per 100,000)',
            'cause_name':'Cause of Death',
            'year':'Year'
        }
    )

    # Slow the play button to 1000 ms per frame:
    fig.layout.updatemenus[0].buttons[0].args[1]['frame']['duration'] = 700
    
    # Slow the slider tween between frames to 1000 ms as well:
    fig.layout.sliders[0].transition = {'duration': 700, 'easing': 'linear'}

    # reverse y-axis and disable auto‐margin
    fig.update_yaxes(
        autorange='reversed',
        tickfont=dict(size=11),
        automargin=False,
        fixedrange=True
    )

    # lock figure size, margins, and disable transitions
    fig.update_layout(
        template='plotly_white',
        title_x=0.5,
        title_font_size=18,
        autosize=False,
        width=1000,
        height=500,
        margin=dict(l=380, r=40, t=80, b=40),
        showlegend=False,
        transition={'duration': 0}
    )
    # disable panning/zooming shifts
    fig.update_xaxes(fixedrange=True)

    fig.show()

# 7) Hook it up — only one dropdown
widgets.interact(plot_for_group_level_1, label=income_dd_level_1)


interactive(children=(Dropdown(description='Income Group:', layout=Layout(width='250px'), options=('Global', '…

<function __main__.plot_for_group_level_1(label)>

In [52]:
df_line = (
    df_grouped_by_year_income_cause_level_1
      .merge(df_causes, on='cause_id', how='inner')
      [['year','income_group','cause_name','avg_rate']]
)
income_map = {
    'H':  'High Income',
    'UM': 'Upper-middle Income',
    'LM': 'Lower-middle Income',
    'L':  'Low Income'
}
df_line['Income_Label'] = df_line['income_group'].map(income_map)

# 2) Prepare the Global lines (reuse your df_global_level_2)
df_global_line = (
    df_global_level_1
      .assign(Income_Label='Global')
      [['year','Income_Label','cause_name','avg_rate']]
)

# 3) Concatenate into one master DataFrame
df_line_all = pd.concat(
    [df_line.rename(columns={'Income_Label':'Income_Label'})[ ['year','Income_Label','cause_name','avg_rate'] ],
     df_global_line],
    ignore_index=True
)

# 4) Build the disease dropdown
diseases = sorted(df_line_all['cause_name'].unique())
disease_dd_level_1 = widgets.Dropdown(
    options=diseases,
    value=diseases[0],
    description='Disease:',
    layout=widgets.Layout(width='300px')
)

# 5) Plotting function
def plot_trend_level_1(cause):
    df_sel = df_line_all[df_line_all['cause_name'] == cause]
    
    # 3×2 grid; bottom row spanning both cols for Global
    fig = make_subplots(
        rows=3, cols=2,
        specs=[
            [{}, {}],
            [{}, {}],
            [{"colspan": 2}, None]
        ],
        subplot_titles=[
            'High Income', 'Upper-middle Income',
            'Lower-middle Income', 'Low Income',
            'Global'
        ],
        vertical_spacing=0.12,
        horizontal_spacing=0.08
    )
    
    panels = ['High Income','Upper-middle Income','Lower-middle Income','Low Income','Global']
    for idx, label in enumerate(panels):
        dfp = df_sel[df_sel['Income_Label'] == label]
        # map to row/col
        if label == 'Global':
            r, c = 2, 0
        else:
            r, c = divmod(idx, 2)
        fig.add_trace(
            go.Scatter(
                x=dfp['year'],
                y=dfp['avg_rate'],
                mode='lines+markers',
                name=label
            ),
            row=r+1, col=c+1
        )
        # axis styling
        fig.update_xaxes(title_text='Year',      row=r+1, col=c+1, fixedrange=True)
        fig.update_yaxes(title_text='Mortality Rate (Per 100,000)',
                         # autorange='reversed',
                         row=r+1, col=c+1,
                         fixedrange=True,
                         tickfont=dict(size=11))
    
    fig.update_layout(
        template='plotly_white',
        title=f"{cause} — Mortality Rate Over Time",
        title_x=0.5,
        height=1000,
        showlegend=False,
        margin=dict(l=60, r=40, t=80, b=40)
    )
    fig.show()

# 6) Wire up and display
out_level_1 = widgets.interactive_output(plot_trend_level_1, {'cause': disease_dd_level_1})
display(disease_dd_level_1, out_level_1)


Dropdown(description='Disease:', layout=Layout(width='300px'), options=('Communicable, maternal, neonatal, and…

Output()

# LEVEL 2

In [53]:
# 22(cause) * (income group)5 * years (35)
keep_ids_level_2 = [
    955, 956, 957, 344, 961, 962, 386, 410, 491,
    508, 526, 542, 558, 973, 974, 653, 669, 626,
    640, 688, 696, 717
]


df_grouped_by_year_income_cause_level_2 = df_grouped_by_year_income_cause[
    df_grouped_by_year_income_cause['cause_id'
    ].isin(keep_ids_level_2)
]

df_grouped_by_year_income_cause_level_2.info()
print("Unique Causes:", df_grouped_by_year_income_cause_level_2["cause_id"].nunique())
df_grouped_by_year_income_cause_level_2.head(10)

<class 'pandas.core.frame.DataFrame'>
Index: 3570 entries, 19 to 49953
Data columns (total 4 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   year          3570 non-null   int16  
 1   income_group  3570 non-null   object 
 2   cause_id      3570 non-null   int16  
 3   avg_rate      3570 non-null   float32
dtypes: float32(1), int16(2), object(1)
memory usage: 83.7+ KB
Unique Causes: 21


Unnamed: 0,year,income_group,cause_id,avg_rate
19,1987,H,344,0.093614
49,1987,H,386,0.992756
64,1987,H,410,174.495682
99,1987,H,491,314.318085
114,1987,H,508,32.360344
129,1987,H,526,29.643839
140,1987,H,542,31.017174
147,1987,H,558,0.011988
177,1987,H,626,1.999222
180,1987,H,640,16.717529


In [54]:
# 10(cause) * (income group)5 * years (35) = 1750
df_top_10_leading_cause_per_income_group_year_level_2 = (
    df_grouped_by_year_income_cause_level_2
      .sort_values(['year','income_group','avg_rate'],
                   ascending=[True, True, False])
      .groupby(['year','income_group'], as_index=False)
      .head(10)
)
df_top_10_leading_cause_per_income_group_year_level_2.info()
# df_top_10_leading_cause_per_income_group_year_level_2.head(50)

<class 'pandas.core.frame.DataFrame'>
Index: 1700 entries, 99 to 49939
Data columns (total 4 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   year          1700 non-null   int16  
 1   income_group  1700 non-null   object 
 2   cause_id      1700 non-null   int16  
 3   avg_rate      1700 non-null   float32
dtypes: float32(1), int16(2), object(1)
memory usage: 39.8+ KB


In [55]:
# get cause name
df_top_10_leading_cause_per_income_group_year_level_2_merged = df_top_10_leading_cause_per_income_group_year_level_2.merge(
    df_causes,
    on='cause_id',
    how='inner'
)

df_top_10_leading_cause_per_income_group_year_level_2_merged.info()
# df_top_10_leading_cause_per_income_group_year_level_2_merged.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1700 entries, 0 to 1699
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   year          1700 non-null   int16  
 1   income_group  1700 non-null   object 
 2   cause_id      1700 non-null   int16  
 3   avg_rate      1700 non-null   float32
 4   cause_name    1700 non-null   string 
dtypes: float32(1), int16(2), object(1), string(1)
memory usage: 40.0+ KB


In [56]:
df_top_10_leading_cause_per_income_group_year_level_2 = df_top_10_leading_cause_per_income_group_year_level_2_merged[[
    'year','income_group','cause_id','cause_name','avg_rate'
]]

df_top_10_leading_cause_per_income_group_year_level_2.info()
# df_top_10_leading_cause_per_income_group_year_level_2.head(60)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1700 entries, 0 to 1699
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   year          1700 non-null   int16  
 1   income_group  1700 non-null   object 
 2   cause_id      1700 non-null   int16  
 3   cause_name    1700 non-null   string 
 4   avg_rate      1700 non-null   float32
dtypes: float32(1), int16(2), object(1), string(1)
memory usage: 40.0+ KB


In [57]:
df_top_10_leading_cause_per_income_group_year_level_2_2015 = df_top_10_leading_cause_per_income_group_year_level_2.query("year == 2015")
# df_top_10_leading_cause_per_income_group_year_level_2_2015

In [58]:
# 21*35=735
df_global_level_2 = df_global[
    df_global['cause_id'
    ].isin(keep_ids_level_2)
]

df_global_level_2.info()
print("Unique Causes:", df_global_level_2["cause_id"].nunique())
df_global_level_2.head(10)

<class 'pandas.core.frame.DataFrame'>
Index: 735 entries, 19 to 10263
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   year        735 non-null    int16  
 1   cause_id    735 non-null    int16  
 2   avg_rate    735 non-null    float32
 3   cause_name  735 non-null    string 
dtypes: float32(1), int16(2), string(1)
memory usage: 17.2 KB
Unique Causes: 21


Unnamed: 0,year,cause_id,avg_rate,cause_name
19,1987,344,33.108559,Neglected tropical diseases and malaria
49,1987,386,16.293797,Nutritional deficiencies
64,1987,410,103.153168,Neoplasms
99,1987,491,252.732086,Cardiovascular diseases
114,1987,508,32.225845,Chronic respiratory diseases
129,1987,526,34.769779,Digestive diseases
140,1987,542,17.494282,Neurological disorders
147,1987,558,0.002577,Mental disorders
177,1987,626,0.956263,Musculoskeletal disorders
180,1987,640,30.528008,Other non-communicable diseases


In [60]:
df_top_10 = df_top_10_leading_cause_per_income_group_year_level_2.copy()


income_groups = ['H','UM','LM','L']
group_titles  = {
    'H':  'High Income',
    'UM': 'Upper-middle Income',
    'LM': 'Lower-middle Income',
    'L':  'Low Income'
}
color_map = {
    'H':  '#1f77b4',
    'UM': '#2ca02c',
    'LM': '#ff7f0e',
    'L':  '#9467bd',
    'ALL':'#555555'       # for the global panel
}

years = sorted(df_top_10['year'].unique())
x_max  = max(df_top_10['avg_rate'].max(),
             df_global_level_2['avg_rate'].max()) * 1.1

# 1) Fixed-year dropdown
year_dd = widgets.Dropdown(
    options=years,
    value=years[0],
    description='Year:',
    style={'description_width':'initial'},
    layout=widgets.Layout(width='150px')
)

# 2) Plotting function
def plot_for_year(selected_year):
    # 3×2 grid: bottom row spans both columns for “Global”
    fig = make_subplots(
        rows=3, cols=2,
        specs=[
            [{}, {}],
            [{}, {}],
            [{"colspan": 2}, None]
        ],
        subplot_titles=[*map(group_titles.get, income_groups), "Global"],
        vertical_spacing=0.18,
        horizontal_spacing=0.12
    )
    
    # 2×2: income-group panels
    for i, inc in enumerate(income_groups):
        df_sel = (
            df_top_10
              .loc[(df_top_10['year']==selected_year) &
                   (df_top_10['income_group']==inc)]
              .nlargest(10, 'avg_rate')
        )
        r, c = divmod(i, 2)
        fig.add_trace(
            go.Bar(
                x=df_sel['avg_rate'],
                y=df_sel['cause_name'],
                orientation='h',
                marker_color=color_map[inc],
                hovertemplate='<b>%{y}</b><br>Rate: %{x:.1f}<extra></extra>',
                showlegend=False
            ),
            row=r+1, col=c+1
        )
    
    # bottom: Global top-10 from df_global_level_2
    df_glob = (
        df_global_level_2
          .loc[df_global_level_2['year']==selected_year]
          .nlargest(10, 'avg_rate')
    )
    fig.add_trace(
        go.Bar(
            x=df_glob['avg_rate'],
            y=df_glob['cause_name'],
            orientation='h',
            marker_color=color_map['ALL'],
            hovertemplate='<b>%{y}</b><br>Rate: %{x:.1f}<extra></extra>',
            showlegend=False
        ),
        row=3, col=1
    )
    
    # Layout & styling
    fig.update_layout(
        template='plotly_white',
        title_text=f"Top 10 Causes of Death by Income Group — {selected_year}",
        title_x=0.5,
        width=1200,
        height=1050,
        margin=dict(l=80, r=80, t=140, b=60),
    )
    
    # Axis formatting for all 5 subplots
    for idx in range(5):
        if idx < 4:
            rr, cc = divmod(idx, 2)
        else:
            rr, cc = 2, 0  # bottom row
        fig.update_xaxes(
            title_text='Mortality Rate (Per 100,000)',
            range=[0, x_max],
            row=rr+1, col=cc+1,
            ticks='outside',
            tickfont=dict(size=11)
        )
        fig.update_yaxes(
            autorange='reversed',
            row=rr+1, col=cc+1,
            tickfont=dict(size=11)
        )
    
    fig.show()

# 3) Wire up & display
out = widgets.interactive_output(plot_for_year, {'selected_year': year_dd})
display(year_dd, out)


Dropdown(description='Year:', layout=Layout(width='150px'), options=(np.int16(1987), np.int16(1988), np.int16(…

Output()

In [61]:
import pandas as pd
import plotly.express as px
import ipywidgets as widgets
from IPython.display import display

# — assume these two DataFrames already exist:
#   df_top10_leading_cause_per_income_group_year_level_2
#   df_global_level_2

# 1) Copy your income-group top-10
df_top10 = df_top_10_leading_cause_per_income_group_year_level_2.copy()

# 2) Map the four real groups + add “ALL” for Global
income_map = {
    'ALL': 'Global',
    'H':   'High Income',
    'UM':  'Upper-middle Income',
    'LM':  'Lower-middle Income',
    'L':   'Low Income',
}
df_top10['Income Label'] = df_top10['income_group'].map(income_map)

# 3) Take your precomputed global series, limit to top 10 per year, then tag “ALL”
df_glob = df_global_level_2.copy()
df_glob = (
    df_glob
      .sort_values(['year','avg_rate'], ascending=[True, False])
      .groupby('year', as_index=False)
      .head(10)
)
df_glob['Income Label'] = 'Global'

# 4) Concatenate into one DataFrame
df_all = pd.concat([df_top10, df_glob], ignore_index=True)

# 5) Build the single dropdown
income_dd = widgets.Dropdown(
    options=list(income_map.values()),
    value='Global',
    description='Income Group:',
    style={'description_width':'initial'},
    layout=widgets.Layout(width='250px')
)

# 6) Plotting function with fixed layout & axes
def plot_for_group(label):
    df = df_all[df_all['Income Label'] == label]
    fig = px.bar(
        df,
        x='avg_rate',
        y='cause_name',
        orientation='h',
        animation_frame='year',
        animation_group='cause_name',
        color_discrete_sequence=['#1f77b4'],
        range_x=[0, df_all.avg_rate.max() * 1.05],
        title=f"Top 10 Causes of Death Over Time — {label}",
        labels={
            'avg_rate':'Mortality Rate (Per 100,000)',
            'cause_name':'Cause of Death',
            'year':'Year'
        }
    )

    # Slow the play button to 1000 ms per frame:
    fig.layout.updatemenus[0].buttons[0].args[1]['frame']['duration'] = 700
    
    # Slow the slider tween between frames to 1000 ms as well:
    fig.layout.sliders[0].transition = {'duration': 700, 'easing': 'linear'}

    # reverse y-axis and disable auto‐margin
    fig.update_yaxes(
        autorange='reversed',
        tickfont=dict(size=11),
        automargin=False,
        fixedrange=True
    )

    # lock figure size, margins, and disable transitions
    fig.update_layout(
        template='plotly_white',
        title_x=0.5,
        title_font_size=18,
        autosize=False,
        width=1000,
        height=500,
        margin=dict(l=290, r=40, t=80, b=40),
        showlegend=False,
        transition={'duration': 0}
    )
    # disable panning/zooming shifts
    fig.update_xaxes(fixedrange=True)

    fig.show()

# 7) Hook it up — only one dropdown
widgets.interact(plot_for_group, label=income_dd)


interactive(children=(Dropdown(description='Income Group:', layout=Layout(width='250px'), options=('Global', '…

<function __main__.plot_for_group(label)>

In [62]:
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import ipywidgets as widgets
from IPython.display import display

# — assume these exist in your namespace already —
# df_grouped_by_year_income_cause_level_2: [year, income_group, cause_id, avg_rate]
# df_causes                         : [cause_id, cause_name]
# df_global_level_2                : [year, cause_id, avg_rate, cause_name]

# 1) Build the in-come-group lines
df_line = (
    df_grouped_by_year_income_cause_level_2
      .merge(df_causes, on='cause_id', how='inner')
      [['year','income_group','cause_name','avg_rate']]
)
income_map = {
    'H':  'High Income',
    'UM': 'Upper-middle Income',
    'LM': 'Lower-middle Income',
    'L':  'Low Income'
}
df_line['Income_Label'] = df_line['income_group'].map(income_map)

# 2) Prepare the Global lines (reuse your df_global_level_2)
df_global_line = (
    df_global_level_2
      .assign(Income_Label='Global')
      [['year','Income_Label','cause_name','avg_rate']]
)

# 3) Concatenate into one master DataFrame
df_line_all = pd.concat(
    [df_line.rename(columns={'Income_Label':'Income_Label'})[ ['year','Income_Label','cause_name','avg_rate'] ],
     df_global_line],
    ignore_index=True
)

# 4) Build the disease dropdown
diseases = sorted(df_line_all['cause_name'].unique())
disease_dd = widgets.Dropdown(
    options=diseases,
    value=diseases[0],
    description='Disease:',
    layout=widgets.Layout(width='300px')
)

# 5) Plotting function
def plot_trend(cause):
    df_sel = df_line_all[df_line_all['cause_name'] == cause]
    
    # 3×2 grid; bottom row spanning both cols for Global
    fig = make_subplots(
        rows=3, cols=2,
        specs=[
            [{}, {}],
            [{}, {}],
            [{"colspan": 2}, None]
        ],
        subplot_titles=[
            'High Income', 'Upper-middle Income',
            'Lower-middle Income', 'Low Income',
            'Global'
        ],
        vertical_spacing=0.12,
        horizontal_spacing=0.08
    )
    
    panels = ['High Income','Upper-middle Income','Lower-middle Income','Low Income','Global']
    for idx, label in enumerate(panels):
        dfp = df_sel[df_sel['Income_Label'] == label]
        # map to row/col
        if label == 'Global':
            r, c = 2, 0
        else:
            r, c = divmod(idx, 2)
        fig.add_trace(
            go.Scatter(
                x=dfp['year'],
                y=dfp['avg_rate'],
                mode='lines+markers',
                name=label
            ),
            row=r+1, col=c+1
        )
        # axis styling
        fig.update_xaxes(title_text='Year',      row=r+1, col=c+1, fixedrange=True)
        fig.update_yaxes(title_text='Mortality Rate (Per 100,000)',
                         # autorange='reversed',
                         row=r+1, col=c+1,
                         fixedrange=True,
                         tickfont=dict(size=11))
    
    fig.update_layout(
        template='plotly_white',
        title=f"{cause} — Mortality Rate Over Time",
        title_x=0.5,
        height=1000,
        showlegend=False,
        margin=dict(l=60, r=40, t=80, b=40)
    )
    fig.show()

# 6) Wire up and display
out = widgets.interactive_output(plot_trend, {'cause': disease_dd})
display(disease_dd, out)


Dropdown(description='Disease:', layout=Layout(width='300px'), options=('Cardiovascular diseases', 'Chronic re…

Output()

# LEVEL 3

In [63]:
# 175(cause) * (income group)5 * years (35)
keep_ids_level_3 = [
    298, 393, 297, 322, 328, 329, 1048, 302, 958, 959,
    321, 345, 356, 357, 358, 359, 360, 364, 405, 843,
    935, 936, 346, 365, 347, 350, 351, 352, 353, 354,
    355, 332, 337, 338, 339, 340, 341, 342, 400, 408,
    366, 380, 387, 388, 389, 390, 391, 444, 423, 426,
    459, 462, 1011, 1012, 429, 432, 435, 465, 447, 438,
    468, 471, 474, 477, 1008, 1013, 480, 483, 484, 450,
    485, 486, 487, 489, 490, 411, 414, 441, 417, 453,
    456, 981, 674, 679, 627, 628, 630, 631, 632, 639,
    641, 594, 603, 613, 619, 680, 686, 492, 502, 503,
    507, 493, 494, 498, 504, 499, 1004, 500, 501, 509,
    510, 515, 516, 520, 521, 541, 992, 529, 530, 531,
    532, 533, 534, 535, 543, 544, 545, 546, 554, 972,
    557, 559, 585, 567, 570, 571, 572, 575, 578, 579,
    582, 560, 561, 587, 589, 588, 654, 664, 665, 668,
    655, 980, 658, 659, 660, 661, 662, 663, 689, 695,
    697, 842, 729, 1056, 698, 699, 700, 704, 708, 709,
    712, 718, 724, 945, 854
]

df_grouped_by_year_income_cause_level_3 = df_grouped_by_year_income_cause[
    df_grouped_by_year_income_cause['cause_id'
    ].isin(keep_ids_level_3)
]

df_grouped_by_year_income_cause_level_3.info()
print("Unique Causes:", df_grouped_by_year_income_cause_level_3["cause_id"].nunique())
df_grouped_by_year_income_cause_level_3.head(10)

<class 'pandas.core.frame.DataFrame'>
Index: 23630 entries, 2 to 49977
Data columns (total 4 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   year          23630 non-null  int16  
 1   income_group  23630 non-null  object 
 2   cause_id      23630 non-null  int16  
 3   avg_rate      23630 non-null  float32
dtypes: float32(1), int16(2), object(1)
memory usage: 553.8+ KB
Unique Causes: 139


Unnamed: 0,year,income_group,cause_id,avg_rate
2,1987,H,297,3.403862
3,1987,H,298,1.427613
5,1987,H,302,1.496238
8,1987,H,321,0.007964
9,1987,H,322,26.363537
10,1987,H,328,0.101556
11,1987,H,329,0.017702
12,1987,H,332,1.334336
13,1987,H,337,0.201768
14,1987,H,338,0.003649


In [64]:
# 10(cause) * (income group)5 * years (35) = 1750
df_top_10_leading_cause_per_income_group_year_level_3 = (
    df_grouped_by_year_income_cause_level_3
      .sort_values(['year','income_group','avg_rate'],
                   ascending=[True, True, False])
      .groupby(['year','income_group'], as_index=False)
      .head(10)
)
df_top_10_leading_cause_per_income_group_year_level_3.info()
# df_top_10_leading_cause_per_income_group_year_level_3.head(50)

<class 'pandas.core.frame.DataFrame'>
Index: 1700 entries, 101 to 49810
Data columns (total 4 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   year          1700 non-null   int16  
 1   income_group  1700 non-null   object 
 2   cause_id      1700 non-null   int16  
 3   avg_rate      1700 non-null   float32
dtypes: float32(1), int16(2), object(1)
memory usage: 39.8+ KB


In [65]:
# get cause name
df_top_10_leading_cause_per_income_group_year_level_3_merged = df_top_10_leading_cause_per_income_group_year_level_3.merge(
    df_causes,
    on='cause_id',
    how='inner'
)

df_top_10_leading_cause_per_income_group_year_level_3_merged.info()
# df_top_10_leading_cause_per_income_group_year_level_3_merged.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1700 entries, 0 to 1699
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   year          1700 non-null   int16  
 1   income_group  1700 non-null   object 
 2   cause_id      1700 non-null   int16  
 3   avg_rate      1700 non-null   float32
 4   cause_name    1700 non-null   string 
dtypes: float32(1), int16(2), object(1), string(1)
memory usage: 40.0+ KB


In [66]:
df_top_10_leading_cause_per_income_group_year_level_3 = df_top_10_leading_cause_per_income_group_year_level_3_merged[[
    'year','income_group','cause_id','cause_name','avg_rate'
]]

df_top_10_leading_cause_per_income_group_year_level_3.info()
# df_top_10_leading_cause_per_income_group_year_level_3.head(60)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1700 entries, 0 to 1699
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   year          1700 non-null   int16  
 1   income_group  1700 non-null   object 
 2   cause_id      1700 non-null   int16  
 3   cause_name    1700 non-null   string 
 4   avg_rate      1700 non-null   float32
dtypes: float32(1), int16(2), object(1), string(1)
memory usage: 40.0+ KB


In [67]:
# 21*35=735
df_global_level_3 = df_global[
    df_global['cause_id'
    ].isin(keep_ids_level_3)
]

df_global_level_3.info()
print("Unique Causes:", df_global_level_3["cause_id"].nunique())
df_global_level_3.head(10)

<class 'pandas.core.frame.DataFrame'>
Index: 4865 entries, 2 to 10287
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   year        4865 non-null   int16  
 1   cause_id    4865 non-null   int16  
 2   avg_rate    4865 non-null   float32
 3   cause_name  4865 non-null   string 
dtypes: float32(1), int16(2), string(1)
memory usage: 114.0 KB
Unique Causes: 139


Unnamed: 0,year,cause_id,avg_rate,cause_name
2,1987,297,32.899364,Tuberculosis
3,1987,298,4.218121,HIV/AIDS
5,1987,302,57.902466,Diarrheal diseases
8,1987,321,0.09667,Other intestinal infectious diseases
9,1987,322,72.241219,Lower respiratory infections
10,1987,328,0.748322,Upper respiratory infections
11,1987,329,0.03329,Otitis media
12,1987,332,11.446073,Meningitis
13,1987,337,0.890135,Encephalitis
14,1987,338,0.931348,Diphtheria


In [69]:
df_top_10_level_3 = df_top_10_leading_cause_per_income_group_year_level_3.copy()


income_groups = ['H','UM','LM','L']
group_titles  = {
    'H':  'High Income',
    'UM': 'Upper-middle Income',
    'LM': 'Lower-middle Income',
    'L':  'Low Income'
}
color_map = {
    'H':  '#1f77b4',
    'UM': '#2ca02c',
    'LM': '#ff7f0e',
    'L':  '#9467bd',
    'ALL':'#555555'       # for the global panel
}

years = sorted(df_top_10_level_3['year'].unique())
x_max  = max(df_top_10_level_3['avg_rate'].max(),
             df_global_level_2['avg_rate'].max()) * 1.1

# 1) Fixed-year dropdown
year_dd_level_3 = widgets.Dropdown(
    options=years,
    value=years[0],
    description='Year:',
    style={'description_width':'initial'},
    layout=widgets.Layout(width='150px')
)

# 2) Plotting function
def plot_for_year_level_3(selected_year):
    # 3×2 grid: bottom row spans both columns for “Global”
    fig = make_subplots(
        rows=3, cols=2,
        specs=[
            [{}, {}],
            [{}, {}],
            [{"colspan": 2}, None]
        ],
        subplot_titles=[*map(group_titles.get, income_groups), "Global"],
        vertical_spacing=0.18,
        horizontal_spacing=0.12
    )
    
    # 2×2: income-group panels
    for i, inc in enumerate(income_groups):
        df_sel = (
            df_top_10_level_3
              .loc[(df_top_10_level_3['year']==selected_year) &
                   (df_top_10_level_3['income_group']==inc)]
              .nlargest(10, 'avg_rate')
        )
        r, c = divmod(i, 2)
        fig.add_trace(
            go.Bar(
                x=df_sel['avg_rate'],
                y=df_sel['cause_name'],
                orientation='h',
                marker_color=color_map[inc],
                hovertemplate='<b>%{y}</b><br>Rate: %{x:.1f}<extra></extra>',
                showlegend=False
            ),
            row=r+1, col=c+1
        )
    
    # bottom: Global top-10 from df_global_level_2
    df_glob = (
        df_global_level_3
          .loc[df_global_level_3['year']==selected_year]
          .nlargest(10, 'avg_rate')
    )
    fig.add_trace(
        go.Bar(
            x=df_glob['avg_rate'],
            y=df_glob['cause_name'],
            orientation='h',
            marker_color=color_map['ALL'],
            hovertemplate='<b>%{y}</b><br>Rate: %{x:.1f}<extra></extra>',
            showlegend=False
        ),
        row=3, col=1
    )
    
    # Layout & styling
    fig.update_layout(
        template='plotly_white',
        title_text=f"Top 10 Causes of Death by Income Group — {selected_year}",
        title_x=0.5,
        width=1200,
        height=1050,
        margin=dict(l=80, r=80, t=140, b=60),
    )
    
    # Axis formatting for all 5 subplots
    for idx in range(5):
        if idx < 4:
            rr, cc = divmod(idx, 2)
        else:
            rr, cc = 2, 0  # bottom row
        fig.update_xaxes(
            title_text='Mortality Rate (Per 100,000)',
            range=[0, x_max],
            row=rr+1, col=cc+1,
            ticks='outside',
            tickfont=dict(size=11)
        )
        fig.update_yaxes(
            autorange='reversed',
            row=rr+1, col=cc+1,
            tickfont=dict(size=11)
        )
    
    fig.show()

# 3) Wire up & display
out_level_3 = widgets.interactive_output(plot_for_year_level_3, {'selected_year': year_dd_level_3})
display(year_dd_level_3, out_level_3)


Dropdown(description='Year:', layout=Layout(width='150px'), options=(np.int16(1987), np.int16(1988), np.int16(…

Output()

In [70]:

df_top10_level_3 = df_top_10_leading_cause_per_income_group_year_level_3.copy()

# 2) Map the four real groups + add “ALL” for Global
income_map = {
    'ALL': 'Global',
    'H':   'High Income',
    'UM':  'Upper-middle Income',
    'LM':  'Lower-middle Income',
    'L':   'Low Income',
}
df_top10_level_3['Income Label'] = df_top10_level_3['income_group'].map(income_map)

# 3) Take your precomputed global series, limit to top 10 per year, then tag “ALL”
df_glob = df_global_level_3.copy()
df_glob = (
    df_glob
      .sort_values(['year','avg_rate'], ascending=[True, False])
      .groupby('year', as_index=False)
      .head(10)
)
df_glob['Income Label'] = 'Global'

# 4) Concatenate into one DataFrame
df_all = pd.concat([df_top10_level_3, df_glob], ignore_index=True)

# 5) Build the single dropdown
income_dd_level_3 = widgets.Dropdown(
    options=list(income_map.values()),
    value='Global',
    description='Income Group:',
    style={'description_width':'initial'},
    layout=widgets.Layout(width='250px')
)

# 6) Plotting function with fixed layout & axes
def plot_for_group_level_3(label):
    df = df_all[df_all['Income Label'] == label]
    fig = px.bar(
        df,
        x='avg_rate',
        y='cause_name',
        orientation='h',
        animation_frame='year',
        animation_group='cause_name',
        color_discrete_sequence=['#1f77b4'],
        range_x=[0, df_all.avg_rate.max() * 1.05],
        title=f"Top 10 Causes of Death Over Time — {label}",
        labels={
            'avg_rate':'Mortality Rate (Per 100,000)',
            'cause_name':'Cause of Death',
            'year':'Year'
        }
    )

    # Slow the play button to 1000 ms per frame:
    fig.layout.updatemenus[0].buttons[0].args[1]['frame']['duration'] = 700
    
    # Slow the slider tween between frames to 1000 ms as well:
    fig.layout.sliders[0].transition = {'duration': 700, 'easing': 'linear'}

    # reverse y-axis and disable auto‐margin
    fig.update_yaxes(
        autorange='reversed',
        tickfont=dict(size=11),
        automargin=False,
        fixedrange=True
    )

    # lock figure size, margins, and disable transitions
    fig.update_layout(
        template='plotly_white',
        title_x=0.5,
        title_font_size=18,
        autosize=False,
        width=1000,
        height=500,
        margin=dict(l=290, r=40, t=80, b=40),
        showlegend=False,
        transition={'duration': 0}
    )
    # disable panning/zooming shifts
    fig.update_xaxes(fixedrange=True)

    fig.show()

# 7) Hook it up — only one dropdown
widgets.interact(plot_for_group_level_3, label=income_dd_level_3)


interactive(children=(Dropdown(description='Income Group:', layout=Layout(width='250px'), options=('Global', '…

<function __main__.plot_for_group_level_3(label)>

In [71]:

df_line = (
    df_grouped_by_year_income_cause_level_3
      .merge(df_causes, on='cause_id', how='inner')
      [['year','income_group','cause_name','avg_rate']]
)
income_map = {
    'H':  'High Income',
    'UM': 'Upper-middle Income',
    'LM': 'Lower-middle Income',
    'L':  'Low Income'
}
df_line['Income_Label'] = df_line['income_group'].map(income_map)

# 2) Prepare the Global lines (reuse your df_global_level_2)
df_global_line = (
    df_global_level_3
      .assign(Income_Label='Global')
      [['year','Income_Label','cause_name','avg_rate']]
)

# 3) Concatenate into one master DataFrame
df_line_all = pd.concat(
    [df_line.rename(columns={'Income_Label':'Income_Label'})[ ['year','Income_Label','cause_name','avg_rate'] ],
     df_global_line],
    ignore_index=True
)

# 4) Build the disease dropdown
diseases = sorted(df_line_all['cause_name'].unique())
disease_dd_level_3 = widgets.Dropdown(
    options=diseases,
    value=diseases[0],
    description='Disease:',
    layout=widgets.Layout(width='300px')
)

# 5) Plotting function
def plot_trend_level_3(cause):
    df_sel = df_line_all[df_line_all['cause_name'] == cause]
    
    # 3×2 grid; bottom row spanning both cols for Global
    fig = make_subplots(
        rows=3, cols=2,
        specs=[
            [{}, {}],
            [{}, {}],
            [{"colspan": 2}, None]
        ],
        subplot_titles=[
            'High Income', 'Upper-middle Income',
            'Lower-middle Income', 'Low Income',
            'Global'
        ],
        vertical_spacing=0.12,
        horizontal_spacing=0.08
    )
    
    panels = ['High Income','Upper-middle Income','Lower-middle Income','Low Income','Global']
    for idx, label in enumerate(panels):
        dfp = df_sel[df_sel['Income_Label'] == label]
        # map to row/col
        if label == 'Global':
            r, c = 2, 0
        else:
            r, c = divmod(idx, 2)
        fig.add_trace(
            go.Scatter(
                x=dfp['year'],
                y=dfp['avg_rate'],
                mode='lines+markers',
                name=label
            ),
            row=r+1, col=c+1
        )
        # axis styling
        fig.update_xaxes(title_text='Year',      row=r+1, col=c+1, fixedrange=True)
        fig.update_yaxes(title_text='Mortality Rate (Per 100,000)',
                         # autorange='reversed',
                         row=r+1, col=c+1,
                         fixedrange=True,
                         tickfont=dict(size=11))
    
    fig.update_layout(
        template='plotly_white',
        title=f"{cause} — Mortality Rate Over Time",
        title_x=0.5,
        height=1000,
        showlegend=False,
        margin=dict(l=60, r=40, t=80, b=40)
    )
    fig.show()

# 6) Wire up and display
out_level_3 = widgets.interactive_output(plot_trend_level_3, {'cause': disease_dd_level_3})
display(disease_dd_level_3, out_level_3)


Dropdown(description='Disease:', layout=Layout(width='300px'), options=('Acute glomerulonephritis', 'Acute hep…

Output()