# VDS Project — Report Stage Dashboard

This dashboard communicates key insights about mental health prevalence and DALYs across countries (1990–2019).
Use the controls to select a disorder and a year. Click countries in the scatter plot to link selections across views.


In [11]:
from __future__ import annotations

from pathlib import Path
import numpy as np
import pandas as pd
import altair as alt


In [12]:
PROJECT_ROOT = Path.cwd()
if not (PROJECT_ROOT / "data").exists():
    PROJECT_ROOT = PROJECT_ROOT.parent

RAW_DIR = PROJECT_ROOT / "data" / "raw"
PROCESSED_DIR = PROJECT_ROOT / "data" / "processed"
DOCS_DIR = PROJECT_ROOT / "docs"

PROCESSED_DIR.mkdir(parents=True, exist_ok=True)
DOCS_DIR.mkdir(parents=True, exist_ok=True)

PREV_PATH = RAW_DIR / "1- mental-illnesses-prevalence.csv"
BURDEN_PATH = RAW_DIR / "2- burden-disease-from-each-mental-illness(1).csv"

print("PROJECT_ROOT:", PROJECT_ROOT)


PROJECT_ROOT: /Users/sergio/Library/Mobile Documents/com~apple~CloudDocs/Master/First Semester/Visual Data Science/VDS_Mental_Health


In [13]:
prev = pd.read_csv(PREV_PATH)
burden = pd.read_csv(BURDEN_PATH)

# Keep only countries (ISO-3)
prev = prev[prev["Code"].notna() & (prev["Code"].str.len() == 3)].copy()
burden = burden[burden["Code"].notna() & (burden["Code"].str.len() == 3)].copy()

# Melt to long format
prev_long = prev.melt(
    id_vars=["Entity", "Code", "Year"],
    var_name="metric",
    value_name="prevalence"
)
prev_long["disorder"] = (
    prev_long["metric"]
    .str.extract(r"^(.*?)\s*\(share of population\)", expand=False)
    .str.replace(" disorders", "", regex=False)
    .str.replace(" disorder", "", regex=False)
    .str.strip()
)
prev_long = prev_long.drop(columns=["metric"])

burden_long = burden.melt(
    id_vars=["Entity", "Code", "Year"],
    var_name="metric",
    value_name="dalys_rate"
)
burden_long["disorder"] = (
    burden_long["metric"]
    .str.extract(r"Cause:\s*(.*)$", expand=False)
    .str.replace(" disorders", "", regex=False)
    .str.replace(" disorder", "", regex=False)
    .str.strip()
)
burden_long = burden_long.drop(columns=["metric"])

# Merge prevalence + DALYs rate
df = prev_long.merge(
    burden_long,
    on=["Entity", "Code", "Year", "disorder"],
    how="inner"
)

# Basic cleanup
df = df.dropna(subset=["prevalence", "dalys_rate"])
df["Year"] = df["Year"].astype(int)

df.head()


Unnamed: 0,Entity,Code,Year,prevalence,disorder,dalys_rate
0,Afghanistan,AFG,1990,0.223206,Schizophrenia,138.24825
1,Afghanistan,AFG,1991,0.222454,Schizophrenia,137.76122
2,Afghanistan,AFG,1992,0.221751,Schizophrenia,137.0803
3,Afghanistan,AFG,1993,0.220987,Schizophrenia,136.48602
4,Afghanistan,AFG,1994,0.220183,Schizophrenia,136.18323


In [14]:
cluster_path = PROCESSED_DIR / "model_country_clusters.csv"
if cluster_path.exists():
    clusters = pd.read_csv(cluster_path)
    df = df.merge(clusters[["Code", "cluster"]], on="Code", how="left")
else:
    df["cluster"] = np.nan

df["cluster"] = df["cluster"].fillna(-1).astype(int)  # -1 means "no cluster available"


In [15]:
global_ts = (
    df.groupby(["Year", "disorder"], as_index=False)
      .agg(prevalence=("prevalence", "mean"),
           dalys_rate=("dalys_rate", "mean"))
)


In [16]:
alt.data_transformers.disable_max_rows()

disorders = sorted(df["disorder"].unique().tolist())

disorder_param = alt.param(
    name="Disorder",
    value=disorders[0],
    bind=alt.binding_select(options=disorders, name="Disorder: ")
)

year_param = alt.param(
    name="Year",
    value=2019,
    bind=alt.binding_range(min=1990, max=2019, step=1, name="Year: ")
)

# Country selection for brushing & linking (click points; double-click to clear)
country_sel = alt.selection_point(
    fields=["Entity"],
    on="click",
    toggle=True,
    clear="dblclick",
    empty="none"
)


In [17]:
base = alt.Chart(df)

scatter_data = (
    base.transform_filter(disorder_param)
        .transform_filter(year_param)
)

scatter = (
    scatter_data.mark_circle(size=70)
    .encode(
        x=alt.X("prevalence:Q", title="Prevalence (% of population, age-standardized)"),
        y=alt.Y("dalys_rate:Q", title="DALYs rate (age-standardized)"),
        tooltip=[
            alt.Tooltip("Entity:N", title="Country"),
            alt.Tooltip("Year:Q"),
            alt.Tooltip("disorder:N", title="Disorder"),
            alt.Tooltip("prevalence:Q", format=".2f"),
            alt.Tooltip("dalys_rate:Q", format=".1f"),
            alt.Tooltip("cluster:N", title="Cluster"),
        ],
        color=alt.Color("cluster:N", title="Cluster"),
        opacity=alt.condition(country_sel, alt.value(1.0), alt.value(0.35)),
    )
    .add_params(country_sel)   # <-- THIS is important for brushing
    .properties(width=520, height=360, title="Prevalence vs DALYs (select countries by clicking points)")
    .interactive()
)


In [18]:
bars = (
    scatter_data.transform_window(
        rank="rank(prevalence)",
        sort=[alt.SortField("prevalence", order="descending")]
    )
    .transform_filter(alt.datum.rank <= 12)
    .mark_bar()
    .encode(
        y=alt.Y("Entity:N", sort="-x", title=None),
        x=alt.X("prevalence:Q", title="Prevalence (%)"),
        tooltip=[
            alt.Tooltip("Entity:N", title="Country"),
            alt.Tooltip("prevalence:Q", format=".2f"),
            alt.Tooltip("dalys_rate:Q", format=".1f"),
        ],
        opacity=alt.condition(country_sel, alt.value(1.0), alt.value(0.35)),
        color=alt.Color("cluster:N", legend=None),
    )
    .properties(width=260, height=360, title="Top 12 countries (prevalence)")
)


In [19]:
global_base = alt.Chart(global_ts).transform_filter(disorder_param)

global_prev = (
    global_base.mark_line()
    .encode(
        x=alt.X("Year:Q", title="Year"),
        y=alt.Y("prevalence:Q", title="Prevalence (% of population)"),
        tooltip=[alt.Tooltip("Year:Q"), alt.Tooltip("prevalence:Q", format=".2f")],
    )
    .properties(width=780, height=160, title="Prevalence over time (global mean + selected countries)")
)

selected_prev = (
    base.transform_filter(disorder_param)
        .transform_filter(country_sel)
        .mark_line()
        .encode(
            x="Year:Q",
            y="prevalence:Q",
            color=alt.Color("Entity:N", legend=None),
            tooltip=[
                alt.Tooltip("Entity:N", title="Country"),
                alt.Tooltip("Year:Q"),
                alt.Tooltip("prevalence:Q", format=".2f")
            ],
        )
)

prev_ts = (global_prev + selected_prev).interactive()


global_dalys = (
    global_base.mark_line()
    .encode(
        x=alt.X("Year:Q", title="Year"),
        y=alt.Y("dalys_rate:Q", title="DALYs rate (age-standardized)"),
        tooltip=[alt.Tooltip("Year:Q"), alt.Tooltip("dalys_rate:Q", format=".1f")],
    )
    .properties(width=780, height=160, title="DALYs over time (global mean + selected countries)")
)

selected_dalys = (
    base.transform_filter(disorder_param)
        .transform_filter(country_sel)
        .mark_line()
        .encode(
            x="Year:Q",
            y="dalys_rate:Q",
            color=alt.Color("Entity:N", legend=None),
            tooltip=[
                alt.Tooltip("Entity:N", title="Country"),
                alt.Tooltip("Year:Q"),
                alt.Tooltip("dalys_rate:Q", format=".1f")
            ],
        )
)

dalys_ts = (global_dalys + selected_dalys).interactive()


In [20]:
top = alt.hconcat(scatter, bars, spacing=10)
bottom = alt.vconcat(prev_ts, dalys_ts, spacing=10)

dashboard = (
    alt.vconcat(top, bottom, spacing=15)
    .add_params(disorder_param, year_param)      # <-- THIS makes dropdown/slider control all views
    .resolve_scale(color="independent")          # avoids color-scale conflicts across charts
    .configure_title(fontSize=16)
)


# Save as a standalone interactive HTML file
out_html = DOCS_DIR / "index.html"
dashboard.save(out_html, embed_options={"actions": False})

out_html


  exec(code_obj, self.user_global_ns, self.user_ns)


PosixPath('/Users/sergio/Library/Mobile Documents/com~apple~CloudDocs/Master/First Semester/Visual Data Science/VDS_Mental_Health/docs/index.html')