In [1]:
import pandas as pd
import numpy as np
import altair as alt
import pycountry
from vega_datasets import data

raw = pd.read_csv('data/survey.csv')
def clean_gender(x):
    if pd.isna(x):
        return None
    s = str(x).strip().lower()
    if s.startswith('f'):
        return 'Female'
    if s.startswith('m'):
        return 'Male'
    return None

raw['GenderClean'] = raw['Gender'].apply(clean_gender)

df_a = (
    raw[(raw['Age'] >= 18) & (raw['Age'] <= 100)]
      .dropna(subset=['Country','Age','treatment','family_history'])
      .copy()
)
df_a['jitter'] = np.random.normal(0, 0.2, len(df_a))
df_a['Gender'] = df_a['GenderClean'] 
df_b = (
    raw.dropna(subset=['Country'])
       .copy()
)

def to_iso_n3(name):
    try:
        return int(pycountry.countries.lookup(name).numeric)
    except Exception:
        try:
            return int(pycountry.countries.search_fuzzy(name)[0].numeric)
        except Exception:
            return None

for df in (df_a, df_b):
    df['id'] = df['Country'].map(to_iso_n3).astype('Int64')
    df.dropna(subset=['id'], inplace=True)
    df['id'] = df['id'].astype(int)

totals = (
    df_b.groupby(['Country','id'])
        .size()
        .reset_index(name='total_count')
)

click     = alt.selection_point(fields=['id'],
                                name='country_click',
                                on='click',
                                empty='all')
brush     = alt.selection_interval(encodings=['x'],
                                   name='age_brush',
                                   empty='all')
sel_treat = alt.selection_point(fields=['treatment'],
                                name='treat_select',
                                on='click',
                                empty='all')

world = alt.topo_feature(data.world_110m.url, 'countries')
country_dropdown = alt.binding_select(
    options = sorted(df_b['Country'].unique()), name='Country: '
)
sel_country = alt.selection_point(
    fields=['Country'],
    bind=country_dropdown,
    name='country_select',
    empty='all' 
)
map_chart = (
    alt.Chart(world).mark_geoshape(stroke='white')
      .transform_calculate(
          id="""
            toNumber(datum.id) === 158
              ? 156
              : toNumber(datum.id)
          """
      )
      .transform_lookup(
          lookup='id',
          from_=alt.LookupData(totals, key='id',
                               fields=['Country','total_count'])
      )
      .encode(
          color=alt.condition(
              alt.datum.total_count > 0,
              alt.value('#18B7F6'),
              alt.value('#D7EEF6') 
          ),
          tooltip=[
              alt.Tooltip('Country:N', title='Country'),
              alt.Tooltip('total_count:Q', title='Responses')
          ],
          opacity=alt.condition(sel_country, alt.value(1), alt.value(0.7))
      )
      .add_params(sel_country)
      .properties(width=700, height=350,
                  title={
                    "text": ["Treatment Distribution", "（Click the drop-down bar below to select a country）"],
                    "subtitleFontSize":12,
                    "anchor":"start"
  })
)

scatter = (
    alt.Chart(df_a)
      .transform_filter(sel_country)
      .transform_calculate(
          age_jitter="datum.Age + (random() - 0.5) * 2" 
      )
      .mark_circle(size=60)
      .encode(
          x='Age:Q',
          y=alt.Y('GenderClean:N', title='Gender'),
          # yOffset='jitterY:Q',
          color=alt.condition(
              brush,
              alt.value('#666BCE'), 
              alt.value('#D7EEF6')  
          ),
          tooltip=['Country','Age','GenderClean','treatment','family_history'],
          opacity=alt.condition(brush, alt.value(1), alt.value(0.2))
      )
      .add_params(brush)
      .properties(width=700, height=200,
                  title={
                    "text": ["Age Distribution", "（Drag in the graph to select the age range）"],
                    "subtitleFontSize":12,
                    "anchor":"start"
          })
      #  .facet(
      #   row=alt.Row('GenderClean:N', title=None,
      #               header=alt.Header(labelFontSize=12, labelPadding=4))
      # )
)
count_text = (
    alt.Chart(df_a)
      .transform_filter(sel_country)
      .transform_filter(brush)
      .mark_text(size=14, align='center', fontWeight='bold')
      .encode(
         text=alt.Text('count():Q', title='Selected Count', format='d')
      )
      .properties(height=30, width=700, title='Selected Respondents')
)
bar_treat = (
    alt.Chart(df_a)
      .transform_filter(sel_country) 
      .transform_filter(brush) 
      .transform_aggregate(
          count='count()',
          groupby=['treatment']
      )
      .transform_joinaggregate(
          total='sum(count)'
      )
      .transform_calculate(
          percent='datum.count / datum.total'
      )
      .mark_bar()
      .encode(
          x=alt.X('treatment:N', title='Treatment'),
          y=alt.Y('count:Q',      title='Count'),
          color=alt.Color('treatment:N',
                          scale=alt.Scale(domain=['No','Yes'],
                                          range=['#D7EEF6','#18B7F6']),
                          legend=None),
          opacity=alt.condition(sel_treat, alt.value(1), alt.value(0.3)),
          tooltip=[
            alt.Tooltip('treatment:N', title='Treatment'),
            alt.Tooltip('count:Q',     title='Count'),
            alt.Tooltip('percent:Q',   title='Percent', format='.1%')
          ]
      )
      .add_params(sel_treat)
      .properties(width=250, height=250, title={
          "text":["Treatment Distribution","（Click on the bar to filter the data under the corresponding conditions）"],
          "subtitleFontSize":12,"anchor":"start"
      })
)
label_treat = bar_treat.mark_text(
    dy=-7,
    size=12
).encode(
    text=alt.Text('percent:Q', format='.1%'),
    color=alt.value('black')
)
bar_treat_with_labels = bar_treat + label_treat
bar_fam = (
    alt.Chart(df_a)
      .transform_filter(sel_country)
      .transform_filter(brush)
      .transform_filter(sel_treat) 
      .transform_aggregate(
          count='count()',
          groupby=['family_history']
      )
      .transform_joinaggregate(
          total='sum(count)'
      )
      .transform_calculate(
          percent='datum.count / datum.total'
      )
      .mark_bar()
      .encode(
          x=alt.X('family_history:N', title='Family History'),
          y=alt.Y('count:Q',            title='Count'),
          color=alt.Color('family_history:N',
                          scale=alt.Scale(domain=['No','Yes'],
                                          range=['#D7EEF6','#18B7F6']),
                          legend=None),
          # opacity=alt.condition(sel_treat, alt.value(1), alt.value(0.3)),
          tooltip=[
            alt.Tooltip('family_history:N', title='Family History'),
            alt.Tooltip('count:Q',            title='Count'),
            alt.Tooltip('percent:Q',          title='Percent', format='.1%')
          ]
      )
      .properties(width=250, height=250, title={
          "text":["Family History Distribution","（Treatment-based screening）"],
          "subtitleFontSize":12,"anchor":"start"
      })
)
label_fam = bar_fam.mark_text(
    dy=-7,     
    size=12
).encode(
    text=alt.Text('percent:Q', format='.1%'),
    color=alt.value('black')
)
bar_fam_with_labels = bar_fam + label_fam
heatmap = (
    alt.Chart(df_a, title='Treatment vs Family History Heatmap')
      .transform_filter(sel_country)
      .transform_filter(brush)
      .transform_aggregate(
          count='count()',
          groupby=['treatment','family_history']
      )
      .mark_rect()
      .encode(
          x=alt.X('treatment:N', title='Treatment'),
          y=alt.Y('family_history:N', title='Family History'),
          color=alt.Color('count:Q', title='Count',
                          scale=alt.Scale(range=['#D7EEF6', '#666BCE']),
                          legend=alt.Legend(
                          orient='right', 
                          direction='vertical'
              )),
          tooltip=[
              alt.Tooltip('treatment:N', title='Treatment'),
              alt.Tooltip('family_history:N', title='Family History'),
              alt.Tooltip('count:Q', title='Count')
          ],
          opacity=alt.condition(sel_treat, alt.value(1), alt.value(0.3))
      )
      .add_params(sel_treat)
      .properties(width=300, height=200)
)

bottom_row = alt.hconcat(
    bar_treat_with_labels,
    bar_fam_with_labels,
    heatmap,
).resolve_scale(color='independent')\
 .resolve_legend(color='independent')

bars = bar_treat_with_labels | bar_fam_with_labels
left_col = alt.vconcat(count_text,map_chart, spacing=5)
right_col= alt.vconcat(scatter, bars, heatmap, spacing=10)

dashboard = alt.hconcat(
    left_col,
    right_col,
    spacing=5
).configure_title(fontSize=16)

dashboard

In [35]:
import pandas as pd
import numpy as np
import altair as alt
import pycountry
from vega_datasets import data

alt.data_transformers.disable_max_rows()

raw = pd.read_csv('data/survey.csv')

def clean_gender(x):
    if pd.isna(x):
        return None
    s = str(x).strip().lower()
    if s.startswith('f'):
        return 'Female'
    if s.startswith('m'):
        return 'Male'
    return None

raw['GenderClean'] = raw['Gender'].apply(clean_gender)

df_a = (
    raw[(raw['Age'] >= 18) & (raw['Age'] <= 100)]
      .dropna(subset=['Country','Age','treatment','family_history'])
      .copy()
)
df_b = raw.dropna(subset=['Country']).copy()

for df in (df_a, df_b):
    df['Country'] = df['Country'].replace('Taiwan', 'China')

df_a['jitter'] = np.random.normal(0, 0.2, len(df_a))
df_a['Gender'] = df_a['GenderClean']

def to_iso_n3(name):
    try:
        return int(pycountry.countries.lookup(name).numeric)
    except Exception:
        try:
            return int(pycountry.countries.search_fuzzy(name)[0].numeric)
        except Exception:
            return None

for df in (df_a, df_b):
    df['id'] = df['Country'].map(to_iso_n3).astype('Int64')
    df.dropna(subset=['id'], inplace=True)
    df['id'] = df['id'].astype(int)

totals = (
    df_b.groupby(['Country','id'])
        .size()
        .reset_index(name='total_count')
)

brush = alt.selection_interval(encodings=['x'], name='age_brush', empty='all')
sel_treat = alt.selection_point(fields=['treatment'], name='treat_select', on='click', empty='all')

country_dropdown = alt.binding_select(
    options=sorted(df_b['Country'].unique()),
    name='Country: '
)
sel_country = alt.selection_point(fields=['Country'], bind=country_dropdown, name='country_select', empty='all')

world = alt.topo_feature(data.world_110m.url, 'countries')

map_chart = (
    alt.Chart(world)
      .mark_geoshape(stroke='white')
      .transform_calculate(
          id="""toNumber(datum.id) === 158 ? 156 : toNumber(datum.id)"""
      )
      .transform_lookup(
          lookup='id',
          from_=alt.LookupData(totals, key='id', fields=['Country','total_count'])
      )
      .encode(
          color=alt.condition(
              alt.datum.total_count > 0,
              alt.value('#18B7F6'),
              alt.value('#D7EEF6')
          ),
          tooltip=[
              alt.Tooltip('Country:N', title='Country'),
              alt.Tooltip('total_count:Q', title='Responses')
          ],
          opacity=alt.condition(sel_country, alt.value(1), alt.value(0.7))
      )
      .properties(
          width=450, height=290,
          title={
              "text": "Survey Coverage by Country",
              "subtitle": "Use the dropdown above to focus on a country",
              "anchor": "start"
          }
      )
)

scatter = (
    alt.Chart(df_a)
      .transform_filter(sel_country)
      .transform_calculate(age_jitter="datum.Age + (random() - 0.5) * 2")
      .mark_circle(size=60)
      .encode(
          x=alt.X('Age:Q', title='Age'),
          y=alt.Y('GenderClean:N', title='Gender',
            axis=alt.Axis(labelPadding=4, titlePadding=6, minExtent=0)),
          color=alt.condition(brush, alt.value('#666BCE'), alt.value('#D7EEF6')),
          tooltip=['Country','Age','GenderClean','treatment','family_history'],
          opacity=alt.condition(brush, alt.value(1), alt.value(0.2))
      )
      .add_params(brush)
      .properties(
          width=400, height=270,
          title={"text": "Age × Gender (filtered by country)",
                 "subtitle": "Drag on the chart to brush an age range",
                 "anchor": "start"}
      )
)

count_text = (
    alt.Chart(df_a)
      .transform_filter(sel_country)
      .transform_filter(brush)
      .mark_text(size=14, align='center', fontWeight='bold')
      .encode(text=alt.Text('count():Q', format='d'))
      .properties(
          width=400, height=30,
          title=alt.TitleParams(text='Selected Respondents', anchor='middle')
      )
)

bar_treat = (
    alt.Chart(df_a)
      .transform_filter(sel_country)
      .transform_filter(brush)
      .transform_aggregate(count='count()', groupby=['treatment'])
      .transform_joinaggregate(total='sum(count)')
      .transform_calculate(percent='datum.count / datum.total')
      .mark_bar()
      .encode(
          x=alt.X('treatment:N', title='Treatment'),
          y=alt.Y('count:Q', title='Count'),
          color=alt.Color('treatment:N',
                          scale=alt.Scale(domain=['No','Yes'], range=['#D7EEF6','#18B7F6']),
                          legend=None),
          opacity=alt.condition(sel_treat, alt.value(1), alt.value(0.3)),
          tooltip=[
              alt.Tooltip('treatment:N', title='Treatment'),
              alt.Tooltip('count:Q', title='Count'),
              alt.Tooltip('percent:Q', title='Percent', format='.1%')
          ]
      )
      .add_params(sel_treat)
      .properties(
          width=160, height=220,
          title={"text": "Treatment",
                 "subtitle": "Bars show count; labels show percent",
                 "anchor": "start"}
      )
)
label_treat = bar_treat.mark_text(dy=-7, size=12).encode(text=alt.Text('percent:Q', format='.1%'), color=alt.value('black'))
bar_treat_with_labels = bar_treat + label_treat

bar_fam = (
    alt.Chart(df_a)
      .transform_filter(sel_country)
      .transform_filter(brush)
      .transform_filter(sel_treat)
      .transform_aggregate(count='count()', groupby=['family_history'])
      .transform_joinaggregate(total='sum(count)')
      .transform_calculate(percent='datum.count / datum.total')
      .mark_bar()
      .encode(
          x=alt.X('family_history:N', title='Family History'),
          y=alt.Y('count:Q', title='Count'),
          color=alt.Color('family_history:N',
                          scale=alt.Scale(domain=['No','Yes'], range=['#D7EEF6','#18B7F6']),
                          legend=None),
          tooltip=[
              alt.Tooltip('family_history:N', title='Family History'),
              alt.Tooltip('count:Q', title='Count'),
              alt.Tooltip('percent:Q', title='Percent', format='.1%')
          ]
      )
      .properties(
          width=160, height=220,
          title={"text": "Family History (filtered by Treatment)",
                 "subtitle": "Bars show count; labels show percent",
                 "anchor": "start"}
      )
)
label_fam = bar_fam.mark_text(dy=-7, size=12).encode(text=alt.Text('percent:Q', format='.1%'), color=alt.value('black'))
bar_fam_with_labels = bar_fam + label_fam

heatmap = (
    alt.Chart(df_a, title='Treatment vs Family History Heatmap')
      .transform_filter(sel_country)
      .transform_filter(brush)
      .transform_aggregate(
          count='count()',
          groupby=['treatment','family_history']
      )
      .mark_rect()
      .encode(
          x=alt.X('treatment:N', title='Treatment'),
          y=alt.Y('family_history:N', title='Family History'),
          color=alt.Color('count:Q', title='Count',
                          scale=alt.Scale(range=['#D7EEF6', '#666BCE']),
                          legend=alt.Legend(
                          orient='right', 
                          direction='vertical'
              )),
          tooltip=[
              alt.Tooltip('treatment:N', title='Treatment'),
              alt.Tooltip('family_history:N', title='Family History'),
              alt.Tooltip('count:Q', title='Count')
          ],
          opacity=alt.condition(sel_treat, alt.value(1), alt.value(0.3))
      )
      .add_params(sel_treat)
      .properties(width=300, height=200)
)


dropdown_bar = (
    alt.Chart(pd.DataFrame({'x':[0]}))
      .mark_point(opacity=0)
      .add_params(sel_country)
      .properties(width=370, height=0)
)

left_col = alt.vconcat(dropdown_bar, map_chart, spacing=8)

mid_col = alt.vconcat(count_text, scatter, spacing=10)

right_top = bar_treat_with_labels | bar_fam_with_labels
right_col = alt.vconcat(right_top, heatmap, spacing=50)\
    .resolve_scale(color='independent').resolve_legend(color='independent')

dashboard = alt.hconcat(left_col, mid_col, right_col, spacing=80).properties(
    title={
        "text": "INSTRUCTION: Dataset Introduction & Interactive Dashboard",
        "subtitle": [
            "1) Select a country from the map dropdown → All maps will be synchronized to that country.", 
            "2) Drag a box around the middle scatter plot to select an age group → The bar chart and heat map on the right will be synchronized with the population group above.",
            "3) Click Yes/No on the Treatment column → View only the family history of that treatment; then click the blank to clear.",
            "4) Hover to view specific values; click the blank on the scatter plot to clear the age selection.",
            "Data: Mental Health in Tech Survey"
        ],
        "anchor": "start"
    }
).configure_title(
    fontSize=18, subtitleFontSize=15, anchor="start"
).configure_axis(
    labelFontSize=11, titleFontSize=12, grid=True, gridOpacity=0.15
).configure_legend(
    labelFontSize=11, titleFontSize=12, orient='right'
).configure_view(
    strokeWidth=0
).properties(padding={"top": 16, "left": 0, "right": 0, "bottom": 0}).configure_view(strokeWidth=0)

dashboard


In [36]:
import json

spec = dashboard.to_dict()
with open('dashboard_spec.json','w') as f:
    json.dump(spec, f, indent=2)