In [None]:
dashboard.save('final_4.0.html')

In [24]:
import pandas as pd
import numpy as np
import altair as alt
import pycountry
from vega_datasets import data

raw = pd.read_csv('data/survey.csv')
def clean_gender(x):
    if pd.isna(x):
        return None
    s = str(x).strip().lower()
    if s.startswith('f'):
        return 'Female'
    if s.startswith('m'):
        return 'Male'
    return None

raw['GenderClean'] = raw['Gender'].apply(clean_gender)

df_a = (
    raw[(raw['Age'] >= 18) & (raw['Age'] <= 100)]
      .dropna(subset=['Country','Age','treatment','family_history'])
      .copy()
)
df_a['jitter'] = np.random.normal(0, 0.2, len(df_a))
df_a['Gender'] = df_a['GenderClean'] 
df_b = (
    raw.dropna(subset=['Country'])
       .copy()
)

def to_iso_n3(name):
    try:
        return int(pycountry.countries.lookup(name).numeric)
    except Exception:
        try:
            return int(pycountry.countries.search_fuzzy(name)[0].numeric)
        except Exception:
            return None

for df in (df_a, df_b):
    df['id'] = df['Country'].map(to_iso_n3).astype('Int64')
    df.dropna(subset=['id'], inplace=True)
    df['id'] = df['id'].astype(int)

totals = (
    df_b.groupby(['Country','id'])
        .size()
        .reset_index(name='total_count')
)
country_dropdown = alt.binding_select(
    options=['All'] + sorted(df_b['Country'].unique()),
    name='Country: '
)
sel_country = alt.selection_point(
    name='country_select',
    fields=['Country'],
    bind=country_dropdown,
    value=[{"Country":"All"}],
    empty='all'
)
filter_expr = "(country_select.Country=='All') || (datum.Country==country_select.Country)"

brush     = alt.selection_interval(encodings=['x'], name='age_brush', empty='all')
sel_treat = alt.selection_point(fields=['treatment'], name='treat_select', on='click', empty='all')
world = alt.topo_feature(data.world_110m.url, 'countries')
# 2) dropdown 占位 + 注入 sel_country
dropdown = (
    alt.Chart(pd.DataFrame({'dummy':[0]}))
      .mark_text(text=' ', opacity=0)
      .add_params(sel_country)
      .properties(width=300, height=30)
)
count_text = (
    alt.Chart(df_a)
      .transform_filter(sel_country)
      .transform_filter(brush)
      .mark_text(size=14, align='center', fontWeight='bold')
      .encode(
         text=alt.Text('count():Q', title='Selected Count', format='d')
      )
      .properties(height=30, width=450, title='Selected Respondents')
)
# 3) map_chart + 注入 filter & opacity
map_chart = (
    alt.Chart(world).mark_geoshape(stroke='white')
      .transform_filter(filter_expr)
      .transform_calculate(
          id="""
            toNumber(datum.id) === 158
              ? 156
              : toNumber(datum.id)
          """
      )
      .transform_lookup(
          lookup='id',
          from_=alt.LookupData(totals, key='id',
                               fields=['Country','total_count'])
      )
      .encode(
          color=alt.condition(
              alt.datum.total_count > 0,
              alt.value('#18B7F6'),   # 深色—有数据
              alt.value('#D7EEF6')    # 浅色—无数据
          ),
          tooltip=[
              alt.Tooltip('Country:N', title='Country'),
              alt.Tooltip('total_count:Q', title='Responses')
          ],
          opacity=alt.condition(sel_country, alt.value(1), alt.value(0.7))
      )
      .add_params(sel_country)
      .properties(width=400, height=270,
                  title={
                    "text": ["Treatment Distribution", "（点击有色地图可查看该国家相关信息）"],
                    "subtitleFontSize":12,
                    "anchor":"start"
  })
       
)


# 4) scatter + filter
scatter = (
    alt.Chart(df_a).transform_filter(filter_expr)
      .transform_calculate(
          age_jitter="datum.Age + (random() - 0.5) * 2" 
      )
      .mark_circle(size=60)
      .encode(
          x='Age:Q',
          y=alt.Y('GenderClean:N', title='Gender'),
          # yOffset='jitterY:Q',
          color=alt.condition(
              brush,
              alt.value('#666BCE'), 
              alt.value('#D7EEF6')
          ),
          tooltip=['Country','Age','GenderClean','treatment','family_history'],
          opacity=alt.condition(brush, alt.value(1), alt.value(0.2))
      )
      .add_params(sel_country,brush)
      .properties(width=700, height=200,
                  title={
                    "text": ["Age Distribution", "（拖拽刷选年龄区间）"],
                    "subtitleFontSize":12,
                    "anchor":"start"
          })

)

# 5) bar_treat + filter
bar_treat = (
    alt.Chart(df_a)
      .transform_filter(filter_expr)    # <-- 一定要加
      .transform_filter(brush)
      .transform_aggregate(
          count='count()',
          groupby=['treatment']
      )
      .transform_joinaggregate(
          total='sum(count)'
      )
      .transform_calculate(
          percent='datum.count / datum.total'
      )
      .mark_bar()
      .encode(
          x=alt.X('treatment:N', title='Treatment'),
          y=alt.Y('count:Q',      title='Count'),
          color=alt.Color('treatment:N',
                          scale=alt.Scale(domain=['No','Yes'],
                                          range=['#D7EEF6','#18B7F6']),
                          legend=None),
          opacity=alt.condition(sel_treat, alt.value(1), alt.value(0.3)),
          tooltip=[
            alt.Tooltip('treatment:N', title='Treatment'),
            alt.Tooltip('count:Q',     title='Count'),
            alt.Tooltip('percent:Q',   title='Percent', format='.1%')
          ]
      )
      .add_params(sel_country, sel_treat)
      .properties(width=250, height=250, title={
          "text":["Treatment Distribution","（点击条形筛选）"],
          "subtitleFontSize":12,"anchor":"start"
      })
)
label_treat = bar_treat.mark_text(
    dy=-7,
    size=12
).encode(
    text=alt.Text('percent:Q', format='.1%'),
    color=alt.value('black')
)
bar_treat_with_labels = bar_treat + label_treat
# 6) bar_fam + filter
bar_fam = (
    alt.Chart(df_a)
      .transform_filter(filter_expr)
      .transform_filter(brush)
      .transform_filter(sel_treat)
      .transform_aggregate(
          count='count()',
          groupby=['family_history']
      )
      .transform_joinaggregate(
          total='sum(count)'
      )
      .transform_calculate(
          percent='datum.count / datum.total'
      )
      .mark_bar()
      .encode(
          x=alt.X('family_history:N', title='Family History'),
          y=alt.Y('count:Q',            title='Count'),
          color=alt.Color('family_history:N',
                          scale=alt.Scale(domain=['No','Yes'],
                                          range=['#D7EEF6','#18B7F6']),
                          legend=None),
          # opacity=alt.condition(sel_treat, alt.value(1), alt.value(0.3)),
          tooltip=[
            alt.Tooltip('family_history:N', title='Family History'),
            alt.Tooltip('count:Q',            title='Count'),
            alt.Tooltip('percent:Q',          title='Percent', format='.1%')
          ]
      )
      .properties(width=250, height=250, title={
          "text":["Family History Distribution","（基于 Treatment 筛选）"],
          "subtitleFontSize":12,"anchor":"start"
      })
)
label_fam = bar_fam.mark_text(
    dy=-7,     
    size=12
).encode(
    text=alt.Text('percent:Q', format='.1%'),
    color=alt.value('black')
)
bar_fam_with_labels = bar_fam + label_fam
# 7) heatmap + filter
heatmap = (
    alt.Chart(df_a)
      .transform_filter(filter_expr)
      .transform_filter(brush)
      .transform_aggregate(
          count='count()',
          groupby=['treatment','family_history']
      )
      .mark_rect()
      .encode(
          x=alt.X('treatment:N', title='Treatment'),
          y=alt.Y('family_history:N', title='Family History'),
          color=alt.Color('count:Q', title='Count',
                          scale=alt.Scale(range=['#D7EEF6', '#666BCE']),
                          legend=alt.Legend(
                          orient='right',  
                          direction='vertical'
              )),
          tooltip=[
              alt.Tooltip('treatment:N', title='Treatment'),
              alt.Tooltip('family_history:N', title='Family History'),
              alt.Tooltip('count:Q', title='Count')
          ],
          opacity=alt.condition(sel_treat, alt.value(1), alt.value(0.3))
      )
      .add_params(sel_country,sel_treat)
      .properties(width=300, height=200)
)
bars = bar_treat_with_labels | bar_fam_with_labels
left_col = alt.vconcat(dropdown, count_text, map_chart, spacing=5)
right_col= alt.vconcat(scatter, bars, heatmap, spacing=10)

dashboard = alt.hconcat(
    left_col,
    right_col,
    spacing=5
).add_params(
    sel_country, brush, sel_treat 
).configure_title(fontSize=16)

dashboard


In [2]:
import json

spec = dashboard.to_dict()
with open('dashboard_spec.json','w') as f:
    json.dump(spec, f, indent=2)

In [7]:
# import pandas as pd
# import numpy as np
# import altair as alt
# import pycountry
# from vega_datasets import data

# raw = pd.read_csv('data/survey.csv')
# def clean_gender(x):
#     if pd.isna(x):
#         return None
#     s = str(x).strip().lower()
#     if s.startswith('f'):
#         return 'Female'
#     if s.startswith('m'):
#         return 'Male'
#     return None

# raw['GenderClean'] = raw['Gender'].apply(clean_gender)

# df_a = (
#     raw[(raw['Age'] >= 18) & (raw['Age'] <= 100)]
#       .dropna(subset=['Country','Age','treatment','family_history'])
#       .copy()
# )
# df_a['jitter'] = np.random.normal(0, 0.2, len(df_a))
# df_a['Gender'] = df_a['GenderClean'] 
# df_b = (
#     raw.dropna(subset=['Country'])
#        .copy()
# )

# def to_iso_n3(name):
#     try:
#         return int(pycountry.countries.lookup(name).numeric)
#     except Exception:
#         try:
#             return int(pycountry.countries.search_fuzzy(name)[0].numeric)
#         except Exception:
#             return None

# for df in (df_a, df_b):
#     df['id'] = df['Country'].map(to_iso_n3).astype('Int64')
#     df.dropna(subset=['id'], inplace=True)
#     df['id'] = df['id'].astype(int)

# totals = (
#     df_b.groupby(['Country','id'])
#         .size()
#         .reset_index(name='total_count')
# )

# click     = alt.selection_point(fields=['id'],
#                                 name='country_click',
#                                 on='click',
#                                 empty='all')
# brush     = alt.selection_interval(encodings=['x'],
#                                    name='age_brush',
#                                    empty='all')
# sel_treat = alt.selection_point(fields=['treatment'],
#                                 name='treat_select',
#                                 on='click',
#                                 empty='all')

# world = alt.topo_feature(data.world_110m.url, 'countries')

# country_dropdown = alt.binding_select(
#     options=['All'] + sorted(df_b['Country'].unique()), name='Country: '
# )
# sel_country = alt.selection_point(
#     fields=['Country'],
#     bind=country_dropdown,
#     name='country_select',
#     empty='all'  # “All” 时不过滤
# )

# map_chart = (
#     alt.Chart(world).mark_geoshape(stroke='white')
#       .transform_calculate(
#           id="""
#             toNumber(datum.id) === 158
#               ? 156
#               : toNumber(datum.id)
#           """
#       )
#       .transform_lookup(
#           lookup='id',
#           from_=alt.LookupData(totals, key='id',
#                                fields=['Country','total_count'])
#       )
#       .encode(
#           color=alt.condition(
#               alt.datum.total_count > 0,
#               alt.value('#18B7F6'),   # 深色—有数据
#               alt.value('#D7EEF6')    # 浅色—无数据
#           ),
#           tooltip=[
#               alt.Tooltip('Country:N', title='Country'),
#               alt.Tooltip('total_count:Q', title='Responses')
#           ],
#           opacity=alt.condition(sel_country, alt.value(1), alt.value(0.7))
#       )
#       .add_params(sel_country)
#       .properties(width=700, height=350,
#                   title={
#                     "text": ["Treatment Distribution", "（点击有色地图可查看该国家相关信息）"],
#                     "subtitleFontSize":12,
#                     "anchor":"start"
#   })
# )

# scatter = (
#     alt.Chart(df_a)
#       .transform_filter(sel_country)
#       .transform_calculate(
#           age_jitter="datum.Age + (random() - 0.5) * 2"  # ±1岁的水平抖动
#       )
#       .mark_circle(size=60)
#       .encode(
#           x='Age:Q',
#           y=alt.Y('GenderClean:N', title='Gender'),
#           # yOffset='jitterY:Q',
#           color=alt.condition(
#               brush,
#               alt.value('#666BCE'),    # 深蓝
#               alt.value('#D7EEF6')     # 刷选外的淡色可选
#           ),
#           tooltip=['Country','Age','GenderClean','treatment','family_history'],
#           opacity=alt.condition(brush, alt.value(1), alt.value(0.2))
#       )
#       .add_params(brush)
#       .properties(width=700, height=200,
#                   title={
#                     "text": ["Age Distribution", "（拖拽刷选年龄区间）"],
#                     "subtitleFontSize":12,
#                     "anchor":"start"
#           })
#       #  .facet(
#       #   row=alt.Row('GenderClean:N', title=None,
#       #               header=alt.Header(labelFontSize=12, labelPadding=4))
#       # )
# )
# count_text = (
#     alt.Chart(df_a)
#       .transform_filter(sel_country)
#       .transform_filter(brush)
#       .mark_text(size=14, align='center', fontWeight='bold')
#       .encode(
#          text=alt.Text('count():Q', title='Selected Count', format='d')
#       )
#       .properties(height=30, width=700, title='Selected Respondents')
# )
# bar_treat = (
#     alt.Chart(df_a)
#       .transform_filter(sel_country)       # 国家
#       .transform_filter(brush)       # 年龄区间
#       .transform_aggregate(
#           count='count()',
#           groupby=['treatment']
#       )
#       .transform_joinaggregate(
#           total='sum(count)'
#       )
#       .transform_calculate(
#           percent='datum.count / datum.total'
#       )
#       .mark_bar()
#       .encode(
#           x=alt.X('treatment:N', title='Treatment'),
#           y=alt.Y('count:Q',      title='Count'),
#           color=alt.Color('treatment:N',
#                           scale=alt.Scale(domain=['No','Yes'],
#                                           range=['#D7EEF6','#18B7F6']),
#                           legend=None),
#           opacity=alt.condition(sel_treat, alt.value(1), alt.value(0.3)),
#           tooltip=[
#             alt.Tooltip('treatment:N', title='Treatment'),
#             alt.Tooltip('count:Q',     title='Count'),
#             alt.Tooltip('percent:Q',   title='Percent', format='.1%')
#           ]
#       )
#       .add_params(sel_treat)
#       .properties(width=250, height=250, title={
#           "text":["Treatment Distribution","（点击条形筛选）"],
#           "subtitleFontSize":12,"anchor":"start"
#       })
# )
# label_treat = bar_treat.mark_text(
#     dy=-7,
#     size=12
# ).encode(
#     text=alt.Text('percent:Q', format='.1%'),
#     color=alt.value('black')
# )
# bar_treat_with_labels = bar_treat + label_treat
# # —— Family History 条形图 —— 
# bar_fam = (
#     alt.Chart(df_a)
#       .transform_filter(sel_country)
#       .transform_filter(brush)
#       .transform_filter(sel_treat)    # 先按 Treatment 筛选
#       .transform_aggregate(
#           count='count()',
#           groupby=['family_history']
#       )
#       .transform_joinaggregate(
#           total='sum(count)'
#       )
#       .transform_calculate(
#           percent='datum.count / datum.total'
#       )
#       .mark_bar()
#       .encode(
#           x=alt.X('family_history:N', title='Family History'),
#           y=alt.Y('count:Q',            title='Count'),
#           color=alt.Color('family_history:N',
#                           scale=alt.Scale(domain=['No','Yes'],
#                                           range=['#D7EEF6','#18B7F6']),
#                           legend=None),
#           # opacity=alt.condition(sel_treat, alt.value(1), alt.value(0.3)),
#           tooltip=[
#             alt.Tooltip('family_history:N', title='Family History'),
#             alt.Tooltip('count:Q',            title='Count'),
#             alt.Tooltip('percent:Q',          title='Percent', format='.1%')
#           ]
#       )
#       .properties(width=250, height=250, title={
#           "text":["Family History Distribution","（基于 Treatment 筛选）"],
#           "subtitleFontSize":12,"anchor":"start"
#       })
# )
# label_fam = bar_fam.mark_text(
#     dy=-7,     
#     size=12
# ).encode(
#     text=alt.Text('percent:Q', format='.1%'),
#     color=alt.value('black')
# )
# bar_fam_with_labels = bar_fam + label_fam
# heatmap = (
#     alt.Chart(df_a, title='Treatment vs Family History Heatmap')
#       .transform_filter(sel_country)
#       .transform_filter(brush)
#       .transform_aggregate(
#           count='count()',
#           groupby=['treatment','family_history']
#       )
#       .mark_rect()
#       .encode(
#           x=alt.X('treatment:N', title='Treatment'),
#           y=alt.Y('family_history:N', title='Family History'),
#           color=alt.Color('count:Q', title='Count',
#                           scale=alt.Scale(range=['#D7EEF6', '#666BCE']),
#                           legend=alt.Legend(
#                           orient='right',    # 固定到右边
#                           direction='vertical'
#               )),
#           tooltip=[
#               alt.Tooltip('treatment:N', title='Treatment'),
#               alt.Tooltip('family_history:N', title='Family History'),
#               alt.Tooltip('count:Q', title='Count')
#           ],
#           opacity=alt.condition(sel_treat, alt.value(1), alt.value(0.3))
#       )
#       .add_params(sel_treat)
#       .properties(width=300, height=200)
# )

# bars = bar_treat_with_labels | bar_fam_with_labels
# left_col = alt.vconcat(count_text,map_chart, spacing=5)
# right_col= alt.vconcat(scatter, bars, heatmap, spacing=10)

# dashboard = alt.hconcat(
#     left_col,
#     right_col,
#     spacing=5
# ).configure_title(fontSize=16)

# dashboard

In [3]:
import pandas as pd
import numpy as np
import altair as alt
import pycountry
from vega_datasets import data

raw = pd.read_csv('data/survey.csv')
def clean_gender(x):
    if pd.isna(x):
        return None
    s = str(x).strip().lower()
    if s.startswith('f'):
        return 'Female'
    if s.startswith('m'):
        return 'Male'
    return None

raw['GenderClean'] = raw['Gender'].apply(clean_gender)

df_a = (
    raw[(raw['Age'] >= 18) & (raw['Age'] <= 100)]
      .dropna(subset=['Country','Age','treatment','family_history'])
      .copy()
)
# df_a['jitter'] = np.random.normal(0, 0.2, len(df_a))
df_a['age_jitter'] = df_a['Age'] + np.random.uniform(-1, 1, len(df_a))
df_a['Gender'] = df_a['GenderClean'] 
df_b = (
    raw.dropna(subset=['Country'])
       .copy()
)

def to_iso_n3(name):
    try:
        return int(pycountry.countries.lookup(name).numeric)
    except Exception:
        try:
            return int(pycountry.countries.search_fuzzy(name)[0].numeric)
        except Exception:
            return None

for df in (df_a, df_b):
    df['id'] = df['Country'].map(to_iso_n3).astype('Int64')
    df.dropna(subset=['id'], inplace=True)
    df['id'] = df['id'].astype(int)

totals = (
    df_b.groupby(['Country','id'])
        .size()
        .reset_index(name='total_count')
)

click     = alt.selection_point(fields=['id'],
                                name='country_click',
                                on='click',
                                empty='all')
brush     = alt.selection_interval(encodings=['x'],
                                   name='age_brush',
                                   empty='all')
sel_treat = alt.selection_point(fields=['treatment'],
                                name='treat_select',
                                on='click',
                                empty='all')

world = alt.topo_feature(data.world_110m.url, 'countries')

country_dropdown = alt.binding_select(
    options=['All'] + sorted(df_b['Country'].unique()), name='Country: '
)
sel_country = alt.selection_point(
    fields=['Country'],
    bind=country_dropdown,
    name='country_select',
    empty='all'  # “All” 时不过滤
)

map_chart = (
    alt.Chart(world).mark_geoshape(stroke='white')
      .transform_calculate(
          id="""
            toNumber(datum.id) === 158
              ? 156
              : toNumber(datum.id)
          """
      )
      .transform_lookup(
          lookup='id',
          from_=alt.LookupData(totals, key='id',
                               fields=['Country','total_count'])
      )
      .encode(
          color=alt.condition(
              alt.datum.total_count > 0,
              alt.value('#18B7F6'),   # 深色—有数据
              alt.value('#D7EEF6')    # 浅色—无数据
          ),
          tooltip=[
              alt.Tooltip('Country:N', title='Country'),
              alt.Tooltip('total_count:Q', title='Responses')
          ],
          opacity=alt.condition(sel_country, alt.value(1), alt.value(0.7))
      )
      .add_params(sel_country)
      .properties(width=700, height=350,
                  title={
                    "text": ["Treatment Distribution", "（点击有色地图可查看该国家相关信息）"],
                    "subtitleFontSize":12,
                    "anchor":"start"
  })
)

scatter = (
    alt.Chart(df_a)
      .transform_filter(sel_country)
      .transform_calculate(
          age_jitter="datum.Age + (random() - 0.5) * 2"  # ±1岁的水平抖动
      )
      .mark_circle(size=60)
      .encode(
          x='Age:Q',
          y=alt.Y('GenderClean:N', title='Gender'),
          # yOffset='jitterY:Q',
          color=alt.condition(
              brush,
              alt.value('#666BCE'),    # 深蓝
              alt.value('#D7EEF6')     # 刷选外的淡色可选
          ),
          tooltip=['Country','Age','GenderClean','treatment','family_history'],
          opacity=alt.condition(brush, alt.value(1), alt.value(0.2))
      )
      .add_params(brush)
      .properties(width=700, height=200,
                  title={
                    "text": ["Age Distribution", "（拖拽刷选年龄区间）"],
                    "subtitleFontSize":12,
                    "anchor":"start"
          })
      #  .facet(
      #   row=alt.Row('GenderClean:N', title=None,
      #               header=alt.Header(labelFontSize=12, labelPadding=4))
      # )
)
count_text = (
    alt.Chart(df_a)
      .transform_filter(sel_country)
      .transform_filter(brush)
      .mark_text(size=14, align='center', fontWeight='bold')
      .encode(
         text=alt.Text('count():Q', title='Selected Count', format='d')
      )
      .properties(height=30, width=700, title='Selected Respondents')
)
bar_treat = (
    alt.Chart(df_a)
      .transform_filter(sel_country)       # 国家
      .transform_filter(brush)       # 年龄区间
      .transform_aggregate(
          count='count()',
          groupby=['treatment']
      )
      .transform_joinaggregate(
          total='sum(count)'
      )
      .transform_calculate(
          percent='datum.count / datum.total'
      )
      .mark_bar()
      .encode(
          x=alt.X('treatment:N', title='Treatment'),
          y=alt.Y('count:Q',      title='Count'),
          color=alt.Color('treatment:N',
                          scale=alt.Scale(domain=['No','Yes'],
                                          range=['#D7EEF6','#18B7F6']),
                          legend=None),
          opacity=alt.condition(sel_treat, alt.value(1), alt.value(0.3)),
          tooltip=[
            alt.Tooltip('treatment:N', title='Treatment'),
            alt.Tooltip('count:Q',     title='Count'),
            alt.Tooltip('percent:Q',   title='Percent', format='.1%')
          ]
      )
      .add_params(sel_treat)
      .properties(width=250, height=250, title={
          "text":["Treatment Distribution","（点击条形筛选）"],
          "subtitleFontSize":12,"anchor":"start"
      })
)
label_treat = bar_treat.mark_text(
    dy=-7,
    size=12
).encode(
    text=alt.Text('percent:Q', format='.1%'),
    color=alt.value('black')
)
bar_treat_with_labels = bar_treat + label_treat
# —— Family History 条形图 —— 
bar_fam = (
    alt.Chart(df_a)
      .transform_filter(sel_country)
      .transform_filter(brush)
      .transform_filter(sel_treat)    # 先按 Treatment 筛选
      .transform_aggregate(
          count='count()',
          groupby=['family_history']
      )
      .transform_joinaggregate(
          total='sum(count)'
      )
      .transform_calculate(
          percent='datum.count / datum.total'
      )
      .mark_bar()
      .encode(
          x=alt.X('family_history:N', title='Family History'),
          y=alt.Y('count:Q',            title='Count'),
          color=alt.Color('family_history:N',
                          scale=alt.Scale(domain=['No','Yes'],
                                          range=['#D7EEF6','#18B7F6']),
                          legend=None),
          # opacity=alt.condition(sel_treat, alt.value(1), alt.value(0.3)),
          tooltip=[
            alt.Tooltip('family_history:N', title='Family History'),
            alt.Tooltip('count:Q',            title='Count'),
            alt.Tooltip('percent:Q',          title='Percent', format='.1%')
          ]
      )
      .properties(width=250, height=250, title={
          "text":["Family History Distribution","（基于 Treatment 筛选）"],
          "subtitleFontSize":12,"anchor":"start"
      })
)
label_fam = bar_fam.mark_text(
    dy=-7,     
    size=12
).encode(
    text=alt.Text('percent:Q', format='.1%'),
    color=alt.value('black')
)
bar_fam_with_labels = bar_fam + label_fam
heatmap = (
    alt.Chart(df_a, title='Treatment vs Family History Heatmap')
      .transform_filter(sel_country)
      .transform_filter(brush)
      .transform_aggregate(
          count='count()',
          groupby=['treatment','family_history']
      )
      .mark_rect()
      .encode(
          x=alt.X('treatment:N', title='Treatment'),
          y=alt.Y('family_history:N', title='Family History'),
          color=alt.Color('count:Q', title='Count',
                          scale=alt.Scale(range=['#D7EEF6', '#666BCE']),
                          legend=alt.Legend(
                          orient='right',    # 固定到右边
                          direction='vertical'
              )),
          tooltip=[
              alt.Tooltip('treatment:N', title='Treatment'),
              alt.Tooltip('family_history:N', title='Family History'),
              alt.Tooltip('count:Q', title='Count')
          ],
          opacity=alt.condition(sel_treat, alt.value(1), alt.value(0.3))
      )
      .add_params(sel_treat)
      .properties(width=300, height=200)
)

bars = bar_treat_with_labels | bar_fam_with_labels
left_col = alt.vconcat(count_text,map_chart, spacing=5)
right_col= alt.vconcat(scatter, bars, heatmap, spacing=10)

dashboard = alt.hconcat(
    left_col,
    right_col,
    spacing=5
).configure_title(fontSize=16)

dashboard

In [4]:
import json

spec = dashboard.to_dict()
with open('dashboard_spec.json','w') as f:
    json.dump(spec, f, indent=2)

In [2]:
# import pandas as pd
# import numpy as np
# import altair as alt

# # 1. 清洗 & 生成 jitter （只做 y 方向微扰）
# df_a = (
#     raw[(raw['Age'] >= 18) & (raw['Age'] <= 100)]
#        .dropna(subset=['Country','Age','treatment','family_history','Gender'])
#        .copy()
# )
# df_a['Country'] = df_a['Country'].str.strip()
# def clean_gender(x):
#     if pd.isna(x): return None
#     s = str(x).strip().lower()
#     if s.startswith('f'): return 'Female'
#     if s.startswith('m'): return 'Male'
#     return None
# df_a['GenderClean'] = df_a['Gender'].apply(clean_gender)
# df_a = df_a.dropna(subset=['GenderClean'])

# # numeric gender base + y jitter
# gender_map = {'Male': 0, 'Female': 1}
# df_a['gender_num'] = df_a['GenderClean'].map(gender_map)
# df_a['y_jitter'] = df_a['gender_num'] + np.random.uniform(-0.1, 0.1, len(df_a))

# # 2. selection 定义
# country_dropdown = alt.binding_select(
#     options=['All'] + sorted(df_a['Country'].unique()), name='Country: '
# )
# sel_country = alt.selection_point(
#     fields=['Country'],
#     bind=country_dropdown,
#     name='country_select',
#     empty='all'
# )
# brush = alt.selection_interval(encodings=['x'], name='age_brush', empty='all')

# # 3. count 验证
# count_text = (
#     alt.Chart(df_a)
#       .transform_filter(sel_country)
#       .transform_filter(brush)
#       .mark_text(size=14, align='center', fontWeight='bold')
#       .encode(
#           text=alt.Text('count():Q', title='Selected Count', format='d')
#       )
#       .add_params(sel_country, brush)
#       .properties(height=40, width=400, title='Selected Respondents')
# )

# # 4. scatter：x 用原始 Age（brush 正常），y 用 jitter 避免叠
# scatter = (
#     alt.Chart(df_a)
#       .transform_filter(sel_country)
#       .transform_filter(brush)
#       .mark_circle(size=60)
#       .encode(
#           x=alt.X('Age:Q', title='Age'),  # brush 基于这个
#           y=alt.Y('y_jitter:Q',
#                   title='Gender',
#                   axis=alt.Axis(values=[0,1],
#                                 labelExpr="datum.value == 0 ? 'Male' : 'Female'")),
#           tooltip=['Country','Age','GenderClean','treatment','family_history'],
#           color=alt.value('#666BCE'),
#           opacity=alt.value(1)
#       )
#       .add_params(sel_country, brush)
#       .properties(
#           width=700, height=250,
#           title={
#               "text": ["Age Distribution", "（拖拽刷选年龄区间）"],
#               "subtitleFontSize":12,
#               "anchor":"start"
#           }
#       )
# )

# # 5. 展示
# dashboard = alt.vconcat(count_text, scatter)
# dashboard
