In [16]:
import altair as alt
import pandas as pd

# Disable the row limit for large datasets
alt.data_transformers.disable_max_rows()

# Load the dataset
df = pd.read_csv('cognitive_decline_df.csv')

# Clean column names and ensure Data_Value is numeric
df.columns = df.columns.str.strip().str.replace(' ', '_')
df['Data_Value'] = pd.to_numeric(df['Data_Value'], errors='coerce')

# Define U.S. regions based on states
regions = {
    'Northeast': ['Maine', 'New Hampshire', 'Vermont', 'Massachusetts', 'Rhode Island', 'Connecticut', 'New York', 'New Jersey', 'Pennsylvania'],
    'Midwest': ['Ohio', 'Indiana', 'Illinois', 'Michigan', 'Wisconsin', 'Minnesota', 'Iowa', 'Missouri', 'North Dakota', 'South Dakota', 'Nebraska', 'Kansas'],
    'South': ['Delaware', 'Maryland', 'District_of_Columbia', 'Virginia', 'West Virginia', 'North Carolina', 'South Carolina', 'Georgia', 'Florida', 'Kentucky', 'Tennessee', 'Mississippi', 'Alabama', 'Oklahoma', 'Texas', 'Arkansas', 'Louisiana'],
    'West': ['Idaho', 'Montana', 'Wyoming', 'Nevada', 'Utah', 'Colorado', 'Arizona', 'New Mexico', 'Alaska', 'Washington', 'Oregon', 'California', 'Hawaii']
}

# Create a reverse mapping from state to region
state_to_region = {state: region for region, states in regions.items() for state in states}

# Add a Region column to the dataframe
df['Region'] = df['LocationDesc'].map(state_to_region)

# Filter out rows where Region is not defined (in case there are missing states)
df_filtered = df.dropna(subset=['Region'])

# Replace NaN values in Data_Value with 0
df_filtered['Data_Value'] = df_filtered['Data_Value'].fillna(0)

# Step 1: Create a dropdown menu for Topic selection, initialized with the first topic
initial_topic = df_filtered['Topic'].unique()[0]  # Get the first topic
topic_dropdown = alt.binding_select(options=list(df_filtered['Topic'].unique()), name='Topic: ')
topic_selection = alt.selection_point(fields=['Topic'], bind=topic_dropdown, value=initial_topic)  # Pass the string value directly

# Step 2: Group by region to calculate the average percentage for each region
df_aggregated_region = df_filtered.groupby(['Region', 'Topic'], as_index=False).agg({'Data_Value': 'mean'})

# Group by state to calculate the average percentage for each state
df_aggregated_state = df_filtered.groupby(['LocationDesc', 'Region', 'Topic'], as_index=False).agg({'Data_Value': 'mean'})

# Step 3: Regional bar chart with topic and region selector
region_selection = alt.selection_point(fields=['Region'], bind='legend')

regional_bar = alt.Chart(df_aggregated_region).mark_bar().encode(
    x=alt.X('Region:N', title='Region'),
    y=alt.Y('Data_Value:Q', title='Avg. Engagement (%)', scale=alt.Scale(domain=[0, 50])),  # Limit y-axis to 100%
    color=alt.Color('Region:N', title='Region'),
    tooltip=[alt.Tooltip('Region:N'), alt.Tooltip('Data_Value:Q', title='Avg. Engagement (%)')]
).add_selection(
    region_selection
).transform_filter(
    topic_selection  # Filter based on selected topic
).properties(
    width=600,
    height=400,
    title="Regional Engagement in Cognitive Decline Discussions"
)

# Step 4: State-level bar charts
state_bar = alt.Chart(df_aggregated_state).mark_bar().encode(
    x=alt.X('LocationDesc:N', title='State', sort='-y'),  # Sort states by the engagement levels
    y=alt.Y('Data_Value:Q', title='Avg. Engagement (%)', scale=alt.Scale(domain=[0, 50])),  # Engagement in percentage
    color=alt.Color('Region:N', title='Region'),  # Same color as the region
    tooltip=[alt.Tooltip('LocationDesc:N', title='State'), alt.Tooltip('Data_Value:Q', title='Avg. Engagement (%)')]
).transform_filter(
    region_selection & topic_selection  # Filter based on both the region and selected topic
).properties(
    width=600,
    height=400,
    title="State-Level Engagement in Discussions by Topic"
)

# Step 5: Combine the two charts into a linked view and include the dropdown
linked_dashboard = alt.vconcat(
    regional_bar.add_selection(topic_selection),
    state_bar
)

linked_dashboard.display()


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filtered['Data_Value'] = df_filtered['Data_Value'].fillna(0)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
