In [35]:
import pandas as pd
import altair as alt
import numpy as np

df = pd.read_csv("data/census_tracts.csv")
df['college_educated_percentage'] = (
    (df['educational_attainment'] / df['total_population_25_over']) * 100
).round(2)
df['median_income'] = pd.to_numeric(df['median_income'], errors='coerce')
alt.data_transformers.disable_max_rows()
df['median_income'] = df['median_income'].mask(df['median_income'] < 0, pd.NA)
df = df.dropna(subset=['college_educated_percentage', 'median_income', 'city'])


low, high = 40000, 80000
df_bh = df[(df['median_income'] >= low) & (df['median_income'] <= high)].copy()

city_dropdown = alt.binding_select(
    options=sorted(df_bh['city'].unique()),
    name='Select City: '
)
city_selection = alt.selection_point(
    fields=['city'],
    bind=city_dropdown,
    value=sorted(df_bh['city'].unique())[0]
)

# Base 
base = alt.Chart(df_bh).transform_filter(city_selection)

# Scatter
scatter_bh = base.mark_circle(size=65, opacity=0.8, stroke='black', strokeWidth=0.5).encode(
    x=alt.X(
        'college_educated_percentage:Q',
        title='Percent with 4-Year Degree (Age 25+)',
        scale=alt.Scale(domain=[0, 100], clamp=True, nice=False)   
    ),
    y=alt.Y(
        'median_income:Q',
        title='Median Income ($)',
        scale=alt.Scale(domain=[low, high], clamp=True, nice=False)
    ),
    color=alt.value('#648fff'),
    tooltip=[
        alt.Tooltip('city:N', title='City'),
        alt.Tooltip('geoid:N', title='GEOID'),
        alt.Tooltip('college_educated_percentage:Q', title='College %', format='.1f'),
        alt.Tooltip('median_income:Q', title='Median Income', format=',')
    ]
).properties(
    width=680,
    height=420,
    title= "Similar incomes regardless of education?"
)

# Polynomial regression trend (quadratic)
trend = base.transform_regression(
    'college_educated_percentage', 'median_income',
    method='poly', order=2
).mark_line(size=4, opacity=0.9, strokeDash=[6, 3], color='gray').encode(
    x='college_educated_percentage:Q',
    y='median_income:Q'
)

# Combine, add dropdown, and enable pan/zoom on both axes
chart = (scatter_bh + trend).add_params(
    city_selection
).interactive(  
    bind_x='scales',
    bind_y='scales'
).configure_axis(
    labelFontSize=12, titleFontSize=16
).configure_title(fontSize=18)

chart
