# Visual 1: Property Value Distribution Over Time (Austin vs. U.S.)

This interactive visualization shows how Austin’s housing market has shifted toward higher-priced homes over the past decade compared to national trends. Use the year slider to explore changes over time.

Your work will be continued in this pull request.

In [2]:
import pandas as pd
import altair as alt
from pathlib import Path

# Load data for Austin and U.S.
prop_val_path = Path('data files/austin_property_value.csv')
prop_val = pd.read_csv(prop_val_path)

# Normalize column names and keep only relevant columns
prop_val = prop_val.rename(columns={
    'Value Bucket': 'bucket',
    'Year': 'year',
    'Place': 'place',
    'share': 'share'
})

# Basic sanity checks
required_cols = {'bucket', 'year', 'place'}
missing = required_cols - set(prop_val.columns)
if missing:
    raise ValueError(f"Missing required columns: {missing} in {prop_val_path}")

# Keep only Austin and United States
prop_val = prop_val[prop_val['place'].isin(['Austin, TX', 'United States'])].copy()

# Create a simpler region column
prop_val['region'] = prop_val['place'].replace({'Austin, TX': 'Austin', 'United States': 'U.S.'})

# Ensure dtypes
prop_val['year'] = pd.to_numeric(prop_val['year'], errors='coerce').astype('Int64')
prop_val = prop_val.dropna(subset=['year'])
prop_val['year'] = prop_val['year'].astype(int)

# Determine bucket order (prefer explicit ID if available)
if 'Value Bucket ID' in prop_val.columns:
    prop_val['bucket_order'] = prop_val['Value Bucket ID']
else:
    # Fallback: preserve approximate order by sorting bucket labels naturally
    order_map = {b: i for i, b in enumerate(sorted(prop_val['bucket'].unique()))}
    prop_val['bucket_order'] = prop_val['bucket'].map(order_map)

# Ensure we have shares; if not, try to compute from a count column
if 'share' not in prop_val.columns or prop_val['share'].isna().all():
    count_candidates = [
        'Property Value by Bucket',  # guessed count column
        'count', 'Count', 'n'
    ]
    count_col = next((c for c in count_candidates if c in prop_val.columns), None)
    if not count_col:
        raise ValueError('No share column found and no recognizable count column to compute shares from.')
    prop_val[count_col] = pd.to_numeric(prop_val[count_col], errors='coerce').fillna(0)
    prop_val['share'] = prop_val.groupby(['year', 'region'])[count_col].transform(lambda s: s / s.sum()).fillna(0)

# Coerce share to numeric
prop_val['share'] = pd.to_numeric(prop_val['share'], errors='coerce').fillna(0)

min_year = int(prop_val['year'].min())
max_year = int(prop_val['year'].max())

# Selection/param for year slider – support Altair v4 and v5
use_point = hasattr(alt, 'selection_point')
if use_point:
    # Altair v5 style
    year_select = alt.selection_point(
        name='Year',
        fields=['year'],
        bind=alt.binding_range(min=min_year, max=max_year, step=1),
        value={'year': min_year}
    )
    base = alt.Chart(prop_val).add_params(year_select).transform_filter(year_select)
else:
    # Altair v4 fallback
    year_select = alt.selection_single(
        fields=['year'],
        bind=alt.binding_range(min=min_year, max=max_year, step=1),
        init={'year': min_year}
    )
    base = alt.Chart(prop_val).add_selection(year_select).transform_filter(year_select)

# Color palette: light blues (low) → greens/yellows (mid) → oranges/reds (high)
palette = [
    '#e3f2fd', '#bbdefb', '#90caf9', '#64b5f6',
    '#81c784', '#aed581',
    '#fff176',
    '#ffb74d', '#ff8a65', '#f4511e', '#d32f2f', '#b71c1c'
]

# Build stacked bar chart
bars = base.mark_bar().encode(
    x=alt.X('region:N', title='Region', axis=alt.Axis(labelAngle=0)),
    y=alt.Y('share:Q', title='Share of Homes', stack='normalize', axis=alt.Axis(format='%')),
    color=alt.Color('bucket:N', title='Home Price Ranges',
                    sort=alt.SortField(field='bucket_order', order='ascending'),
                    scale=alt.Scale(range=palette)),
    order=alt.Order('bucket_order:Q'),
    tooltip=[
        alt.Tooltip('year:O', title='Year'),
        alt.Tooltip('region:N', title='Region'),
        alt.Tooltip('bucket:N', title='Price Range'),
        alt.Tooltip('share:Q', title='Share', format='.1%')
    ]
).properties(width=520, height=360)

# Labels for lowest and highest bucket in first and last year only
first_year, last_year = min_year, max_year
label_years = [first_year, last_year]

low_high = (
    prop_val[prop_val['year'].isin(label_years)]
    .sort_values(['year', 'region', 'bucket_order'])
    .groupby(['year', 'region'], as_index=False)
    .agg(low_bucket=('bucket', 'first'), high_bucket=('bucket', 'last'))
)

labels_df = prop_val.merge(low_high, on=['year', 'region'])
labels_low = labels_df[labels_df['bucket'] == labels_df['low_bucket']].copy()
labels_high = labels_df[labels_df['bucket'] == labels_df['high_bucket']].copy()

# Only show labels when the selected year equals first or last year
label_filter = (alt.datum.year == first_year) | (alt.datum.year == last_year)

text_low = alt.Chart(labels_low).transform_filter(label_filter)
if use_point:
    text_low = text_low.transform_filter(year_select)
else:
    text_low = text_low.transform_filter(year_select)
text_low = text_low.mark_text(color='black', dx=0, dy=-5, size=11).encode(
    x='region:N',
    y=alt.Y('share:Q', stack='normalize'),
    text=alt.Text('share:Q', format='.0%'),
    detail='bucket:N'
)

text_high = alt.Chart(labels_high).transform_filter(label_filter)
if use_point:
    text_high = text_high.transform_filter(year_select)
else:
    text_high = text_high.transform_filter(year_select)
text_high = text_high.mark_text(color='black', dx=0, dy=-5, size=11).encode(
    x='region:N',
    y=alt.Y('share:Q', stack='normalize'),
    text=alt.Text('share:Q', format='.0%'),
    detail='bucket:N'
)

chart = (bars + text_low + text_high).properties(
    title='Share of Homes by Price Range — Austin vs. U.S. (Use slider to change year)'
)

chart