# Accessibility Atlas: A Data-Driven Portrait of Disability

**Author**: Luke Steuber  
**Date**: February 2026  
**Data Sources**: US Census Bureau, Bureau of Labor Statistics, WebAIM, Eurostat, NCES/IDEA, and more  

This notebook explores disability prevalence, employment outcomes, web accessibility compliance, assistive technology usage, and special education trends across 25+ datasets.

---

In [None]:
import json
import csv
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')

# Style setup
plt.rcParams['figure.figsize'] = (12, 6)
plt.rcParams['figure.dpi'] = 100
plt.rcParams['axes.titlesize'] = 14
plt.rcParams['axes.labelsize'] = 12
plt.rcParams['axes.grid'] = True
plt.rcParams['grid.alpha'] = 0.3
plt.rcParams['font.family'] = 'sans-serif'

DATA_DIR = Path('.')
json_files = list(DATA_DIR.glob('*.json'))
csv_files = list(DATA_DIR.glob('*.csv'))
print(f'Data directory: {DATA_DIR.resolve()}')
print(f'Files: {len(json_files)} JSON + {len(csv_files)} CSV = {len(json_files) + len(csv_files)} datasets')

def read_csv_as_dicts(path):
    """Read CSV into list of dicts (no pandas needed)."""
    with open(path, newline='', encoding='utf-8') as f:
        return list(csv.DictReader(f))

## 1. US Disability Prevalence — National Trends (2010-2023)

Census Bureau ACS 1-year estimates from Table S1810. 13 years of data (2020 excluded due to COVID survey disruptions).

In [None]:
# Load the full 13-year Census trend dataset
with open(DATA_DIR / 'census_disability_trends_2010_2023.json') as f:
    census_raw = json.load(f)

# Parse S1810 into numpy arrays
# Note: S1810 age-group variables changed meaning around 2015.
# Pre-2015 values (0.4-0.8%) are NOT disability rates — skip those.
t_years, t_pop, t_dis, t_pct = [], [], [], []
t_u18, t_1864, t_65 = [], [], []

for year in sorted(census_raw['s1810_data'].keys()):
    d = census_raw['s1810_data'][year]['data']
    header, values = list(d[0]), list(d[1])
    row = dict(zip(header, values))
    yr = int(year)
    
    t_years.append(yr)
    t_pop.append(float(row.get('S1810_C01_001E', 0) or 0))
    t_dis.append(float(row.get('S1810_C02_001E', 0) or 0))
    t_pct.append(float(row.get('S1810_C03_001E', 0) or 0))
    
    # Pre-2015 age-group data uses different variable definitions
    if yr < 2015:
        t_u18.append(np.nan)
        t_1864.append(np.nan)
        t_65.append(np.nan)
    else:
        t_u18.append(float(row.get('S1810_C03_002E', 0) or 0))
        t_1864.append(float(row.get('S1810_C03_003E', 0) or 0))
        t_65.append(float(row.get('S1810_C03_004E', 0) or 0))

# Convert to numpy
t_years = np.array(t_years)
t_pop = np.array(t_pop)
t_dis = np.array(t_dis)
t_pct = np.array(t_pct)
t_u18 = np.array(t_u18)
t_1864 = np.array(t_1864)
t_65 = np.array(t_65)

print(f'Census disability trends: {len(t_years)} years ({t_years[0]}-{t_years[-1]})')
print(f'Disability rate: {t_pct[0]:.1f}% ({t_years[0]}) → {t_pct[-1]:.1f}% ({t_years[-1]})')
print(f'Population with disability: {t_dis[-1]:,.0f} in {t_years[-1]}')

In [None]:
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))

# Left: Overall disability rate
ax1.plot(t_years, t_pct, 'o-', color='#2c3e50', linewidth=2.5, markersize=8)
ax1.fill_between(t_years, t_pct, alpha=0.1, color='#2c3e50')
ax1.set_title('US Disability Prevalence Rate', fontweight='bold')
ax1.set_ylabel('% of Population')
ax1.set_xlabel('Year')
ax1.axvspan(2019.5, 2020.5, alpha=0.15, color='red', label='2020 gap (COVID)')
ax1.legend()
ax1.yaxis.set_major_formatter(mticker.FormatStrFormatter('%.1f%%'))

# Right: By age group (2015+ only, where data is valid)
mask = ~np.isnan(t_u18)
ax2.plot(t_years[mask], t_u18[mask], 'o-', color='#3498db', label='Under 18', linewidth=2, markersize=6)
ax2.plot(t_years[mask], t_1864[mask], 'o-', color='#e67e22', label='18-64', linewidth=2, markersize=6)
ax2.plot(t_years[mask], t_65[mask], 'o-', color='#e74c3c', label='65+', linewidth=2, markersize=6)

ax2.set_title('Disability Rate by Age Group', fontweight='bold')
ax2.set_ylabel('% of Age Group')
ax2.set_xlabel('Year')
ax2.legend()
ax2.yaxis.set_major_formatter(mticker.FormatStrFormatter('%.1f%%'))

plt.tight_layout()
plt.show()

print(f'\nKey finding: Disability rate rose from {t_pct[0]:.1f}% ({t_years[0]}) to {t_pct[-1]:.1f}% ({t_years[-1]})')
print(f'Total with disability in {t_years[-1]}: {t_dis[-1]:,.0f}')

## 2. Disability by Age and Sex (2022 Snapshot)

Detailed breakdown from Census Table B18101 showing how disability rates vary dramatically by age and sex.

In [None]:
with open(DATA_DIR / 'census_disability_by_age_sex_2022.json') as f:
    age_sex = json.load(f)

# Build comparison DataFrame
age_groups = ['under_5', '5_to_17', '18_to_34', '35_to_64', '65_to_74', '75_plus']
age_labels = ['Under 5', '5-17', '18-34', '35-64', '65-74', '75+']

male_rates = [age_sex['male']['by_age'][ag]['rate_pct'] for ag in age_groups]
female_rates = [age_sex['female']['by_age'][ag]['rate_pct'] for ag in age_groups]

x = np.arange(len(age_labels))
width = 0.35

fig, ax = plt.subplots(figsize=(12, 6))
bars1 = ax.bar(x - width/2, male_rates, width, label='Male', color='#3498db', alpha=0.85)
bars2 = ax.bar(x + width/2, female_rates, width, label='Female', color='#e74c3c', alpha=0.85)

ax.set_ylabel('Disability Rate (%)')
ax.set_title('Disability Rate by Age Group and Sex (2022)', fontweight='bold')
ax.set_xticks(x)
ax.set_xticklabels(age_labels)
ax.legend()
ax.bar_label(bars1, fmt='%.1f%%', padding=3, fontsize=9)
ax.bar_label(bars2, fmt='%.1f%%', padding=3, fontsize=9)

plt.tight_layout()
plt.show()

print(f'Overall: Male {age_sex["male"]["rate_pct"]:.1f}% vs Female {age_sex["female"]["rate_pct"]:.1f}%')
print(f'Steepest climb: 35-64 → 65-74 (male: {male_rates[3]:.1f}% → {male_rates[4]:.1f}%, female: {female_rates[3]:.1f}% → {female_rates[4]:.1f}%)')

## 3. Disability Employment Gap

Bureau of Labor Statistics data showing employment outcomes for people with and without disabilities.

In [None]:
with open(DATA_DIR / 'bls_disability_employment_2024.json') as f:
    bls = json.load(f)

# Extract historical employment-population ratio
emp_data = bls['historical_trends']['employment_population_ratio']['data']
years_emp = sorted(emp_data.keys())
with_dis = [emp_data[y]['with_disability'] for y in years_emp]
without_dis = [emp_data[y].get('without_disability') for y in years_emp]

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))

# Left: Employment-population ratio trend
ax1.plot([int(y) for y in years_emp], with_dis, 'o-', color='#e74c3c', linewidth=2, label='With disability')
# Plot without_disability where available
valid_wo = [(int(y), v) for y, v in zip(years_emp, without_dis) if v is not None]
if valid_wo:
    ax1.plot([x[0] for x in valid_wo], [x[1] for x in valid_wo], 's--', color='#2ecc71', linewidth=2, label='Without disability')
ax1.set_title('Employment-Population Ratio (2009-2024)', fontweight='bold')
ax1.set_ylabel('% Employed')
ax1.set_xlabel('Year')
ax1.legend()
ax1.axvspan(2019.5, 2020.5, alpha=0.1, color='gray')

# Right: 2024 comparison dashboard
stats = bls['overall_statistics']
metrics = ['employment_population_ratio', 'unemployment_rate', 'part_time_workers_percent', 'self_employed_percent']
metric_labels = ['Employment Ratio', 'Unemployment Rate', 'Part-Time Workers', 'Self-Employed']
dis_vals = [stats['with_disability'][m] for m in metrics]
nodis_vals = [stats['without_disability'][m] if stats['without_disability'][m] is not None else 0 for m in metrics]

x = np.arange(len(metric_labels))
ax2.barh(x - 0.2, dis_vals, 0.35, label='With Disability', color='#e74c3c', alpha=0.85)
ax2.barh(x + 0.2, nodis_vals, 0.35, label='Without Disability', color='#2ecc71', alpha=0.85)
ax2.set_yticks(x)
ax2.set_yticklabels(metric_labels)
ax2.set_xlabel('Percentage (%)')
ax2.set_title('Employment Metrics (2024)', fontweight='bold')
ax2.legend()

plt.tight_layout()
plt.show()

print(f'Employment gap: {stats["without_disability"]["employment_population_ratio"] - stats["with_disability"]["employment_population_ratio"]:.1f} percentage points')
print(f'People with disabilities employed: {bls["population_overview"]["total_employed_with_disability_thousands"]}K')

In [None]:
# Employment by race/ethnicity
race_data = bls['by_race_ethnicity']
races = list(race_data['disability_prevalence'].keys())
races = [r for r in races if r != 'note']

race_labels = [r.replace('_', ' ').title() for r in races]
prevalence = [race_data['disability_prevalence'][r] for r in races]
unemp_dis = [race_data['unemployment_rate_with_disability'][r] for r in races]

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))

bars = ax1.barh(race_labels, prevalence, color=['#3498db', '#e74c3c', '#2ecc71', '#f39c12'])
ax1.set_xlabel('Prevalence (%)')
ax1.set_title('Disability Prevalence by Race/Ethnicity', fontweight='bold')
ax1.bar_label(bars, fmt='%.1f%%', padding=5)

bars2 = ax2.barh(race_labels, unemp_dis, color=['#3498db', '#e74c3c', '#2ecc71', '#f39c12'])
ax2.set_xlabel('Unemployment Rate (%)')
ax2.set_title('Unemployment Rate (With Disability)', fontweight='bold')
ax2.bar_label(bars2, fmt='%.1f%%', padding=5)

plt.tight_layout()
plt.show()

In [None]:
with open(DATA_DIR / 'fred_disability_employment.json') as f:
    fred = json.load(f)

fred_years = sorted(fred['annual_data'].keys())
fred_yr_int = [int(y) for y in fred_years]
fred_emp_dis = [fred['annual_data'][y].get('disability_employment_ratio') for y in fred_years]
fred_emp_all = [fred['annual_data'][y].get('total_employment_ratio') for y in fred_years]
fred_gap = [fred['annual_data'][y].get('employment_gap_pp') for y in fred_years]

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))

# Left: Employment-population ratio comparison
ax1.plot(fred_yr_int, fred_emp_all, 's-', color='#2ecc71', linewidth=2.5, markersize=7, label='Total civilian')
valid_dis = [(y, v) for y, v in zip(fred_yr_int, fred_emp_dis) if v is not None]
ax1.plot([x[0] for x in valid_dis], [x[1] for x in valid_dis], 'o-', color='#e74c3c', linewidth=2.5, markersize=7, label='With disability')
ax1.fill_between([x[0] for x in valid_dis], [x[1] for x in valid_dis], 
                 [fred_emp_all[fred_yr_int.index(x[0])] for x in valid_dis], alpha=0.15, color='#e74c3c')
ax1.set_title('Employment-Population Ratio (FRED)', fontweight='bold')
ax1.set_ylabel('% Employed')
ax1.set_xlabel('Year')
ax1.legend()
ax1.axvspan(2019.5, 2020.5, alpha=0.1, color='gray', label='COVID')

# Right: Gap over time
valid_gap = [(y, g) for y, g in zip(fred_yr_int, fred_gap) if g is not None]
ax2.bar([x[0] for x in valid_gap], [x[1] for x in valid_gap], color='#e67e22', alpha=0.7)
ax2.set_title('Employment Gap (pp)', fontweight='bold')
ax2.set_ylabel('Percentage Point Gap')
ax2.set_xlabel('Year')
ax2.axhline(y=np.mean([x[1] for x in valid_gap]), color='gray', linestyle='--', label=f'Average: {np.mean([x[1] for x in valid_gap]):.1f}pp')
ax2.legend()

plt.tight_layout()
plt.show()

s = fred['summary']
print(f'FRED data: {s["years_of_data"]} years (2009-{s["latest_year"]})')
print(f'Employment gap ({s["latest_year"]}): {s["employment_gap"]} pp ({s["total_employment_ratio"]}% total vs {s["disability_employment_ratio"]}% disability)')
print(f'Disability LFPR: {s["disability_lfpr"]}%')
print(f'{s["trend"]}')

### FRED: Disability Employment Gap Over Time

Complete FRED series (2008-2024) showing the employment-population ratio for people with and without disabilities side by side.

## 4. Web Accessibility — WebAIM Million Report

WAVE automated analysis of 1,000,000 website home pages. The annual state-of-the-web for accessibility.

In [None]:
with open(DATA_DIR / 'webaim_million_2025.json') as f:
    webaim = json.load(f)

trends = webaim['yearly_trends']
years = trends['years']

fig, axes = plt.subplots(2, 2, figsize=(16, 12))

# Top-left: Failure rate trend
ax = axes[0, 0]
ax.plot(years, trends['pages_with_failures_pct'], 'o-', color='#e74c3c', linewidth=2.5)
ax.fill_between(years, trends['pages_with_failures_pct'], 90, alpha=0.1, color='#e74c3c')
ax.set_title('Pages with WCAG Failures (%)', fontweight='bold')
ax.set_ylim(90, 100)
ax.yaxis.set_major_formatter(mticker.FormatStrFormatter('%.1f%%'))
ax.annotate(f'{trends["pages_with_failures_pct"][-1]}%', xy=(years[-1], trends['pages_with_failures_pct'][-1]),
            fontsize=14, fontweight='bold', color='#e74c3c', ha='center', va='bottom',
            xytext=(0, 10), textcoords='offset points')

# Top-right: Error types breakdown
ax = axes[0, 1]
error_types = ['low_contrast_pct', 'missing_alt_text_pct', 'empty_links_pct', 
               'missing_form_labels_pct', 'empty_buttons_pct', 'missing_language_pct']
error_labels = ['Low Contrast', 'Missing Alt Text', 'Empty Links', 
                'Missing Labels', 'Empty Buttons', 'Missing Lang']
colors_err = ['#e74c3c', '#3498db', '#f39c12', '#9b59b6', '#2ecc71', '#1abc9c']
for et, label, color in zip(error_types, error_labels, colors_err):
    ax.plot(years, trends[et], 'o-', label=label, color=color, linewidth=1.5)
ax.set_title('WCAG Failure Types Over Time', fontweight='bold')
ax.set_ylabel('% of Pages Affected')
ax.legend(fontsize=9, loc='upper right')

# Bottom-left: CMS performance
ax = axes[1, 0]
cms_data = webaim['cms_performance']
cms_names = [c['cms'] for c in cms_data]
cms_errors = [c['avg_errors'] for c in cms_data]
bar_colors = ['#2ecc71' if e < 51 else '#f39c12' if e < 65 else '#e74c3c' for e in cms_errors]
bars = ax.barh(cms_names, cms_errors, color=bar_colors)
ax.axvline(x=51, color='gray', linestyle='--', alpha=0.5, label='Baseline (51 avg)')
ax.set_xlabel('Average Errors per Page')
ax.set_title('CMS Accessibility Performance', fontweight='bold')
ax.bar_label(bars, fmt='%.0f', padding=5)
ax.legend()

# Bottom-right: TLD performance
ax = axes[1, 1]
tld_data = webaim['tld_performance']
tld_names = [t['tld'] for t in tld_data]
tld_errors = [t['avg_errors'] for t in tld_data]
bar_colors = ['#2ecc71' if e < 40 else '#3498db' if e < 55 else '#f39c12' if e < 70 else '#e74c3c' for e in tld_errors]
bars = ax.barh(tld_names, tld_errors, color=bar_colors)
ax.set_xlabel('Average Errors per Page')
ax.set_title('Accessibility by TLD', fontweight='bold')
ax.bar_label(bars, fmt='%.0f', padding=5)

plt.tight_layout()
plt.show()

print(f'Bottom line: {webaim["summary"]["pages_with_wcag_failures_pct"]}% of top 1M sites have WCAG failures')
print(f'Total errors detected: {webaim["summary"]["total_errors_detected"]:,}')
print(f'Best TLD: .gov ({tld_data[0]["avg_errors"]} errors)  Worst: .ua ({tld_data[-1]["avg_errors"]} errors)')

## 5. Screen Reader Usage — WebAIM Survey

Survey of 1,539 screen reader users revealing technology preferences and accessibility pain points.

In [None]:
with open(DATA_DIR / 'webaim_screen_reader_survey_2024.json') as f:
    sr = json.load(f)

fig, axes = plt.subplots(2, 2, figsize=(16, 12))

# Top-left: Primary screen reader
ax = axes[0, 0]
sr_names = [s['name'] for s in sr['primary_screen_reader']]
sr_pcts = [s['pct'] for s in sr['primary_screen_reader']]
colors_sr = plt.cm.Set2(np.linspace(0, 1, len(sr_names)))
wedges, texts, autotexts = ax.pie(sr_pcts, labels=sr_names, autopct='%1.1f%%', 
                                   colors=colors_sr, startangle=90)
ax.set_title('Primary Screen Reader (2024)', fontweight='bold')

# Top-right: Most problematic items
ax = axes[0, 1]
problems = sr['problematic_items_ranked'][:8]
prob_names = [p['item'][:30] + '...' if len(p['item']) > 30 else p['item'] for p in problems]
prob_points = [p['points'] for p in problems]
bars = ax.barh(prob_names[::-1], prob_points[::-1], color=plt.cm.Reds(np.linspace(0.3, 0.9, len(problems))))
ax.set_xlabel('Severity Points')
ax.set_title('Most Problematic Web Elements', fontweight='bold')

# Bottom-left: Disability types
ax = axes[1, 0]
dis_types = sr['demographics']['disability_types']
dt_names = [d['type'] for d in dis_types]
dt_pcts = [d['pct'] for d in dis_types]
bars = ax.barh(dt_names[::-1], dt_pcts[::-1], color='#3498db', alpha=0.8)
ax.set_xlabel('% of Respondents')
ax.set_title('Disability Types of Screen Reader Users', fontweight='bold')
ax.bar_label(bars, fmt='%.1f%%', padding=5)

# Bottom-right: Mobile platform preference
ax = axes[1, 1]
mobile = sr['mobile']['primary_platform']
mob_names = [m['name'] for m in mobile]
mob_pcts = [m['pct'] for m in mobile]
ax.pie(mob_pcts, labels=mob_names, autopct='%1.1f%%', 
       colors=['#636363', '#2ecc71', '#3498db', '#95a5a6'], startangle=90)
ax.set_title('Mobile Platform (Screen Reader Users)', fontweight='bold')

plt.tight_layout()
plt.show()

print(f'Survey: {sr["respondents"]} respondents')
print(f'#1 problem: {sr["problematic_items_ranked"][0]["item"]} ({sr["problematic_items_ranked"][0]["points"]} severity points)')
print(f'Web getting better? {sr["web_accessibility_progress"]["more_accessible_pct"]}% say yes, {sr["web_accessibility_progress"]["less_accessible_pct"]}% say worse')

## 6. ADA Digital Accessibility Lawsuits

Tracking the explosion of ADA web accessibility lawsuits from 2017-2024.

In [None]:
with open(DATA_DIR / 'ada_digital_lawsuits.json') as f:
    ada = json.load(f)

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))

# Left: Lawsuit volume trend
ax1.bar(ada['years'], ada['total_lawsuits'], color='#e74c3c', alpha=0.85, edgecolor='#c0392b')
ax1.plot(ada['years'], ada['total_lawsuits'], 'o-', color='#2c3e50', linewidth=2)
ax1.set_title('ADA Digital Accessibility Lawsuits', fontweight='bold')
ax1.set_ylabel('Number of Lawsuits')
ax1.set_xlabel('Year')
for i, (yr, val) in enumerate(zip(ada['years'], ada['total_lawsuits'])):
    ax1.annotate(f'{val:,}', xy=(yr, val), ha='center', va='bottom', fontsize=9, fontweight='bold')

# Right: 2024 breakdown
bd = ada['breakdown_2024']
labels = ['Federal\nCourt', 'State\nCourt']
values = [bd['federal_court'], bd['state_court']]
ax2.pie(values, labels=labels, autopct='%1.0f%%', colors=['#3498db', '#e67e22'],
        startangle=90, textprops={'fontsize': 12})
ax2.set_title(f'2024 Lawsuit Breakdown (n={bd["total"]:,})', fontweight='bold')

plt.tight_layout()
plt.show()

print(f'Total growth: {ada["total_lawsuits"][0]:,} ({ada["years"][0]}) → {ada["total_lawsuits"][-1]:,} ({ada["years"][-1]}) = {(ada["total_lawsuits"][-1]/ada["total_lawsuits"][0]-1)*100:.0f}% increase')
print(f'E-commerce accounts for {bd["ecommerce_pct"]}% of cases')
print(f'NY and CA combined: {bd["ny_ca_combined_pct"]}% of all lawsuits')

## 7. European Disability Data — Eurostat GALI

EU-wide disability data using the Global Activity Limitation Indicator from the EU Statistics on Income and Living Conditions survey.

In [None]:
with open(DATA_DIR / 'eurostat_disability_eu.json') as f:
    eu = json.load(f)

# Extract country-level data
countries = [c for c in eu['countries'] if c['country_code'] not in ['EU27_2020', 'EA20']]
countries_with_data = [c for c in countries if c.get('gali_indicator', {}).get('some_or_severe_limitation') is not None]

# Sort by disability rate
countries_sorted = sorted(countries_with_data, 
                           key=lambda c: c['gali_indicator']['some_or_severe_limitation'],
                           reverse=True)

top_20 = countries_sorted[:20]
names = [c['country_name'][:20] for c in top_20]
rates = [c['gali_indicator']['some_or_severe_limitation'] for c in top_20]
severe = [c['gali_indicator'].get('severe_limitation', 0) for c in top_20]

fig, ax = plt.subplots(figsize=(14, 8))
ax.barh(names[::-1], rates[::-1], color='#3498db', alpha=0.7, label='Some + Severe')
ax.barh(names[::-1], severe[::-1], color='#e74c3c', alpha=0.9, label='Severe only')
ax.axvline(x=eu['eu27_summary_2023']['total_disability_rate_pct'], color='gray', linestyle='--', 
           label=f'EU27 avg ({eu["eu27_summary_2023"]["total_disability_rate_pct"]}%)')
ax.set_xlabel('Activity Limitation Rate (%)')
ax.set_title('Disability Prevalence Across Europe (GALI, 2023)', fontweight='bold')
ax.legend()

plt.tight_layout()
plt.show()

print(f'EU27 average: {eu["eu27_summary_2023"]["total_disability_rate_pct"]}% activity limitation')
print(f'EU employment gap: {eu["eu27_summary_2023"]["employment_gap_pp"]} percentage points')
print(f'Severe limitation employment gap: {eu["eu27_summary_2023"]["severe_employment_gap_pp"]} pp')

## 8. IDEA Special Education Trends (1976-2023)

Individuals with Disabilities Education Act data showing 47 years of special education enrollment across 13 disability categories.

In [None]:
with open(DATA_DIR / 'idea_special_education_enriched.json') as f:
    idea = json.load(f)

# Historical trends — stored as dict of year->count
hist_served = idea['historical_trends_1976_2023']['total_served_by_year']
hist_years = sorted(hist_served.keys())
hist_total = [hist_served[y] for y in hist_years]

# Clean year labels for plotting (e.g., "1976-77" -> 1976)
hist_year_nums = [int(y.split('-')[0]) for y in hist_years]

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))

# Left: Total students over time
ax1.fill_between(hist_year_nums, [t/1e6 for t in hist_total], alpha=0.2, color='#3498db')
ax1.plot(hist_year_nums, [t/1e6 for t in hist_total], 'o-', color='#3498db', linewidth=2, markersize=4)
ax1.set_title('Students Served Under IDEA (1976-2023)', fontweight='bold')
ax1.set_ylabel('Students (Millions)')
ax1.set_xlabel('Year')

# Right: Category breakdown (latest year)
categories = idea['disability_categories_2022_23']
cat_sorted = sorted(categories, key=lambda c: c.get('count', 0), reverse=True)
top_cats = cat_sorted[:10]
cat_names = [c['category'][:25] for c in top_cats]
cat_counts = [c.get('count', 0) / 1e6 for c in top_cats]

bars = ax2.barh(cat_names[::-1], cat_counts[::-1], 
                color=plt.cm.viridis(np.linspace(0.2, 0.9, len(top_cats))))
ax2.set_xlabel('Students (Millions)')
ax2.set_title('Students by Disability Category (2022-23)', fontweight='bold')

plt.tight_layout()
plt.show()

ns = idea['national_summary_2022_23']
print(f'Total students served (2022-23): {ns["total_students_served"]:,}')
print(f'Percent of enrollment: {ns["percent_of_public_school_enrollment"]}%')
print(f'Disability categories: {len(categories)}')

## 9. County-Level Disability Map (3,200+ Counties)

Census ACS disability rates at the county level, showing geographic variation across the US.

In [None]:
county_rows = read_csv_as_dicts(DATA_DIR / 'census_disability_by_county_2022.csv')
county_rates = [float(r['disability_rate']) for r in county_rows if r.get('disability_rate')]
print(f'County data: {len(county_rows):,} counties')
print(f'Columns: {list(county_rows[0].keys())}')
print(f'\nDisability rate range: {min(county_rates):.1f}% - {max(county_rates):.1f}%')
county_median = sorted(county_rates)[len(county_rates)//2]
county_mean = sum(county_rates) / len(county_rates)
print(f'Median: {county_median:.1f}%')
print(f'Mean: {county_mean:.1f}%')

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 5))

# Distribution
ax1.hist(county_rates, bins=50, color='#3498db', alpha=0.7, edgecolor='white')
ax1.axvline(county_median, color='#e74c3c', linestyle='--', label=f'Median: {county_median:.1f}%')
ax1.set_title('Distribution of County Disability Rates', fontweight='bold')
ax1.set_xlabel('Disability Rate (%)')
ax1.set_ylabel('Number of Counties')
ax1.legend()

# Top/bottom states by average county disability rate
state_col = 'state_name' if 'state_name' in county_rows[0] else 'state'
state_sums = {}
state_counts = {}
for r in county_rows:
    st = r.get(state_col, '')
    rate = r.get('disability_rate')
    if st and rate:
        state_sums[st] = state_sums.get(st, 0) + float(rate)
        state_counts[st] = state_counts.get(st, 0) + 1

state_avgs = {s: state_sums[s]/state_counts[s] for s in state_sums}
sorted_states = sorted(state_avgs.items(), key=lambda x: x[1], reverse=True)
top_10 = sorted_states[:10]
bottom_5 = sorted_states[-5:]
combined = top_10 + bottom_5
names = [s[0] for s in combined][::-1]
vals = [s[1] for s in combined][::-1]
colors_tb = ['#2ecc71']*5 + ['#e74c3c']*10
ax2.barh(names, vals, color=colors_tb)
ax2.set_xlabel('Average County Disability Rate (%)')
ax2.set_title('States: Highest & Lowest Disability Rates', fontweight='bold')

plt.tight_layout()
plt.show()

## 10. WHO Healthy Life Expectancy (HALE)

World Health Organization data on healthy life expectancy — the years lived in good health versus total life expectancy.

In [None]:
hale_rows = read_csv_as_dicts(DATA_DIR / 'who_healthy_life_expectancy.csv')
print(f'WHO HALE data: {len(hale_rows):,} records')
print(f'Columns: {list(hale_rows[0].keys())}')
print(f'\nFirst 5 rows:')
for r in hale_rows[:5]:
    print(f'  {r}')

In [None]:
# Explore the HALE data structure
print('Column types and unique value counts:')
for col in hale_rows[0].keys():
    unique = set(r[col] for r in hale_rows if r.get(col))
    if len(unique) < 20:
        print(f'  {col}: {len(unique)} unique - {sorted(list(unique))[:10]}')
    else:
        print(f'  {col}: {len(unique)} unique')

## 11. Section 508 Federal Compliance

GSA assessment of federal government website accessibility — a sobering look at how the government's own sites measure up.

In [None]:
with open(DATA_DIR / 'section_508_compliance_2024.json') as f:
    s508 = json.load(f)

fig, ax = plt.subplots(figsize=(8, 5))

kf = s508['key_findings']
labels = ['Public Websites\nConforming', 'Intranet Pages\nConforming']
values = [kf['public_websites_conforming_pct'], kf['intranet_pages_conforming_pct']]
remainder = [100 - v for v in values]

x = np.arange(len(labels))
bars1 = ax.bar(x, values, 0.5, label='Conforming', color='#2ecc71')
bars2 = ax.bar(x, remainder, 0.5, bottom=values, label='Non-conforming', color='#e74c3c', alpha=0.7)
ax.set_ylabel('Percentage (%)')
ax.set_title(f'Federal Section 508 Compliance ({s508["year"]})', fontweight='bold')
ax.set_xticks(x)
ax.set_xticklabels(labels)
ax.legend()
ax.bar_label(bars1, fmt='%d%%', label_type='center', fontsize=14, fontweight='bold', color='white')

plt.tight_layout()
plt.show()

print(f'Only {kf["public_websites_conforming_pct"]}% of federal public websites conform to Section 508')
print(f'{s508["reporting_entities"]} federal entities assessed across {s508["assessment_criteria"]} criteria')
print(f'Trend: {kf["conformance_trend"]}')

## 12. Disability by Race/Ethnicity and Characteristics

Census data on disability types and racial/ethnic disparities.

In [None]:
with open(DATA_DIR / 'census_disability_by_race_2022.json') as f:
    race = json.load(f)

with open(DATA_DIR / 'census_disability_characteristics_2022.json') as f:
    chars = json.load(f)

print('Race/ethnicity data:')
print(json.dumps(race, indent=2)[:1000])
print('\nCharacteristics data:')
print(json.dumps(chars, indent=2)[:1000])

## 13. Sign Language & AAC Datasets

Assistive technology datasets: WLASL (sign language video index) and AAC vocabulary data.

In [None]:
# WLASL - Word-Level American Sign Language
wlasl_rows = read_csv_as_dicts(DATA_DIR / 'wlasl_index.csv')
print(f'WLASL dataset: {len(wlasl_rows):,} sign entries')
print(f'Columns: {list(wlasl_rows[0].keys())}')
print(f'\nFirst 5 entries:')
for r in wlasl_rows[:5]:
    print(f'  {r}')

In [None]:
# AAC Vocabulary Data
with open(DATA_DIR / 'aac_vocabulary_data.json') as f:
    aac = json.load(f)

print(f'AAC dataset keys: {list(aac.keys()) if isinstance(aac, dict) else "list of " + str(len(aac))}')
if isinstance(aac, dict):
    for k, v in aac.items():
        if isinstance(v, list):
            print(f'  {k}: {len(v)} items')
        elif isinstance(v, dict):
            print(f'  {k}: {len(v)} keys')
        else:
            print(f'  {k}: {v}')

In [None]:
# VizWiz - Visual Question Answering for Blind Users
vizwiz_rows = read_csv_as_dicts(DATA_DIR / 'vizwiz_val_annotations.csv')
print(f'VizWiz dataset: {len(vizwiz_rows):,} image annotations')
print(f'Columns: {list(vizwiz_rows[0].keys())}')
print(f'\nFirst 5 entries:')
for r in vizwiz_rows[:5]:
    print(f'  {r}')

## 14. Dataset Inventory

All accessibility datasets in this collection.

In [None]:
from pathlib import Path

# Catalog all data files
catalog = []
for f in sorted(DATA_DIR.glob('*')):
    if f.is_file() and not f.name.startswith('.') and f.suffix in ['.json', '.csv', '.xlsx']:
        size = f.stat().st_size
        if f.suffix == '.json':
            try:
                with open(f) as fh:
                    data = json.load(fh)
                if isinstance(data, list):
                    records = len(data)
                elif isinstance(data, dict):
                    records = sum(len(v) if isinstance(v, (list, dict)) else 1 for v in data.values())
                else:
                    records = 1
            except:
                records = '?'
        elif f.suffix == '.csv':
            try:
                records = sum(1 for _ in open(f)) - 1
            except:
                records = '?'
        else:
            records = '?'
        catalog.append({
            'file': f.name,
            'format': f.suffix[1:].upper(),
            'size_kb': round(size / 1024, 1),
            'records': records
        })

total_kb = sum(c['size_kb'] for c in catalog)
print(f'Total datasets: {len(catalog)}')
print(f'Total size: {total_kb:.0f} KB ({total_kb/1024:.1f} MB)\n')

# Print as formatted table
print(f'{"File":<55} {"Format":<6} {"Size (KB)":<10} {"Records"}')
print('-' * 85)
for c in sorted(catalog, key=lambda x: x['size_kb'], reverse=True):
    print(f'{c["file"]:<55} {c["format"]:<6} {c["size_kb"]:<10} {c["records"]}')

## 15. Cross-Dataset Analysis: The Disability Landscape

Pulling threads across all datasets to paint a unified picture.

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(16, 12))

# 1. US vs EU disability rate comparison
ax = axes[0, 0]
us_rate = t_pct[-1]
eu_rate = eu['eu27_summary_2023']['total_disability_rate_pct']
ax.bar(['United States\n(Census ACS)', 'European Union\n(GALI/EU-SILC)'], 
       [us_rate, eu_rate], color=['#3498db', '#f39c12'], width=0.5)
ax.set_ylabel('Disability Prevalence (%)')
ax.set_title('US vs EU Disability Rates', fontweight='bold')
ax.annotate('Different methodologies — not directly comparable', 
            xy=(0.5, 0.02), xycoords='axes fraction', ha='center', fontsize=9, style='italic', color='gray')
for i, v in enumerate([us_rate, eu_rate]):
    ax.text(i, v + 0.3, f'{v}%', ha='center', fontweight='bold', fontsize=14)

# 2. Employment gap comparison
ax = axes[0, 1]
bls_stats = bls['overall_statistics']
us_emp_gap = bls_stats['without_disability']['employment_population_ratio'] - bls_stats['with_disability']['employment_population_ratio']
eu_emp_gap = eu['eu27_summary_2023']['employment_gap_pp']
ax.bar(['US Employment\nGap', 'EU Employment\nGap'], [us_emp_gap, eu_emp_gap], 
       color=['#e74c3c', '#e67e22'], width=0.5)
ax.set_ylabel('Gap (Percentage Points)')
ax.set_title('Disability Employment Gap: US vs EU', fontweight='bold')
for i, v in enumerate([us_emp_gap, eu_emp_gap]):
    ax.text(i, v + 0.3, f'{v:.1f}pp', ha='center', fontweight='bold', fontsize=14)

# 3. Web accessibility vs lawsuits (dual axis)
ax = axes[1, 0]
common_years = [y for y in ada['years'] if y in trends['years']]
ada_idx = [ada['years'].index(y) for y in common_years]
webaim_idx = [trends['years'].index(y) for y in common_years]
ax.bar(common_years, [ada['total_lawsuits'][i] for i in ada_idx], color='#e74c3c', alpha=0.6, label='Lawsuits')
ax2_twin = ax.twinx()
ax2_twin.plot(common_years, [trends['pages_with_failures_pct'][i] for i in webaim_idx], 
              'o-', color='#3498db', linewidth=2, label='% Sites Failing')
ax.set_ylabel('Lawsuits Filed', color='#e74c3c')
ax2_twin.set_ylabel('Sites with Failures (%)', color='#3498db')
ax.set_title('Lawsuits vs Web Compliance', fontweight='bold')
ax.legend(loc='upper left')
ax2_twin.legend(loc='upper right')

# 4. Key stats dashboard
ax = axes[1, 1]
ax.axis('off')
idea_ns = idea['national_summary_2022_23']
stats_text = [
    f'US disability prevalence: {us_rate}%',
    f'Americans with disabilities: {t_dis[-1]:,.0f}',
    f'IDEA students served: {idea_ns["total_students_served"]:,}',
    f'Employment rate (w/disability): {bls_stats["with_disability"]["employment_population_ratio"]}%',
    f'Web pages with WCAG failures: {webaim["summary"]["pages_with_wcag_failures_pct"]}%',
    f'Federal sites conforming: {s508["key_findings"]["public_websites_conforming_pct"]}%',
    f'ADA lawsuits (2024): {ada["total_lawsuits"][-1]:,}',
    f'Screen reader users surveyed: {sr["respondents"]:,}',
    f'#1 web problem: {sr["problematic_items_ranked"][0]["item"]}',
]
ax.set_title('Key Stats at a Glance', fontweight='bold', fontsize=14, pad=20)
for i, line in enumerate(stats_text):
    ax.text(0.05, 0.9 - i*0.1, line, fontsize=11, transform=ax.transAxes, 
            fontfamily='monospace')

plt.tight_layout()
plt.show()

---

## Data Sources & Credits

| Dataset | Source | Coverage |
|---------|--------|----------|
| Census ACS S1810/B18101 | US Census Bureau | 2010-2023 national trends |
| Census County Disability | US Census Bureau ACS 5-year | 3,200+ counties (2022) |
| BLS Employment | Bureau of Labor Statistics CPS | 2009-2024 annual |
| WebAIM Million | WebAIM.org | 2019-2025 (1M sites/year) |
| Screen Reader Survey | WebAIM Survey #10 | 1,539 respondents (2024) |
| ADA Lawsuits | UsableNet, EcomBack | 2017-2024 |
| Section 508 | GSA FY24 Assessment | 245 federal entities |
| Eurostat GALI | EU-SILC | 30+ EU/EEA countries (2023) |
| IDEA | NCES Digest of Education | 1976-2023, 13 categories |
| WHO HALE | World Health Organization | Global life expectancy |
| WLASL | Li et al. (2020) | 2,001 ASL signs |
| VizWiz | VizWiz Challenge | 4,320 image annotations |
| AAC Vocabulary | Research compilation | Communication patterns |

**Author**: Luke Steuber | **License**: CC-BY-4.0 | **Repository**: github.com/lukeslp/accessibility-atlas