# Brazil-Wide Heat Vulnerability Analysis

Batch processing of all Brazilian cities using Google Earth Engine map-reduce to calculate heat vulnerability ratios and demographic statistics.

In [1]:
import ee
import pandas as pd
import numpy as np
import time
from datetime import datetime
import ipywidgets as widgets
from IPython.display import display, clear_output

# Initialize GEE
ee.Initialize(project='tl-cities')
print('✅ GEE initialized for Brazil-wide processing')

✅ GEE initialized for Brazil-wide processing


In [2]:
# Load datasets
allCities = ee.FeatureCollection('projects/tl-cities/assets/GHS_UCDB_THEME_HAZARD_RISK_GLOBE_R2024A')
worldpopCollection = ee.ImageCollection('WorldPop/GP/100m/pop_age_sex')

# Get WorldPop data for 2020 (confirmed available year)
worldpop_2020 = worldpopCollection.filter(ee.Filter.eq('year', 2020)).mosaic()

print(f"Total cities globally: {allCities.size().getInfo()}")
print(f"WorldPop 2020 bands: {len(worldpop_2020.bandNames().getInfo())}")
print('✅ Datasets loaded')

Total cities globally: 11422
WorldPop 2020 bands: 37
✅ Datasets loaded


## Brazilian Cities Filtering and Preparation

In [3]:
def get_filtered_brazilian_cities(min_population=50000, max_cities=200):
    """Get filtered Brazilian cities for batch processing"""
    print(f'🇧🇷 Filtering Brazilian cities...')
    
    # Filter Brazilian cities with population threshold
    brazilian_cities = (allCities
        .filter(ee.Filter.eq('GC_CNT_GAD', 'Brazil'))
        .filter(ee.Filter.gt('GC_POP_TOT', min_population))
        .sort('GC_POP_TOT', False)  # Largest cities first
        .limit(max_cities)
    )
    
    # Get basic statistics
    total_count = brazilian_cities.size().getInfo()
    
    print(f'✅ Found {total_count} Brazilian cities')
    print(f'   Minimum population: {min_population:,}')
    print(f'   Maximum cities: {max_cities}')
    
    # Show sample of top cities
    sample = brazilian_cities.limit(10).getInfo()
    print(f'\n📊 Top 10 cities by population:')
    for i, city in enumerate(sample['features']):
        props = city['properties']
        name = props.get('GC_UCN_MAI', 'Unknown')
        pop = props.get('GC_POP_TOT', 0)
        area = props.get('GC_UCA_KM2', 0)
        print(f'  {i+1:2d}. {name:20s} - Pop: {pop:>10,.0f}, Area: {area:>6.1f} km²')
    
    return brazilian_cities

# Test the filtering
brazilian_cities = get_filtered_brazilian_cities()
print(f'\n🎯 Ready to process {brazilian_cities.size().getInfo()} Brazilian cities')

🇧🇷 Filtering Brazilian cities...
✅ Found 200 Brazilian cities
   Minimum population: 50,000
   Maximum cities: 200

📊 Top 10 cities by population:
   1. São Paulo            - Pop: 19,485,158, Area: 2111.0 km²
   2. Rio de Janeiro       - Pop:  9,853,693, Area: 1285.0 km²
   3. Belo Horizonte       - Pop:  4,376,747, Area:  609.0 km²
   4. Recife               - Pop:  3,847,558, Area:  491.0 km²
   5. Fortaleza            - Pop:  3,324,149, Area:  413.0 km²
   6. Salvador             - Pop:  3,305,396, Area:  323.0 km²
   7. Porto Alegre         - Pop:  2,763,350, Area:  599.0 km²
   8. Curitiba             - Pop:  2,512,877, Area:  566.0 km²
   9. Goiânia              - Pop:  2,457,483, Area:  531.0 km²
  10. Manaus               - Pop:  2,368,805, Area:  272.0 km²

🎯 Ready to process 200 Brazilian cities


## Map-Reduce Functions for Heat Vulnerability

In [4]:
def create_heat_vulnerability_map_function():
    """Create the map function for processing individual cities"""
    
    def map_city_heat_vulnerability(city_feature):
        """Map function: Calculate heat vulnerability statistics for one city"""
        
        # Get city properties and geometry
        city_geometry = city_feature.geometry()
        
        # Define age cohorts using WorldPop bands
        age_cohort_bands = {
            'age_0_4': ['M_0', 'M_1', 'F_0', 'F_1'],
            'age_5_9': ['M_5', 'F_5'],
            'age_10_14': ['M_10', 'F_10'],
            'age_15_19': ['M_15', 'F_15'],
            'age_20_24': ['M_20', 'F_20'],
            'age_25_29': ['M_25', 'F_25'],
            'age_30_34': ['M_30', 'F_30'],
            'age_35_39': ['M_35', 'F_35'],
            'age_40_44': ['M_40', 'F_40'],
            'age_45_49': ['M_45', 'F_45'],
            'age_50_54': ['M_50', 'F_50'],
            'age_55_59': ['M_55', 'F_55'],
            'age_60_64': ['M_60', 'F_60'],
            'age_65_69': ['M_65', 'F_65'],
            'age_70_74': ['M_70', 'F_70'],
            'age_75_79': ['M_75', 'F_75'],
            'age_80_plus': ['M_80', 'F_80'],
            'heat_vuln_ratio': ['M_0', 'M_1', 'F_0', 'F_1', 'M_65', 'F_65', 'M_70', 'F_70', 'M_75', 'F_75', 'M_80', 'F_80']
        }
        
        # Create total population image (all bands except duplicates in heat_vuln_ratio)
        all_unique_bands = []
        for cohort_name, bands in age_cohort_bands.items():
            if cohort_name != 'heat_vuln_ratio':
                all_unique_bands.extend(bands)
        
        all_unique_bands = list(dict.fromkeys(all_unique_bands))  # Remove duplicates
        total_pop_image = worldpop_2020.select(all_unique_bands).reduce(ee.Reducer.sum())
        
        # Calculate statistics for each cohort
        cohort_stats = {}
        
        for cohort_name, bands in age_cohort_bands.items():
            # Create cohort population image
            cohort_image = worldpop_2020.select(bands).reduce(ee.Reducer.sum())
            
            # Calculate percentage: (cohort / total) * 100
            # Add small constant to avoid division by zero
            percentage_image = cohort_image.divide(total_pop_image.add(0.001)).multiply(100)
            
            # Calculate comprehensive statistics within city boundary
            stats = percentage_image.reduceRegion(
                reducer=(
                    ee.Reducer.mean().combine(ee.Reducer.median(), sharedInputs=True)
                    .combine(ee.Reducer.stdDev(), sharedInputs=True)
                    .combine(ee.Reducer.minMax(), sharedInputs=True)
                    .combine(ee.Reducer.percentile([25, 75]), sharedInputs=True)
                    .combine(ee.Reducer.count(), sharedInputs=True)
                ),
                geometry=city_geometry,
                scale=90,  # WorldPop native resolution
                maxPixels=1e8,
                bestEffort=True
            )
            
            # Extract statistics with proper naming
            cohort_stats[f'{cohort_name}_mean'] = stats.get('sum_mean')
            cohort_stats[f'{cohort_name}_median'] = stats.get('sum_median')
            cohort_stats[f'{cohort_name}_std'] = stats.get('sum_stdDev')
            cohort_stats[f'{cohort_name}_min'] = stats.get('sum_min')
            cohort_stats[f'{cohort_name}_max'] = stats.get('sum_max')
            cohort_stats[f'{cohort_name}_q25'] = stats.get('sum_p25')
            cohort_stats[f'{cohort_name}_q75'] = stats.get('sum_p75')
            cohort_stats[f'{cohort_name}_count'] = stats.get('sum_count')
        
        # Also calculate total population for the city
        total_pop_stats = total_pop_image.reduceRegion(
            reducer=ee.Reducer.sum(),
            geometry=city_geometry,
            scale=90,
            maxPixels=1e8,
            bestEffort=True
        )
        
        cohort_stats['total_worldpop_population'] = total_pop_stats.get('sum')
        
        # Return city feature with computed statistics as properties
        return city_feature.set(cohort_stats)
    
    return map_city_heat_vulnerability

print('✅ Map-reduce function created')

✅ Map-reduce function created


## Batch Processing Implementation

In [5]:
def process_cities_batch(cities_collection, batch_size=10):
    """Process a batch of cities and return results as pandas DataFrame"""
    
    print(f'🔄 Processing batch of {batch_size} cities...')
    start_time = time.time()
    
    try:
        # Create the map function
        map_function = create_heat_vulnerability_map_function()
        
        # Apply map function to all cities in batch
        cities_with_stats = cities_collection.map(map_function)
        
        # Get the results
        print('   📊 Computing statistics...')
        results = cities_with_stats.getInfo()
        
        # Convert to pandas DataFrame
        batch_data = []
        
        for city in results['features']:
            props = city['properties']
            
            # Extract city basic info
            city_info = {
                'city_name': props.get('GC_UCN_MAI', 'Unknown'),
                'country': props.get('GC_CNT_GAD', 'Unknown'),
                'population_estimate': props.get('GC_POP_TOT', 0),
                'area_km2': props.get('GC_UCA_KM2', 0),
                'total_worldpop_population': props.get('total_worldpop_population', 0)
            }
            
            # Extract all cohort statistics
            for key, value in props.items():
                if any(key.startswith(prefix) for prefix in [
                    'age_0_4_', 'age_5_9_', 'age_10_14_', 'age_15_19_', 'age_20_24_',
                    'age_25_29_', 'age_30_34_', 'age_35_39_', 'age_40_44_', 'age_45_49_',
                    'age_50_54_', 'age_55_59_', 'age_60_64_', 'age_65_69_', 'age_70_74_',
                    'age_75_79_', 'age_80_plus_', 'heat_vuln_ratio_'
                ]):
                    city_info[key] = value
            
            batch_data.append(city_info)
        
        # Create DataFrame
        df = pd.DataFrame(batch_data)
        
        processing_time = time.time() - start_time
        print(f'   ✅ Batch completed in {processing_time:.1f} seconds')
        print(f'   📊 Processed {len(df)} cities successfully')
        
        return df
        
    except Exception as e:
        print(f'   ❌ Batch processing failed: {e}')
        return None

def process_all_brazilian_cities_in_batches(batch_size=10, max_batches=None):
    """Process all Brazilian cities in manageable batches"""
    
    print(f'🚀 Starting Brazil-wide heat vulnerability analysis')
    print(f'   Batch size: {batch_size} cities')
    print(f'   Max batches: {max_batches or "All"}')
    print('='*60)
    
    # Get filtered cities
    cities = get_filtered_brazilian_cities()
    total_cities = cities.size().getInfo()
    total_batches = (total_cities + batch_size - 1) // batch_size
    
    if max_batches:
        total_batches = min(total_batches, max_batches)
    
    print(f'\n📊 Processing plan:')
    print(f'   Total cities: {total_cities}')
    print(f'   Total batches: {total_batches}')
    print(f'   Cities per batch: {batch_size}')
    
    all_results = []
    start_time = datetime.now()
    
    for batch_idx in range(total_batches):
        start_idx = batch_idx * batch_size
        
        print(f'\n📦 Batch {batch_idx + 1}/{total_batches} (Cities {start_idx + 1}-{min(start_idx + batch_size, total_cities)})')
        
        try:
            # Get batch of cities
            batch_cities_list = cities.toList(batch_size, start_idx)
            batch_cities = ee.FeatureCollection(batch_cities_list)
            
            # Process batch
            batch_df = process_cities_batch(batch_cities, batch_size)
            
            if batch_df is not None and len(batch_df) > 0:
                all_results.append(batch_df)
                print(f'   ✅ Batch {batch_idx + 1} successful - {len(batch_df)} cities')
            else:
                print(f'   ❌ Batch {batch_idx + 1} failed or returned no data')
            
            # Add delay to respect GEE rate limits
            if batch_idx < total_batches - 1:  # Don't delay after last batch
                print('   ⏳ Waiting 3 seconds before next batch...')
                time.sleep(3)
                
        except Exception as e:
            print(f'   ❌ Batch {batch_idx + 1} error: {e}')
            continue
    
    # Combine all successful batches
    if all_results:
        final_df = pd.concat(all_results, ignore_index=True)
        
        # Calculate processing summary
        end_time = datetime.now()
        total_time = (end_time - start_time).total_seconds()
        
        print(f'\n🎉 Brazil-wide processing completed!')
        print(f'   Total cities processed: {len(final_df)}')
        print(f'   Successful batches: {len(all_results)}/{total_batches}')
        print(f'   Total processing time: {total_time:.1f} seconds')
        print(f'   Average time per city: {total_time/len(final_df):.1f} seconds')
        
        return final_df
    else:
        print('\n❌ No successful batches - processing failed')
        return None

print('✅ Batch processing functions ready')

✅ Batch processing functions ready


## Prototype Test: Process Small Batch

In [6]:
# Test with a small batch first
print('🧪 PROTOTYPE TEST: Processing 5 largest Brazilian cities')
print('='*60)

# Process just 1 batch of 5 cities for testing
prototype_results = process_all_brazilian_cities_in_batches(
    batch_size=5, 
    max_batches=1  # Only process 1 batch for testing
)

if prototype_results is not None:
    print(f'\n📊 PROTOTYPE RESULTS:')
    print(f'   Cities processed: {len(prototype_results)}')
    print(f'   Columns: {len(prototype_results.columns)}')
    
    # Show basic info
    print(f'\n🏙️ Cities processed:')
    for idx, row in prototype_results.iterrows():
        city_name = row['city_name']
        pop_est = row['population_estimate']
        worldpop_total = row['total_worldpop_population']
        heat_vuln = row.get('heat_vuln_ratio_mean', 'N/A')
        
        print(f'   {idx+1}. {city_name:20s} - Pop: {pop_est:>10,.0f}, WorldPop: {worldpop_total:>10,.0f}, Heat Vuln: {heat_vuln:>6.2f}%')
    
    # Show sample of heat vulnerability statistics
    heat_vuln_cols = [col for col in prototype_results.columns if 'heat_vuln_ratio_' in col]
    if heat_vuln_cols:
        print(f'\n🌡️ Heat Vulnerability Statistics:')
        for col in heat_vuln_cols:
            values = prototype_results[col].dropna()
            if len(values) > 0:
                print(f'   {col:25s}: min={values.min():6.2f}%, max={values.max():6.2f}%, mean={values.mean():6.2f}%')
    
    print(f'\n✅ Prototype test successful! Ready for full-scale processing.')
else:
    print(f'\n❌ Prototype test failed - check configuration and retry.')

🧪 PROTOTYPE TEST: Processing 5 largest Brazilian cities
🚀 Starting Brazil-wide heat vulnerability analysis
   Batch size: 5 cities
   Max batches: 1
🇧🇷 Filtering Brazilian cities...
✅ Found 200 Brazilian cities
   Minimum population: 50,000
   Maximum cities: 200

📊 Top 10 cities by population:
   1. São Paulo            - Pop: 19,485,158, Area: 2111.0 km²
   2. Rio de Janeiro       - Pop:  9,853,693, Area: 1285.0 km²
   3. Belo Horizonte       - Pop:  4,376,747, Area:  609.0 km²
   4. Recife               - Pop:  3,847,558, Area:  491.0 km²
   5. Fortaleza            - Pop:  3,324,149, Area:  413.0 km²
   6. Salvador             - Pop:  3,305,396, Area:  323.0 km²
   7. Porto Alegre         - Pop:  2,763,350, Area:  599.0 km²
   8. Curitiba             - Pop:  2,512,877, Area:  566.0 km²
   9. Goiânia              - Pop:  2,457,483, Area:  531.0 km²
  10. Manaus               - Pop:  2,368,805, Area:  272.0 km²

📊 Processing plan:
   Total cities: 200
   Total batches: 1
   Cities per

## Full-Scale Processing Interface

In [7]:
# Create interface for full-scale processing
batch_size_slider = widgets.IntSlider(
    value=10,
    min=5,
    max=20,
    step=5,
    description='Batch Size:',
    style={'description_width': 'initial'}
)

max_batches_slider = widgets.IntSlider(
    value=20,
    min=1,
    max=50,
    step=1,
    description='Max Batches:',
    style={'description_width': 'initial'}
)

process_button = widgets.Button(
    description='🇧🇷 Process Brazilian Cities',
    button_style='primary',
    layout={'width': '250px'}
)

export_button = widgets.Button(
    description='💾 Export Results to CSV',
    button_style='success',
    layout={'width': '200px'},
    disabled=True
)

processing_output = widgets.Output()

# Global variable to store results
brazil_results = None

def on_process_click(button):
    global brazil_results
    
    with processing_output:
        clear_output(wait=True)
        
        batch_size = batch_size_slider.value
        max_batches = max_batches_slider.value
        
        print(f'🚀 Starting full-scale Brazil processing')
        print(f'   Configuration: {batch_size} cities per batch, max {max_batches} batches')
        print(f'   Estimated cities: {batch_size * max_batches}')
        
        brazil_results = process_all_brazilian_cities_in_batches(
            batch_size=batch_size,
            max_batches=max_batches
        )
        
        if brazil_results is not None:
            print(f'\n🎉 SUCCESS! Processed {len(brazil_results)} Brazilian cities')
            
            # Show top cities by heat vulnerability
            if 'heat_vuln_ratio_mean' in brazil_results.columns:
                top_vulnerable = brazil_results.nlargest(10, 'heat_vuln_ratio_mean')
                print(f'\n🌡️ TOP 10 MOST HEAT VULNERABLE CITIES:')
                for idx, row in top_vulnerable.iterrows():
                    city = row['city_name']
                    vuln = row['heat_vuln_ratio_mean']
                    pop = row['population_estimate']
                    print(f'   {city:20s}: {vuln:5.2f}% heat vulnerable (Pop: {pop:,.0f})')
            
            # Enable export button
            export_button.disabled = False
            
        else:
            print(f'\n❌ Processing failed - check logs above for details')

def on_export_click(button):
    global brazil_results
    
    if brazil_results is not None:
        timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
        filename = f'/Users/martynclark/heatInsights-notebooks/data/brazil_heat_vulnerability_{timestamp}.csv'
        
        brazil_results.to_csv(filename, index=False)
        print(f'✅ Results exported to: {filename}')
        print(f'   Cities: {len(brazil_results)}')
        print(f'   Columns: {len(brazil_results.columns)}')
    else:
        print('❌ No results to export - run processing first')

process_button.on_click(on_process_click)
export_button.on_click(on_export_click)

# Display interface
display(widgets.VBox([
    widgets.HTML('<h3>🇧🇷 Brazil-Wide Heat Vulnerability Processing</h3>'),
    widgets.HTML('''
    <p><strong>Batch process all major Brazilian cities to calculate heat vulnerability ratios.</strong></p>
    <p><strong>Configuration:</strong></p>
    <ul>
        <li><strong>Batch Size:</strong> Number of cities processed simultaneously (5-20)</li>
        <li><strong>Max Batches:</strong> Maximum number of batches to process (1-50)</li>
        <li><strong>Total Cities:</strong> Batch Size × Max Batches</li>
    </ul>
    <p><strong>Processing Time:</strong> ~30-60 seconds per batch depending on city sizes</p>
    '''),
    widgets.HBox([batch_size_slider, max_batches_slider]),
    widgets.HBox([process_button, export_button]),
    processing_output
]))

print('\n🇧🇷 Brazil-wide processing interface ready!')
print('Configure batch settings above and click "Process Brazilian Cities" to start.')

VBox(children=(HTML(value='<h3>🇧🇷 Brazil-Wide Heat Vulnerability Processing</h3>'), HTML(value='\n    <p><stro…


🇧🇷 Brazil-wide processing interface ready!
Configure batch settings above and click "Process Brazilian Cities" to start.
