# India-Wide Heat Vulnerability Analysis

Batch processing of all Indian cities using Google Earth Engine map-reduce to calculate heat vulnerability ratios and demographic statistics.

In [1]:
import ee
import pandas as pd
import numpy as np
import time
from datetime import datetime
import ipywidgets as widgets
from IPython.display import display, clear_output

# Initialize GEE
ee.Initialize(project='tl-cities')
print('‚úÖ GEE initialized for India-wide processing')

‚úÖ GEE initialized for India-wide processing


In [2]:
# Load datasets
allCities = ee.FeatureCollection('projects/tl-cities/assets/GHS_UCDB_THEME_HAZARD_RISK_GLOBE_R2024A')
worldpopCollection = ee.ImageCollection('WorldPop/GP/100m/pop_age_sex')

# Get WorldPop data for 2020 (confirmed available year)
worldpop_2020 = worldpopCollection.filter(ee.Filter.eq('year', 2020)).mosaic()

print(f"Total cities globally: {allCities.size().getInfo()}")
print(f"WorldPop 2020 bands: {len(worldpop_2020.bandNames().getInfo())}")
print('‚úÖ Datasets loaded')

Total cities globally: 11422
WorldPop 2020 bands: 37
‚úÖ Datasets loaded


## Indian Cities Filtering and Preparation

In [3]:
def get_filtered_indian_cities(min_population=100000, max_cities=300):
    """Get filtered Indian cities for batch processing"""
    print(f'üáÆüá≥ Filtering Indian cities...')
    
    # Filter Indian cities with population threshold
    indian_cities = (allCities
        .filter(ee.Filter.eq('GC_CNT_GAD', 'India'))
        .filter(ee.Filter.gt('GC_POP_TOT', min_population))
        .sort('GC_POP_TOT', False)  # Largest cities first
        .limit(max_cities)
    )
    
    # Get basic statistics
    total_count = indian_cities.size().getInfo()
    
    print(f'‚úÖ Found {total_count} Indian cities')
    print(f'   Minimum population: {min_population:,}')
    print(f'   Maximum cities: {max_cities}')
    
    # Show sample of top cities
    sample = indian_cities.limit(15).getInfo()
    print(f'\nüìä Top 15 cities by population:')
    for i, city in enumerate(sample['features']):
        props = city['properties']
        name = props.get('GC_UCN_MAI', 'Unknown')
        pop = props.get('GC_POP_TOT', 0)
        area = props.get('GC_UCA_KM2', 0)
        print(f'  {i+1:2d}. {name:25s} - Pop: {pop:>12,.0f}, Area: {area:>8.1f} km¬≤')
    
    return indian_cities

# Test the filtering
indian_cities = get_filtered_indian_cities()
print(f'\nüéØ Ready to process {indian_cities.size().getInfo()} Indian cities')
print('\nüìù Note: India has many more cities than Brazil, so we use higher thresholds:')
print('   ‚Ä¢ Minimum population: 100,000 (vs 50,000 for Brazil)')
print('   ‚Ä¢ Maximum cities: 300 (vs 200 for Brazil)')
print('   ‚Ä¢ This captures major urban centers across all Indian states')

üáÆüá≥ Filtering Indian cities...
‚úÖ Found 300 Indian cities
   Minimum population: 100,000
   Maximum cities: 300

üìä Top 15 cities by population:
   1. New Delhi                 - Pop:   31,422,508, Area:   2139.0 km¬≤
   2. Kolkata                   - Pop:   23,314,585, Area:   2482.0 km¬≤
   3. Mumbai                    - Pop:   20,453,270, Area:    738.0 km¬≤
   4. Bengaluru                 - Pop:   15,178,533, Area:   1008.0 km¬≤
   5. Chennai                   - Pop:   11,466,400, Area:   1052.0 km¬≤
   6. Hajipur                   - Pop:    9,755,303, Area:   3166.0 km¬≤
   7. Hyderabad                 - Pop:    9,455,230, Area:    889.0 km¬≤
   8. Ahmedabad                 - Pop:    7,898,650, Area:    365.0 km¬≤
   9. Kozhikode                 - Pop:    7,612,130, Area:   1341.0 km¬≤
  10. Surat                     - Pop:    7,100,723, Area:    296.0 km¬≤
  11. Pune                      - Pop:    6,674,000, Area:    580.0 km¬≤
  12. Lucknow                   - Pop:    5,

## Map-Reduce Functions for Heat Vulnerability (Same as Brazil)

In [4]:
def create_heat_vulnerability_map_function():
    """Create the map function for processing individual cities"""
    
    def map_city_heat_vulnerability(city_feature):
        """Map function: Calculate heat vulnerability statistics for one city"""
        
        # Get city properties and geometry
        city_geometry = city_feature.geometry()
        
        # Define age cohorts using WorldPop bands
        age_cohort_bands = {
            'age_0_4': ['M_0', 'M_1', 'F_0', 'F_1'],
            'age_5_9': ['M_5', 'F_5'],
            'age_10_14': ['M_10', 'F_10'],
            'age_15_19': ['M_15', 'F_15'],
            'age_20_24': ['M_20', 'F_20'],
            'age_25_29': ['M_25', 'F_25'],
            'age_30_34': ['M_30', 'F_30'],
            'age_35_39': ['M_35', 'F_35'],
            'age_40_44': ['M_40', 'F_40'],
            'age_45_49': ['M_45', 'F_45'],
            'age_50_54': ['M_50', 'F_50'],
            'age_55_59': ['M_55', 'F_55'],
            'age_60_64': ['M_60', 'F_60'],
            'age_65_69': ['M_65', 'F_65'],
            'age_70_74': ['M_70', 'F_70'],
            'age_75_79': ['M_75', 'F_75'],
            'age_80_plus': ['M_80', 'F_80'],
            'heat_vuln_ratio': ['M_0', 'M_1', 'F_0', 'F_1', 'M_65', 'F_65', 'M_70', 'F_70', 'M_75', 'F_75', 'M_80', 'F_80']
        }
        
        # Create total population image (all bands except duplicates in heat_vuln_ratio)
        all_unique_bands = []
        for cohort_name, bands in age_cohort_bands.items():
            if cohort_name != 'heat_vuln_ratio':
                all_unique_bands.extend(bands)
        
        all_unique_bands = list(dict.fromkeys(all_unique_bands))  # Remove duplicates
        total_pop_image = worldpop_2020.select(all_unique_bands).reduce(ee.Reducer.sum())
        
        # Calculate statistics for each cohort
        cohort_stats = {}
        
        for cohort_name, bands in age_cohort_bands.items():
            # Create cohort population image
            cohort_image = worldpop_2020.select(bands).reduce(ee.Reducer.sum())
            
            # Calculate percentage: (cohort / total) * 100
            # Add small constant to avoid division by zero
            percentage_image = cohort_image.divide(total_pop_image.add(0.001)).multiply(100)
            
            # Calculate comprehensive statistics within city boundary
            stats = percentage_image.reduceRegion(
                reducer=(
                    ee.Reducer.mean().combine(ee.Reducer.median(), sharedInputs=True)
                    .combine(ee.Reducer.stdDev(), sharedInputs=True)
                    .combine(ee.Reducer.minMax(), sharedInputs=True)
                    .combine(ee.Reducer.percentile([25, 75]), sharedInputs=True)
                    .combine(ee.Reducer.count(), sharedInputs=True)
                ),
                geometry=city_geometry,
                scale=90,  # WorldPop native resolution
                maxPixels=1e8,
                bestEffort=True
            )
            
            # Extract statistics with proper naming
            cohort_stats[f'{cohort_name}_mean'] = stats.get('sum_mean')
            cohort_stats[f'{cohort_name}_median'] = stats.get('sum_median')
            cohort_stats[f'{cohort_name}_std'] = stats.get('sum_stdDev')
            cohort_stats[f'{cohort_name}_min'] = stats.get('sum_min')
            cohort_stats[f'{cohort_name}_max'] = stats.get('sum_max')
            cohort_stats[f'{cohort_name}_q25'] = stats.get('sum_p25')
            cohort_stats[f'{cohort_name}_q75'] = stats.get('sum_p75')
            cohort_stats[f'{cohort_name}_count'] = stats.get('sum_count')
        
        # Also calculate total population for the city
        total_pop_stats = total_pop_image.reduceRegion(
            reducer=ee.Reducer.sum(),
            geometry=city_geometry,
            scale=90,
            maxPixels=1e8,
            bestEffort=True
        )
        
        cohort_stats['total_worldpop_population'] = total_pop_stats.get('sum')
        
        # Return city feature with computed statistics as properties
        return city_feature.set(cohort_stats)
    
    return map_city_heat_vulnerability

print('‚úÖ Map-reduce function created')

‚úÖ Map-reduce function created


## Batch Processing Implementation for India

In [5]:
def process_cities_batch(cities_collection, batch_size=8):
    """Process a batch of cities and return results as pandas DataFrame"""
    
    print(f'üîÑ Processing batch of {batch_size} cities...')
    start_time = time.time()
    
    try:
        # Create the map function
        map_function = create_heat_vulnerability_map_function()
        
        # Apply map function to all cities in batch
        cities_with_stats = cities_collection.map(map_function)
        
        # Get the results
        print('   üìä Computing statistics...')
        results = cities_with_stats.getInfo()
        
        # Convert to pandas DataFrame
        batch_data = []
        
        for city in results['features']:
            props = city['properties']
            
            # Extract city basic info
            city_info = {
                'city_name': props.get('GC_UCN_MAI', 'Unknown'),
                'country': props.get('GC_CNT_GAD', 'Unknown'),
                'population_estimate': props.get('GC_POP_TOT', 0),
                'area_km2': props.get('GC_UCA_KM2', 0),
                'total_worldpop_population': props.get('total_worldpop_population', 0)
            }
            
            # Extract all cohort statistics
            for key, value in props.items():
                if any(key.startswith(prefix) for prefix in [
                    'age_0_4_', 'age_5_9_', 'age_10_14_', 'age_15_19_', 'age_20_24_',
                    'age_25_29_', 'age_30_34_', 'age_35_39_', 'age_40_44_', 'age_45_49_',
                    'age_50_54_', 'age_55_59_', 'age_60_64_', 'age_65_69_', 'age_70_74_',
                    'age_75_79_', 'age_80_plus_', 'heat_vuln_ratio_'
                ]):
                    city_info[key] = value
            
            batch_data.append(city_info)
        
        # Create DataFrame
        df = pd.DataFrame(batch_data)
        
        processing_time = time.time() - start_time
        print(f'   ‚úÖ Batch completed in {processing_time:.1f} seconds')
        print(f'   üìä Processed {len(df)} cities successfully')
        
        return df
        
    except Exception as e:
        print(f'   ‚ùå Batch processing failed: {e}')
        return None

def process_all_indian_cities_in_batches(batch_size=8, max_batches=None):
    """Process all Indian cities in manageable batches"""
    
    print(f'üöÄ Starting India-wide heat vulnerability analysis')
    print(f'   Batch size: {batch_size} cities (smaller for Indian mega-cities)')
    print(f'   Max batches: {max_batches or "All"}')
    print('='*60)
    
    # Get filtered cities
    cities = get_filtered_indian_cities()
    total_cities = cities.size().getInfo()
    total_batches = (total_cities + batch_size - 1) // batch_size
    
    if max_batches:
        total_batches = min(total_batches, max_batches)
    
    print(f'\nüìä Processing plan:')
    print(f'   Total cities: {total_cities}')
    print(f'   Total batches: {total_batches}')
    print(f'   Cities per batch: {batch_size}')
    
    all_results = []
    start_time = datetime.now()
    
    for batch_idx in range(total_batches):
        start_idx = batch_idx * batch_size
        
        print(f'\nüì¶ Batch {batch_idx + 1}/{total_batches} (Cities {start_idx + 1}-{min(start_idx + batch_size, total_cities)})')
        
        try:
            # Get batch of cities
            batch_cities_list = cities.toList(batch_size, start_idx)
            batch_cities = ee.FeatureCollection(batch_cities_list)
            
            # Process batch
            batch_df = process_cities_batch(batch_cities, batch_size)
            
            if batch_df is not None and len(batch_df) > 0:
                all_results.append(batch_df)
                print(f'   ‚úÖ Batch {batch_idx + 1} successful - {len(batch_df)} cities')
            else:
                print(f'   ‚ùå Batch {batch_idx + 1} failed or returned no data')
            
            # Add longer delay for India due to larger cities and processing complexity
            if batch_idx < total_batches - 1:  # Don't delay after last batch
                print('   ‚è≥ Waiting 5 seconds before next batch (longer delay for Indian mega-cities)...')
                time.sleep(5)
                
        except Exception as e:
            print(f'   ‚ùå Batch {batch_idx + 1} error: {e}')
            continue
    
    # Combine all successful batches
    if all_results:
        final_df = pd.concat(all_results, ignore_index=True)
        
        # Calculate processing summary
        end_time = datetime.now()
        total_time = (end_time - start_time).total_seconds()
        
        print(f'\nüéâ India-wide processing completed!')
        print(f'   Total cities processed: {len(final_df)}')
        print(f'   Successful batches: {len(all_results)}/{total_batches}')
        print(f'   Total processing time: {total_time:.1f} seconds')
        print(f'   Average time per city: {total_time/len(final_df):.1f} seconds')
        
        return final_df
    else:
        print('\n‚ùå No successful batches - processing failed')
        return None

print('‚úÖ Batch processing functions ready (optimized for Indian cities)')

‚úÖ Batch processing functions ready (optimized for Indian cities)


## Prototype Test: Process Small Batch of Indian Cities

In [6]:
# Test with a small batch first
print('üß™ PROTOTYPE TEST: Processing 4 largest Indian cities')
print('='*60)

# Process just 1 batch of 4 cities for testing (smaller due to Indian mega-city sizes)
prototype_results = process_all_indian_cities_in_batches(
    batch_size=4, 
    max_batches=1  # Only process 1 batch for testing
)

if prototype_results is not None:
    print(f'\nüìä PROTOTYPE RESULTS:')
    print(f'   Cities processed: {len(prototype_results)}')
    print(f'   Columns: {len(prototype_results.columns)}')
    
    # Show basic info
    print(f'\nüèôÔ∏è Cities processed:')
    for idx, row in prototype_results.iterrows():
        city_name = row['city_name']
        pop_est = row['population_estimate']
        worldpop_total = row['total_worldpop_population']
        heat_vuln = row.get('heat_vuln_ratio_mean', 'N/A')
        
        print(f'   {idx+1}. {city_name:25s} - Pop: {pop_est:>12,.0f}, WorldPop: {worldpop_total:>12,.0f}, Heat Vuln: {heat_vuln:>6.2f}%')
    
    # Show sample of heat vulnerability statistics
    heat_vuln_cols = [col for col in prototype_results.columns if 'heat_vuln_ratio_' in col]
    if heat_vuln_cols:
        print(f'\nüå°Ô∏è Heat Vulnerability Statistics:')
        for col in heat_vuln_cols:
            values = prototype_results[col].dropna()
            if len(values) > 0:
                print(f'   {col:25s}: min={values.min():6.2f}%, max={values.max():6.2f}%, mean={values.mean():6.2f}%')
    
    print(f'\n‚úÖ Prototype test successful! Ready for full-scale processing.')
    print(f'\nüìù India Processing Notes:')
    print(f'   ‚Ä¢ Indian cities tend to be larger and more complex than Brazilian cities')
    print(f'   ‚Ä¢ Using smaller batch sizes (4-8) and longer delays (5 seconds)')
    print(f'   ‚Ä¢ This ensures reliable processing of mega-cities like Delhi and Mumbai')
else:
    print(f'\n‚ùå Prototype test failed - check configuration and retry.')

üß™ PROTOTYPE TEST: Processing 4 largest Indian cities
üöÄ Starting India-wide heat vulnerability analysis
   Batch size: 4 cities (smaller for Indian mega-cities)
   Max batches: 1
üáÆüá≥ Filtering Indian cities...
‚úÖ Found 300 Indian cities
   Minimum population: 100,000
   Maximum cities: 300

üìä Top 15 cities by population:
   1. New Delhi                 - Pop:   31,422,508, Area:   2139.0 km¬≤
   2. Kolkata                   - Pop:   23,314,585, Area:   2482.0 km¬≤
   3. Mumbai                    - Pop:   20,453,270, Area:    738.0 km¬≤
   4. Bengaluru                 - Pop:   15,178,533, Area:   1008.0 km¬≤
   5. Chennai                   - Pop:   11,466,400, Area:   1052.0 km¬≤
   6. Hajipur                   - Pop:    9,755,303, Area:   3166.0 km¬≤
   7. Hyderabad                 - Pop:    9,455,230, Area:    889.0 km¬≤
   8. Ahmedabad                 - Pop:    7,898,650, Area:    365.0 km¬≤
   9. Kozhikode                 - Pop:    7,612,130, Area:   1341.0 km¬≤
  10. 

## Full-Scale Processing Interface for India

In [7]:
# Create interface for full-scale processing
batch_size_slider = widgets.IntSlider(
    value=8,
    min=4,
    max=12,
    step=2,
    description='Batch Size:',
    style={'description_width': 'initial'}
)

max_batches_slider = widgets.IntSlider(
    value=30,
    min=1,
    max=75,
    step=1,
    description='Max Batches:',
    style={'description_width': 'initial'}
)

process_button = widgets.Button(
    description='üáÆüá≥ Process Indian Cities',
    button_style='primary',
    layout={'width': '250px'}
)

export_button = widgets.Button(
    description='üíæ Export Results to CSV',
    button_style='success',
    layout={'width': '200px'},
    disabled=True
)

processing_output = widgets.Output()

# Global variable to store results
india_results = None

def on_process_click(button):
    global india_results
    
    with processing_output:
        clear_output(wait=True)
        
        batch_size = batch_size_slider.value
        max_batches = max_batches_slider.value
        
        print(f'üöÄ Starting full-scale India processing')
        print(f'   Configuration: {batch_size} cities per batch, max {max_batches} batches')
        print(f'   Estimated cities: {batch_size * max_batches}')
        print(f'   Note: Smaller batches and longer delays due to Indian mega-cities')
        
        india_results = process_all_indian_cities_in_batches(
            batch_size=batch_size,
            max_batches=max_batches
        )
        
        if india_results is not None:
            print(f'\nüéâ SUCCESS! Processed {len(india_results)} Indian cities')
            
            # Show top cities by heat vulnerability
            if 'heat_vuln_ratio_mean' in india_results.columns:
                top_vulnerable = india_results.nlargest(10, 'heat_vuln_ratio_mean')
                print(f'\nüå°Ô∏è TOP 10 MOST HEAT VULNERABLE CITIES:')
                for idx, row in top_vulnerable.iterrows():
                    city = row['city_name']
                    vuln = row['heat_vuln_ratio_mean']
                    pop = row['population_estimate']
                    print(f'   {city:25s}: {vuln:5.2f}% heat vulnerable (Pop: {pop:,.0f})')
                
                # Show top cities by population for context
                top_population = india_results.nlargest(10, 'population_estimate')
                print(f'\nüèôÔ∏è TOP 10 LARGEST CITIES (by population):')
                for idx, row in top_population.iterrows():
                    city = row['city_name']
                    pop = row['population_estimate']
                    vuln = row.get('heat_vuln_ratio_mean', 0)
                    print(f'   {city:25s}: {pop:>12,.0f} people, {vuln:5.2f}% heat vulnerable')
            
            # Enable export button
            export_button.disabled = False
            
        else:
            print(f'\n‚ùå Processing failed - check logs above for details')

def on_export_click(button):
    global india_results
    
    if india_results is not None:
        timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
        filename = f'/Users/martynclark/heatInsights-notebooks/data/india_heat_vulnerability_{timestamp}.csv'
        
        india_results.to_csv(filename, index=False)
        print(f'‚úÖ Results exported to: {filename}')
        print(f'   Cities: {len(india_results)}')
        print(f'   Columns: {len(india_results.columns)}')
    else:
        print('‚ùå No results to export - run processing first')

process_button.on_click(on_process_click)
export_button.on_click(on_export_click)

# Display interface
display(widgets.VBox([
    widgets.HTML('<h3>üáÆüá≥ India-Wide Heat Vulnerability Processing</h3>'),
    widgets.HTML('''
    <p><strong>Batch process all major Indian cities to calculate heat vulnerability ratios.</strong></p>
    <p><strong>India-Specific Configuration:</strong></p>
    <ul>
        <li><strong>Batch Size:</strong> 4-12 cities (smaller than Brazil due to mega-cities)</li>
        <li><strong>Max Batches:</strong> 1-75 batches (more cities than Brazil)</li>
        <li><strong>Processing Delays:</strong> 5 seconds between batches (vs 3 for Brazil)</li>
        <li><strong>Population Threshold:</strong> 100,000+ (vs 50,000+ for Brazil)</li>
        <li><strong>Expected Cities:</strong> ~300 major Indian urban centers</li>
    </ul>
    <p><strong>Processing Time:</strong> ~60-120 seconds per batch (Indian cities are larger/more complex)</p>
    <p><strong>Major Cities Expected:</strong> Delhi, Mumbai, Bangalore, Hyderabad, Chennai, Kolkata, Ahmedabad, Pune, Surat, Jaipur, and many more</p>
    '''),
    widgets.HBox([batch_size_slider, max_batches_slider]),
    widgets.HBox([process_button, export_button]),
    processing_output
]))

print('\nüáÆüá≥ India-wide processing interface ready!')
print('Configure batch settings above and click "Process Indian Cities" to start.')
print('\nüìù Recommended settings for different use cases:')
print('   ‚Ä¢ Quick test: Batch Size=4, Max Batches=2 (~8 largest cities)')
print('   ‚Ä¢ Major cities: Batch Size=8, Max Batches=15 (~120 largest cities)')
print('   ‚Ä¢ Comprehensive: Batch Size=8, Max Batches=40 (~320 cities)')

VBox(children=(HTML(value='<h3>üáÆüá≥ India-Wide Heat Vulnerability Processing</h3>'), HTML(value='\n    <p><stron‚Ä¶


üáÆüá≥ India-wide processing interface ready!
Configure batch settings above and click "Process Indian Cities" to start.

üìù Recommended settings for different use cases:
   ‚Ä¢ Quick test: Batch Size=4, Max Batches=2 (~8 largest cities)
   ‚Ä¢ Major cities: Batch Size=8, Max Batches=15 (~120 largest cities)
   ‚Ä¢ Comprehensive: Batch Size=8, Max Batches=40 (~320 cities)
