# Sri Lanka 2024 Census - Geospatial Analysis (Phase 4)

This notebook performs geospatial analysis by joining census data with GN Division boundaries.

## Requirements
```bash
pip install geopandas folium mapclassify
```

In [None]:
# Import Libraries
import pandas as pd
import geopandas as gpd
import folium
from folium import Choropleth
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

## 1. Load Data

In [None]:
# Load Census Data with calculated ratios
census_df = pd.read_csv('GN_population_final_analysis.csv')
census_df.columns = [col.replace('\n', '_') for col in census_df.columns]

print(f"Census data: {census_df.shape}")
print(f"Columns: {census_df.columns.tolist()[:10]}...")

In [None]:
# Load GN Division GeoJSON (simplified version for faster loading)
gdf = gpd.read_file('geoBoundaries-LKA-ADM4_simplified.geojson')

print(f"GeoJSON records: {len(gdf)}")
print(f"GeoJSON columns: {gdf.columns.tolist()}")
gdf.head()

In [None]:
# Preview the map
gdf.plot(figsize=(12, 15), edgecolor='gray', linewidth=0.1)
plt.title('Sri Lanka GN Division Boundaries')
plt.axis('off')
plt.savefig('gn_boundaries_preview.png', dpi=150, bbox_inches='tight')
plt.show()

## 2. Data Joining

Join the census data with the GeoJSON using GN Division name or code.

In [None]:
# Identify join keys
print("Census GN columns:")
gn_cols = [c for c in census_df.columns if 'GN' in c or 'Division' in c]
print(gn_cols)

print("\nGeoJSON columns:")
print(gdf.columns.tolist())

In [None]:
# Prepare join - use shapeName from GeoJSON and GN_Division_Name from census
# Normalize names for better matching
gdf['shapeName_clean'] = gdf['shapeName'].str.upper().str.strip()

gn_name_col = [c for c in census_df.columns if 'GN_Division' in c and 'Name' in c][0]
census_df['GN_clean'] = census_df[gn_name_col].astype(str).str.upper().str.strip()

# Perform the join
merged = gdf.merge(census_df, left_on='shapeName_clean', right_on='GN_clean', how='left')

print(f"Merged records: {len(merged)}")
print(f"Matched: {merged['Sex_Ratio'].notna().sum()}")
print(f"Unmatched: {merged['Sex_Ratio'].isna().sum()}")

## 3. Choropleth Maps

In [None]:
# Static Choropleth - Old Age Dependency Ratio
fig, axes = plt.subplots(1, 2, figsize=(20, 12))

# Plot 1: Old Age Dependency
merged.plot(column='Old_Age_Dependency_Ratio', 
            ax=axes[0], 
            legend=True,
            cmap='OrRd',
            missing_kwds={'color': 'lightgrey'},
            legend_kwds={'label': 'Old-Age Dependency Ratio'})
axes[0].set_title('Aging Villages: Old-Age Dependency Ratio by GN Division', fontsize=14)
axes[0].axis('off')

# Plot 2: Child Dependency
merged.plot(column='Child_Dependency_Ratio', 
            ax=axes[1], 
            legend=True,
            cmap='Blues',
            missing_kwds={'color': 'lightgrey'},
            legend_kwds={'label': 'Child Dependency Ratio'})
axes[1].set_title('Youth Hubs: Child Dependency Ratio by GN Division', fontsize=14)
axes[1].axis('off')

plt.tight_layout()
plt.savefig('choropleth_dependency_ratios.png', dpi=150, bbox_inches='tight')
plt.show()

In [None]:
# Static Choropleth - Sex Ratio
fig, ax = plt.subplots(figsize=(12, 15))

merged.plot(column='Sex_Ratio', 
            ax=ax, 
            legend=True,
            cmap='coolwarm',
            vmin=80, vmax=120,  # Center at 100
            missing_kwds={'color': 'lightgrey'},
            legend_kwds={'label': 'Sex Ratio (M/F * 100)'})
ax.set_title('Gender Balance: Sex Ratio by GN Division\n(Blue = Female Majority, Red = Male Majority)', fontsize=14)
ax.axis('off')

plt.tight_layout()
plt.savefig('choropleth_sex_ratio.png', dpi=150, bbox_inches='tight')
plt.show()

In [None]:
# Static Choropleth - Cluster Profiles
if 'Cluster' in merged.columns:
    fig, ax = plt.subplots(figsize=(12, 15))
    
    merged.plot(column='Cluster', 
                ax=ax, 
                legend=True,
                cmap='Set1',
                categorical=True,
                missing_kwds={'color': 'lightgrey'},
                legend_kwds={'title': 'Demographic Profile'})
    ax.set_title('Demographic Clusters by GN Division', fontsize=14)
    ax.axis('off')
    
    plt.tight_layout()
    plt.savefig('choropleth_clusters.png', dpi=150, bbox_inches='tight')
    plt.show()

## 4. Interactive Map with Folium

In [None]:
# Create interactive map centered on Sri Lanka
m = folium.Map(location=[7.8731, 80.7718], zoom_start=8, tiles='cartodbpositron')

# Add choropleth layer
# Note: For large GeoJSON, this may be slow. Use simplified version.
Choropleth(
    geo_data=merged.__geo_interface__,
    data=merged,
    columns=['shapeName', 'Old_Age_Dependency_Ratio'],
    key_on='feature.properties.shapeName',
    fill_color='OrRd',
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name='Old-Age Dependency Ratio',
    nan_fill_color='white'
).add_to(m)

m.save('interactive_map_old_age.html')
print("Interactive map saved to 'interactive_map_old_age.html'")
m

## 5. District-Level Aggregation Map

In [None]:
# Aggregate by District for cleaner visualization
district_col = [c for c in merged.columns if 'District' in c and 'Name' in c]
if district_col:
    district_col = district_col[0]
    
    # Dissolve GN divisions into districts
    district_gdf = merged.dissolve(by=district_col, aggfunc={
        'Sex_Ratio': 'mean',
        'Child_Dependency_Ratio': 'mean',
        'Old_Age_Dependency_Ratio': 'mean'
    }).reset_index()
    
    fig, ax = plt.subplots(figsize=(12, 15))
    district_gdf.plot(column='Old_Age_Dependency_Ratio', 
                      ax=ax, 
                      legend=True,
                      cmap='OrRd',
                      edgecolor='black',
                      linewidth=0.5,
                      legend_kwds={'label': 'Avg Old-Age Dependency Ratio'})
    
    # Add district labels
    for idx, row in district_gdf.iterrows():
        centroid = row.geometry.centroid
        ax.annotate(row[district_col], xy=(centroid.x, centroid.y), 
                    fontsize=6, ha='center', va='center')
    
    ax.set_title('District-Level Old-Age Dependency Ratio', fontsize=14)
    ax.axis('off')
    plt.tight_layout()
    plt.savefig('district_map_old_age.png', dpi=150, bbox_inches='tight')
    plt.show()

## 6. Save Merged GeoDataFrame

In [None]:
# Save the merged geodataframe for future use
merged.to_file('GN_census_merged.geojson', driver='GeoJSON')
print("Merged GeoDataFrame saved to 'GN_census_merged.geojson'")

---
## Summary

This geospatial analysis:
- Joined census data with GN Division boundaries
- Created choropleth maps for Dependency Ratios and Sex Ratio
- Generated interactive HTML maps using Folium
- Produced district-level aggregated maps

### Output Files:
- `choropleth_dependency_ratios.png` - Side-by-side Old-Age and Child dependency maps
- `choropleth_sex_ratio.png` - Gender balance visualization
- `choropleth_clusters.png` - Demographic cluster map
- `interactive_map_old_age.html` - Interactive web map
- `GN_census_merged.geojson` - Census + geometry merged file