In [None]:
%pip install matplotlib
%pip install geopandas

import os
import geopandas as gpd
import matplotlib.pyplot as plt

# Path to your shapefile (update filename as needed)
shapefile_path = "data/boundaries/uga_admbnda_adm2_ubos_20200824.shp"

# Check file exists and help debugging if not
if not os.path.exists(shapefile_path):
    print(f"Shapefile not found at {shapefile_path}.")
    base_dir = os.path.dirname(shapefile_path)
    if os.path.isdir(base_dir):
        print('Files in', base_dir, ':', os.listdir(base_dir))
    raise FileNotFoundError(shapefile_path)

# Load shapefile
try:
    gdf = gpd.read_file(shapefile_path)
except Exception as e:
    raise RuntimeError(f"Failed to read shapefile: {e}")

# Basic exploration
print('Rows, columns:', gdf.shape)
print('Columns:', list(gdf.columns))
print('CRS:', gdf.crs)
print('Total bounds:', gdf.total_bounds)

print('\nPreview:' )
print(gdf.head())

# Plot all boundaries
ax = gdf.plot(figsize=(10, 10), color='lightgrey', edgecolor='black')
ax.set_title('Uganda Administrative Boundaries (Level 2)')
plt.show()

# Find a column that contains 'Kampala' values
name_col = None
for col in gdf.select_dtypes(include=['object']).columns:
    if gdf[col].astype(str).str.contains('Kampala', case=False, na=False).any():
        name_col = col
        break

kampala = None
if name_col is None:
    # try scanning all columns as strings
    mask = None
    for col in gdf.columns:
        try:
            s = gdf[col].astype(str).str.contains('Kampala', case=False, na=False)
            if mask is None:
                mask = s
            else:
                mask = mask | s
        except Exception:
            pass
    if mask is not None and mask.any():
        kampala = gdf[mask]
        print('Found Kampala by scanning all columns')
    else:
        print("Could not find 'Kampala' in any column. Sample unique values from first string columns:")
        for col in gdf.select_dtypes(include=['object']).columns[:5]:
            print(col, '->', gdf[col].dropna().unique()[:10])
else:
    kampala = gdf[gdf[name_col].astype(str).str.contains('Kampala', case=False, na=False)]
    print(f"Filtering by column: {name_col}")

# Show and plot Kampala if found
if kampala is not None and len(kampala):
    print('Kampala rows:', len(kampala))
    print(kampala)
    ax = gdf.plot(figsize=(8, 8), color='lightgrey', edgecolor='black')
    kampala.plot(ax=ax, color='orange', edgecolor='black')
    ax.set_title('Kampala District (highlighted)')
    minx, miny, maxx, maxy = kampala.total_bounds
    xpad = (maxx - minx) * 0.1
    ypad = (maxy - miny) * 0.1
    ax.set_xlim(minx - xpad, maxx + xpad)
    ax.set_ylim(miny - ypad, maxy + ypad)
    plt.show()
else:
    print('No Kampala geometry to plot')

## Setup: geopandas installation (recommended via conda)

If you don't have geopandas installed or pip install fails on Windows, the easiest and most reliable route is conda (Miniconda/Anaconda):

```
# Create an environment with geopandas (recommended)
conda create -n geo -c conda-forge geopandas
conda activate geo
```

If you must use pip, try installing prebuilt wheels or use the `--find-links` option, but conda-forge avoids most Windows build issues.


In [None]:
# Quick runtime check: attempt to import geopandas and show versions
try:
	import geopandas as gpd
	import sys
	print('geopandas version:', getattr(gpd, '__version__', 'unknown'))
	print('python version:', sys.version)
except Exception as e:
	print('Failed to import geopandas:', e)

## Further exploration and processing

This cell computes area in kmÂ² for each ADM2 polygon, shows top-level summaries, filters Kampala, and writes a processed GeoJSON to `data/processed/` and a choropleth to `outputs/`.

In [None]:
import os
import geopandas as gpd
import matplotlib.pyplot as plt

shp = r'data/boundaries/uga_admbnda_adm2_ubos_20200824.shp'
if not os.path.exists(shp):
    raise FileNotFoundError(shp)

gdf = gpd.read_file(shp)
print('Loaded ADM2:', gdf.shape)
print('Columns:', list(gdf.columns))

# Ensure we have a geographic CRS for centroid-based equal-area projection
if gdf.crs is None:
    print('Input has no CRS; assuming WGS84 (EPSG:4326)')
    gdf.set_crs(epsg=4326, inplace=True)

# Build a local Lambert Azimuthal Equal-Area projection centered on the data extent
centroid = gdf.geometry.unary_union.centroid
laea_crs = f"+proj=laea +lat_0={centroid.y} +lon_0={centroid.x} +datum=WGS84 +units=m +no_defs"
proj = gdf.to_crs(laea_crs)
proj['area_km2'] = proj.geometry.area / 1e6
gdf['area_km2'] = proj['area_km2']

print('Area stats (km2):')
print(gdf['area_km2'].describe())

# Show top 10 largest ADM2 by area
print('\nTop 10 ADM2 by area:')
print(gdf.nlargest(10, 'area_km2')[['ADM2_EN','area_km2']].to_string(index=False))

# Filter Kampala (try ADM2_EN first, else scan)
if 'ADM2_EN' in gdf.columns:
    kampala = gdf[gdf['ADM2_EN'].astype(str).str.contains('Kampala', case=False, na=False)]
else:
    kampala = gdf[gdf.apply(lambda r: r.astype(str).str.contains('Kampala', case=False, na=False).any(), axis=1)]

print('Kampala rows found:', len(kampala))

# Save processed GeoJSON
out_dir = 'data/processed'
os.makedirs(out_dir, exist_ok=True)
processed_path = os.path.join(out_dir, 'adm2_processed.geojson')
gdf.to_file(processed_path, driver='GeoJSON')
print('Saved processed GeoJSON to', processed_path)

# Produce a simple choropleth by area and save to outputs (in-notebook display will still work)
os.makedirs('outputs', exist_ok=True)
ax = gdf.plot(column='area_km2', cmap='viridis', figsize=(10, 10), legend=True, edgecolor='black')
ax.set_title('ADM2 areas (km2)')
plt.savefig('outputs/adm2_area_choropleth.png', dpi=150, bbox_inches='tight')
plt.show()
plt.close()

# Also save a zoomed map for Kampala if present
if len(kampala):
    ax = gdf.plot(figsize=(8, 8), color='lightgrey', edgecolor='black')
    kampala.plot(ax=ax, color='red', edgecolor='black')
    minx, miny, maxx, maxy = kampala.total_bounds
    xpad = (maxx - minx) * 0.1 if (maxx - minx) != 0 else 0.01
    ypad = (maxy - miny) * 0.1 if (maxy - miny) != 0 else 0.01
    ax.set_xlim(minx - xpad, maxx + xpad)
    ax.set_ylim(miny - ypad, maxy + ypad)
    plt.savefig('outputs/kampala_zoom.png', dpi=150, bbox_inches='tight')
    plt.show()
    plt.close()
    print('Saved outputs/kampala_zoom.png')

print('Saved outputs/adm2_area_choropleth.png')