In [4]:
import ee
import geemap
import os

import rasterio
import numpy as np
from PIL import Image

ee.Authenticate()
ee.Initialize()

In [5]:
# ==========================================
# 1. LOAD DHS CLUSTERS
# ==========================================

dhs_shp_path = '/Users/ruben/Desktop/Thesis/TrainingData/PH_DHS_GPS/ph_dhs_sample_50.shp' 
dhs_points = geemap.shp_to_ee(dhs_shp_path)


In [6]:
# ==========================================
# 2. DEFINE BUFFER FUNCTION
# ==========================================
# We need to turn each single point into a square box for the CNN.
# 2km buffer for urban, 5km for rural

def adaptive_buffer(feature):
    # 1. Get the Urban/Rural flag from column 'URBAN_RURA'
    urban_rural_status = ee.String(feature.get('URBAN_RURA'))
    
    # 2. Check if it is Urban (Compare string to 'U')
    is_urban = urban_rural_status.compareTo('U').eq(0)
    
    # 3. Choose Radius based on status
    # Logic: If Urban -> 2000m, Else -> 5000m
    radius = ee.Number(ee.Algorithms.If(is_urban, 2000, 5000))
    
    # 4. Buffer and Box
    return feature.buffer(radius).bounds()

# Apply the buffer function to all DHS points
dhs_squares = dhs_points.map(adaptive_buffer)
cluster_ids = dhs_squares.aggregate_array('DHSCLUST').getInfo()
tif_filenames = [f"dhs_{str(c_id)}.tif" for c_id in cluster_ids]

# Verify on Map
Map = geemap.Map()
Map.centerObject(dhs_points, 6)
Map.addLayer(dhs_points, {'color': 'red'}, 'Original DHS Points')
Map.addLayer(dhs_squares, {'color': 'blue'}, 'Buffered Squares (Download Area)')
Map

Map(center=[12.154454545945676, 122.42655436359965], controls=(WidgetControl(options=['position', 'transparentâ€¦

In [8]:
# ==========================================
# 3. DOWNLOAD CENTERED CHIPS
# ==========================================
output_dir = os.path.expanduser('/Users/ruben/Desktop/Thesis/TrainingData/Sentinel2/dhs_sample_tiles')

print(f"Downloading {dhs_squares.size().getInfo()} clusters...")




print("Fetching feature list from Google Earth Engine...")
# We convert the FeatureCollection to a local Python list so we can loop through it
# .getInfo() pulls the data down to your computer
features_list = dhs_squares.getInfo()['features']

print(f"Found {len(features_list)} clusters. Starting download...")

# ==========================================
# 2. THE MANUAL LOOP
# ==========================================
# We define the image setup ONCE outside the loop
# (Ensure s2_image is defined from previous steps)

for i, feature in enumerate(features_list):
    try:
        # A. Get the Cluster ID
        # The structure is feature -> 'properties' -> 'DHSCLUST'
        cluster_id = feature['properties']['DHSCLUST']
        
        # B. Get the specific square geometry for this cluster
        roi_geometry = ee.Geometry.Polygon(feature['geometry']['coordinates'])
        
        # C. Define Filename
        filename = f"dhs_{cluster_id}.tif"
        out_path = os.path.join(output_dir, filename)
        
        # D. Skip if already exists (Resuming download support)
        if os.path.exists(out_path):
            print(f"Skipping {filename} (Already exists)")
            continue
            
        # E. Download this specific chip
        geemap.download_ee_image(
            image=s2_image,
            filename=out_path,
            region=roi_geometry,
            scale=10,
            crs='EPSG:3857',
            overwrite=True
        )
        
        # Print progress every 10 images
        if i % 10 == 0:
            print(f"Downloaded {i}/{len(features_list)}: {filename}")
            
    except Exception as e:
        print(f"Failed to download Cluster {cluster_id}: {e}")

print("Download sequence complete.")

Downloading 50 clusters...
Fetching feature list from Google Earth Engine...
Found 50 clusters. Starting download...


  0%|          |0/1 tiles [00:00<?]

Downloaded 0/50: dhs_843.tif


  0%|          |0/1 tiles [00:00<?]

  0%|          |0/1 tiles [00:00<?]

  0%|          |0/1 tiles [00:00<?]

  0%|          |0/1 tiles [00:00<?]

  0%|          |0/1 tiles [00:00<?]

  0%|          |0/1 tiles [00:00<?]

  0%|          |0/1 tiles [00:00<?]

  0%|          |0/1 tiles [00:00<?]

  0%|          |0/1 tiles [00:00<?]

  0%|          |0/1 tiles [00:00<?]

Downloaded 10/50: dhs_393.tif


  0%|          |0/1 tiles [00:00<?]

  0%|          |0/1 tiles [00:00<?]

  0%|          |0/1 tiles [00:00<?]

  0%|          |0/1 tiles [00:00<?]

  0%|          |0/1 tiles [00:00<?]

  0%|          |0/1 tiles [00:00<?]

  0%|          |0/1 tiles [00:00<?]

  0%|          |0/1 tiles [00:00<?]

  0%|          |0/1 tiles [00:00<?]

  0%|          |0/1 tiles [00:00<?]

Downloaded 20/50: dhs_361.tif


  0%|          |0/1 tiles [00:00<?]

  0%|          |0/1 tiles [00:00<?]

  0%|          |0/1 tiles [00:00<?]

  0%|          |0/1 tiles [00:00<?]

  0%|          |0/12 tiles [00:00<?]

  0%|          |0/12 tiles [00:00<?]

  0%|          |0/12 tiles [00:00<?]

  0%|          |0/4 tiles [00:00<?]

  0%|          |0/4 tiles [00:00<?]

  0%|          |0/4 tiles [00:00<?]

Downloaded 30/50: dhs_895.tif


  0%|          |0/16 tiles [00:00<?]

  0%|          |0/12 tiles [00:00<?]

  0%|          |0/12 tiles [00:00<?]

  0%|          |0/12 tiles [00:00<?]

  0%|          |0/4 tiles [00:00<?]

  0%|          |0/8 tiles [00:00<?]

  0%|          |0/12 tiles [00:00<?]

  0%|          |0/12 tiles [00:00<?]

  0%|          |0/4 tiles [00:00<?]

  0%|          |0/12 tiles [00:00<?]

Downloaded 40/50: dhs_638.tif


  0%|          |0/12 tiles [00:00<?]

  0%|          |0/4 tiles [00:00<?]

  0%|          |0/12 tiles [00:00<?]

  0%|          |0/12 tiles [00:00<?]

  0%|          |0/4 tiles [00:00<?]

  0%|          |0/4 tiles [00:00<?]

  0%|          |0/4 tiles [00:00<?]

  0%|          |0/4 tiles [00:00<?]

  0%|          |0/12 tiles [00:00<?]

Download sequence complete.


In [9]:
# ==========================================
# 5. GENERATE JPG PREVIEWS
# ==========================================

# Path to TIF files
input_dir = os.path.expanduser('/Users/ruben/Desktop/Thesis/TrainingData/Sentinel2/dhs_sample_tiles')

# Output directory for JPG previews
output_dir = os.path.join(input_dir, 'previews_jpg')

if not os.path.exists(output_dir):
    os.makedirs(output_dir)

print(f"Reading TIFs from: {input_dir}")
print(f"Saving JPGs to:    {output_dir}")

# 2. DEFINING THE STRETCH FUNCTION
def stretch_band(band):
    # Calculate the 2nd and 98th percentiles (ignores the absolute outliers)
    p2, p98 = np.percentile(band, (2, 98))
    
    # Stretch the data so p2 becomes 0 (Black) and p98 becomes 255 (White)
    # This "pulls" the dark details out of the shadows
    img_rescale = np.interp(band, (p2, p98), (0, 255)).astype(np.uint8)
    return img_rescale

# 3. PROCESSING LOOP
tif_files = [f for f in os.listdir(input_dir) if f.endswith('.tif')]

for filename in tif_files:
    input_path = os.path.join(input_dir, filename)
    output_path = os.path.join(output_dir, filename.replace('.tif', '.jpg'))
    
    try:
        with rasterio.open(input_path) as src:
            # Read RGB (Bands 1, 2, 3)
            r = src.read(1)
            g = src.read(2)
            b = src.read(3)
            
            # Check if empty (all zeros)
            if np.max(r) == 0:
                print(f"Skipping {filename}: Image is completely empty.")
                continue

            # Apply the stretch to each color channel independently
            r_str = stretch_band(r)
            g_str = stretch_band(g)
            b_str = stretch_band(b)
            
            # Stack and Save
            rgb = np.dstack((r_str, g_str, b_str))
            img = Image.fromarray(rgb)
            img.save(output_path, quality=85)
            
    except Exception as e:
        print(f"Error on {filename}: {e}")



Reading TIFs from: /Users/ruben/Desktop/Thesis/TrainingData/Sentinel2/dhs_sample_tiles
Saving JPGs to:    /Users/ruben/Desktop/Thesis/TrainingData/Sentinel2/dhs_sample_tiles/previews_jpg
