In [1]:
import ee
import geemap
import os

import rasterio
import numpy as np
from PIL import Image

ee.Authenticate()
ee.Initialize()

In [None]:
# ==========================================
# 1. LOAD DHS CLUSTERS
# ==========================================

dhs_shp_path = '/Users/ruben/Desktop/Thesis/TrainingData/PH_DHS_GPS/ph_dhs_sample_50.shp' 
dhs_points = geemap.shp_to_ee(dhs_shp_path)

In [None]:
# ==========================================
# 2. DEFINE BUFFER FUNCTION
# ==========================================
# We need to turn each single point into a square box for the CNN.
# 2km buffer for urban, 5km for rural

def adaptive_buffer(feature):
    # 1. Get the Urban/Rural flag from column 'URBAN_RURA'
    urban_rural_status = ee.String(feature.get('URBAN_RURA'))
    
    # 2. Check if it is Urban (Compare string to 'U')
    is_urban = urban_rural_status.compareTo('U').eq(0)
    
    # 3. Choose Radius based on status
    # Logic: If Urban -> 2000m, Else -> 5000m
    radius = ee.Number(ee.Algorithms.If(is_urban, 2000, 5000))
    
    # 4. Buffer and Box
    return feature.buffer(radius).bounds()

# Apply the buffer function to all DHS points
dhs_squares = dhs_points.map(adaptive_buffer)

# Verify on Map
Map = geemap.Map()
Map.centerObject(dhs_points, 6)
Map.addLayer(dhs_points, {'color': 'red'}, 'Original DHS Points')
Map.addLayer(dhs_squares, {'color': 'blue'}, 'Buffered Squares (Download Area)')
Map

Map(center=[12.154454545945676, 122.42655436359965], controls=(WidgetControl(options=['position', 'transparent…

In [None]:
# ==========================================
# 3. DOWNLOAD CENTERED CHIPS
# ==========================================
output_dir = os.path.expanduser('/Users/ruben/Desktop/Thesis/TrainingData/Sentinel2/dhs_sample_tiles')

print(f"Downloading {dhs_squares.size().getInfo()} clusters...")

# ==========================================
# 4. PREPARE SATELLITE IMAGE
# ==========================================
# Use a relaxed cloud filter to avoid "Black Images"
def mask_s2_clouds(image):
    qa = image.select('QA60')
    mask = qa.bitwiseAnd(1 << 10).eq(0).And(qa.bitwiseAnd(1 << 11).eq(0))
    return image.updateMask(mask).divide(10000)

s2_image = (ee.ImageCollection('COPERNICUS/S2_SR_HARMONIZED')
    .filterDate('2023-01-01', '2023-12-31') # 2023 calendar year
    .filterBounds(dhs_squares)
    .filter(ee.Filter.lt('CLOUDY_PIXEL_PERCENTAGE', 30)) # Allow 30% clouds
    .map(mask_s2_clouds)
    .median()
    .select(['B4', 'B3', 'B2', 'B8'])
    .clip(dhs_squares))

geemap.download_ee_image_tiles_parallel(
    image=s2_image,
    out_dir=output_dir,
    features=dhs_squares,
    scale=30, #30 m resolution
    crs='EPSG:3857'
    prefix='dhs_'
)

Downloading 50 clusters...
Downloading 5/50: /Users/ruben/Desktop/Thesis/TrainingData/Sentinel2/dhs_sample_tiles/dhs_05.tif
Downloading 6/50: /Users/ruben/Desktop/Thesis/TrainingData/Sentinel2/dhs_sample_tiles/dhs_06.tif
Downloading 2/50: /Users/ruben/Desktop/Thesis/TrainingData/Sentinel2/dhs_sample_tiles/dhs_02.tif
Downloading 4/50: /Users/ruben/Desktop/Thesis/TrainingData/Sentinel2/dhs_sample_tiles/dhs_04.tif
Downloading 8/50: /Users/ruben/Desktop/Thesis/TrainingData/Sentinel2/dhs_sample_tiles/dhs_08.tif
Downloading 9/50: /Users/ruben/Desktop/Thesis/TrainingData/Sentinel2/dhs_sample_tiles/dhs_09.tif
Downloading 1/50: /Users/ruben/Desktop/Thesis/TrainingData/Sentinel2/dhs_sample_tiles/dhs_01.tif
Downloading 10/50: /Users/ruben/Desktop/Thesis/TrainingData/Sentinel2/dhs_sample_tiles/dhs_10.tif
Downloading 7/50: /Users/ruben/Desktop/Thesis/TrainingData/Sentinel2/dhs_sample_tiles/dhs_07.tif


  img = gd.download.BaseImage(image)
  img = gd.download.BaseImage(image)
  img = gd.download.BaseImage(image)
  img = gd.download.BaseImage(image)
  img = gd.download.BaseImage(image)
  img = gd.download.BaseImage(image)
  img = gd.download.BaseImage(image)
  img = gd.download.BaseImage(image)
  img = gd.download.BaseImage(image)


Downloading 3/50: /Users/ruben/Desktop/Thesis/TrainingData/Sentinel2/dhs_sample_tiles/dhs_03.tif


  img = gd.download.BaseImage(image)
100%|██████████|1/1 tiles [00:03<00:00]
  return STACClient().get(self.id)


Downloading 11/50: /Users/ruben/Desktop/Thesis/TrainingData/Sentinel2/dhs_sample_tiles/dhs_11.tif


100%|██████████|1/1 tiles [00:03<00:00]
  return STACClient().get(self.id)


Downloading 12/50: /Users/ruben/Desktop/Thesis/TrainingData/Sentinel2/dhs_sample_tiles/dhs_12.tif


100%|██████████|1/1 tiles [00:04<00:00]
  return STACClient().get(self.id)


Downloading 13/50: /Users/ruben/Desktop/Thesis/TrainingData/Sentinel2/dhs_sample_tiles/dhs_13.tif


100%|██████████|1/1 tiles [00:05<00:00]
  return STACClient().get(self.id)


Downloading 14/50: /Users/ruben/Desktop/Thesis/TrainingData/Sentinel2/dhs_sample_tiles/dhs_14.tif


100%|██████████|1/1 tiles [00:04<00:00]
  return STACClient().get(self.id)


Downloading 15/50: /Users/ruben/Desktop/Thesis/TrainingData/Sentinel2/dhs_sample_tiles/dhs_15.tif


100%|██████████|1/1 tiles [00:06<00:00]
  return STACClient().get(self.id)


Downloading 16/50: /Users/ruben/Desktop/Thesis/TrainingData/Sentinel2/dhs_sample_tiles/dhs_16.tif


100%|██████████|1/1 tiles [00:12<00:00]
  return STACClient().get(self.id)
100%|██████████|1/1 tiles [00:04<00:00]
  return STACClient().get(self.id)


Downloading 17/50: /Users/ruben/Desktop/Thesis/TrainingData/Sentinel2/dhs_sample_tiles/dhs_17.tif
Downloading 18/50: /Users/ruben/Desktop/Thesis/TrainingData/Sentinel2/dhs_sample_tiles/dhs_18.tif


100%|██████████|1/1 tiles [00:06<00:00]


Downloading 19/50: /Users/ruben/Desktop/Thesis/TrainingData/Sentinel2/dhs_sample_tiles/dhs_19.tif


100%|██████████|1/1 tiles [00:05<00:00]


Downloading 20/50: /Users/ruben/Desktop/Thesis/TrainingData/Sentinel2/dhs_sample_tiles/dhs_20.tif


100%|██████████|1/1 tiles [00:05<00:00]


Downloading 21/50: /Users/ruben/Desktop/Thesis/TrainingData/Sentinel2/dhs_sample_tiles/dhs_21.tif


100%|██████████|1/1 tiles [00:06<00:00]
  return STACClient().get(self.id)


Downloading 22/50: /Users/ruben/Desktop/Thesis/TrainingData/Sentinel2/dhs_sample_tiles/dhs_22.tif


100%|██████████|1/1 tiles [00:04<00:00]
  0%|          |0/1 tiles [00:00<?]

Downloading 23/50: /Users/ruben/Desktop/Thesis/TrainingData/Sentinel2/dhs_sample_tiles/dhs_23.tif


100%|██████████|1/1 tiles [00:07<00:00]


Downloading 24/50: /Users/ruben/Desktop/Thesis/TrainingData/Sentinel2/dhs_sample_tiles/dhs_24.tif


100%|██████████|1/1 tiles [00:07<00:00]


Downloading 25/50: /Users/ruben/Desktop/Thesis/TrainingData/Sentinel2/dhs_sample_tiles/dhs_25.tif


100%|██████████|1/1 tiles [00:05<00:00]


Downloading 26/50: /Users/ruben/Desktop/Thesis/TrainingData/Sentinel2/dhs_sample_tiles/dhs_26.tif


100%|██████████|1/1 tiles [00:06<00:00]
  0%|          |0/1 tiles [00:00<?]

Downloading 27/50: /Users/ruben/Desktop/Thesis/TrainingData/Sentinel2/dhs_sample_tiles/dhs_27.tif


100%|██████████|1/1 tiles [00:04<00:00]


Downloading 28/50: /Users/ruben/Desktop/Thesis/TrainingData/Sentinel2/dhs_sample_tiles/dhs_28.tif


100%|██████████|1/1 tiles [00:04<00:00]
  return STACClient().get(self.id)


Downloading 29/50: /Users/ruben/Desktop/Thesis/TrainingData/Sentinel2/dhs_sample_tiles/dhs_29.tif


100%|██████████|1/1 tiles [00:05<00:00]


Downloading 30/50: /Users/ruben/Desktop/Thesis/TrainingData/Sentinel2/dhs_sample_tiles/dhs_30.tif


100%|██████████|1/1 tiles [00:05<00:00]
  0%|          |0/1 tiles [00:00<?]

Downloading 31/50: /Users/ruben/Desktop/Thesis/TrainingData/Sentinel2/dhs_sample_tiles/dhs_31.tif


100%|██████████|1/1 tiles [00:04<00:00]


Downloading 32/50: /Users/ruben/Desktop/Thesis/TrainingData/Sentinel2/dhs_sample_tiles/dhs_32.tif


100%|██████████|1/1 tiles [00:03<00:00]


Downloading 33/50: /Users/ruben/Desktop/Thesis/TrainingData/Sentinel2/dhs_sample_tiles/dhs_33.tif


100%|██████████|1/1 tiles [00:03<00:00]


Downloading 34/50: /Users/ruben/Desktop/Thesis/TrainingData/Sentinel2/dhs_sample_tiles/dhs_34.tif


100%|██████████|1/1 tiles [00:05<00:00]
100%|██████████|1/1 tiles [00:03<00:00]


Downloading 35/50: /Users/ruben/Desktop/Thesis/TrainingData/Sentinel2/dhs_sample_tiles/dhs_35.tif
Downloading 36/50: /Users/ruben/Desktop/Thesis/TrainingData/Sentinel2/dhs_sample_tiles/dhs_36.tif


100%|██████████|1/1 tiles [00:05<00:00]


Downloading 37/50: /Users/ruben/Desktop/Thesis/TrainingData/Sentinel2/dhs_sample_tiles/dhs_37.tif


100%|██████████|1/1 tiles [00:04<00:00]


Downloading 38/50: /Users/ruben/Desktop/Thesis/TrainingData/Sentinel2/dhs_sample_tiles/dhs_38.tif


100%|██████████|1/1 tiles [00:06<00:00]


Downloading 39/50: /Users/ruben/Desktop/Thesis/TrainingData/Sentinel2/dhs_sample_tiles/dhs_39.tif


100%|██████████|1/1 tiles [00:06<00:00]


Downloading 40/50: /Users/ruben/Desktop/Thesis/TrainingData/Sentinel2/dhs_sample_tiles/dhs_40.tif


100%|██████████|1/1 tiles [00:06<00:00]


Downloading 41/50: /Users/ruben/Desktop/Thesis/TrainingData/Sentinel2/dhs_sample_tiles/dhs_41.tif


100%|██████████|1/1 tiles [00:07<00:00]


Downloading 42/50: /Users/ruben/Desktop/Thesis/TrainingData/Sentinel2/dhs_sample_tiles/dhs_42.tif


100%|██████████|1/1 tiles [00:06<00:00]


Downloading 43/50: /Users/ruben/Desktop/Thesis/TrainingData/Sentinel2/dhs_sample_tiles/dhs_43.tif


100%|██████████|1/1 tiles [00:05<00:00]


Downloading 44/50: /Users/ruben/Desktop/Thesis/TrainingData/Sentinel2/dhs_sample_tiles/dhs_44.tif


100%|██████████|1/1 tiles [00:08<00:00]
  0%|          |0/1 tiles [00:00<?]

Downloading 45/50: /Users/ruben/Desktop/Thesis/TrainingData/Sentinel2/dhs_sample_tiles/dhs_45.tif


100%|██████████|1/1 tiles [00:12<00:00]


Downloading 46/50: /Users/ruben/Desktop/Thesis/TrainingData/Sentinel2/dhs_sample_tiles/dhs_46.tif


100%|██████████|1/1 tiles [00:07<00:00]


Downloading 47/50: /Users/ruben/Desktop/Thesis/TrainingData/Sentinel2/dhs_sample_tiles/dhs_47.tif


100%|██████████|1/1 tiles [00:06<00:00]


Downloading 48/50: /Users/ruben/Desktop/Thesis/TrainingData/Sentinel2/dhs_sample_tiles/dhs_48.tif


100%|██████████|1/1 tiles [00:15<00:00]
100%|██████████|1/1 tiles [00:05<00:00]


Downloading 49/50: /Users/ruben/Desktop/Thesis/TrainingData/Sentinel2/dhs_sample_tiles/dhs_49.tif
Downloading 50/50: /Users/ruben/Desktop/Thesis/TrainingData/Sentinel2/dhs_sample_tiles/dhs_50.tif


100%|██████████|1/1 tiles [00:07<00:00]
100%|██████████|1/1 tiles [00:05<00:00]
100%|██████████|1/1 tiles [00:05<00:00]
100%|██████████|1/1 tiles [00:36<00:00]
100%|██████████|1/1 tiles [00:06<00:00]
100%|██████████|1/1 tiles [00:06<00:00]
100%|██████████|1/1 tiles [00:08<00:00]
100%|██████████|1/1 tiles [00:12<00:00]
100%|██████████|1/1 tiles [00:12<00:00]


Finished in 89.21064805984497 seconds.


100%|██████████|1/1 tiles [00:19<00:00]


In [None]:
# ==========================================
# 5. GENERATE JPG PREVIEWS
# ==========================================

# Path to TIF files
input_dir = os.path.expanduser('/Users/ruben/Desktop/Thesis/TrainingData/Sentinel2/dhs_sample_tiles')

# Output directory for JPG previews
output_dir = os.path.join(input_dir, 'previews_jpg')

if not os.path.exists(output_dir):
    os.makedirs(output_dir)

print(f"Reading TIFs from: {input_dir}")
print(f"Saving JPGs to:    {output_dir}")

# 2. DEFINING THE STRETCH FUNCTION
def stretch_band(band):
    # Calculate the 2nd and 98th percentiles (ignores the absolute outliers)
    p2, p98 = np.percentile(band, (2, 98))
    
    # Stretch the data so p2 becomes 0 (Black) and p98 becomes 255 (White)
    # This "pulls" the dark details out of the shadows
    img_rescale = np.interp(band, (p2, p98), (0, 255)).astype(np.uint8)
    return img_rescale

# 3. PROCESSING LOOP
tif_files = [f for f in os.listdir(input_dir) if f.endswith('.tif')]

for filename in tif_files:
    input_path = os.path.join(input_dir, filename)
    output_path = os.path.join(output_dir, filename.replace('.tif', '.jpg'))
    
    try:
        with rasterio.open(input_path) as src:
            # Read RGB (Bands 1, 2, 3)
            r = src.read(1)
            g = src.read(2)
            b = src.read(3)
            
            # Check if empty (all zeros)
            if np.max(r) == 0:
                print(f"Skipping {filename}: Image is completely empty.")
                continue

            # Apply the stretch to each color channel independently
            r_str = stretch_band(r)
            g_str = stretch_band(g)
            b_str = stretch_band(b)
            
            # Stack and Save
            rgb = np.dstack((r_str, g_str, b_str))
            img = Image.fromarray(rgb)
            img.save(output_path, quality=85)
            
    except Exception as e:
        print(f"Error on {filename}: {e}")

print("-" * 30)
print(f"Done! Check the folder: {output_dir}")

Reading TIFs from: /Users/ruben/Desktop/Thesis/TrainingData/Sentinel2/dhs_sample_tiles
Saving JPGs to:    /Users/ruben/Desktop/Thesis/TrainingData/Sentinel2/dhs_sample_tiles/previews_jpg
------------------------------
Done! Check the folder: /Users/ruben/Desktop/Thesis/TrainingData/Sentinel2/dhs_sample_tiles/previews_jpg
