In [13]:
import ee
import geemap
import os

import rasterio
import numpy as np
from PIL import Image

ee.Authenticate()
ee.Initialize()

In [14]:
# ==========================================
# 2. LOAD POPULATION DATA (WorldPop)
# ==========================================
# WorldPop (2020) for the Philippines (PHL)
# This dataset gives the estimated number of people per pixel.
pop_image = ee.ImageCollection("WorldPop/GP/100m/pop") \
    .filter(ee.Filter.eq('country', 'PHL')) \
    .filter(ee.Filter.eq('year', 2020)) \
    .first()

# Define the Philippines boundary (to constrain sampling)
ph_boundary = ee.FeatureCollection('USDOS/LSIB_SIMPLE/2017') \
    .filter(ee.Filter.eq('country_na', 'Philippines'))

In [15]:
# ==========================================
# 3. CREATE A "HABITABLE" MASK
# ==========================================
# DHS creates clusters where people actually live.
# Map threshold: Keep pixels with > 5 people (adjust as needed).
habitable_mask = pop_image.gt(5) 

# Update the image to hide empty areas (creates transparency)
masked_pop = pop_image.updateMask(habitable_mask)

# Visualize where we will sample from
Map = geemap.Map()
Map.centerObject(ph_boundary, 6)
vis_params = {'min': 0, 'max': 50, 'palette': ['black', 'yellow', 'red']}
Map.addLayer(masked_pop.clip(ph_boundary), vis_params, 'Populated Areas')
Map # Uncomment to view

Map(center=[11.715520965690386, 122.91816679938353], controls=(WidgetControl(options=['position', 'transparent…

In [16]:
# ==========================================
# 4. GENERATE PROXY POINTS (Stratified Sample)
# ==========================================
# We want to generate, say, 1,000 random points, but ONLY on populated pixels.
# 'stratifiedSample' is great for this, or standard 'sample'.

print("Generating proxy clusters based on population...")

proxy_points = masked_pop.sample(
    region=ph_boundary,
    scale=100,             # WorldPop resolution is ~100m
    numPixels=1000,        # Number of fake clusters
    geometries=True        # Return the Lat/Lon points
)

Generating proxy clusters based on population...


In [17]:
# ==========================================
# 5. CREATE BUFFER SQUARES 
# ==========================================
# Simulate these proxy points exactly like real DHS GPS data.
# 5km buffer for context (simulating the DHS rural displacement).

def buffer_points(feature):
    return feature.buffer(5000).bounds()

proxy_squares = proxy_points.map(buffer_points)

In [12]:
# ==========================================
# 6. DISPLAY BUFFER SQUARES
# ==========================================

# Create the Map
Map = geemap.Map()

# Add a Satellite Basemap (Hybrid shows labels, Satellite is pure image)
Map.add_basemap('HYBRID')

# Define Styles
style_params = {
    'color': 'red',          # Border color
    'width': 2,              # Border thickness
    'fillColor': '00000000'  # Transparent fill
}

# Add the Clusters to the Map
Map.addLayer(proxy_squares.style(**style_params), {}, 'DHS Cluster Areas')

# Center the map on the data
Map.centerObject(proxy_squares, 6)

# Display the Map
Map

Map(center=[12.222845702621468, 122.65009285767124], controls=(WidgetControl(options=['position', 'transparent…

In [6]:
# ==========================================
# 6. DOWNLOAD (Test Run)
# ==========================================
# Define the output directory
output_dir = os.path.expanduser('/Users/ruben/Desktop/Thesis/TrainingData/Sentinel2')

# Prepare S2 Image (Simple composite)
s2_image = (ee.ImageCollection('COPERNICUS/S2_SR_HARMONIZED')
    .filterDate('2023-01-01', '2023-06-30')
    .filterBounds(proxy_squares)
    .filter(ee.Filter.lt('CLOUDY_PIXEL_PERCENTAGE', 10))
    .median()
    .select(['B4', 'B3', 'B2', 'B8'])
    .clip(proxy_squares))

print("Starting download of proxy clusters...")

geemap.download_ee_image_tiles_parallel(
    image=s2_image,
    out_dir=output_dir,
    features=proxy_squares,
    scale=60,  # Using 60m for speed since this is a test
    crs='EPSG:3857',
    prefix='proxy_cluster_'
)

Starting download of proxy clusters...
Finished in 536.0550699234009 seconds.


In [7]:
# ==========================================
# 7. GENERATE JPG PREVIEWS
# ==========================================

# Path to TIF files
input_dir = os.path.expanduser('/Users/ruben/Desktop/Thesis/TrainingData/Sentinel2')

# Output: Where you want the visible previews to go
output_dir = os.path.join(input_dir, 'previews_jpg')

if not os.path.exists(output_dir):
    os.makedirs(output_dir)

print(f"Reading TIFs from: {input_dir}")
print(f"Saving JPGs to:    {output_dir}")

#Loop to convert TIF files
tif_files = [f for f in os.listdir(input_dir) if f.endswith('.tif')]

if not tif_files:
    print("No TIF files found! Check your input directory path.")

for filename in tif_files:
    input_path = os.path.join(input_dir, filename)
    output_filename = filename.replace('.tif', '.jpg')
    output_path = os.path.join(output_dir, output_filename)
    
    try:
        with rasterio.open(input_path) as src:
            # Check if file has at least 3 bands (RGB)
            if src.count < 3:
                print(f"Skipping {filename}: Not enough bands (needs 3, has {src.count})")
                continue

            # Read RGB bands (Assuming B4=Red, B3=Green, B2=Blue are indices 1, 2, 3)
            
            r = src.read(1)
            g = src.read(2)
            b = src.read(3)
            
            # Stack into a color image
            rgb = np.dstack((r, g, b))
            
            # Normalization:
            # Sentinel-2 values go from 0 to ~10000. 
            # Visual info is usually between 0 and 3000.
            # Clip the bright values so the dark parts become visible.
            
            max_val = 3000.0 
            rgb = np.clip(rgb, 0, max_val)
            
            # Convert to 8-bit (0-255) for JPEG
            rgb = (rgb / max_val * 255).astype(np.uint8)
            
            # Save
            img = Image.fromarray(rgb)
            img.save(output_path, quality=80)
            

    except Exception as e:
        print(f"Failed to convert {filename}: {e}")

print("-" * 30)
print(f"Done! Check the folder: {output_dir}")

Reading TIFs from: /Users/ruben/Desktop/Thesis/TrainingData/Sentinel2
Saving JPGs to:    /Users/ruben/Desktop/Thesis/TrainingData/Sentinel2/previews_jpg
------------------------------
Done! Check the folder: /Users/ruben/Desktop/Thesis/TrainingData/Sentinel2/previews_jpg
You should be able to see these files in VS Code or Finder now.
