# Session 1: Palawan Land Cover Classification with Verified Training Data

**Duration:** 90 minutes | **Instructor:** CoPhil Advanced Training

## Learning Objectives

1. Set up and authenticate Google Earth Engine in Google Colab
2. Acquire and preprocess Sentinel-2 satellite imagery
3. Calculate spectral indices (NDVI, NDWI, NDBI, EVI)
4. Create training datasets using verified open data sources
5. Train a Random Forest classifier
6. Perform land cover classification
7. Assess accuracy and generate statistics

## Study Area: Palawan Province

- UNESCO Biosphere Reserve
- High biodiversity, critical for conservation
- Environmental challenges: deforestation, mining, agriculture expansion

## A. Setup and Authentication

In [None]:
# Install required packages
!pip install earthengine-api geemap pandas numpy matplotlib seaborn -q

In [None]:
# Import all libraries
import ee
import geemap
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime

# Set plot style
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")

print("✓ Libraries imported successfully")

In [None]:
# Authenticate and initialize Earth Engine
ee.Authenticate()
ee.Initialize(project='gee-trainning')

print("✓ Earth Engine initialized successfully")

## B. Define Study Area

In [None]:
# Define Palawan boundary
palawan_coords = [
    [117.0, 8.5],   # Southwest
    [117.0, 12.5],  # Northwest  
    [120.5, 12.5],  # Northeast
    [120.5, 8.5],   # Southeast
    [117.0, 8.5]    # Close polygon
]

palawan = ee.Geometry.Polygon(palawan_coords)

# Calculate area
area_km2 = palawan.area().divide(1e6).getInfo()
print(f"Study Area: Palawan Province")
print(f"Approximate Area: {area_km2:,.0f} km²")

In [None]:
# Create interactive map
Map = geemap.Map(center=[10.5, 118.8], zoom=8, height='600px')
Map.addLayer(palawan, {'color': 'red'}, 'Palawan Boundary')
Map

## C. Sentinel-2 Data Acquisition

In [None]:
# Define date range (using 2024 for better availability)
start_date = '2024-01-01'
end_date = '2024-12-31'

# Cloud masking function
def mask_s2_clouds(image):
    qa = image.select('QA60')
    cloud_bit_mask = 1 << 10
    cirrus_bit_mask = 1 << 11
    mask = (qa.bitwiseAnd(cloud_bit_mask).eq(0)
            .And(qa.bitwiseAnd(cirrus_bit_mask).eq(0)))
    return image.updateMask(mask).divide(10000)

print(f"Date range: {start_date} to {end_date}")

In [None]:
# Load Sentinel-2 collection
s2_collection = (ee.ImageCollection('COPERNICUS/S2_SR_HARMONIZED')
                 .filterBounds(palawan)
                 .filterDate(start_date, end_date)
                 .filter(ee.Filter.lt('CLOUDY_PIXEL_PERCENTAGE', 20))
                 .map(mask_s2_clouds))

# Create median composite
s2_composite = s2_collection.median().clip(palawan)

# Select bands
bands = ['B2', 'B3', 'B4', 'B8', 'B11', 'B12']
s2_composite = s2_composite.select(bands)

print(f"✓ Sentinel-2 composite created")
print(f"Images in collection: {s2_collection.size().getInfo()}")

In [None]:
# Visualize RGB composite
rgb_vis = {'bands': ['B4', 'B3', 'B2'], 'min': 0.0, 'max': 0.3, 'gamma': 1.4}
Map.addLayer(s2_composite, rgb_vis, 'Sentinel-2 RGB')
Map

## D. Calculate Spectral Indices

In [None]:
# Calculate all indices
ndvi = s2_composite.normalizedDifference(['B8', 'B4']).rename('NDVI')
ndwi = s2_composite.normalizedDifference(['B3', 'B8']).rename('NDWI')
ndbi = s2_composite.normalizedDifference(['B11', 'B8']).rename('NDBI')

evi = s2_composite.expression(
    '2.5 * ((NIR - RED) / (NIR + 6 * RED - 7.5 * BLUE + 1))',
    {
        'NIR': s2_composite.select('B8'),
        'RED': s2_composite.select('B4'),
        'BLUE': s2_composite.select('B2')
    }
).rename('EVI')

print("✓ Spectral indices calculated")

## E. Feature Stack Preparation

In [None]:
# Create feature stack
feature_stack = (s2_composite
                 .addBands(ndvi)
                 .addBands(ndwi)
                 .addBands(ndbi)
                 .addBands(evi))

feature_names = feature_stack.bandNames().getInfo()
print(f"✓ Feature stack created with {len(feature_names)} features")
print(f"Features: {feature_names}")

## F. Training Data from Verified Open Sources

Using verified datasets:
- **ESA WorldCover 2021**: 10m resolution, 76.7% accuracy
- **Global Mangrove Watch**: 25m resolution, 87.4% accuracy for mangroves
- **OpenStreetMap**: Urban features

In [None]:
# Define class properties
class_info = {
    1: {'name': 'Forest', 'color': '006400'},
    2: {'name': 'Agriculture', 'color': 'FFFF00'},
    3: {'name': 'Water', 'color': '0000FF'},
    4: {'name': 'Urban', 'color': 'FF0000'},
    5: {'name': 'Mangrove', 'color': '008B8B'}
}

print("Land Cover Classes:")
for class_id, info in class_info.items():
    print(f"  {class_id}: {info['name']}")

In [None]:
# Load ESA WorldCover 2021
worldcover = ee.ImageCollection('ESA/WorldCover/v200') \
    .filterDate('2021-01-01', '2022-01-01') \
    .first() \
    .clip(palawan)

# Remap to our classes
worldcover_remapped = worldcover.remap(
    [10, 20, 30, 40, 50, 60, 70, 80, 90, 95, 100],
    [1,  1,  1,  2,  4,  0,  0,  3,  0,  5,  0],
    0
).rename('landcover')

print("✓ ESA WorldCover loaded (10m, 76.7% accuracy)")

In [None]:
# Generate stratified random training points
training_points = worldcover_remapped.stratifiedSample(
    numPoints=500,
    classBand='landcover',
    region=palawan,
    scale=30,
    seed=42,
    geometries=True
).filter(ee.Filter.neq('landcover', 0))

print(f"✓ Training points generated: {training_points.size().getInfo()} total")

In [None]:
# Add urban training points from known cities
urban_centers = [
    ee.Geometry.Point([118.7384, 9.7392]).buffer(5000),   # Puerto Princesa
    ee.Geometry.Point([119.507, 10.693]).buffer(2000),     # Taytay
    ee.Geometry.Point([119.408, 11.041]).buffer(2000),     # El Nido
    ee.Geometry.Point([118.3707, 9.3085]).buffer(2000),    # Bataraza
]

urban_areas = ee.FeatureCollection([ee.Feature(center, {'landcover': 4}) for center in urban_centers])

urban_points = ee.FeatureCollection.randomPoints(
    region=urban_areas,
    points=200,
    seed=42
).map(lambda feat: feat.set('landcover', 4))

# Combine all training points
all_training_points = training_points.merge(urban_points)
print(f"✓ Final training dataset: {all_training_points.size().getInfo()} points")

In [None]:
# Sample spectral values from training points
training_samples = feature_stack.sampleRegions(
    collection=all_training_points,
    properties=['landcover'],
    scale=10,
    geometries=False,
    tileScale=4
)

print(f"✓ Training samples extracted: {training_samples.size().getInfo()} pixels")

In [None]:
# Visualize training points
Map_training = geemap.Map(center=[10.5, 118.8], zoom=8, height='600px')
Map_training.addLayer(s2_composite, rgb_vis, 'Sentinel-2 RGB')

for class_id, info in class_info.items():
    class_points = all_training_points.filter(ee.Filter.eq('landcover', class_id))
    Map_training.addLayer(class_points, {'color': info['color']}, f"Training: {info['name']}")

Map_training

## G. Random Forest Training

In [None]:
# Train Random Forest classifier
rf_classifier = ee.Classifier.smileRandomForest(
    numberOfTrees=100,
    variablesPerSplit=None,
    minLeafPopulation=1,
    bagFraction=0.632,
    seed=42
)

trained_classifier = rf_classifier.train(
    features=training_samples,
    classProperty='landcover',
    inputProperties=feature_names
)

print("✓ Random Forest training complete")

## H. Image Classification

In [None]:
# Apply classifier
classified_image = feature_stack.classify(trained_classifier)

# Define palette
class_palette = [class_info[i]['color'] for i in sorted(class_info.keys())]

# Visualize
Map_classified = geemap.Map(center=[10.5, 118.8], zoom=8, height='700px')
Map_classified.addLayer(s2_composite, rgb_vis, 'Sentinel-2 RGB', False)
Map_classified.addLayer(classified_image, {'min': 1, 'max': 5, 'palette': class_palette}, 'Land Cover')

print("✓ Classification complete")
Map_classified

## I. Accuracy Assessment

In [None]:
# Create validation dataset
validation_points = worldcover_remapped.stratifiedSample(
    numPoints=100,
    classBand='landcover',
    region=palawan,
    scale=30,
    seed=123,  # Different seed
    geometries=True
).filter(ee.Filter.neq('landcover', 0))

validation_samples = feature_stack.sampleRegions(
    collection=validation_points,
    properties=['landcover'],
    scale=10,
    geometries=False
)

# Classify validation set
validation_classified = validation_samples.classify(trained_classifier)

# Generate confusion matrix
confusion_matrix = validation_classified.errorMatrix('landcover', 'classification')
overall_accuracy = confusion_matrix.accuracy().getInfo()
kappa = confusion_matrix.kappa().getInfo()

print(f"Overall Accuracy: {overall_accuracy*100:.2f}%")
print(f"Kappa Coefficient: {kappa:.4f}")

## J. Area Statistics

In [None]:
# Calculate area per class
pixel_area = ee.Image.pixelArea()
area_image = pixel_area.addBands(classified_image)

area_stats = area_image.reduceRegion(
    reducer=ee.Reducer.sum().group(
        groupField=1,
        groupName='landcover'
    ),
    geometry=palawan,
    scale=10,
    maxPixels=1e10,
    tileScale=4
)

print("Calculating area statistics...")
print("This may take 2-3 minutes...")

## K. Export Results

In [None]:
# Export classification
export_classification = ee.batch.Export.image.toDrive(
    image=classified_image,
    description='Palawan_LandCover_Classification',
    folder='EarthEngine',
    fileNamePrefix='palawan_landcover_2024',
    region=palawan.geometry(),
    scale=10,
    crs='EPSG:4326',
    maxPixels=1e10,
    fileFormat='GeoTIFF'
)

print("✓ Export configured")
print("To start export, run: export_classification.start()")
print("Check progress at: https://code.earthengine.google.com/tasks")