# Google Earth Engine Access

Access Sentinel-2 from Google Earth Engine with spectral indices (NDVI, NDTI), mask with coastline buffer and download cropped images with subtile geometries.

## Setup

Files and system

In [2]:
import os
import json

Arrays and math

In [3]:
import numpy as np
import xarray as xr
import pandas as pd
import geopandas as gpd

In [50]:
import datetime
import calendar

Plotting

In [4]:
import matplotlib.pyplot as plt

Geometry

In [30]:
from shapely import wkt

Google Earth Engine

In [5]:
import ee

In [6]:
ee.Initialize()

Directories

In [7]:
DATA_DIR = '../data/'
FIGURES_DIR = '../saved_figures/'

Habitat data

In [17]:
habitats_shp = os.path.join(DATA_DIR, 'OSPARHabitats2022_Points_clustered/OSPAR2022Points.shp')
habitats_gdf = gpd.read_file(habitats_shp)

Sub-tile grid

In [9]:
with open(os.path.join(DATA_DIR, 'tile_buffer_wkt.json'), 'r') as f:
    tiles_wkt_dict = json.load(f)

## Get habitat months, tiles and subtiles

In [18]:
habitats_gdf['SurveyMonth'] = pd.to_datetime(habitats_gdf['SurveyDate']).dt.month

In [20]:
habitats_sel = habitats_gdf[habitats_gdf['HabType'].isin(('Zostera beds', 'Kelp forests'))].reset_index()

In [22]:
tiles = habitats_sel['utm_tile'].unique()

In [24]:
habitats_sel['SurveyYear'].unique()

array([2003, 2010, 2011, 2013, 2009, 2012, 2014, 2019, 2017, 2008, 2016,
       2018, 2020, 2002, 2000, 2005, 2001, 2004, 2006, 2015, 2021, 2007,
       2022, 2023], dtype=int32)

In [58]:
habitats_sel = habitats_sel[(habitats_sel['SurveyYear'] >= 2016)].reset_index(drop=True)

In [60]:
subsets_df = habitats_sel.groupby(['utm_tile', 'subtile_in', 'SurveyYear', 'SurveyMonth']).size().reset_index().rename(columns={0: 'count'})

In [63]:
subsets_df

Unnamed: 0,utm_tile,subtile_in,SurveyYear,SurveyMonth,count
0,29UPB,19,2017,5,2
1,29UPB,19,2017,9,2
2,29UPB,19,2021,8,6
3,29UPB,20,2018,6,2
4,29UPB,25,2016,7,2
...,...,...,...,...,...
1046,32VPK,34,2018,7,6
1047,32VPK,34,2018,8,4
1048,32VPK,35,2016,10,5
1049,32VPK,7,2016,8,1


## Access Sentinel-2 from GEE

In [48]:
def mask_s2_clouds(image):
    """Masks clouds in a Sentinel-2 image using the QA band.
    
    Args:
      image (ee.Image): A Sentinel-2 image.
    
    Returns:
      ee.Image: A cloud-masked Sentinel-2 image.
    """
    qa = image.select('QA60')
    
    # Bits 10 and 11 are clouds and cirrus, respectively.
    cloud_bit_mask = 1 << 10
    cirrus_bit_mask = 1 << 11
    
    # Both flags should be set to zero, indicating clear conditions.
    mask = (
        qa.bitwiseAnd(cloud_bit_mask)
        .eq(0)
        .And(qa.bitwiseAnd(cirrus_bit_mask).eq(0))
    )
    
    return image.updateMask(mask).divide(10000)

In [54]:
def add_indices(image):
    # NDVI = (NIR - Red) / (NIR + Red)
    ndvi = image.normalizedDifference(['B8', 'B4']).rename('NDVI')
    
    # NDTI = (Red - SWIR1) / (Red + SWIR1)
    # Using B4 (Red) and B11 (SWIR1)
    ndti = image.normalizedDifference(['B4', 'B11']).rename('NDTI')
    
    return image.addBands([ndvi, ndti])

In [62]:
def s2_dataset_from_region(startdate, enddate, region, cloud_threshold=20):
    dataset = (
        ee.ImageCollection('COPERNICUS/S2_SR_HARMONIZED')
        .filterDate(startdate, enddate)
        .filterBounds(region)
        .filter(ee.Filter.lt('CLOUDY_PIXEL_PERCENTAGE', cloud_threshold))
        .map(mask_s2_clouds)
        .map(add_indices)
    )

    return dataset

In [51]:
def month_date_range(year, month):
    # First day of the month
    startdate = datetime.date(year, month, 1)
    
    # Last day of the month
    last_day = calendar.monthrange(year, month)[1]
    enddate = datetime.date(year, month, last_day)
    
    # Return as strings
    return startdate.isoformat(), enddate.isoformat()


In [52]:
def export_s2_image_to_drive(image, region, description, folder='EarthEngine', scale=10):
    task = ee.batch.Export.image.toDrive(
        image=image.clip(region),
        description=description,
        folder=folder,
        fileNamePrefix=description,
        region=region,
        scale=scale,
        crs='EPSG:4326',
        maxPixels=1e13
    )
    task.start()
    return task

In [None]:
for _, row in subsets_df.iterrows():
    year = row['SurveyYear']
    month = row['SurveyMonth']
    startdate, enddate = month_date_range(year, month)
    wkt_string = tiles_wkt_dict[row['utm_tile']][row['subtile_in']]
    shapely_geom = wkt.loads(wkt_string)
    region = ee.Geometry.Polygon(shapely.__geo_interface__['coordinates'])
    s2_col = s2_dataset_from_region(startdate, enddate, region)

    bands = ['B2', 'B3', 'B4', 'B8', 'B11', 'NDVI', 'NDTI']
    image = s2_col.median().select(bands)
    description = f"s2_{row['utm_tile']}_{row['subtile_in']}_{year}_{month:02d}"

    export_task = export_s2_image_to_drive(image, region, description=description)
    print(f"Export started for {description}")