# MODIS Vegetation Indices Time Series Extraction from Google Earth Engine

An all-in-one script to extract time series of the Normalized Difference Vegetation Index (NDVI) and Enhanced Vegetation Index (EVI) from MODIS product on Google Earth Engine.

In [None]:
import time
import json
import geopandas as gpd
import ee

## Supportive Tools

In [None]:
def modis_scaling(image):
    """
    Apply scales on band values of MODIS image.
    :param image: ee.Image
    :return: ee.Image
    """
    bands_to_modify = ['NDVI', 'EVI', 'sur_refl_b02']
    scale = ee.Number(0.0001)
    
    def scale_band(band_name):
        band = image.select(band_name)
        return band.multiply(scale).rename(band_name)
    
    # scale values for each band
    scaled_bands = [scale_band(band_name) for band_name in bands_to_modify]
    scaled_image = ee.ImageCollection(scaled_bands).toBands()
    original_names = ee.List(bands_to_modify)
    renamed_scaled_image = scaled_image.rename(original_names)
    
    # combine scaled bands with the original bands
    modified_image = image.select(image.bandNames().removeAll(bands_to_modify)).addBands(renamed_scaled_image)
    
    return modified_image

def mask_clouds_mod13(image):
    qa = image.select('SummaryQA')
    mask = qa.bitwiseAnd(0x01).eq(0)    # keep only clear pixels
    return image.updateMask(mask)

def get_time_series(roi, image_collection, date_start, date_end, target='NDVI'):
    """
    Extract band values of vegetation indices and create time series of mean pixel-based indices over the given feature,
    using the longitude and latitude of the feature centroid to mark the location.
    :param roi: ee.Feature, the region of interest.
    :param image_collection: ee.ImageCollection, the image collection to map over. 
    :param date_start: string, the start date to search image, in format 'YYYY-MM-dd'. 
    :param date_end: string, the end date to search image, in format 'YYYY-MM-dd'. 
    :param target: string, 'NDVI' and/or 'EVI' and/or 'NIRv', default to 'NDVI'. 
    :return: ee.FeatureCollection, containing centroid location and VI values in each feature.
    """
    # function to aggregate VI for roi
    def calc_mean_vi(image):
        mean_vi = image.reduceRegion(
            reducer=ee.Reducer.mean(),
            geometry=roi.geometry(),
            scale=250,
            maxPixels=1e9
        )
        return ee.Feature(None, {
            'system:index': image.get('system:index'),
            'lon': lon,
            'lat': lat,
            'target': mean_vi.get(target)
        })
    
     # function to calculate NIRv
    def calc_nirv(image):
        nirv = image.expression(
            'NDVI * nir', {
                'NDVI': image.select('NDVI'),
                'nir': image.select('sur_refl_b02')
            }).rename('NIRv').toFloat()
        return image.addBands(nirv)
    
    # function to calculate VI over all images
    def get_values_from_image_collection(collection):
        vi_features = collection.map(calc_mean_vi)
        vi_filtered = vi_features.filter(ee.Filter.notNull(['target']))   # filter out null values
        return ee.FeatureCollection(vi_filtered)
    
    
    # get centroid location of the given feature
    centroid = roi.geometry().centroid()
    lon = centroid.coordinates().get(0)
    lat = centroid.coordinates().get(1)
    
    # filter images by date and location, and apply pre-process on images
    ic_to_map = image_collection \
        .filterBounds(roi.geometry()) \
        .filterDate(date_start, date_end) \
        .map(mask_clouds_mod13) \
        .map(modis_scaling) \
        .map(calc_nirv)
    
    # [DEBUG USE] log the filtered image count
    # !! REMEMBER TO REMOVE AFTER DEBUGGING
    ic_count = ic_to_map.size()
    print(f"MOD13Q1 image count: {ic_count.getInfo()}.")        
    
    image_count = ic_to_map.select(target).size()

    output = ee.Algorithms.If(
        condition=image_count.gt(0),
        trueCase=get_values_from_image_collection(ic_to_map),
        falseCase=ee.FeatureCollection([])
    )
    
    return output

## Let's Get Things Done!

It's time to start the main process!

In [None]:
# First, get authenticate from Earth Engine
ee.Authenticate()

In [None]:
# Next, link the Earth Engine API
ee.Initialize(project='ee-charleshzijian')

In [None]:
# Then, mount Google Drive for shapefile
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Set the parameter for the process
path_to_export = 'vi_ts'
# Date range for image search
start_date = '2000-01-01'
end_date = '2024-12-31'

In [None]:
# Now read the shapefile
path_to_shapefile = r'/content/drive/MyDrive/Mangrove/China/ChinaMangrove2020'
shapefile_to_map = gpd.read_file(path_to_shapefile + '/ChinaMangrove2020.shp').to_crs("epsg:4326")
source = 'drive'

In [None]:
# Or load shapefile form Earth Engine
mangrove_asset_id = 'projects/ee-charleshzijian/assets/China_Mangrove/Non-Protected'
shapefile_to_map = ee.FeatureCollection(mangrove_asset_id)
source = 'ee'

In [None]:
# Now load the MODIS image collection
ic = ee.ImageCollection("MODIS/061/MOD13Q1")

In [None]:
# FOR DEBUG ONLY
test_polygon = ee.Geometry.Polygon([
    [
        [113.99995454627094, 22.529072266039357],
        [113.99995454627094, 22.503700164120257],
        [114.04527314978657, 22.503700164120257],
        [114.04527314978657, 22.529072266039357]
    ]
])

test_roi = ee.Feature(test_polygon)

# get the time series for NIRv
result = get_time_series(test_roi, ic, start_date, end_date, target='NIRv')

print("Result: ", result.getInfo())

In [None]:
# Split and retrieve vegetation indices on every 400 features each time
shp_idx = 0
total_features = len(shapefile_to_map) if source == 'drive' else shapefile_to_map.size().getInfo()
step_length = 400
for i in range(shp_idx*step_length, total_features, step_length):
    shp_idx += 1
    # 1st -- get features ready
    # -- slice the geo-dataframe
    if source == 'drive':
        gdf = shapefile_to_map.iloc[i:i+step_length]
    else:
        gdf = shapefile_to_map.toList(step_length, i)
        
    # # -- export the sliced geo-dataframe into a new shapefile [ONLY WHEN source == drive]
    # export_slice = f"ChinaMangrove_part{shp_idx}.shp"
    # gdf.to_file(f"{path_to_export}/{export_slice}",
    #             driver='ESRI Shapefile')
    
    # 2nd -- get indices
    for vi in ['NDVI', 'EVI', 'NIRv']:
        print(f">> Now on #{shp_idx} {vi}...")
        # convert the geo-dataframe to a list of dictionaries
        if source == 'drive':
            gdf_json = json.loads(gdf.to_json())["features"]
            # create a list of Earth Engine features
            # ee_features = []
            # for feature in features:
            #     # extract geometry and properties
            #     geometry = ee.Geometry.MultiLineString(feature['geometry']['coordinates'])
            #     properties = feature['properties']
            #     # create an Earth Engine feature
            #     ee_feature = ee.Feature(geometry, properties)
            #     # append to list
            #     ee_features.append(ee_feature)
            ee_features = [ee.Feature(ee.Geometry.MultiPolygon(feature['geometry']['coordinates']), feature['properties']) for feature in gdf_json]
            # convert the list to a feature collection
            features = ee.FeatureCollection(ee_features)
            
        else:
            features = ee.FeatureCollection(gdf)
            
        # get mean vegetation index for each feature
        result = features.map(lambda f: get_time_series(
            roi=f, 
            image_collection=ic, 
            date_start=start_date, 
            date_end=end_date, 
            target=vi
        )).flatten()        
        
        # export the result to a csv file
        csv_name = f'Mean_{vi}_{shp_idx}'
        task = ee.batch.Export.table.toDrive(
            collection=result,
            description=f'MODIS_{vi}_{shp_idx}',
            folder=path_to_export,
            fileNamePrefix=csv_name,
            fileFormat='CSV'
        )
        task.start()
        
        print(f"-- Task submitted at {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime())}.")
        
        # check if the task is still active every 30 seconds
        while task.active():
            time.sleep(30)
            print(f".. Task is running ({time.strftime('%Y-%m-%d %H:%M:%S', time.localtime())}).")
            
        print(f">> Task #{shp_idx} finished on {vi} calculation.")
        
print(">> ALL FEATURES PROCESSED.")