# Data preprocessing and export from GEE
Nachat Jatusripitak

In [2]:
"""
--------------------------------------------------------------------------------
Set up GEE API
--------------------------------------------------------------------------------
"""
import ee
import geemap
import geetools
import time
from pprint import pprint

ee.Authenticate(auth_mode='notebook')
ee.Initialize(project='ee-thailand-pm')

import src.GEE_utils as GEE_utils

In [None]:
"""
--------------------------------------------------------------------------------
Preprocess data and export as images to Google Drive
--------------------------------------------------------------------------------
"""

START_DATE = ee.Date('2020-03-25')
END_DATE = ee.Date('2020-03-25')
CHUNK_SIZE = 1

# Initialize Map (for visualization)
Map = geemap.Map()
Map.addLayer(GRID, {}, 'grid')
Map.centerObject(ROI, zoom = 6)

# Import datasets as ImageCollections
pm25_ic = ee.ImageCollection('projects/sat-io/open-datasets/GHAP/GHAP_D1K_PM25')
weather_ic = ee.ImageCollection('ECMWF/ERA5_LAND/DAILY_AGGR')
fire_ic = ee.ImageCollection('NASA/VIIRS/002/VNP14A1')
lc_ic = ee.ImageCollection("projects/sat-io/open-datasets/landcover/ESRI_Global-LULC_10m_TS")
terrain_img = ee.Image('CGIAR/SRTM90_V4')

# retrieve required projection information
weather_scale = weather_ic.first().projection().nominalScale()
fire_scale = fire_ic.first().projection().nominalScale()

PROJ = ee.Projection('EPSG:32647')
SIZE = 43                           # how many pixels per side
SIZE *= weather_scale.getInfo()
CM_X, CM_Y = 580000, 2100000

# Define ROI (fixed square region w/ side length SIZE * SCALE)
ROI = ee.Geometry.Rectangle(
    coords=[
        CM_X - SIZE / 2,
        CM_Y - SIZE / 2,
        CM_X + SIZE / 2,
        CM_Y + SIZE / 2
    ],
    proj=PROJ,
    evenOdd=False
)

GRID = ROI.coveringGrid(PROJ, weather_scale)

# Initialize Map (for visualization)
Map = geemap.Map()
Map.addLayer(GRID, {}, 'grid')
Map.centerObject(ROI, zoom = 6)

# Store datasets as dictionary for quick reference by helper functions
ics = {
    'pm25': pm25_ic.filterBounds(ROI),
    'weather': weather_ic.filterBounds(ROI),
    'fire': fire_ic.filterBounds(ROI),
    'lc': lc_ic.filterBounds(ROI)
}

# Create date range and export pictures
num_images = 0
current_start = START_DATE
while True:
    current_end = current_start.advance(CHUNK_SIZE, 'week')
    if current_end.millis().getInfo() > END_DATE.millis().getInfo():
        current_end = END_DATE

    if current_start.millis().getInfo() > END_DATE.millis().getInfo():
        break

    print(
        current_start.format('YYYY-MM-dd').getInfo(),
        current_end.format('YYYY-MM-dd').getInfo(),
        num_images
    )

    # Generate images

    dates = GEE_utils.create_date_list(current_start, current_end)

    image = GEE_utils.create_date_image(ics, terrain_img, weather_scale, fire_scale, START_DATE)

    image_list = dates.map(
        lambda date: GEE_utils.create_date_image(ics, terrain_img, weather_scale, fire_scale, date)
    ).filter(ee.Filter.eq('num_bands', 10))

    images = ee.ImageCollection.fromImages(image_list)

    # Export image batch

    tasks = ee.batch.Export.geetools.imagecollection.toDrive(
        imagecollection = images,
        index_property = 'date',
        description = 'dataset_1',
        scale = weather_scale,
        crs = PROJ.crs(),
        region = ROI,
        folder = 'dataset_1',
    )

    for task in tasks:
        time.sleep(0.01)
        task.start()

    num_images += images.size().getInfo()

    current_start = current_end.advance(1, 'day')
Map

11131.949079327358
2020-03-25 2020-03-25 0
['pm25_today', 'u_wind_10m', 'v_wind_10m', 'dew_temp_2m', 'temp_2m', 'surf_pressure', 'precip_sum', 'frp', 'elevation', 'pm25_change']


Map(center=[18.99057101597028, 99.7596772096937], controls=(WidgetControl(options=['position', 'transparent_bg…