# SCRIPT 01: Pre-Processing Data with Google Earth Engine

This is the first script used in the methodology. Here, 5 different types of data are produced with Google Earth Engine using its Python API:

+ [Sentinel 1](https://developers.google.com/earth-engine/datasets/catalog/COPERNICUS_S1_GRD)
    + Bands used
        + VV
        + VH
    + Products generated
        + Annual reduction for 2020
        + Monthly reductions for each month of 2020
+ [Sentinel 2](https://developers.google.com/earth-engine/datasets/catalog/COPERNICUS_S2_SR)
    + Bands used
        + Band 2
        + Band 3
        + Band 4
        + Band 8
        + Band 11
        + Band 12
    + Products Generated
        + Annual reduction for 2020
        + Monthly reductions for each month of 2020
+ Land Use and Land Cover Maps
    + Maps used
        + [MapBiomas](https://mapbiomas.org) (v6.0)
        + [Google Dynamic World](https://developers.google.com/earth-engine/datasets/catalog/GOOGLE_DYNAMICWORLD_V1) (v1)
        + [ESA world Cover](https://developers.google.com/earth-engine/datasets/catalog/ESA_WorldCover_v100) (v100)
        
In the following cells, please refer to the comments in the code for further explanations of its functioning.

In [None]:
# importing packages
import ee
from tqdm.notebook import tqdm
import time

# starts Google Earth Engine connection
ee.Authenticate()
ee.Initialize()

In [None]:
# the code uses a grid defined by the script's author to create the data
# tiled for each 1 degree in geographic coordinates, with some minor overlapping.

# it is recommended to make the data for only a portion of the tiles at a time,
# for storage purposes.

ids = [ 9 ,10 ,11 ,13 ,14 ,15 ,23 ,24 ,25 ,26, 
       27, 28, 29, 30, 31, 32, 38, 39, 40, 41, 
       42, 43, 44, 45, 46, 47, 48, 51, 52, 53, 
       54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 
       65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 
       75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 
       85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 
       95, 96, 97, 98, 99, 100, 101, 102, 103, 
       104, 105, 106, 107, 108, 109, 110, 111, 
       113, 114, 115, 116, 117, 118, 119, 120, 
       121, 122, 123, 124, 125, 126, 127, 130, 
       131, 132, 133, 134, 135, 136, 137, 138, 
       139, 147, 148, 149, 150, 151, 152, 153, 
       154, 163, 164, 165, 166, 167, 168, 169, 
       179, 180, 181, 182, 183]
len(ids)

In [None]:
# iterates through every tile id to create its data.
for i in tqdm(ids):
    # Grid Semiarido - Tiles of 1 in 1 degrees
    grid_semiarido = ee.FeatureCollection("users/brunomenini10/Grid_Semiarido")

    # ROI
    roi = grid_semiarido.filter(ee.Filter.eq('id', i)).geometry()
    # print(roi.getInfo())

    # Sentinel 1 - SAR
    sentinel_1 = (ee.ImageCollection("COPERNICUS/S1_GRD").filter(ee.Filter.listContains('transmitterReceiverPolarisation', 'VV'))
                                                         .filter(ee.Filter.listContains('transmitterReceiverPolarisation', 'VH'))
                                                         .select('VV', 'VH'))

    # Sentinel 2 - Optical
    sentinel_2 = ee.ImageCollection("COPERNICUS/S2_SR")

    # LULC - Google Dnamic World
    dw = ee.ImageCollection("GOOGLE/DYNAMICWORLD/V1")

    # LULC - ESA World Cover
    ESA_wc = ee.ImageCollection("ESA/WorldCover/v100")

    # LULC - MapBiomas
    mapbiomas = ee.Image("projects/mapbiomas-workspace/public/collection6/mapbiomas_collection60_integration_v1")

    ########################################
    # Doing the LULC Map

    # - Google Dynamic Map -
    gdc_classification = dw.select('label').filterDate('2020-01-01', '2020-12-31').reduce(ee.Reducer.mode())

    # reclass Google Dynamic Cover data
    def reclass_google_dc(gdc):
        return (ee.Image(1).where(gdc.eq( 0), 0) # // water
                           .where(gdc.eq( 1), 0) # // trees
                           .where(gdc.eq( 2), 0) # // grass
                           .where(gdc.eq( 3), 0) # // flooded_vegetation
                           .where(gdc.eq( 4), 1) # // crops
                           .where(gdc.eq( 5), 0) # // shrub_and_scrub
                           .where(gdc.eq( 6), 0) # // built
                           .where(gdc.eq( 7), 0) # // bare
                           .where(gdc.eq( 8), 0) # // snow_and_ice
               )

    gdc_reclass = reclass_google_dc(gdc_classification)

    # reclass ESA World Cover
    def reclass_esa(esa):
        return (ee.Image(1).where(esa.eq( 10), 0) # Trees
                           .where(esa.eq( 20), 0) # Shrubland
                           .where(esa.eq( 30), 0) # Grassland
                           .where(esa.eq( 40), 1) # Cropland
                           .where(esa.eq( 50), 0) # Built-Up
                           .where(esa.eq( 60), 0) # Barren / Sparse Vegetation
                           .where(esa.eq( 70), 0) # Snow and Ice
                           .where(esa.eq( 80), 0) # Open water
                           .where(esa.eq( 90), 0) # Herbaceous wetland
                           .where(esa.eq( 95), 0) # Mangroves
                           .where(esa.eq(100), 0) # Moss and lichen
               )

    esa_reclass = reclass_esa(ESA_wc.first().select('Map'))

    # reclass MapBiomas
    def reclass_mapbiomas(map_biomas):
        return (ee.Image(1).where(map_biomas.eq( 1), 0) # 1. Forest
                           .where(map_biomas.eq( 3), 0) #   1.1. Forest Formation
                           .where(map_biomas.eq( 4), 0) #   1.2. Savanna Formation
                           .where(map_biomas.eq( 5), 0) #   1.2. Mangrove
                           .where(map_biomas.eq(49), 0) #   1.4. Wooded Restinga
                           .where(map_biomas.eq(10), 0) # 2. Non Forest Natural Formation
                           .where(map_biomas.eq(11), 0) #   2.1. Wetlands
                           .where(map_biomas.eq(12), 0) #   2.2. Grassland
                           .where(map_biomas.eq(32), 0) #   2.3. Salt Flat
                           .where(map_biomas.eq(29), 0) #   2.4. Rocky Outcrop
                           .where(map_biomas.eq(13), 0) #   2.5. Other non Forest Formations
                           .where(map_biomas.eq(14), 1) # 3. Farming
                           .where(map_biomas.eq(15), 0) #   3.1. Pasture
                           .where(map_biomas.eq(18), 1) #   3.2. Agriculture
                           .where(map_biomas.eq(19), 1) #     3.2.1. Temporary Crop
                           .where(map_biomas.eq(39), 1) #       3.2.1.1. Soybean
                           .where(map_biomas.eq(20), 1) #       3.2.1.2. Sugar cane
                           .where(map_biomas.eq(40), 1) #       3.2.1.3. Rice
                           .where(map_biomas.eq(41), 1) #       3.2.1.4. Other temporary Crops
                           .where(map_biomas.eq(36), 1) #     3.2.2. Perennial Corp
                           .where(map_biomas.eq(46), 1) #       3.2.2.1. Coffee
                           .where(map_biomas.eq(47), 1) #       3.2.2.2. Citrus
                           .where(map_biomas.eq(48), 1) #       3.2.2.3. Other Perennial Crop
                           .where(map_biomas.eq( 9), 0) #   3.2. Forest Plantation
                           .where(map_biomas.eq(21), 1) #   3.4. Mosaic Agriculture and Pasture
                           .where(map_biomas.eq(22), 0) # 4. Non vegetated Area
                           .where(map_biomas.eq(23), 0) #   4.1. Beach, Dune and Sand Spot
                           .where(map_biomas.eq(24), 0) #   4.2. Urban Area
                           .where(map_biomas.eq(30), 0) #   4.3. Mining
                           .where(map_biomas.eq(25), 0) #   4.4. Other non Vegetaded Areas
                           .where(map_biomas.eq(26), 0) # 5. Water
                           .where(map_biomas.eq(33), 0) #   5.1. River,Lake and Ocean
                           .where(map_biomas.eq(31), 0) #   5.2. Aquaculture
                           .where(map_biomas.eq(27), 0) # 6. Non Observed
               )

    mapbiomas_reclass = reclass_mapbiomas(mapbiomas.select('classification_2020'))

    # Merging maps in one file
    data_LULC = (ee.Image(gdc_reclass.select('constant').rename('GDC')
                                                        .addBands(esa_reclass.select('constant')
                                                        .rename('ESA')))
                                                        .addBands(mapbiomas_reclass.select('constant')
                                                        .rename('MapBiomas')).toByte())

    # Exporting LULC
    export_task = ee.batch.Export.image.toDrive(folder = 'Earth Engine',
                                                description = f'LULC - id{str(i).zfill(3)}',
                                                image = data_LULC,
                                                fileNamePrefix = f'LULC_id{str(i).zfill(3)}',
                                                region = roi,
                                                scale = 10,
                                                maxPixels = 10e8)
    export_task.start()
    time.sleep(1)

    ########################################
    # Doing the Sentinel 2 Stuff

    # function to mask out clouds and cloud shadows
    def mask_image(image):
        return image.mask(image.expression('!(SCL==3 || (SCL==7 || (SCL==8 || (SCL==9 || (SCL==10 || (SCL==11))))))', 
                                            {
                                            'SCL': image.select(['SCL'])
                                            }))

    # prepares the sentinel imagecollection
    sentinel_2 = sentinel_2.map(mask_image)

    # Yearly Reduction
    reduction_s2_yearly = sentinel_2.filterDate('2020-01-01', '2020-12-31').reduce(ee.Reducer.median()).toInt16()

    # Exporting Yearly Reduction Sentinel 2
    export_task = ee.batch.Export.image.toDrive(folder = 'Earth Engine',
                                                description = f'Reduction - Optical - Year - id{str(i).zfill(3)}',
                                                image = reduction_s2_yearly,
                                                fileNamePrefix = f'Reduction_Optical_Year_id{str(i).zfill(3)}',
                                                region = roi,
                                                scale = 10,
                                                maxPixels = 10e8)
    export_task.start()
    time.sleep(1)

    # function to create the reductions according to the intervals
    def intervals_reduction_s2_B2(dates):
        return (sentinel_2.filterDate(ee.List(dates).get(0), ee.List(dates).get(1))
                          .select(['B2'])
                          .reduce(ee.Reducer.median())
                          .set('system:time_start', ee.List(dates).get(0))
                          .set('system:time_end', ee.List(dates).get(1)))
    def intervals_reduction_s2_B3(dates):
        return (sentinel_2.filterDate(ee.List(dates).get(0), ee.List(dates).get(1))
                          .select(['B3'])
                          .reduce(ee.Reducer.median())
                          .set('system:time_start', ee.List(dates).get(0))
                          .set('system:time_end', ee.List(dates).get(1)))
    def intervals_reduction_s2_B4(dates):
        return (sentinel_2.filterDate(ee.List(dates).get(0), ee.List(dates).get(1))
                          .select(['B4'])
                          .reduce(ee.Reducer.median())
                          .set('system:time_start', ee.List(dates).get(0))
                          .set('system:time_end', ee.List(dates).get(1)))
    def intervals_reduction_s2_B8(dates):
        return (sentinel_2.filterDate(ee.List(dates).get(0), ee.List(dates).get(1))
                          .select(['B8'])
                          .reduce(ee.Reducer.median())
                          .set('system:time_start', ee.List(dates).get(0))
                          .set('system:time_end', ee.List(dates).get(1)))
    def intervals_reduction_s2_B11(dates):
        return (sentinel_2.filterDate(ee.List(dates).get(0), ee.List(dates).get(1))
                          .select(['B11'])
                          .reduce(ee.Reducer.median())
                          .set('system:time_start', ee.List(dates).get(0))
                          .set('system:time_end', ee.List(dates).get(1)))
    def intervals_reduction_s2_B12(dates):
        return (sentinel_2.filterDate(ee.List(dates).get(0), ee.List(dates).get(1))
                          .select(['B12'])
                          .reduce(ee.Reducer.median())
                          .set('system:time_start', ee.List(dates).get(0))
                          .set('system:time_end', ee.List(dates).get(1)))

    # list of dates to define the months
    dates = ee.List([
                     ee.List([ee.Date('2020-01-01'), ee.Date('2020-01-31')]),
                     ee.List([ee.Date('2020-02-01'), ee.Date('2020-02-29')]),
                     ee.List([ee.Date('2020-03-01'), ee.Date('2020-03-31')]),
                     ee.List([ee.Date('2020-04-01'), ee.Date('2020-04-30')]),
                     ee.List([ee.Date('2020-05-01'), ee.Date('2020-05-31')]),
                     ee.List([ee.Date('2020-06-01'), ee.Date('2020-06-30')]),
                     ee.List([ee.Date('2020-07-01'), ee.Date('2020-07-31')]),
                     ee.List([ee.Date('2020-08-01'), ee.Date('2020-08-31')]),
                     ee.List([ee.Date('2020-09-01'), ee.Date('2020-09-30')]),
                     ee.List([ee.Date('2020-10-01'), ee.Date('2020-10-31')]),
                     ee.List([ee.Date('2020-11-01'), ee.Date('2020-11-30')]),
                     ee.List([ee.Date('2020-12-01'), ee.Date('2020-12-31')])
                     ])

    # creates the ImageCollection
    reduction_s2_monthly = ee.ImageCollection(dates.map(intervals_reduction_s2)).sort('system:time_start', True)
    
    reduction_s2_monthly_B2 = ee.ImageCollection(dates.map(intervals_reduction_s2_B2)).sort('system:time_start', True).toBands().toInt16()
    reduction_s2_monthly_B3 = ee.ImageCollection(dates.map(intervals_reduction_s2_B3)).sort('system:time_start', True).toBands().toInt16()
    reduction_s2_monthly_B4 = ee.ImageCollection(dates.map(intervals_reduction_s2_B4)).sort('system:time_start', True).toBands().toInt16()
    reduction_s2_monthly_B8 = ee.ImageCollection(dates.map(intervals_reduction_s2_B8)).sort('system:time_start', True).toBands().toInt16()
    reduction_s2_monthly_B11 = ee.ImageCollection(dates.map(intervals_reduction_s2_B11)).sort('system:time_start', True).toBands().toInt16()
    reduction_s2_monthly_B12 = ee.ImageCollection(dates.map(intervals_reduction_s2_B12)).sort('system:time_start', True).toBands().toInt16()

    # Exporting Monthly Reductions of Sentinel 2 data
    export_task = ee.batch.Export.image.toDrive(folder = 'Earth Engine',
                                                description = f'Reduction - Optical - Months - id{str(i).zfill(3)} - B2',
                                                image = reduction_s2_monthly_B2,
                                                fileNamePrefix = f'Reduction_Optical_Months_id{str(i).zfill(3)}_B2',
                                                region = roi,
                                                scale = 10,
                                                maxPixels = 10e8)
    export_task.start()
    time.sleep(1)
    
    export_task = ee.batch.Export.image.toDrive(folder = 'Earth Engine',
                                                description = f'Reduction - Optical - Months - id{str(i).zfill(3)} - B3',
                                                image = reduction_s2_monthly_B3,
                                                fileNamePrefix = f'Reduction_Optical_Months_id{str(i).zfill(3)}_B3',
                                                region = roi,
                                                scale = 10,
                                                maxPixels = 10e8)
    export_task.start()
    time.sleep(1)
    
    export_task = ee.batch.Export.image.toDrive(folder = 'Earth Engine',
                                                description = f'Reduction - Optical - Months - id{str(i).zfill(3)} - B4',
                                                image = reduction_s2_monthly_B4,
                                                fileNamePrefix = f'Reduction_Optical_Months_id{str(i).zfill(3)}_B4',
                                                region = roi,
                                                scale = 10,
                                                maxPixels = 10e8)
    export_task.start()
    time.sleep(1)
    
    export_task = ee.batch.Export.image.toDrive(folder = 'Earth Engine',
                                                description = f'Reduction - Optical - Months - id{str(i).zfill(3)} - B8',
                                                image = reduction_s2_monthly_B8,
                                                fileNamePrefix = f'Reduction_Optical_Months_id{str(i).zfill(3)}_B8',
                                                region = roi,
                                                scale = 10,
                                                maxPixels = 10e8)
    export_task.start()
    time.sleep(1)
    
    export_task = ee.batch.Export.image.toDrive(folder = 'Earth Engine',
                                                description = f'Reduction - Optical - Months - id{str(i).zfill(3)} - B11',
                                                image = reduction_s2_monthly_B11,
                                                fileNamePrefix = f'Reduction_Optical_Months_id{str(i).zfill(3)}_B11',
                                                region = roi,
                                                scale = 10,
                                                maxPixels = 10e8)
    export_task.start()
    time.sleep(1)
    
    export_task = ee.batch.Export.image.toDrive(folder = 'Earth Engine',
                                                description = f'Reduction - Optical - Months - id{str(i).zfill(3)} - B12',
                                                image = reduction_s2_monthly_B12,
                                                fileNamePrefix = f'Reduction_Optical_Months_id{str(i).zfill(3)}_B12',
                                                region = roi,
                                                scale = 10,
                                                maxPixels = 10e8)
    export_task.start()
    time.sleep(1)

    ########################################
    # Doing the Sentinel 1 Stuff
    
    # creating the yearly reduction
    # it is exportet as an int value, after being multiplyed by 1000
    reduction_s1_yearly = (sentinel_1.filterDate('2020-01-01', '2020-12-31')
                                     .reduce(ee.Reducer.median())
                                     .multiply(ee.Image(1000))
                                     .toInt16())
    
    # Exporting Yearly Reduction Sentinel 1
    export_task = ee.batch.Export.image.toDrive(folder = 'Earth Engine',
                                                description = f'Reduction - SAR - Year - id{str(i).zfill(3)}',
                                                image = reduction_s1_yearly,
                                                fileNamePrefix = f'Reduction_SAR_Year_id{str(i).zfill(3)}',
                                                region = roi,
                                                scale = 10,
                                                maxPixels = 10e8)
    export_task.start()
    time.sleep(1)
    
    # creating the monthly reductions
    def intervals_reduction_s1_VV(dates):
        return (sentinel_1.filterDate(ee.List(dates).get(0), ee.List(dates).get(1))
                          .select(['VV'])
                          .reduce(ee.Reducer.median())
                          .set('system:time_start', ee.List(dates).get(0))
                          .set('system:time_end', ee.List(dates).get(1)))
    
    def intervals_reduction_s1_VH(dates):
        return (sentinel_1.filterDate(ee.List(dates).get(0), ee.List(dates).get(1))
                          .select(['VH'])
                          .reduce(ee.Reducer.median())
                          .set('system:time_start', ee.List(dates).get(0))
                          .set('system:time_end', ee.List(dates).get(1)))
    
    reduction_s1_monthly_VV = ee.ImageCollection(dates.map(intervals_reduction_s1_VV)).sort('system:time_start', True).toBands().multiply(ee.Image(1000)).toInt16()
    reduction_s1_monthly_VH = ee.ImageCollection(dates.map(intervals_reduction_s1_VH)).sort('system:time_start', True).toBands().multiply(ee.Image(1000)).toInt16()
    
    # Exporting Monthly Reductions for Sentinel 1 data
    export_task = ee.batch.Export.image.toDrive(folder = 'Earth Engine',
                                                description = f'Reduction - SAR - Months - id{str(i).zfill(3)} - VV',
                                                image = reduction_s1_monthly_VV,
                                                fileNamePrefix = f'Reduction_SAR_Months_id{str(i).zfill(3)}_VV',
                                                region = roi,
                                                scale = 10,
                                                maxPixels = 10e8)
    export_task.start()
    time.sleep(1)
    
    export_task = ee.batch.Export.image.toDrive(folder = 'Earth Engine',
                                                description = f'Reduction - SAR - Months - id{str(i).zfill(3)} - VH',
                                                image = reduction_s1_monthly_VH,
                                                fileNamePrefix = f'Reduction_SAR_Months_id{str(i).zfill(3)}_VH',
                                                region = roi,
                                                scale = 10,
                                                maxPixels = 10e8)
    export_task.start()
    time.sleep(1)