# Extracting MODIS and Landsat-8 Data Using Google Earth Engine

In [1]:
import ee
import pandas as pd
ee.Authenticate()
ee.Initialize()


Successfully saved authorization token.


### Accessing the MODIS chlor_a data for the Bahamas and adding it to the modis_caribbean datafrane

In [8]:
# accessing modis data from ee
chlor_a = ee.ImageCollection("NASA/OCEANDATA/MODIS-Aqua/L3SMI").select("chlor_a")

In [14]:
years = ['2018', '2019', '2020', '2021']
months = ['01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12']

modis_car = pd.DataFrame()

for year in years:
    for month in range(len(months)-1):
        start_date = f'{year}-{months[month]}-01'
        end_date = f'{year}-{months[month+1]}-01'
        print(f'extracting data for {start_date} to {end_date}')
        
        try:
            curr_image = chlor_a.filterBounds(geometry=ee.Geometry.Rectangle([[-85, 17],[-63, 27]])).filterDate(start_date, end_date)
            curr_data = curr_image.getRegion(geometry=ee.Geometry.Rectangle([[-85, 17],[-63, 27]]), scale=10000).getInfo()
            curr_df = pd.DataFrame(curr_data[1:], columns=curr_data[0])[['longitude', 'latitude', 'time', 'chlor_a']]
            curr_df = curr_df.dropna()
            modis_car = pd.concat([modis_car, curr_df])
        except:
            continue
            
        print(f'current size = {modis_car.shape}')

extracting data for 2018-01-01 to 2018-02-01
current size = (145189, 4)
extracting data for 2018-02-01 to 2018-03-01
current size = (416755, 4)
extracting data for 2018-03-01 to 2018-04-01
current size = (634694, 4)
extracting data for 2018-04-01 to 2018-05-01
current size = (825860, 4)
extracting data for 2018-05-01 to 2018-06-01
current size = (903921, 4)
extracting data for 2018-06-01 to 2018-07-01
current size = (1011669, 4)
extracting data for 2018-07-01 to 2018-08-01
current size = (1099642, 4)
extracting data for 2018-08-01 to 2018-09-01
current size = (1255494, 4)
extracting data for 2018-09-01 to 2018-10-01
current size = (1424377, 4)
extracting data for 2018-10-01 to 2018-11-01
current size = (1658340, 4)
extracting data for 2018-11-01 to 2018-12-01
current size = (1917987, 4)
extracting data for 2019-01-01 to 2019-02-01
current size = (2193725, 4)
extracting data for 2019-02-01 to 2019-03-01
current size = (2469957, 4)
extracting data for 2019-03-01 to 2019-04-01
current siz

In [15]:
# modis_car.to_pickle('./files/MODIS/modis_car.pkl')

### Extracting chlor_a data for the Great Barrier Reef and adding it to modis_gb dataframe

In [19]:
chlor_a = ee.ImageCollection("NASA/OCEANDATA/MODIS-Aqua/L3SMI").select("chlor_a")

In [5]:
years = ['2018', '2019', '2020', '2021']
months = ['01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12']

modis_gb = pd.DataFrame()

for year in years:
    for month in range(len(months)-1):
        start_date = f'{year}-{months[month]}-01'
        end_date = f'{year}-{months[month+1]}-01'
        print(f'extracting data for {start_date} to {end_date}')
        
        try:
            curr_image = chlor_a.filterBounds(geometry=ee.Geometry.Rectangle([[142.69 , -10],[153.7, -28.7]])).filterDate(start_date, end_date)
            curr_data = curr_image.getRegion(geometry=ee.Geometry.Rectangle([[142.69 , -10],[153.7, -28.7]]), scale=100).getInfo()
            curr_df = pd.DataFrame(curr_data[1:], columns=curr_data[0])[['longitude', 'latitude', 'time', 'chlor_a']]
            curr_df = curr_df.dropna()
            modis_gb = pd.concat([modis_gb, curr_df])
        except:
            continue
            
        print(f'current size = {modis_gb.shape}')

extracting data for 2018-01-01 to 2018-02-01
extracting data for 2018-02-01 to 2018-03-01
extracting data for 2018-03-01 to 2018-04-01
extracting data for 2018-04-01 to 2018-05-01
extracting data for 2018-05-01 to 2018-06-01
extracting data for 2018-06-01 to 2018-07-01
extracting data for 2018-07-01 to 2018-08-01
extracting data for 2018-08-01 to 2018-09-01
extracting data for 2018-09-01 to 2018-10-01
extracting data for 2018-10-01 to 2018-11-01
extracting data for 2018-11-01 to 2018-12-01
extracting data for 2019-01-01 to 2019-02-01
extracting data for 2019-02-01 to 2019-03-01
extracting data for 2019-03-01 to 2019-04-01
extracting data for 2019-04-01 to 2019-05-01
extracting data for 2019-05-01 to 2019-06-01
extracting data for 2019-06-01 to 2019-07-01
extracting data for 2019-07-01 to 2019-08-01
extracting data for 2019-08-01 to 2019-09-01
extracting data for 2019-09-01 to 2019-10-01
extracting data for 2019-10-01 to 2019-11-01
extracting data for 2019-11-01 to 2019-12-01
extracting

In [18]:
# modis_gb.to_pickle('./files/MODIS/modis_gb.pkl')

### Extracting MODIS Reflectance Values for Great Barrier Reef from Earth Engine

In [4]:
from ee import EEException

bands = ['sur_refl_b09', 'sur_refl_b10', 'sur_refl_b11', 'sur_refl_b12', 'sur_refl_b12', 'sur_refl_b13', 'sur_refl_b14', 'sur_refl_b15', 'sur_refl_b16']
years = ['2018', '2019', '2020', '2021']
months = ['01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12']

for band in bands:
    print(f'Extracting {band} data')
    modis_gb = pd.DataFrame()
    img = ee.ImageCollection("MODIS/006/MYDOCGA").select(band)
    for year in years:
        for month in range(len(months)-1):
            start_date = f'{year}-{months[month]}-01'
            end_date = f'{year}-{months[month+1]}-01'
            print(f'extracting {band} data for {start_date} to {end_date}')
            
            try:
                curr_image = img.filterBounds(geometry=ee.Geometry.Rectangle([[142.69 , -10],[153.7, -28.7]])).filterDate(start_date, end_date)
                curr_data = curr_image.getRegion(geometry=ee.Geometry.Rectangle([[142.69 , -10],[153.7, -28.7]]), scale=10000).getInfo()
                curr_df = pd.DataFrame(curr_data[1:], columns=curr_data[0])[['longitude', 'latitude', 'time', band]]
                curr_df = curr_df.dropna()
                modis_gb = pd.concat([modis_gb, curr_df])
            except EEException:
                continue
    modis_gb.to_pickle(f'./files/MODIS/modis_gb_{band}.pkl')
    print(f'Saved {band} data to pickle file')


Extracting sur_refl_b09 data
extracting sur_refl_b09 data for 2018-01-01 to 2018-02-01
extracting sur_refl_b09 data for 2018-02-01 to 2018-03-01
extracting sur_refl_b09 data for 2018-03-01 to 2018-04-01
extracting sur_refl_b09 data for 2018-04-01 to 2018-05-01
extracting sur_refl_b09 data for 2018-05-01 to 2018-06-01
extracting sur_refl_b09 data for 2018-06-01 to 2018-07-01
extracting sur_refl_b09 data for 2018-07-01 to 2018-08-01
extracting sur_refl_b09 data for 2018-08-01 to 2018-09-01
extracting sur_refl_b09 data for 2018-09-01 to 2018-10-01
extracting sur_refl_b09 data for 2018-10-01 to 2018-11-01
extracting sur_refl_b09 data for 2018-11-01 to 2018-12-01
extracting sur_refl_b09 data for 2019-01-01 to 2019-02-01
extracting sur_refl_b09 data for 2019-02-01 to 2019-03-01
extracting sur_refl_b09 data for 2019-03-01 to 2019-04-01
extracting sur_refl_b09 data for 2019-04-01 to 2019-05-01
extracting sur_refl_b09 data for 2019-05-01 to 2019-06-01
extracting sur_refl_b09 data for 2019-06-01

In [5]:

bands = ['sur_refl_b08', 'sur_refl_b09', 'sur_refl_b10', 'sur_refl_b11', 'sur_refl_b12', 'sur_refl_b12', 'sur_refl_b13', 'sur_refl_b14', 'sur_refl_b15', 'sur_refl_b16']
years = ['2018', '2019', '2020', '2021']
months = ['01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12']

for band in bands:
    img = ee.ImageCollection("MODIS/006/MYDOCGA").select(band)
    print(f'Extracting {band} data')
    modis_car = pd.DataFrame()
    for year in years:
        for month in range(len(months)-1):
            start_date = f'{year}-{months[month]}-01'
            end_date = f'{year}-{months[month+1]}-01'
            print(f'extracting {band} data for {start_date} to {end_date}')
            
            try:
                curr_image = img.filterBounds(geometry=ee.Geometry.Rectangle([[-85, 17],[-63, 27]])).filterDate(start_date, end_date)
                curr_data = curr_image.getRegion(geometry=ee.Geometry.Rectangle([[-85, 17],[-63, 27]]), scale=10000).getInfo()
                curr_df = pd.DataFrame(curr_data[1:], columns=curr_data[0])[['longitude', 'latitude', 'time', f'{band}']]
                curr_df = curr_df.dropna()
                modis_car = pd.concat([modis_car, curr_df])
            except:
                continue
    modis_car.to_pickle(f'./files/MODIS/modis_car_{band}.pkl')
    print(f'Saved {band} data to pickle file')


Extracting sur_refl_b08 data
extracting sur_refl_b08 data for 2018-01-01 to 2018-02-01
extracting sur_refl_b08 data for 2018-02-01 to 2018-03-01
extracting sur_refl_b08 data for 2018-03-01 to 2018-04-01
extracting sur_refl_b08 data for 2018-04-01 to 2018-05-01
extracting sur_refl_b08 data for 2018-05-01 to 2018-06-01
extracting sur_refl_b08 data for 2018-06-01 to 2018-07-01
extracting sur_refl_b08 data for 2018-07-01 to 2018-08-01
extracting sur_refl_b08 data for 2018-08-01 to 2018-09-01
extracting sur_refl_b08 data for 2018-09-01 to 2018-10-01
extracting sur_refl_b08 data for 2018-10-01 to 2018-11-01
extracting sur_refl_b08 data for 2018-11-01 to 2018-12-01
extracting sur_refl_b08 data for 2019-01-01 to 2019-02-01
extracting sur_refl_b08 data for 2019-02-01 to 2019-03-01
extracting sur_refl_b08 data for 2019-03-01 to 2019-04-01
extracting sur_refl_b08 data for 2019-04-01 to 2019-05-01
extracting sur_refl_b08 data for 2019-05-01 to 2019-06-01
extracting sur_refl_b08 data for 2019-06-01

### Merging Modis Data into a Common Dataframe

In [13]:
import os

modis_gb_all = pd.DataFrame()
modis_car_all = pd.DataFrame()

for file in os.listdir('./files/MODIS'):
    if file.startswith('modis_gb'):
        if modis_gb_all.shape[0] == 0:
            modis_gb_all = pd.read_pickle(f'./files/MODIS/{file}')
        else:
            modis_gb_all = modis_gb_all.merge(pd.read_pickle(f'./files/MODIS/{file}'), on=['longitude', 'latitude', 'time'], how='inner')   

    elif file.startswith('modis_car'):
        if modis_car_all.shape[0] == 0:
            modis_car_all = pd.read_pickle(f'./files/MODIS/{file}')
        else:
            modis_car_all = modis_car_all.merge(pd.read_pickle(f'./files/MODIS/{file}'), on=['longitude', 'latitude', 'time'], how='inner')