# Pulling Landsat and Modis Data for the Points in the Coral Dataset

In [250]:
import geopandas as gpd
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime
import swifter
import datetime
import geopandas as gpd

In [308]:
# loading in coral data
coral_data = pd.read_pickle('./landsat_allen_50k.pkl')
coral_data.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,class,geometry,centroid,long,lat,Blue,Green,Red,Near Infrared,Shortwave Infrared 1,Shortwave Infrared 2,QA_PIXEL,datetime_landsat,datetime_landsat_offset
class,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
Coral,1532057,Coral,"POLYGON ((143.853963811 -9.738995321, 143.8540...",POINT (143.85399910162243 -9.739062695),143.853999,-9.739063,8154.0,8031.0,7140.0,7297.0,7878.0,7867.0,21952.0,2022-09-10 20:33:33.778,9 days 20:33:33.778000
Coral,1227915,Coral,"POLYGON ((149.059431388 -20.248834988, 149.059...",POINT (149.0594763035 -20.248879903499994),149.059476,-20.24888,8557.0,8085.0,7199.0,7224.0,7448.0,7476.0,21952.0,2022-09-07 20:05:25.681,6 days 20:05:25.681000
Coral,1543871,Coral,"POLYGON ((143.137961613 -9.350429045, 143.1380...",POINT (143.13809431861569 -9.350418837214875),143.138094,-9.350419,8736.0,9595.0,7848.0,7227.0,7819.0,7821.0,21952.0,2022-09-10 20:33:09.844,9 days 20:33:09.844000
Coral,1653812,Coral,"POLYGON ((143.80222085 -9.609997246, 143.80231...",POINT (143.80228822356247 -9.610154451187496),143.802288,-9.610154,8215.0,8166.0,7250.0,7433.0,8095.0,8037.0,21952.0,2022-09-10 20:33:09.844,9 days 20:33:09.844000
Coral,2765285,Coral,"POLYGON ((144.928977711 -14.455150563, 144.929...",POINT (144.929022627 -14.455195478500002),144.929023,-14.455195,7975.0,7811.0,7004.0,7164.0,7753.0,7756.0,21952.0,2022-09-12 20:22:24.767,11 days 20:22:24.767000


In [310]:
coral_data['datetime_landsat'].unique()
coral_data.replace('NaT', np.nan, inplace=True)
coral_data = coral_data.dropna(subset=['datetime_landsat'])

coral_data['day'] = coral_data['datetime_landsat'].dt.day.astype(str).str.zfill(2)
coral_data['month'] = coral_data['datetime_landsat'].dt.month.astype(str).str.zfill(2)
coral_data['year'] = coral_data['datetime_landsat'].dt.year.astype(str).str.zfill(4)

## Adding MODIS Surface Reflectance Data from Google Earth Engine

In [253]:
import ee
import pandas as pd
ee.Authenticate(quiet=True)
ee.Initialize()

Paste the following address into a web browser:

    https://code.earthengine.google.com/client-auth?scopes=https%3A//www.googleapis.com/auth/earthengine%20https%3A//www.googleapis.com/auth/devstorage.full_control&request_id=VhilS8W7-silwzwL7zsOExLjwnmDhB1sgHZzzKYuocs&tc=vN4CwLV73SpEXzaFHIwRa_hjkg8OhEKn_zbrbzBgoDI&cc=iOr1tnlGsOShL8JZ8kM_ddK6u2tdsS4mVKFKE38VJ2c

On the web page, please authorize access to your Earth Engine account and copy the authentication code. Next authenticate with the following command:

    earthengine authenticate --code-verifier=VhilS8W7-silwzwL7zsOExLjwnmDhB1sgHZzzKYuocs:kaxwdOZEZru4iq39CYxOWZaGyZYX6DsMRACl1F8-eVY:3ukXuw2_ZzvcDhq9VBUZVi_EOrCHoCTiWREspwFFNzc --authorization-code=PLACE_AUTH_CODE_HERE


Successfully saved authorization token.


In [315]:
from ee import EEException

def getModisData(row):
    try:
        modis = ee.ImageCollection("MODIS/006/MYDOCGA").select(['sur_refl_b08', 'sur_refl_b09', 'sur_refl_b10', 'sur_refl_b11',
                                                                                'sur_refl_b12', 'sur_refl_b13', 'sur_refl_b14', 
                                                                                'sur_refl_b15', 'sur_refl_b16'])
        date = ee.Date(f"{row['year']}-{row['month']}-{row['day']}")
        image = modis.filterBounds(geometry=ee.Geometry.Point([row['long'], row['lat']])).filterDate(date, date.advance(1, 'day'))
        data = image.getRegion(geometry=ee.Geometry.Point([row['long'], row['lat']]), scale=1000).getInfo()
        df = pd.DataFrame(data[1:], columns=data[0])
        if df.shape[0] > 1:
            row['sur_refl_b08'] = df.iloc[0,]['sur_refl_b08']
            row['sur_refl_b09'] = df.iloc[0,]['sur_refl_b09']
            row['sur_refl_b10'] = df.iloc[0,]['sur_refl_b10']
            row['sur_refl_b11'] = df.iloc[0,]['sur_refl_b11']
            row['sur_refl_b12'] = df.iloc[0,]['sur_refl_b12']
            row['sur_refl_b13'] = df.iloc[0,]['sur_refl_b13']
            row['sur_refl_b14'] = df.iloc[0,]['sur_refl_b14']
            row['sur_refl_b15'] = df.iloc[0,]['sur_refl_b15']
            row['sur_refl_b16'] = df.iloc[0,]['sur_refl_b16']

        elif df.shape[0]==1:
            row['sur_refl_b08'] = df.iloc[0,]['sur_refl_b08']
            row['sur_refl_b09'] = df.iloc[0,]['sur_refl_b09']
            row['sur_refl_b10'] = df.iloc[0,]['sur_refl_b10']
            row['sur_refl_b11'] = df.iloc[0,]['sur_refl_b11']
            row['sur_refl_b12'] = df.iloc[0,]['sur_refl_b12']
            row['sur_refl_b13'] = df.iloc[0,]['sur_refl_b13']
            row['sur_refl_b14'] = df.iloc[0,]['sur_refl_b14']
            row['sur_refl_b15'] = df.iloc[0,]['sur_refl_b15']
            row['sur_refl_b16'] = df.iloc[0,]['sur_refl_b16']
        elif df.shape[0]==0:
            row['sur_refl_b08'] = np.nan
            row['sur_refl_b09'] = np.nan
            row['sur_refl_b10'] = np.nan
            row['sur_refl_b11'] = np.nan
            row['sur_refl_b12'] = np.nan
            row['sur_refl_b13'] = np.nan
            row['sur_refl_b14'] = np.nan
            row['sur_refl_b15'] = np.nan
            row['sur_refl_b16'] = np.nan
        return row
    except EEException:
        row['sur_refl_b08'] = np.nan
        row['sur_refl_b09'] = np.nan
        row['sur_refl_b10'] = np.nan
        row['sur_refl_b11'] = np.nan
        row['sur_refl_b12'] = np.nan
        row['sur_refl_b13'] = np.nan
        row['sur_refl_b14'] = np.nan
        row['sur_refl_b15'] = np.nan
        row['sur_refl_b16'] = np.nan    
        return row

In [316]:
coral_data = coral_data.swifter.apply(getModisData, axis=1)

Pandas Apply: 100%|██████████| 24268/24268 [1:07:13<00:00,  6.02it/s]


In [317]:
coral_data.to_pickle('./landsat_allen_modis_50k_1.pkl')

### Adding MODIS Chlorophyll A concentration and Sea Surface Temperature Data from Earth Engine

In [81]:
from ee import EEException
def getChlorData(row):
    try:
        chlor = ee.ImageCollection("NASA/OCEANDATA/MODIS-Aqua/L3SMI").select(['chlor_a', 'sst'])
        date = ee.Date(f"{row['year']}-{row['month']}-{row['day']}")
        image = chlor.filterBounds(geometry=ee.Geometry.Point([row['Longitude_Degrees'], row['Latitude_Degrees']])).filterDate(date, date.advance(1, 'day'))
        data = image.getRegion(geometry=ee.Geometry.Point([row['Longitude_Degrees'], row['Latitude_Degrees']]), scale=5000).getInfo()
        df = pd.DataFrame(data[1:], columns=data[0])
        if df.shape[0] > 1:
            row['chlor_a'] = df.iloc[0,]['chlor_a']
            row['sst'] = df.iloc[0,]['sst']
        elif df.shape[0]==1:
            row['chlor_a'] = df.iloc[0,]['chlor_a']
            row['sst'] = df.iloc[0,]['sst']
        elif df.shape[0]==0:
            row['chlor_a'] = np.nan
            row['sst'] = np.nan
        return row
    except EEException:
        row['chlor_a'] = np.nan
        row['sst'] = np.nan
        return row

In [83]:
coral_data = coral_data.swifter.apply(getChlorData, axis=1)

Pandas Apply: 100%|██████████| 13462/13462 [37:56<00:00,  5.91it/s]  


In [84]:
coral_data.to_csv('./coral_data_with_modis_features.csv', index=False)

In [202]:
coral_data['coral_presence'].value_counts()

1    7329
0    6133
Name: coral_presence, dtype: int64

### Combining With Landsat Features