In [1]:
import ee
import eemont
import numpy as np
import pandas as pd
import swifter

from datetime import datetime
from ee import EEException
from tqdm import tqdm

In [2]:
# authenticating ee with google account
ee.Authenticate(quiet=True)
# Initialize the Earth Engine object, using the authentication credentials.
ee.Initialize()

Paste the following address into a web browser:

    https://code.earthengine.google.com/client-auth?scopes=https%3A//www.googleapis.com/auth/earthengine%20https%3A//www.googleapis.com/auth/devstorage.full_control&request_id=ENuIu4ATP4ae4PnQjJ7cFPSI_NAcp4CAlv7q-KhU-SE&tc=G4_3Xd8SZb1vI6NGIW0VNF9dyFvOCQIPIch2yfdYeK0&cc=QugSCUS-Xpr3TPuoTmOEG6A8g7P-lAbeHJ5cVmm-lcw

On the web page, please authorize access to your Earth Engine account and copy the authentication code. Next authenticate with the following command:

    earthengine authenticate --code-verifier=ENuIu4ATP4ae4PnQjJ7cFPSI_NAcp4CAlv7q-KhU-SE:a1XNXoOQKbUcmy0PSf8mmOWhzpk1qb_gCUeifhn_2ZE:3puaPKscWgSAi2JinfzEglJGt2KbzSjVDsspLkhLfFE --authorization-code=PLACE_AUTH_CODE_HERE


Successfully saved authorization token.


Read in prepared allen coral atlas file.

In [3]:
allen_sample = pd.read_pickle('./files/northern_carribean_subset.pkl')
allen_sample

Unnamed: 0_level_0,Unnamed: 1_level_0,class,geometry,centroid_column,long,lat
class,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Coral/Algae,2971790,Coral/Algae,"POLYGON ((-78.61004 26.52420, -78.60991 26.524...",POINT (-78.60970 26.52371),-78.609698,26.523708
Coral/Algae,2118004,Coral/Algae,"POLYGON ((-78.59468 26.51512, -78.59455 26.515...",POINT (-78.59475 26.51495),-78.594745,26.514949
Coral/Algae,2118268,Coral/Algae,"POLYGON ((-78.58709 26.52909, -78.58705 26.529...",POINT (-78.58707 26.52907),-78.587069,26.529069
Coral/Algae,2097168,Coral/Algae,"POLYGON ((-78.61278 26.51422, -78.61269 26.514...",POINT (-78.61274 26.51420),-78.612738,26.514202
Coral/Algae,2111207,Coral/Algae,"POLYGON ((-78.61112 26.51481, -78.61108 26.514...",POINT (-78.61110 26.51479),-78.611099,26.514786
...,...,...,...,...,...,...
Non-Coral,2100310,Non-Coral,"POLYGON ((-78.59024 26.51607, -78.59015 26.516...",POINT (-78.59025 26.51595),-78.590252,26.515946
Non-Coral,2971962,Non-Coral,"POLYGON ((-78.58413 26.52096, -78.58404 26.520...",POINT (-78.58420 26.52069),-78.584200,26.520686
Non-Coral,2108374,Non-Coral,"POLYGON ((-78.58049 26.52770, -78.58040 26.527...",POINT (-78.58045 26.52764),-78.580451,26.527640
Non-Coral,2115402,Non-Coral,"POLYGON ((-78.59320 26.51409, -78.59316 26.514...",POINT (-78.59318 26.51407),-78.593178,26.514068


# Collect Satellite Data

Queries Google Earth Engine to get data from either Landsat8 or MODIS. Multiple satellite data points can be returned from a single query, so the data point that occured the earliest is used. Arguments that need to be specified are satellite collection, start date, end date, spectral bands, and scale. Only Landsat can calculate spectral indicies, so that argument is None for modis.

In [4]:
# Function to return an empty row for given location
def noBandsFound(collection, bands, spec_idxs, lat, lon):
	new_row = pd.DataFrame()
	new_row = new_row.reindex(columns=['lat', 'long', *bands], fill_value=np.nan)
	if(collection == 'LANDSAT/LC08/C02/T1_L2'):
		new_row = new_row.reindex(columns=[*new_row.columns.tolist(), *spec_idxs], fill_value=np.nan)
	new_row['datetime_' + collection] = np.nan
	new_row['lat'] = lat
	new_row['long'] = lon
	return new_row.T.squeeze()

In [5]:
def getSatelliteData(row, collection, start_date, end_date, bands, spec_idxs, scale):
    row = np.squeeze(row)
    try:
        if (collection != 'LANDSAT/LC08/C02/T1_L2'):
            img_col = ee.ImageCollection(collection)\
                        .select(bands)
        else:
            img_col = ee.ImageCollection(collection)\
                        .select(bands)\
                        .spectralIndices(spec_idxs)

        data = img_col\
                 .filterBounds(geometry=ee.Geometry.Point(row['long'], row['lat']))\
                 .filterDate(start_date, end_date)\
                 .getRegion(geometry=ee.Geometry.Point(row['long'], row['lat']), scale=scale)\
                 .getInfo()

        df = pd.DataFrame(data[1:], columns=data[0])
        df.dropna(inplace=True)
        df = df.dropna()  
        df['datetime_' + collection] = df.time.apply(lambda x: datetime.utcfromtimestamp(x/1000))
        df['long'] = row['long']
        df['lat'] = row['lat']
        row = row.to_frame().T

        if df.shape[0] > 1:
                # Loop through data return from Google Earth Engine query and find data point closest to the coral date. 
                # Select only the earliest Satellite data point
                time_deltas = {}
                for i in range(df.shape[0]):
                    time_deltas[i] = np.abs(pd.to_datetime(df['datetime_' + collection].values[i]) - datetime.strptime(start_date, '%Y-%m-%d'))
                ind = min(time_deltas, key=time_deltas.get)

                data = df.iloc[ind,].to_frame().T
                return data.drop(columns=['id', 'time', 'longitude', 'latitude']).squeeze()

		 # We only have one Landsat data point for the selected time interval and region
        elif df.shape[0]==1:
            data = df.iloc[0,].to_frame().T
            return data.drop(columns=['id', 'time', 'longitude', 'latitude']).squeeze()

		# No data was found for given location and date range
        elif df.shape[0]==0:
            return noBandsFound(collection, bands, spec_idxs, row['lat'], row['lon'])
			
    # Error occured when retrieving data
    except EEException as e:
        return noBandsFound(collection, bands, spec_idxs, row['lat'], row['lon'])
        

## MODIS
Collected in 2 day intervals since MODIS covered the earth every 1 to 2 days.

In [6]:
# Initialize Arguments
start_date = '2022-03-01'
end_date = ee.Date(start_date).advance(2, 'day')
bands = ['sur_refl_b08', 'sur_refl_b09', 'sur_refl_b10', 'sur_refl_b11', 
         'sur_refl_b12', 'sur_refl_b13', 'sur_refl_b14', 'sur_refl_b15', 'sur_refl_b16']
scale = 1000

# Get the Modis data
modis_df = allen_sample.swifter.apply(getSatelliteData, args = ('MODIS/006/MYDOCGA', start_date, end_date, bands, None, scale), axis = 1)

Pandas Apply:   0%|          | 0/1550 [00:00<?, ?it/s]

In [7]:
modis_df

Unnamed: 0_level_0,Unnamed: 1_level_0,sur_refl_b08,sur_refl_b09,sur_refl_b10,sur_refl_b11,sur_refl_b12,sur_refl_b13,sur_refl_b14,sur_refl_b15,sur_refl_b16,datetime_MODIS/006/MYDOCGA,long,lat
class,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
Coral/Algae,2971790,885,963,1135,1262,1293,642,640,-100,-100,2022-03-01,-78.609698,26.523708
Coral/Algae,2118004,886,824,757,651,629,462,457,491,501,2022-03-01,-78.594745,26.514949
Coral/Algae,2118268,667,811,1053,1275,-100,-100,-100,-100,-100,2022-03-01,-78.587069,26.529069
Coral/Algae,2097168,885,963,1135,1262,1293,642,640,-100,-100,2022-03-01,-78.612738,26.514202
Coral/Algae,2111207,885,963,1135,1262,1293,642,640,-100,-100,2022-03-01,-78.611099,26.514786
...,...,...,...,...,...,...,...,...,...,...,...,...,...
Non-Coral,2100310,886,824,757,651,629,462,457,491,501,2022-03-01,-78.590252,26.515946
Non-Coral,2971962,169,172,243,217,229,-65,-65,-44,13,2022-03-01,-78.584200,26.520686
Non-Coral,2108374,-100,-100,-100,-100,-100,-100,-100,-100,-100,2022-03-01,-78.580451,26.527640
Non-Coral,2115402,886,824,757,651,629,462,457,491,501,2022-03-01,-78.593178,26.514068


In [8]:
modis_df.to_pickle('./files/modis_caribbean.pkl')

## LANDSAT
Needs to be collected monthly due to 16-day orbit cycles.

In [9]:
# Initialize Arguments
start_date = '2022-03-01'
end_date = ee.Date(start_date).advance(1, 'month')
spec_idxs = ['AWEInsh', 'AWEIsh', 'LSWI', 'MBWI', 'MLSWI26', 'MLSWI27',
            'MNDWI', 'MuWIR', 'NDVIMNDWI', 'NDWI', 'NDWIns', 'NWI', 'SWM', 'WI1', 'WI2', 'WRI']
bands = ['SR_B2', 'SR_B3', 'SR_B4', 'SR_B5', 'SR_B6', 'SR_B7', 'QA_PIXEL']
scale = 100

# Get the Landsat data
landsat_df = allen_sample.swifter.apply(getSatelliteData, args = ('LANDSAT/LC08/C02/T1_L2', start_date, end_date, bands, spec_idxs, scale), axis = 1)


Pandas Apply:   0%|          | 0/1550 [00:00<?, ?it/s]

In [10]:
landsat_df

Unnamed: 0_level_0,Unnamed: 1_level_0,SR_B2,SR_B3,SR_B4,SR_B5,SR_B6,SR_B7,QA_PIXEL,AWEInsh,AWEIsh,LSWI,...,NDWI,NDWIns,NWI,SWM,WI1,WI2,WRI,datetime_LANDSAT/LC08/C02/T1_L2,long,lat
class,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
Coral/Algae,2971790,9099.0,11563.0,10505.0,19179.0,13803.0,11156.0,21824.0,16924.25,-14255.50,0.162998,...,-0.247739,0.313743,-0.658170,0.626463,0.017915,-0.101555,0.669092,2022-03-06 15:43:51.238,-78.609698,26.523708
Coral/Algae,2118004,8188.0,8388.0,7116.0,7318.0,7884.0,7869.0,23888.0,21826.25,4387.75,-0.037232,...,0.068127,0.487470,-0.476119,1.090383,0.031925,0.019867,1.019866,2022-03-06 15:43:51.238,-78.594745,26.514949
Coral/Algae,2118268,10641.0,11581.0,7328.0,6948.0,7950.0,7982.0,23888.0,34737.50,15251.00,-0.067257,...,0.250040,0.587522,-0.365114,1.491610,0.183970,0.142780,1.269231,2022-03-06 15:43:51.238,-78.587069,26.529069
Coral/Algae,2097168,10813.0,11173.0,7394.0,7147.0,7848.0,7880.0,21952.0,33183.25,14283.00,-0.046749,...,0.219760,0.570868,-0.358050,1.466222,0.172834,0.156904,1.238213,2022-03-06 15:43:51.238,-78.612738,26.514202
Coral/Algae,2111207,11185.0,11798.0,7587.0,7167.0,7889.0,7913.0,21952.0,35605.00,16117.75,-0.047954,...,0.244187,0.584303,-0.345025,1.526501,0.197098,0.171327,1.287527,2022-03-06 15:43:51.238,-78.611099,26.514786
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Non-Coral,2100310,8233.0,8194.0,6910.0,7195.0,7938.0,7913.0,23888.0,20986.00,4040.25,-0.049098,...,0.064916,0.485704,-0.473577,1.085508,0.017446,0.019819,0.998084,2022-03-06 15:43:51.238,-78.590252,26.515946
Non-Coral,2971962,8060.0,8525.0,6884.0,7102.0,7913.0,7929.0,23888.0,22477.25,4867.75,-0.054013,...,0.091060,0.500083,-0.480067,1.104562,0.036222,0.008193,1.026240,2022-03-06 15:43:51.238,-78.584200,26.520686
Non-Coral,2108374,9341.0,11160.0,7524.0,7868.0,9040.0,9136.0,23888.0,31637.00,9595.00,-0.069316,...,0.173008,0.545155,-0.472036,1.212503,0.099724,0.011095,1.105039,2022-03-06 15:43:51.238,-78.580451,26.527640
Non-Coral,2115402,8403.0,8023.0,7093.0,7339.0,7940.0,7934.0,23888.0,20315.75,3558.50,-0.039335,...,0.044525,0.474489,-0.468434,1.075070,0.005577,0.028708,0.989332,2022-03-06 15:43:51.238,-78.593178,26.514068


In [11]:
# Column renaming
landsat_df.rename({'SR_B2' : 'Blue',
                        'SR_B3' : 'Green',
                        'SR_B4' : 'Red',
                        'SR_B5' : 'Near Infrared',
                        'SR_B6' : 'Shortwave Infrared 1',
                        'SR_B7' : 'Shortwave Infrared 2'}, axis = 1, inplace = True)

In [12]:
landsat_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Blue,Green,Red,Near Infrared,Shortwave Infrared 1,Shortwave Infrared 2,QA_PIXEL,AWEInsh,AWEIsh,LSWI,...,NDWI,NDWIns,NWI,SWM,WI1,WI2,WRI,datetime_LANDSAT/LC08/C02/T1_L2,long,lat
class,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
Coral/Algae,2971790,9099.0,11563.0,10505.0,19179.0,13803.0,11156.0,21824.0,16924.25,-14255.50,0.162998,...,-0.247739,0.313743,-0.658170,0.626463,0.017915,-0.101555,0.669092,2022-03-06 15:43:51.238,-78.609698,26.523708
Coral/Algae,2118004,8188.0,8388.0,7116.0,7318.0,7884.0,7869.0,23888.0,21826.25,4387.75,-0.037232,...,0.068127,0.487470,-0.476119,1.090383,0.031925,0.019867,1.019866,2022-03-06 15:43:51.238,-78.594745,26.514949
Coral/Algae,2118268,10641.0,11581.0,7328.0,6948.0,7950.0,7982.0,23888.0,34737.50,15251.00,-0.067257,...,0.250040,0.587522,-0.365114,1.491610,0.183970,0.142780,1.269231,2022-03-06 15:43:51.238,-78.587069,26.529069
Coral/Algae,2097168,10813.0,11173.0,7394.0,7147.0,7848.0,7880.0,21952.0,33183.25,14283.00,-0.046749,...,0.219760,0.570868,-0.358050,1.466222,0.172834,0.156904,1.238213,2022-03-06 15:43:51.238,-78.612738,26.514202
Coral/Algae,2111207,11185.0,11798.0,7587.0,7167.0,7889.0,7913.0,21952.0,35605.00,16117.75,-0.047954,...,0.244187,0.584303,-0.345025,1.526501,0.197098,0.171327,1.287527,2022-03-06 15:43:51.238,-78.611099,26.514786
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Non-Coral,2100310,8233.0,8194.0,6910.0,7195.0,7938.0,7913.0,23888.0,20986.00,4040.25,-0.049098,...,0.064916,0.485704,-0.473577,1.085508,0.017446,0.019819,0.998084,2022-03-06 15:43:51.238,-78.590252,26.515946
Non-Coral,2971962,8060.0,8525.0,6884.0,7102.0,7913.0,7929.0,23888.0,22477.25,4867.75,-0.054013,...,0.091060,0.500083,-0.480067,1.104562,0.036222,0.008193,1.026240,2022-03-06 15:43:51.238,-78.584200,26.520686
Non-Coral,2108374,9341.0,11160.0,7524.0,7868.0,9040.0,9136.0,23888.0,31637.00,9595.00,-0.069316,...,0.173008,0.545155,-0.472036,1.212503,0.099724,0.011095,1.105039,2022-03-06 15:43:51.238,-78.580451,26.527640
Non-Coral,2115402,8403.0,8023.0,7093.0,7339.0,7940.0,7934.0,23888.0,20315.75,3558.50,-0.039335,...,0.044525,0.474489,-0.468434,1.075070,0.005577,0.028708,0.989332,2022-03-06 15:43:51.238,-78.593178,26.514068


In [13]:
landsat_df.to_pickle('./files/landsat_caribbean.pkl')

## VIIRS
Note: Was not used in this project. Could be used in the future

In [14]:
# Initialize Arguments
start_date = '2022-03-01'
end_date = ee.Date(start_date).advance(1, 'day')
bands = ['M1', 'M2', 'M3', 'M4', 'M5', 'M7', 'I1', 'I2']
scale = 1000

# Get the VIIRS data
viirs_df = allen_sample.swifter.apply(getSatelliteData, args = ('NOAA/VIIRS/001/VNP09GA', start_date, end_date, bands, None, scale), axis = 1)

Pandas Apply:   0%|          | 0/1550 [00:00<?, ?it/s]

In [15]:
viirs_df

Unnamed: 0_level_0,Unnamed: 1_level_0,M1,M2,M3,M4,M5,M7,I1,I2,datetime_NOAA/VIIRS/001/VNP09GA,long,lat
class,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Coral/Algae,2971790,1785,1804,1851,1986,1777,2457,3061,3588,2022-03-01,-78.609698,26.523708
Coral/Algae,2118004,2559,2575,2670,2737,2397,2463,1375,1422,2022-03-01,-78.594745,26.514949
Coral/Algae,2118268,1976,1960,1955,2084,1840,2354,4893,5260,2022-03-01,-78.587069,26.529069
Coral/Algae,2097168,3092,3124,3258,3305,3113,3293,2756,2959,2022-03-01,-78.612738,26.514202
Coral/Algae,2111207,3092,3124,3258,3305,3113,3293,2723,2857,2022-03-01,-78.611099,26.514786
...,...,...,...,...,...,...,...,...,...,...,...,...
Non-Coral,2100310,2131,2166,2335,2305,2161,2153,3979,4234,2022-03-01,-78.590252,26.515946
Non-Coral,2971962,7016,7301,7590,7909,8218,8565,8342,8826,2022-03-01,-78.584200,26.520686
Non-Coral,2108374,5343,5466,5635,5801,5824,6216,5315,5561,2022-03-01,-78.580451,26.527640
Non-Coral,2115402,2131,2166,2335,2305,2161,2153,3979,4234,2022-03-01,-78.593178,26.514068


In [16]:
viirs_df.to_pickle('./files/viirs_caribbean.pkl')

## Merging
Join satellite data on longitude and latitude and then join with coral data.

In [17]:
from functools import reduce

#modis_df = pd.read_pickle('./files/modis_caribbean.pkl')
#landsat_df = pd.read_pickle('./files/landsat_caribbean.pkl')
#viirs_df = pd.read_pickle('./files/viirs_caribbean.pkl')

data_frames = [modis_df, landsat_df]
# Merge all satellite data
all_satellite = reduce(lambda  left,right: pd.merge(left, right, left_on = ['lat', 'long'], right_on = ['lat', 'long']), 
                                          data_frames)

# Merge satellite with allen_coral
data = allen_sample.merge(all_satellite, left_on = ['lat', 'long'], right_on = ['lat', 'long'])

In [18]:
data

Unnamed: 0,class,geometry,centroid_column,long,lat,sur_refl_b08,sur_refl_b09,sur_refl_b10,sur_refl_b11,sur_refl_b12,...,MuWIR,NDVIMNDWI,NDWI,NDWIns,NWI,SWM,WI1,WI2,WRI,datetime_LANDSAT/LC08/C02/T1_L2
0,Coral/Algae,"POLYGON ((-78.61004 26.52420, -78.60991 26.524...",POINT (-78.60970 26.52371),-78.609698,26.523708,885,963,1135,1262,1293,...,0.105669,0.380518,-0.247739,0.313743,-0.658170,0.626463,0.017915,-0.101555,0.669092,2022-03-06 15:43:51.238
1,Coral/Algae,"POLYGON ((-78.59468 26.51512, -78.59455 26.515...",POINT (-78.59475 26.51495),-78.594745,26.514949,886,824,757,651,629,...,0.217392,-0.016979,0.068127,0.487470,-0.476119,1.090383,0.031925,0.019867,1.019866,2022-03-06 15:43:51.238
2,Coral/Algae,"POLYGON ((-78.58709 26.52909, -78.58705 26.529...",POINT (-78.58707 26.52907),-78.587069,26.529069,667,811,1053,1275,-100,...,0.851313,-0.212528,0.250040,0.587522,-0.365114,1.491610,0.183970,0.142780,1.269231,2022-03-06 15:43:51.238
3,Coral/Algae,"POLYGON ((-78.61278 26.51422, -78.61269 26.514...",POINT (-78.61274 26.51420),-78.612738,26.514202,885,963,1135,1262,1293,...,0.675876,-0.191793,0.219760,0.570868,-0.358050,1.466222,0.172834,0.156904,1.238213,2022-03-06 15:43:51.238
4,Coral/Algae,"POLYGON ((-78.61112 26.51481, -78.61108 26.514...",POINT (-78.61110 26.51479),-78.611099,26.514786,885,963,1135,1262,1293,...,0.790700,-0.227024,0.244187,0.584303,-0.345025,1.526501,0.197098,0.171327,1.287527,2022-03-06 15:43:51.238
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1545,Non-Coral,"POLYGON ((-78.59024 26.51607, -78.59015 26.516...",POINT (-78.59025 26.51595),-78.590252,26.515946,886,824,757,651,629,...,0.139359,0.004337,0.064916,0.485704,-0.473577,1.085508,0.017446,0.019819,0.998084,2022-03-06 15:43:51.238
1546,Non-Coral,"POLYGON ((-78.58413 26.52096, -78.58404 26.520...",POINT (-78.58420 26.52069),-78.584200,26.520686,169,172,243,217,229,...,0.329484,-0.021644,0.091060,0.500083,-0.480067,1.104562,0.036222,0.008193,1.026240,2022-03-06 15:43:51.238
1547,Non-Coral,"POLYGON ((-78.58049 26.52770, -78.58040 26.527...",POINT (-78.58045 26.52764),-78.580451,26.527640,-100,-100,-100,-100,-100,...,0.795424,-0.082601,0.173008,0.545155,-0.472036,1.212503,0.099724,0.011095,1.105039,2022-03-06 15:43:51.238
1548,Non-Coral,"POLYGON ((-78.59320 26.51409, -78.59316 26.514...",POINT (-78.59318 26.51407),-78.593178,26.514068,886,824,757,651,629,...,0.002470,0.011846,0.044525,0.474489,-0.468434,1.075070,0.005577,0.028708,0.989332,2022-03-06 15:43:51.238


In [19]:
data.columns

Index(['class', 'geometry', 'centroid_column', 'long', 'lat', 'sur_refl_b08',
       'sur_refl_b09', 'sur_refl_b10', 'sur_refl_b11', 'sur_refl_b12',
       'sur_refl_b13', 'sur_refl_b14', 'sur_refl_b15', 'sur_refl_b16',
       'datetime_MODIS/006/MYDOCGA', 'Blue', 'Green', 'Red', 'Near Infrared',
       'Shortwave Infrared 1', 'Shortwave Infrared 2', 'QA_PIXEL', 'AWEInsh',
       'AWEIsh', 'LSWI', 'MBWI', 'MLSWI26', 'MLSWI27', 'MNDWI', 'MuWIR',
       'NDVIMNDWI', 'NDWI', 'NDWIns', 'NWI', 'SWM', 'WI1', 'WI2', 'WRI',
       'datetime_LANDSAT/LC08/C02/T1_L2'],
      dtype='object')

In [20]:
data.to_pickle('./files/merged_satellite_allen_northern_caribbean.pkl')

# Bleaching Features
This section queries Google Earth Engine for chlor_a, nflh, poc, and sst data. The returned data is then used to build more features that are essential to our bleaching model.

In [21]:
# Importing the GCBD dataset that includes data on latitudes and longitudes for both the 
# Great Barrier Reef and the Caribbean 
df_GCBD = pd.read_csv('./MeshedGCBD.csv',index_col=0)
df_GCBD.rename({'Longitude_Degrees' : 'long',
                   'Latitude_Degrees' : 'lat',}, axis = 1, inplace = True)
df_GCBD

Unnamed: 0,index,lat,long,Percent_Bleached_Sum,day,month,year,lat_Rnd,lon_Rnd
0,0,24.366926,124.003372,75.00,8,9,2016,24.5,124.5
1,1,24.454807,124.109802,75.00,2,9,2016,24.5,124.5
2,3,24.463401,123.802185,32.85,5,8,2008,24.5,123.5
3,4,24.482618,124.125423,75.00,2,9,2016,24.5,124.5
4,5,24.483712,124.123192,75.00,1,10,2016,24.5,124.5
...,...,...,...,...,...,...,...,...,...
22950,38043,12.028167,-61.783111,0.00,18,3,2015,12.5,-61.5
22951,38044,12.045639,-61.764917,0.00,26,3,2015,12.5,-61.5
22952,38045,12.045639,-61.764917,0.00,26,3,2015,12.5,-61.5
22953,38046,12.041444,-61.770944,0.00,11,3,2015,12.5,-61.5


In [22]:
def cumulativeLimit(x, lim):
    x[np.isnan(x)]=0
    total = 0.
    result = np.empty_like(x)
    for i, y in enumerate(x):
        total += y
        if total < lim:
            total = 0.
        result[i]=total
    return result

In [23]:
SummerAvgSST = pd.read_csv('./Files/SummerAvgSST.csv')

def extendSST(df):
    # This function takes the dataframe created from the MODIS values and creates the features for coefficient of variation (cv)
    # maximum temperature and the comparison to summertime averages.
	df['sst'] = df['sst'].interpolate()
	dfExt=pd.merge(df, SummerAvgSST, how = 'left', left_on = ['lon_Rnd','lat_Rnd'], right_on = ['Lon','Lat'])
	dfExt['sst_SumComp'] = dfExt['sst']-dfExt['sst_Summer']
	dfExt['sst_SumComp'] = dfExt['sst_SumComp'].fillna(0)
	dfExt['sst_sd'] = dfExt['sst'].rolling(14).std()
	dfExt['sst_mean'] = dfExt['sst'].rolling(14).mean()
	dfExt['sst_cv'] = dfExt['sst_sd'] * 100 / dfExt['sst_mean']

	dfExt['sst_streak'] = dfExt['sst_SumComp'].apply(np.floor)
	dfExt['sst_streak_min'] = dfExt['sst_streak'].rolling(7).min()
	dfExt.loc[dfExt['sst_streak_min'] < 1,'sst_streak_min'] = -1
	dfExt['sst_streak_min'] = dfExt['sst_streak_min'] / 7
	dfExt['DHW'] = cumulativeLimit(dfExt['sst_streak_min'].values,0)

	dfExt['sst_streak'] = dfExt['sst_SumComp'].apply(np.ceil)
	dfExt.loc[dfExt['sst_streak'] < 1,'sst_streak'] = -1
	dfExt['sst_streak'] = dfExt['sst_streak'] / 7
	dfExt['DHW'] = cumulativeLimit(dfExt['sst_streak_min'].values,0)

	return dfExt

In [24]:
def getModisBleachingFeatures(row):
	try:
		end_date = ee.Date(f"{int(row['year'])}-{int(row['month'])}-{int(row['day'])}")
		img_col = ee.ImageCollection("NASA/OCEANDATA/MODIS-Aqua/L3SMI")\
					.select(['chlor_a', 'nflh','poc', 'sst'])\
					.filterBounds(geometry=ee.Geometry.Point(row['long'], row['lat']))\
					.filterDate(end_date.advance(-1080, 'day'), end_date)\
					.getRegion(geometry=ee.Geometry.Point(row['long'], row['lat']), scale=1000)\
					.getInfo()

		df = pd.DataFrame(img_col[1:], columns=img_col[0])
		df.dropna(inplace=True)
		df['lat_Rnd'] = row['lat_Rnd']
		df['lon_Rnd'] = row['lat_Rnd']

		#Calculate statistics from 90 day history
		df = extendSST(df)
		df_90_Limit = df.tail(90)

		if df.shape[0] >= 1:
			row['chlor_max'] = np.max(df_90_Limit['chlor_a'])
			row['chlor_min'] = np.min(df_90_Limit['chlor_a'])
			row['chlor_avg'] = np.mean(df_90_Limit['chlor_a'])
			row['chlor_change'] = float(df_90_Limit['chlor_a'][-1:]) - float(df_90_Limit['chlor_a'][:1])
			row['nflh_max'] = np.max(df_90_Limit['nflh'])
			row['nflh_min'] = np.min(df_90_Limit['nflh'])
			row['nflh_avg'] = np.mean(df_90_Limit['nflh'])
			row['nflh_change'] = float(df_90_Limit['nflh'][-1:]) - float(df_90_Limit['nflh'][:1])
			row['poc_max'] = np.max(df_90_Limit['poc'])
			row['poc_min'] = np.min(df_90_Limit['poc'])
			row['poc_avg'] = np.mean(df_90_Limit['poc'])
			row['poc_change'] = float(df_90_Limit['poc'][-1:]) - float(df_90_Limit['poc'][:1])

			row['sst_day_of_study'] = df['sst'].tail(1).values[0]
			row['sst_max'] = df_90_Limit['sst'].max()
			row['sst_summer_max'] = df_90_Limit['sst_SumComp'].max()
			row['sst_cv_max'] = df_90_Limit['sst_cv'].max()
			row['sst_cv_cnt'] = df_90_Limit['sst_cv'].loc[df_90_Limit['sst_cv'] >= 1.9].count()
			row['sst_abv_summer'] = df_90_Limit['sst_SumComp'].loc[(df_90_Limit['sst_SumComp'] > 1)].count()
			row['sst_abv_summer_cumulative'] = df_90_Limit['sst_SumComp'].loc[(df_90_Limit['sst_SumComp']>1)].sum()
			row['sst_cv_cnt_SumComp'] = df_90_Limit['sst_cv'].loc[(df_90_Limit['sst_SumComp'] > 0) &
                                                                  (df_90_Limit['sst'] > df_90_Limit['sst_mean'])].count()
			row['sst_cv_max_SumComp'] = df_90_Limit['sst_cv'].loc[(df_90_Limit['sst_SumComp'] > 0) &
                                                                  (df_90_Limit['sst'] > df_90_Limit['sst_mean'])].max()
			row['sst_dhw'] = df['DHW'].max()
			row['sst_dhw_age'] = df.loc[df['DHW'] == df['DHW'].max()].index.values.astype(int)[0] - len(df)

		elif df.shape[0] == 0:
			row['chlor_max'] = np.nan
			row['chlor_min'] = np.nan
			row['chlor_avg'] = np.nan
			row['chlor_change'] = np.nan
			row['nflh_max'] = np.nan
			row['nflh_min'] = np.nan
			row['nflh_avg'] = np.nan
			row['nflh_change'] = np.nan
			row['poc_max'] = np.nan
			row['poc_min'] = np.nan
			row['poc_avg'] = np.nan
			row['poc_change'] = np.nan

			row['sst_day_of_study'] = np.nan
			row['sst_max'] = np.nan
			row['sst_summer_max'] = np.nan
			row['sst_cv_max'] = np.nan
			row['sst_cv_cnt'] = np.nan
			row['sst_abv_summer'] = np.nan
			row['sst_abv_summer_cumulative'] = np.nan
			row['sst_cv_cnt_SumComp'] = np.nan
			row['sst_cv_max_SumComp'] = np.nan
			row['sst_dhw'] = np.nan
			row['sst_dhw_age'] = np.nan

		return row
	except EEException as e:
		row['chlor_max'] = np.nan
		row['chlor_min'] = np.nan
		row['chlor_avg'] = np.nan
		row['chlor_change'] = np.nan
		row['nflh_max'] = np.nan
		row['nflh_min'] = np.nan
		row['nflh_avg'] = np.nan
		row['nflh_change'] = np.nan
		row['poc_max'] = np.nan
		row['poc_min'] = np.nan
		row['poc_avg'] = np.nan
		row['poc_change'] = np.nan

		row['sst_day_of_study'] = np.nan
		row['sst_max'] = np.nan
		row['sst_summer_max'] = np.nan
		row['sst_cv_max'] = np.nan
		row['sst_cv_cnt'] = np.nan
		row['sst_abv_summer'] = np.nan
		row['sst_abv_summer_cumulative'] = np.nan
		row['sst_cv_cnt_SumComp'] = np.nan
		row['sst_cv_max_SumComp'] = np.nan
		row['sst_dhw'] = np.nan
		row['sst_dhw_age'] = np.nan
		return row

In [None]:
df_bleaching = df_GCBD.swifter.apply(getModisBleachingFeatures, axis = 1)
df_bleaching

In [None]:
df_bleaching.to_pickle('./files/gcbd_meshed_bleaching_features.pkl')