In [2]:
import pandas as pd
import numpy as np
import geopandas
import xarray as xr 
import netCDF4 as nc
import ee

from matplotlib import pyplot as plt 
from matplotlib.colors import LinearSegmentedColormap 
np.warnings.filterwarnings('ignore')

import urllib.request 
from urllib.error import HTTPError

from IPython.display import Image

In [2]:
# Trigger the authentication flow.
ee.Authenticate()

# Initialize the library.
ee.Initialize()

Enter verification code: 4/1AX4XfWgrR1oXAe1yGG2bBfpVoU2ASHQX3e4l_3-c_WXAnp7DieYmZZOKn0E

Successfully saved authorization token.


In [3]:
# define location
lat_start = 26.6
lat_stop = 26.48
lon_start = -78.88
lon_stop = -78.66

# poi = ee.Geometry.Rectangle(lat_start, lon_start, lat_stop, lon_stop) # rectangle doesn't work for some reason...
poi = ee.Geometry.Point(-78.75, 26.5) 

roi = poi.buffer(1e4) # 10km coverage

scale = 4616 # in meteres - change based on coverage

# Initial date of interest (inclusive).
i_date = '2021-03-01'

# Final date of interest (exclusive).
f_date = '2021-10-01'

In [None]:
def ee_array_to_df(arr, list_of_bands):
    """Transforms client-side ee.Image.getRegion array to pandas.DataFrame."""
    df = pd.DataFrame(arr)

    # Rearrange the header.
    headers = df.iloc[0]
    df = pd.DataFrame(df.values[1:], columns=headers)

    # Convert the time field into a datetime.
    df['datetime'] = pd.to_datetime(df['time'], unit='ms')

    return df

In [None]:
gcom_sst = "JAXA/GCOM-C/L3/OCEAN/SST/V2"
gcom_sst_bands = ['SST_AVE', 'SST_QA_flag']

gcom_chl = "JAXA/GCOM-C/L3/OCEAN/CHLA/V2"
gcom_chl_bands = ['CHLA_AVE', 'CHLA_QA_flag']

modis_aqua = "NASA/OCEANDATA/MODIS-Aqua/L3SMI"
modis_bands = ["chlor_a", 'nflh', 'poc', 'Rrs_531']

save_file_names = ['gcom_sst', 'gcom_chl', 'modis_bands']

data_set = [gcom_sst, gcom_sst,gcom_chl, modis_aqua]
bands_list = [gcom_sst_bands, gcom_chl_bands, modis_bands]

In [None]:
for data_source, bands, file_name in zip(data_set, bands_list, save_file_names):
    
    print(data_source, bands)
    
    # retrieve data 
    data = ee.ImageCollection(data_source).filterDate(i_date, f_date)
    
    # get array 
    arr = data.getRegion(roi, scale).getInfo()
    
    df = ee_array_to_df(arr, bands)
    
    df.to_csv("../data/" + file_name + ".csv", index=False)

In [None]:
# sea surface temp
nopp_sst_sal = ee.ImageCollection("HYCOM/sea_temp_salinity").filterDate(i_date, f_date)

# salinity
sst = ee.ImageCollection("JAXA/GCOM-C/L3/OCEAN/SST/V2").filterDate(i_date, f_date)

# particulate organic
poc = ee.ImageCollection("NASA/OCEANDATA/MODIS-Aqua/L3SMI").filterDate(i_date, f_date)

In [11]:
# Initial date of interest (inclusive).
i_date = '2021-03-01'

# Final date of interest (exclusive).
f_date = '2021-09-01'

# ee data
data = ee.ImageCollection("NASA/OCEANDATA/MODIS-Aqua/L3SMI").filterDate(i_date, f_date)

# get array 
arr = data.getRegion(roi, scale).getInfo()

# convert to dataframe
df = pd.DataFrame(arr)

# Rearrange the header.
headers = df.iloc[0]
df = pd.DataFrame(df.values[1:], columns=headers).dropna(subset=['chlor_a','poc','sst']).reset_index(drop=True)

df['datetime'] = pd.to_datetime(df['time'], unit='ms')

display(df.info())
display(df.tail())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 211 entries, 0 to 210
Data columns (total 19 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   id         211 non-null    object        
 1   longitude  211 non-null    object        
 2   latitude   211 non-null    object        
 3   time       211 non-null    object        
 4   chlor_a    211 non-null    object        
 5   nflh       110 non-null    object        
 6   poc        211 non-null    object        
 7   Rrs_412    211 non-null    object        
 8   Rrs_443    211 non-null    object        
 9   Rrs_469    211 non-null    object        
 10  Rrs_488    211 non-null    object        
 11  Rrs_531    211 non-null    object        
 12  Rrs_547    211 non-null    object        
 13  Rrs_555    211 non-null    object        
 14  Rrs_645    211 non-null    object        
 15  Rrs_667    211 non-null    object        
 16  Rrs_678    211 non-null    object        
 1

None

Unnamed: 0,id,longitude,latitude,time,chlor_a,nflh,poc,Rrs_412,Rrs_443,Rrs_469,Rrs_488,Rrs_531,Rrs_547,Rrs_555,Rrs_645,Rrs_667,Rrs_678,sst,datetime
206,A2021074,-78.806577,26.559123,1615766700000,0.172681,0.00226,66.599998,0.010106,0.008796,0.008494,0.007704,0.00387,0.00311,0.002614,0.000288,0.00021,0.000188,25.549999,2021-03-15 00:05:00
207,A2021076,-78.806577,26.559123,1615940701000,0.115833,,43.0,0.00881,0.008002,0.007316,0.006254,0.0025,0.001936,0.001574,9.6e-05,0.0001,0.000106,24.594999,2021-03-17 00:25:01
208,A2021083,-78.806577,26.559123,1616544001000,0.132648,-0.023325,49.200001,0.008978,0.0078,0.007176,0.006258,0.002774,0.002126,0.001722,0.000166,8.6e-05,6e-05,25.275,2021-03-24 00:00:01
209,A2021131,-78.806577,26.559123,1620693001000,0.081649,0.00192,37.400002,0.011648,0.009844,0.008534,0.007112,0.00274,0.00207,0.0017,4.4e-05,0.0001,9e-05,28.359999,2021-05-11 00:30:01
210,A2021147,-78.806577,26.559123,1622075401000,0.091756,0.01229,41.200001,0.011026,0.009592,0.008296,0.006986,0.002904,0.0022,0.001798,7.4e-05,9.2e-05,9e-05,27.295,2021-05-27 00:30:01


In [12]:
df.to_csv("../data/modis_bands.csv", index=False)