<a href="https://colab.research.google.com/github/larissavaladao/py6s_harmonize_sample/blob/main/curuai_sample_py6s.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Install and import packages

In [1]:
#import packages used
import ee
import pandas as pd
import geemap
import geopandas as gpd
import matplotlib.pyplot as plt
import json
import math
import geemap
import os

In [3]:
#authenticate and initialize google earth engine (also necessary for geemap)
ee.Authenticate()
ee.Initialize(project = 'ee-curuai')

In [4]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Import Curuai dataset

In [5]:
#import the entire dataset and include ID column
dataset = pd.read_excel('/content/drive/MyDrive/CURUAI_PROCESS/Dataset_CFP.xlsx', sheet_name='data',na_values='NaN').loc[:383]
# dataset = pd.read_excel('Dataset_CFP.xlsx', sheet_name='data',na_values='NaN').loc[:383]


dataset['ID'] = range(len(dataset))
dataset['Local_Time'] = dataset['Local Time']
dataset['TIME_UTC'] = dataset['TIME UTC']
dataset['DEPTH_CLASS'] = dataset['DEPTH CLASS']
dataset['SAMPLE_SITE'] = dataset['SAMPLE SITE']
dataset['WATER_PERIOD'] = dataset['WATER PERIOD']
dataset['TOTAL_DEPTH'] = dataset['TOTAL DEPTH']
dataset['SAMPLING_DEPTH'] = dataset['SAMPLING DEPTH']
dataset['CHLOROPHYLL_A'] = dataset['CHLOROPHYLL A']
dataset['CHLOROPHYLL_B'] = dataset['CHLOROPHYLL B']
dataset['ODO'] = dataset['ODO ']

In [6]:
#copy the dataset and select only the variables of interest
dataset_att = dataset.copy()
dataset_att = dataset_att.loc[:,['ID','DATE', 'LATITUDE','LONGITUDE', 'MISSION']]
dataset_att.head()

Unnamed: 0,ID,DATE,LATITUDE,LONGITUDE,MISSION
0,0,2013-03-08 00:00:00,-2.25127,-55.14622,I
1,1,2013-03-08 00:00:00,-2.25127,-55.14622,I
2,2,2013-03-09 00:00:00,-2.28422,-55.22023,I
3,3,2013-03-09 00:00:00,-2.19696,-55.29953,I
4,4,2013-03-09 00:00:00,-2.221738,-55.270194,I


In [7]:
dataset_att.groupby(['MISSION'])['MISSION'].count()

Unnamed: 0_level_0,MISSION
MISSION,Unnamed: 1_level_1
I,71
II,73
III,74
IV,36
IX,24
V,26
VI,25
VII,28
VIII,27


In [8]:
#vizualize variables included in the dataset
dataset_att.columns

Index(['ID', 'DATE', 'LATITUDE', 'LONGITUDE', 'MISSION'], dtype='object')

In [9]:
#transform dataframe in a geodataframe (geometry column with point location)
gdf = gpd.GeoDataFrame(
    dataset_att, geometry=gpd.points_from_xy(dataset_att.LONGITUDE, dataset_att.LATITUDE),
    crs="EPSG:4326"
)
gdf.head()

Unnamed: 0,ID,DATE,LATITUDE,LONGITUDE,MISSION,geometry
0,0,2013-03-08 00:00:00,-2.25127,-55.14622,I,POINT (-55.14622 -2.25127)
1,1,2013-03-08 00:00:00,-2.25127,-55.14622,I,POINT (-55.14622 -2.25127)
2,2,2013-03-09 00:00:00,-2.28422,-55.22023,I,POINT (-55.22023 -2.28422)
3,3,2013-03-09 00:00:00,-2.19696,-55.29953,I,POINT (-55.29953 -2.19696)
4,4,2013-03-09 00:00:00,-2.221738,-55.270194,I,POINT (-55.27019 -2.22174)


In [10]:
#transform date and time columns in string - necessary to convert to JSON
gdf['DATE'] = gdf['DATE'].astype("str")

In [None]:
# gdf.to_file("/content/drive/MyDrive/CURUAI_PROCESS/points_curuai.shp")

In [11]:
##Convert geodataframe to json - necessary to be read in GEE
dataset_json = gdf.to_json()

In [12]:
#load and select the features of the json data
data_points = json.loads(dataset_json)
data_points = data_points['features']
# data_points

In [13]:
##transform json in in gee object = feature collection
roi_points = ee.FeatureCollection(data_points)
print(roi_points.size().getInfo())

384


In [14]:
#function to insert a property with date of the point - Gee date format
def insert_date (feat):
    return feat.set('system:time_start',ee.Date.parse('YYYY-MM-dd HH:mm:ss',feat.get('DATE')))

In [15]:
#apply date function - new system:time_start property
roi_points = roi_points.map(insert_date)
print(roi_points.first().getInfo())

{'type': 'Feature', 'geometry': {'type': 'Point', 'coordinates': [-55.14622, -2.25127]}, 'id': '0', 'properties': {'DATE': '2013-03-08 00:00:00', 'ID': 0, 'LATITUDE': -2.25127, 'LONGITUDE': -55.14622, 'MISSION': 'I', 'system:time_start': {'type': 'Date', 'value': 1362700800000}}}


In [16]:
#create a polygon around the floodpalin area - roi
roi_poly = ee.FeatureCollection('projects/ee-curuai/assets/roi_poly_curuai')

# Import GEE images

## Define date ranges for filtering images

In [17]:
def date_mission(mission,time):
  return ee.Date(roi_points.filter(ee.Filter.eq('MISSION',mission)).sort('system:time_start',time).first().get('system:time_start'))

In [18]:
mission = 'VI'
advance = 16

In [19]:
if mission == "I":
  col = 1
elif mission == "II":
  col = 2
elif mission == "III":
  col = 3
elif mission == "IV":
  col = 4
elif mission == "V":
  col = 5
elif mission == "VI":
  col = 6
elif mission == "VII":
  col = 7
elif mission == "VIII":
  col = 8
elif mission == "IX":
  col = 9
else:
  print('Mission value error')

In [20]:
#define initial and final date to filter the image collection based on the
#field points date
initial_date = date_mission(mission,True).advance(-advance, 'day')
end_date = date_mission(mission,False).advance(advance, 'day')

print('Data inicial de coleta: ',initial_date.format().getInfo())
print('Data final de coleta: ',end_date.format().getInfo())

Data inicial de coleta:  2016-03-27T00:00:00
Data final de coleta:  2016-05-07T00:00:00


## Import image collections

###Landsat 7 - PY6S

In [57]:
#Landsat 7 collection 2 tier 1 TOA
#filter collection by region and date (2 month prior toi the field dates and one month past) based on field points
#mask clouds
landsat7 = ee.ImageCollection("projects/ee-curuai/assets/Py6S/LD7/ld7_py6s")\
            .filterDate(initial_date,end_date)\
            .select([ 'B1', 'B2', 'B3', 'B4', 'B5', 'B7'])
print(ee.Date(landsat7.first().get('system:time_start')).format().getInfo())
print(ee.Date(landsat7.sort('system:time_start',False).first().get('system:time_start')).format().getInfo())

2016-04-09T13:56:12
2016-04-25T13:56:44


In [58]:
print('collection size',landsat7.size().getInfo())
print('projection',landsat7.first().select('B4').projection().getInfo())
print('spatial resolution',landsat7.first().select('B4').projection().nominalScale().getInfo())
print('bands',landsat7.first().bandNames().getInfo())


collection size 4
projection {'type': 'Projection', 'crs': 'EPSG:32721', 'transform': [30, 0, 610350, 0, -30, 9780030]}
spatial resolution 30
bands ['B1', 'B2', 'B3', 'B4', 'B5', 'B7']


###Landsat 8 PY6S

In [70]:
#Landsat 8 collection 2 tier 1 TOA
#filter collection by region and date (2 month prior toi the field dates and one month past) based on field points
#mask clouds
landsat8 = (ee.ImageCollection("projects/ee-curuai/assets/Py6S/LD8/ld8_py6s")
            .filterDate(initial_date,end_date)
            .select(['B2', 'B3', 'B4', 'B5', 'B6', 'B7']))
print(ee.Date(landsat8.first().get('system:time_start')).format().getInfo())
print(ee.Date(landsat8.sort('system:time_start',False).first().get('system:time_start')).format().getInfo())

2016-04-26T13:47:44
2016-04-26T13:47:44


In [69]:
print('collection size',landsat8.size().getInfo())
print('projection',landsat8.first().select('B4').projection().getInfo())
print('spatial resolution',landsat8.first().select('B4').projection().nominalScale().getInfo())
print('bands',landsat8.first().bandNames().getInfo())

collection size 1
projection {'type': 'Projection', 'crs': 'EPSG:32721', 'transform': [30, 0, 610350, 0, -30, 9780030]}
spatial resolution 30
bands ['B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7']


###Sentinel 2 PY6S

In [None]:
#Sentinel 2 level 1C harmonized
sentinel2 = ee.ImageCollection("projects/ee-curuai/assets/Py6S/S2/s2_py6s")\
            .filterDate(initial_date,end_date)
print(ee.Date(sentinel2.first().get('system:time_start')).format().getInfo())
print(ee.Date(sentinel2.sort('system:time_start',False).first().get('system:time_start')).format().getInfo())

In [None]:
print('collection size',sentinel2.size().getInfo())
print('bands',sentinel2.first().bandNames().getInfo())

### Visualize

In [61]:
Map = geemap.Map(basemap='HYBRID')
Map.centerObject(roi_points,10)

Map.addLayer(landsat7.first(), {'bands':['B3','B2','B1'], min:0,max:0.00002}, str(landsat7.first().get('LANDSAT_PRODUCT_ID').getInfo()))
Map.addLayer(landsat8.first(), {'bands':['B3','B2','B1'], min:0,max:0.00002}, str(landsat8.first().get('LANDSAT_PRODUCT_ID').getInfo()))
# Map.addLayer(sentinel2.first(), {'bands':['B3','B2','B1'], min:0,max:0.00002}, "TOA"+str(sentinel2_combine.first().get('PRODUCT_ID').getInfo()))

Map.addLayer(roi_points, {'color':'darkred'}, 'Data Points');
# Map.addLayer(roi_poly, {'color':'darkred'}, 'Data polygon');
Map

Map(center=[-2.1837863104242503, -55.48976569658883], controls=(WidgetControl(options=['position', 'transparen…

## Padronize band names

In [62]:
name_bands = ['blue','green','red','nir','swir1','swir2']

###Landsat 7

In [63]:
#renomear bandas

ld7 = landsat7.map(lambda img: img.rename(name_bands))
ld7.first()

### Landsat 8

In [72]:
ld8 = landsat8.map(lambda img: img.rename(name_bands))
ld8.first()

### Sentinel 2

In [None]:
s2 = sentinel2.map(lambda img: img.select(['b1','b2','b3','b4','b5','b6'])\
                        .rename(name_bands))

# Sample data points pixel values

filter images that fall within a 16-day window period from each field point date

In [73]:
def imgs_points(collection):
    def wrap(feat):
        date_point = ee.Date(feat.get('system:time_start'))
        data1 = date_point.advance(-16,'day')\
        .format('yyyy-MM-dd')

        data2 = date_point.advance(16,'day')\
        .format('yyyy-MM-dd')

        filtro = collection.filterDate(data1,data2)\
        .filterBounds(feat.geometry())\
        .map(lambda img: img.set({
            'dif_date_point':ee.Date(img.get('system:time_start')).difference(date_point, 'day')})\
            .copyProperties(feat,['ID']))


        return ee.ImageCollection(filtro.limit(10))

    return wrap

obtain statistics for the same location of the field point - with a 3 pixel window - and filter out if more than 4 pixels are masked   

In [74]:
prj = ld8.first().projection()
prj

In [75]:
def sample_point(img):
  feat = roi_points.filter(ee.Filter.eq('ID',img.get('ID'))).first()
  geom = feat.geometry().buffer(45).bounds()

  mean = img.reduceRegion(geometry=geom,
                          scale=prj.nominalScale(),
                          crs=prj.crs(),
                          reducer=ee.Reducer.mean())
  median = img.reduceRegion(geometry=geom,
                            scale=prj.nominalScale(),
                            crs=prj.crs(),
                            reducer=ee.Reducer.median())
  minMax = img.reduceRegion(geometry=geom,
                            scale=prj.nominalScale(),
                            crs=prj.crs(),
                            reducer=ee.Reducer.minMax())
  count = img.reduceRegion(geometry=geom,
                            scale=prj.nominalScale(),
                            crs=prj.crs(),
                            reducer=ee.Reducer.count())
  return feat.set({
      "SATELLITE_PRODUCT_ID": img.get('SATELLITE_PRODUCT_ID'),
      'CLOUD_COVER':img.get('CLOUD_COVER'),
      'img_date':ee.Date(img.get('system:time_start')).format(),
      'dif_date_point': img.get('dif_date_point'),

      'blue_mean':mean.get('blue'),
      'green_mean':mean.get('green'),
      'red_mean':mean.get('red'),
      'nir_mean':mean.get('nir'),
      'swir1_mean':mean.get('swir1'),
      'swir2_mean':mean.get('swir2'),

      'blue_median':median.get('blue'),
      'green_meadin':median.get('green'),
      'red_median':median.get('red'),
      'nir_median':median.get('nir'),
      'swir1_median':median.get('swir1'),
      'swir2_median':median.get('swir2'),

      'blue_min':minMax.get('blue_min'),
      'green_min':minMax.get('green_min'),
      'red_min':minMax.get('red_min'),
      'nir_min':minMax.get('nir_min'),
      'swir1_min':minMax.get('swir1_min'),
      'swir2_min':minMax.get('swir2_min'),

      'blue_max':minMax.get('blue_max'),
      'green_max':minMax.get('green_max'),
      'red_max':minMax.get('red_max'),
      'nir_max':minMax.get('nir_max'),
      "count_pixel":count.get('red'),
      'swir1_max':minMax.get('swir1_max'),
      'swir2_max':minMax.get('swir2_max')
  })

filter_count = ee.Filter.gt('count_pixel',3)




### Landsat 7

In [76]:
img_pointsLD7 = ee.ImageCollection(roi_points.map(imgs_points(landsat7)).flatten().toList(2000))

In [77]:
img_pointsLD7.size()


In [78]:
img_pointsLD7.aggregate_count_distinct('ID')

In [79]:
img_pointsLD7.limit(5)

In [80]:
reduced_LD7 = ee.FeatureCollection(img_pointsLD7.map(sample_point)).filter(filter_count)

In [81]:
reduced_LD7.limit(2)

In [82]:
reduced_LD7.size()

In [None]:
reduced_LD7.aggregate_count_distinct('ID')

## Landsat 8

In [None]:
img_pointsLD8 = ee.ImageCollection(roi_points.map(imgs_points(landsat8_rs)).flatten().toList(2000))

In [None]:
img_pointsLD8.size()


In [None]:
img_pointsLD8.aggregate_count_distinct('ID')

In [None]:
img_pointsLD8.limit(5)

In [None]:
reduced_LD8 = ee.FeatureCollection(img_pointsLD8.map(sample_point)).filter(filter_count)

In [None]:
reduced_LD8.size()


In [None]:
reduced_LD8.aggregate_count_distinct('ID')

In [None]:
reduced_LD8.limit(2)

##Sentinel 2

In [None]:
img_pointsS2 = ee.ImageCollection(roi_points.map(imgs_points(sentinel2_rs)).flatten().toList(2000))

In [None]:
img_pointsS2.size()

In [None]:
img_pointsS2.aggregate_count_distinct('ID')

In [None]:
reduced_S2 = ee.FeatureCollection(img_pointsS2.map(sample_point)).filter(filter_count)

In [None]:
reduced_S2.size()


In [None]:
reduced_S2.aggregate_count_distinct('ID')

In [None]:
reduced_S2.limit(2)

## Visualize sampled image by defining field point ID

In [None]:
Map = geemap.Map()
Map.centerObject(roi_points,10)
id = 302
vis_params={'bands':['red','green','blue'],'min':0,'max':0.05}
Map.addLayer(img_pointsLD7.filter(ee.Filter.eq('ID',id)).sort('dif_date_point').first(), vis_params, 'landsat7')
Map.addLayer(img_pointsLD8.filter(ee.Filter.eq('ID',id)).sort('dif_date_point').first(), vis_params, 'landsat8')
Map.addLayer(img_pointsS2.filter(ee.Filter.eq('ID',id)).sort('dif_date_point').first(), vis_params, 'sentinel2')

Map.addLayer(roi_points.filter(ee.Filter.eq('ID',id)), {'color':'darkred'}, 'Data Points');
Map

Map(center=[-2.1837863104242503, -55.48976569658883], controls=(WidgetControl(options=['position', 'transparen…

# Export data as CSV file

In [None]:
#Exportar tabelas
#LD7
geemap.ee_to_csv(reduced_LD7, '/content/drive/MyDrive/CURUAI_PROCESS/'+mission+'_LD7_ACOLLITE_data.csv', selectors=None, verbose=True, timeout=300, proxies=None)

Generating URL ...
Downloading data from https://earthengine.googleapis.com/v1/projects/earthengine-legacy/tables/d8b42fafaf915a7dc6c4fa3405ee958d-f66e441ccbbddab9e570994ea2529232:getFeatures
Please wait ...
Data downloaded to /content/drive/MyDrive/CURUAI_PROCESS/V_LD7_ACOLLITE_data.csv


In [None]:
geemap.ee_to_csv(reduced_LD8, '/content/drive/MyDrive/CURUAI_PROCESS/'+mission+'_LD8_ACOLLITE_data.csv', selectors=None, verbose=True, timeout=300, proxies=None)

Generating URL ...
Downloading data from https://earthengine.googleapis.com/v1/projects/earthengine-legacy/tables/3380d1b0a060f28645c4461a9fb0f3bc-98d0a95ab91b1984ce280694be16294b:getFeatures
Please wait ...
Data downloaded to /content/drive/MyDrive/CURUAI_PROCESS/V_LD8_ACOLLITE_data.csv


In [None]:
geemap.ee_to_csv(reduced_S2, '/content/drive/MyDrive/CURUAI_PROCESS/'+mission+'_LD7_ACOLLITE_data.csv', selectors=None, verbose=True, timeout=300, proxies=None)

Generating URL ...
Downloading data from https://earthengine.googleapis.com/v1/projects/earthengine-legacy/tables/4f86bba985360aa467fb0cf2f6017150-dae5fb1f94758da695727c685facea14:getFeatures
Please wait ...
Data downloaded to /content/drive/MyDrive/CURUAI_PROCESS/VI_S2_DOS_data.csv


CORREÇÂO ATM:


1.   ACOLITE https://github.com/acolite/acolite
2.   6S com imagem da Dinha e parametros do artigo... talvez seja necessaria mais uma correção para isso



